Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions livekit-plugins/livekit-plugins-reson8/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Reson8 plugin for LiveKit Agents

Support for streaming speech-to-text with [Reson8](https://reson8.dev).

See [docs.reson8.dev](https://docs.reson8.dev/documentation/integrations/livekit/) for more information.

## Installation

```bash
pip install livekit-plugins-reson8
```

## Setup

An API key is required. Sign up at [console.reson8.dev](https://console.reson8.dev) and set the `RESON8_API_KEY` environment variable.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Reson8 plugin for LiveKit Agents

Support for speech-to-text with [Reson8](https://reson8.dev/).

See https://docs.reson8.dev for more information.
"""

from .stt import STT, SpeechStream
from .version import __version__

__all__ = ["STT", "SpeechStream", "__version__"]


from livekit.agents import Plugin

from .log import logger


class Reson8Plugin(Plugin):
def __init__(self) -> None:
super().__init__(__name__, __version__, __package__, logger)


Plugin.register_plugin(Reson8Plugin())

_module = dir()
NOT_IN_ALL = [m for m in _module if m not in __all__]

__pdoc__ = {}

for n in NOT_IN_ALL:
__pdoc__[n] = False
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import logging

logger = logging.getLogger("livekit.plugins.reson8")
Empty file.
221 changes: 221 additions & 0 deletions livekit-plugins/livekit-plugins-reson8/livekit/plugins/reson8/stt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
from __future__ import annotations

import asyncio
import contextlib
import json
import os
import uuid
from dataclasses import dataclass, replace
from urllib.parse import urlencode

import httpx
import websockets
from websockets.asyncio.client import ClientConnection

from livekit import rtc
from livekit.agents import stt, utils
from livekit.agents._exceptions import APIConnectionError, APIStatusError, APITimeoutError
from livekit.agents.language import LanguageCode
from livekit.agents.types import (
DEFAULT_API_CONNECT_OPTIONS,
NOT_GIVEN,
APIConnectOptions,
NotGivenOr,
)
from livekit.agents.utils import is_given


@dataclass
class STTOptions:
api_key: str
api_url: str
language: str | None
custom_model_id: str | None
sample_rate: int
include_timestamps: bool
include_words: bool
include_confidence: bool

def query_params(self, *, interim: bool) -> dict[str, str]:
params: dict[str, str] = {
"encoding": "pcm_s16le",
"sample_rate": str(self.sample_rate),
"channels": "1",
}
if self.language:
params["language"] = self.language
if interim:
params["include_interim"] = "true"
if self.custom_model_id:
params["custom_model_id"] = self.custom_model_id
if self.include_timestamps:
params["include_timestamps"] = "true"
if self.include_words:
params["include_words"] = "true"
if self.include_confidence:
params["include_confidence"] = "true"
return params


class STT(stt.STT):
def __init__(
self,
*,
api_key: str | None = None,
api_url: str | None = None,
language: str | None = None,
custom_model_id: str | None = None,
sample_rate: int = 16000,
include_timestamps: bool = False,
include_words: bool = False,
include_confidence: bool = False,
) -> None:
super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))
resolved_key = api_key or os.environ.get("RESON8_API_KEY", "")
if not resolved_key:
raise ValueError("Reson8 API key is required. Set RESON8_API_KEY or pass api_key=")
self._opts = STTOptions(
api_key=resolved_key,
api_url=api_url or os.environ.get("RESON8_API_URL", "https://api.reson8.dev"),
language=language,
custom_model_id=custom_model_id,
sample_rate=sample_rate,
include_timestamps=include_timestamps,
include_words=include_words,
include_confidence=include_confidence,
)

@property
def model(self) -> str:
return "reson8-stt"

@property
def provider(self) -> str:
return "reson8"

def _resolve_opts(self, language: NotGivenOr[str]) -> STTOptions:
if is_given(language):
return replace(self._opts, language=language)
return self._opts

def stream(
self,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> SpeechStream:
return SpeechStream(stt=self, opts=self._resolve_opts(language), conn_options=conn_options)

async def _recognize_impl(
self,
buffer: utils.AudioBuffer,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
) -> stt.SpeechEvent:
opts = self._resolve_opts(language)
audio = rtc.combine_audio_frames(buffer).data.tobytes()
url = f"{opts.api_url.rstrip('/')}/v1/speech-to-text/prerecorded?{urlencode(opts.query_params(interim=False))}"

try:
async with httpx.AsyncClient(timeout=conn_options.timeout) as client:
resp = await client.post(
url,
content=audio,
headers={
"Authorization": f"ApiKey {opts.api_key}",
"Content-Type": "application/octet-stream",
},
)
except httpx.TimeoutException as e:
raise APITimeoutError() from e
except httpx.HTTPError as e:
raise APIConnectionError(str(e)) from e

if resp.status_code != 200:
raise APIStatusError(
message=f"Reson8 prerecorded error {resp.status_code}",
status_code=resp.status_code,
body=resp.text,
)

return stt.SpeechEvent(
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
alternatives=[
stt.SpeechData(
text=resp.json().get("text", ""), language=LanguageCode(opts.language or "")
)
],
)


class SpeechStream(stt.RecognizeStream):
def __init__(self, *, stt: STT, opts: STTOptions, conn_options: APIConnectOptions) -> None:
super().__init__(stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate)
self._opts = opts

async def _run(self) -> None:
base = (
self._opts.api_url.rstrip("/").replace("https://", "wss://").replace("http://", "ws://")
)
url = (
f"{base}/v1/speech-to-text/realtime?{urlencode(self._opts.query_params(interim=True))}"
)

try:
ws = await websockets.connect(
url,
additional_headers={"Authorization": f"ApiKey {self._opts.api_key}"},
proxy=None,
)
except websockets.exceptions.InvalidStatus as e:
raise APIStatusError(
message=f"Reson8 WebSocket rejected: {e.response.status_code}",
status_code=e.response.status_code,
) from e
except Exception as e:
raise APIConnectionError(f"Failed to connect to Reson8: {e}") from e

async with ws:
send_task = asyncio.create_task(self._send_loop(ws))
recv_task = asyncio.create_task(self._recv_loop(ws))
tasks = asyncio.gather(send_task, recv_task)
try:
await tasks
except Exception:
raise
finally:
tasks.cancel()
with contextlib.suppress(asyncio.CancelledError):
await tasks
Comment thread
raoulritter marked this conversation as resolved.
Comment thread
raoulritter marked this conversation as resolved.

async def _send_loop(self, ws: ClientConnection) -> None:
async for data in self._input_ch:
if isinstance(data, rtc.AudioFrame):
await ws.send(data.data.tobytes())
elif isinstance(data, self._FlushSentinel):
await ws.send(json.dumps({"type": "flush_request", "id": str(uuid.uuid4())}))
await ws.close()

async def _recv_loop(self, ws: ClientConnection) -> None:
async for raw in ws:
if isinstance(raw, bytes):
continue
msg = json.loads(raw)
if msg.get("type") != "transcript":
continue
event_type = (
stt.SpeechEventType.FINAL_TRANSCRIPT
if msg.get("is_final", True)
else stt.SpeechEventType.INTERIM_TRANSCRIPT
)
self._event_ch.send_nowait(
stt.SpeechEvent(
type=event_type,
alternatives=[
stt.SpeechData(
text=msg.get("text", ""), language=LanguageCode(self._opts.language or "")
),
],
)
)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.1.1"
41 changes: 41 additions & 0 deletions livekit-plugins/livekit-plugins-reson8/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "livekit-plugins-reson8"
dynamic = ["version"]
description = "Agent Framework plugin for speech-to-text using Reson8's API."
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.10.0"
authors = [{ name = "Reson8 Labs", email = "support@reson8.dev" }]
keywords = ["webrtc", "realtime", "audio", "livekit", "reson8", "stt", "speech-to-text"]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Topic :: Multimedia :: Sound/Audio",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3 :: Only",
]
dependencies = ["livekit-agents[codecs]>=1.5.2", "websockets>=13.0", "httpx>=0.28"]

[project.urls]
Documentation = "https://docs.reson8.dev"
Website = "https://reson8.dev"
Source = "https://github.com/livekit/agents"

[tool.hatch.version]
path = "livekit/plugins/reson8/version.py"

[tool.hatch.build.targets.wheel]
packages = ["livekit"]

[tool.hatch.build.targets.sdist]
include = ["/livekit"]

[tool.uv]
exclude-newer = "7 days"
exclude-newer-package = { livekit-agents = "0 days" }
Loading
Loading