Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions plugins/_kokoro_tts/api/status.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import importlib.metadata

from helpers.api import ApiHandler, Request, Response
Expand All @@ -8,24 +10,37 @@ class Status(ApiHandler):
async def process(self, input: dict, request: Request) -> dict | Response:
migration.ensure_migrated()

cfg = runtime.get_config()
remote_url = cfg.get("remote_url", "")

# Local model status (always reported)
package_version = ""
package_error = ""
try:
package_version = importlib.metadata.version("kokoro")
except Exception as e:
package_error = str(e)

return {
result = {
"plugin": "_kokoro_tts",
"enabled": runtime.is_globally_enabled(),
"config": runtime.get_config(),
"config": cfg,
"model": {
"ready": await runtime.is_downloaded(),
"loading": await runtime.is_downloading(),
},
"package": {
"loading": runtime.is_updating_model,
"version": package_version,
"error": package_error,
"error": package_error or None,
},
"fallback": "Browser-native speechSynthesis remains the fallback when Kokoro is disabled.",
}

# Remote health status (only if configured)
if remote_url:
remote_healthy, remote_error = await runtime.is_remote_healthy()
result["remote"] = {
"url": remote_url,
"healthy": remote_healthy,
"error": remote_error or None,
}

return result
6 changes: 4 additions & 2 deletions plugins/_kokoro_tts/api/synthesize.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from helpers.api import ApiHandler, Request, Response
from plugins._kokoro_tts.helpers import runtime

Expand All @@ -12,11 +14,11 @@ async def process(self, input: dict, request: Request) -> dict | Response:
return Response(status=400, response="Missing text")

try:
audio = await runtime.synthesize_sentences([text])
audio, mime_type = await runtime.synthesize_sentences([text])
return {
"success": True,
"audio": audio,
"mime_type": "audio/wav",
"mime_type": mime_type,
}
except Exception as e:
return {"success": False, "error": str(e)}
2 changes: 2 additions & 0 deletions plugins/_kokoro_tts/default_config.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
voice: am_puck,am_onyx
speed: 1.1
remote_url:
response_format: mp3
102 changes: 95 additions & 7 deletions plugins/_kokoro_tts/helpers/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import warnings
from typing import Any

import aiohttp
import soundfile as sf

from helpers import plugins
Expand All @@ -26,6 +27,16 @@
DEFAULT_CONFIG = {
"voice": "am_puck,am_onyx",
"speed": 1.1,
"remote_url": "",
"response_format": "mp3",
}

VALID_FORMATS = {"wav", "mp3", "opus", "flac"}
MIME_TYPES = {
"wav": "audio/wav",
"mp3": "audio/mpeg",
"opus": "audio/opus",
"flac": "audio/flac",
}

_pipeline = None
Expand All @@ -48,6 +59,14 @@ def normalize_config(config: dict[str, Any] | None) -> dict[str, Any]:
except (TypeError, ValueError):
pass

remote_url = str(config.get("remote_url", normalized["remote_url"]) or "").strip()
if remote_url:
normalized["remote_url"] = remote_url.rstrip("/")

response_format = str(config.get("response_format", normalized["response_format"]) or "").strip().lower()
if response_format in VALID_FORMATS:
normalized["response_format"] = response_format

return normalized


Expand Down Expand Up @@ -106,20 +125,89 @@ async def is_downloaded() -> bool:
return _pipeline is not None


async def is_remote_healthy() -> tuple[bool, str]:
"""Check if the remote Kokoro-FastAPI server is reachable.

Returns (healthy, error_message). If no remote_url is configured,
returns (False, "Not configured").
"""
cfg = get_config()
remote_url = cfg.get("remote_url", "")
if not remote_url:
return False, "Not configured"

try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"{remote_url}/health",
timeout=aiohttp.ClientTimeout(total=5),
) as resp:
if resp.status == 200:
return True, ""
return False, f"HTTP {resp.status}"
except Exception as e:
return False, str(e)


async def synthesize_sentences(
sentences: list[str], config: dict[str, Any] | None = None
) -> str:
) -> tuple[str, str]:
cfg = normalize_config(config or get_config())
return await _synthesize_sentences(
remote_url = str(cfg.get("remote_url", ""))

if remote_url:
return await _synthesize_remote(
sentences,
voice=str(cfg["voice"]),
speed=float(cfg["speed"]),
remote_url=remote_url,
response_format=str(cfg["response_format"]),
)

return await _synthesize_local(
sentences,
voice=str(cfg["voice"]),
speed=float(cfg["speed"]),
)


async def _synthesize_sentences(
async def _synthesize_remote(
sentences: list[str],
*,
voice: str,
speed: float,
remote_url: str,
response_format: str,
) -> tuple[str, str]:
text = " ".join(s.strip() for s in sentences if s.strip())
if not text:
return "", MIME_TYPES.get(response_format, "audio/mpeg")

try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{remote_url}/v1/audio/speech",
json={
"model": "kokoro",
"input": text,
"voice": voice,
"response_format": response_format,
"speed": speed,
},
timeout=aiohttp.ClientTimeout(total=30),
) as resp:
resp.raise_for_status()
audio_bytes = await resp.read()
mime_type = MIME_TYPES.get(response_format, "audio/mpeg")
return base64.b64encode(audio_bytes).decode("utf-8"), mime_type
except Exception as e:
PrintStyle.error(f"Error in remote Kokoro TTS synthesis: {e}")
raise


async def _synthesize_local(
sentences: list[str], *, voice: str, speed: float
) -> str:
) -> tuple[str, str]:
await _preload()

combined_audio: list[float] = []
Expand All @@ -136,11 +224,11 @@ async def _synthesize_sentences(
combined_audio.extend(audio_numpy.tolist())

if not combined_audio:
return ""
return "", "audio/wav"

buffer = io.BytesIO()
sf.write(buffer, combined_audio, 24000, format="WAV")
return base64.b64encode(buffer.getvalue()).decode("utf-8")
return base64.b64encode(buffer.getvalue()).decode("utf-8"), "audio/wav"
except Exception as e:
PrintStyle.error(f"Error in Kokoro TTS synthesis: {e}")
PrintStyle.error(f"Error in local Kokoro TTS synthesis: {e}")
raise
34 changes: 30 additions & 4 deletions plugins/_kokoro_tts/webui/config.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,46 @@
<div class="plugin-config-page">
<div class="section-title">Kokoro TTS</div>
<div class="section-description">
Configure the built-in Kokoro voice provider. When this plugin is disabled,
spoken output falls back to the browser speech API.
Configure the Kokoro voice provider. Supports both local model synthesis
and a remote Kokoro-FastAPI service. When disabled, spoken output falls
back to the browser speech API.
</div>

<div class="field">
<div class="field-label">
<div class="field-title">Remote URL</div>
<div class="field-description">URL of a remote Kokoro-FastAPI service (e.g. http://localhost:18890). Leave empty to use local model synthesis.</div>
</div>
<div class="field-control">
<input type="text" x-model="config.remote_url" />
</div>
</div>

<div class="field">
<div class="field-label">
<div class="field-title">Voice</div>
<div class="field-description">Kokoro voice identifier passed to the backend pipeline.</div>
<div class="field-description">Kokoro voice identifier (e.g. am_puck, am_onyx, or blend voices with + like am_puck+am_onyx).</div>
</div>
<div class="field-control">
<input type="text" x-model="config.voice" />
</div>
</div>

<div class="field">
<div class="field-label">
<div class="field-title">Audio Format</div>
<div class="field-description">Output format for remote synthesis. Local synthesis always outputs WAV.</div>
</div>
<div class="field-control">
<select x-model="config.response_format">
<option value="mp3">MP3 (recommended)</option>
<option value="wav">WAV (uncompressed)</option>
<option value="opus">Opus (low bitrate)</option>
<option value="flac">FLAC (lossless)</option>
</select>
</div>
</div>

<div class="field">
<div class="field-label">
<div class="field-title">Speed</div>
Expand All @@ -36,4 +62,4 @@
</template>
</div>
</body>
</html>
</html>
10 changes: 7 additions & 3 deletions plugins/_kokoro_tts/webui/kokoro-tts-store.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ const model = {
config: {
voice: "",
speed: 1.1,
remote_url: "",
response_format: "mp3",
},
modelReady: false,
modelLoading: false,
packageVersion: "",
remoteHealthy: false,
providerCleanup: null,

async initRuntime() {
Expand All @@ -42,10 +44,12 @@ const model = {
this.config = {
voice: status?.config?.voice || "",
speed: Number(status?.config?.speed || 1.1),
remote_url: status?.config?.remote_url || "",
response_format: status?.config?.response_format || "mp3",
};
this.modelReady = !!status?.model?.ready;
this.modelLoading = !!status?.model?.loading;
this.packageVersion = status?.package?.version || "";
this.remoteHealthy = !!status?.remote?.healthy;

if (this.enabled) {
this.registerProvider();
Expand Down Expand Up @@ -77,7 +81,7 @@ const model = {

return {
audioBase64: result.audio || "",
mimeType: result.mime_type || "audio/wav",
mimeType: result.mime_type || "audio/mpeg",
};
},
});
Expand Down
31 changes: 22 additions & 9 deletions plugins/_kokoro_tts/webui/main.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@
<div>
<div class="section-title">Kokoro TTS</div>
<div class="section-description">
Built-in Kokoro speech synthesis. Dependency installation remains on the
Docker/bootstrap path; disabling this plugin returns spoken output to the
browser fallback.
Built-in Kokoro speech synthesis. When disabled, spoken output falls back
to the browser speech API.
</div>

<div class="speech-plugin-grid">
Expand All @@ -29,13 +28,15 @@
<span class="status-badge" :class="$store.kokoroTts.enabled ? 'ok' : 'warn'" x-text="$store.kokoroTts.enabled ? 'Yes' : 'No'"></span>
</div>
<div class="status-row">
<span class="status-key">Model</span>
<span class="status-key">Local Model</span>
<span class="status-badge" :class="$store.kokoroTts.statusClass" x-text="$store.kokoroTts.statusText"></span>
</div>
<div class="status-row" x-show="$store.kokoroTts.packageVersion">
<span class="status-key">Package</span>
<span class="status-value" x-text="$store.kokoroTts.packageVersion"></span>
</div>
<template x-if="$store.kokoroTts.config.remote_url">
<div class="status-row">
<span class="status-key">Remote</span>
<span class="status-badge" :class="$store.kokoroTts.remoteHealthy ? 'ok' : 'warn'" x-text="$store.kokoroTts.remoteHealthy ? 'Healthy' : 'Unreachable'"></span>
</div>
</template>
</div>

<div class="speech-plugin-card">
Expand All @@ -48,6 +49,18 @@
<span class="status-key">Speed</span>
<span class="status-value" x-text="$store.kokoroTts.config.speed"></span>
</div>
<template x-if="$store.kokoroTts.config.remote_url">
<div>
<div class="status-row">
<span class="status-key">Remote URL</span>
<span class="status-value mono" x-text="$store.kokoroTts.config.remote_url"></span>
</div>
<div class="status-row">
<span class="status-key">Format</span>
<span class="status-value" x-text="$store.kokoroTts.config.response_format?.toUpperCase()"></span>
</div>
</div>
</template>
</div>
</div>

Expand Down Expand Up @@ -130,4 +143,4 @@
}
</style>
</body>
</html>
</html>