From dc6c04fa709f860cdd9c84f88cac1eda6215bdef Mon Sep 17 00:00:00 2001
From: Filip Lindqvist <buggfille@gmail.com>
Date: Thu, 5 Mar 2026 16:11:28 +0100
Subject: [PATCH] Add OpenAI TTS provider

- New TTS provider using OpenAI's API for text-to-speech synthesis
- Caches synthesized audio files to disk
- Requires openaiKey setting in settings.json
- Supports language detection and voice customization
---
 README.md                   | 30 ++++++++++++++
 lib/tts-providers/openai.js | 83 +++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)
 create mode 100644 lib/tts-providers/openai.js

diff --git a/README.md b/README.md
index 30cb8063..d3be143a 100644
--- a/README.md
+++ b/README.md
@@ -404,6 +404,7 @@ Experimental support for TTS. Today the following providers are available:
 * Google (default)
 * macOS say command
 * Elevenlabs
+* OpenAI
 
 It will use the one you configure in settings.json. If you define settings for multiple TTS services, it will not be guaranteed which one it will choose!
 
@@ -653,6 +654,35 @@ Full:
 	}
 ```
 
+#### OpenAI
+
+This REQUIRES a registered API key from OpenAI! See https://platform.openai.com/docs/overview
+
+You need to add this to a file called settings.json (create if it doesn't exist), like this:
+
+```
+{
+  "openaiKey": "sk-12822720jhskjhs9879879879"
+}
+```
+
+Replace the code above (it is just made up) with the API key you've got after registering.
+
+Action is:
+
+	/[Room name]/say/[phrase][/[language_code]][/[announce volume]]
+	/sayall/[phrase][/[language_code]][/[announce volume]]
+
+Example:
+
+	/Office/say/Hello, dinner is ready
+	/Office/say/Hej, maten är klar/sv-se
+	/sayall/Hello, dinner is ready
+	/Office/say/Hello, dinner is ready/90
+	/Office/say/Hej, maten är klar/sv-se/90
+
+The language code doesn't matter as OpenAI will determine the language from the text. This may not always be correct but the probability increases with longer texts.
+
 #### Google (default if no other has been configured)
 
 Does not require any API keys. Please note that Google has been known in the past to change the requirements for its Text-to-Speech API, and this may stop working in the future. There is also limiations to how many requests one is allowed to do in a specific time period.
diff --git a/lib/tts-providers/openai.js b/lib/tts-providers/openai.js
new file mode 100644
index 00000000..bf91a006
--- /dev/null
+++ b/lib/tts-providers/openai.js
@@ -0,0 +1,83 @@
+'use strict';
+const crypto = require('crypto');
+const fs = require('fs');
+const http = require('http');
+const https = require('https');
+const path = require('path');
+const fileDuration = require('../helpers/file-duration');
+const settings = require('../../settings');
+const logger = require('sonos-discovery/lib/helpers/logger');
+
+function openai(phrase, language, voice = 'alloy', model = 'tts-1') {
+    if (!language) {
+        language = 'en';
+    }
+
+    // Construct a filesystem neutral filename
+    const phraseHash = crypto.createHash('sha1').update(phrase).digest('hex');
+    const filename = `openai-${phraseHash}-${language}.mp3`;
+    const filepath = path.resolve(settings.webroot, 'tts', filename);
+
+    const expectedUri = `/tts/${filename}`;
+    try {
+        fs.accessSync(filepath, fs.R_OK);
+        return fileDuration(filepath)
+            .then((duration) => {
+                return {
+                    duration,
+                    uri: expectedUri
+                };
+            });
+    } catch (err) {
+        logger.info(`announce file for phrase "${phrase}" does not seem to exist, downloading from OpenAI TTS`);
+    }
+
+    return new Promise((resolve, reject) => {
+        const postData = JSON.stringify({
+            model: model,
+            input: phrase,
+            voice: voice
+        });
+        const options = {
+            hostname: 'api.openai.com',
+            path: '/v1/audio/speech',
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${settings.openaiKey}`,
+                'Content-Type': 'application/json',
+                'Content-Length': postData.length
+            }
+        };
+
+        const req = https.request(options, (res) => {
+            if (res.statusCode >= 200 && res.statusCode < 300) {
+                const file = fs.createWriteStream(filepath);
+                res.pipe(file);
+                file.on('finish', function () {
+                    file.end();
+                    resolve(expectedUri);
+                });
+            } else {
+                reject(new Error(`Download from OpenAI TTS failed with status ${res.statusCode}, ${res.statusMessage}`));
+            }
+        });
+
+        req.on('error', (err) => {
+            reject(err);
+        });
+
+        req.write(postData);
+        req.end();
+    })
+        .then(() => {
+            return fileDuration(filepath);
+        })
+        .then((duration) => {
+            return {
+                duration,
+                uri: expectedUri
+            };
+        });
+}
+
+module.exports = openai;