diff --git a/.gitignore b/.gitignore index 330f1d4..9e9a4d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /target/ FnKey.app/ .DS_Store +.venv/ +__pycache__/ diff --git a/AppIcon.icns b/AppIcon.icns new file mode 100644 index 0000000..f66f944 Binary files /dev/null and b/AppIcon.icns differ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..2a75cbe --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,113 @@ +# FnKey — Agent Instructions + +## What is this + +FnKey is a macOS menu bar app (Rust) that records audio while a hotkey is held, transcribes it via Whisper, sanitizes the text with a small LLM, and pastes the result at the cursor. Single file: `src/main.rs`. + +## Architecture + +``` +Hotkey held → mic records → release → WAV encoded → Whisper STT → LLM sanitizer → clipboard → ⌘V paste +``` + +- **Transcription**: Any OpenAI-compatible `/v1/audio/transcriptions` endpoint +- **Sanitization**: Any OpenAI-compatible `/v1/chat/completions` endpoint +- **Config**: `~/.config/fnkey/config.toml` (TOML, deserialized into `Config` struct) +- **No runtime dependencies** — single static binary, no frameworks beyond macOS system libs + +## Text Sanitization Pipeline + +The sanitizer is the key differentiator. Raw Whisper output is messy — filler words, repeated words, misheard technical terms. A small LLM (0.6B–3B params) cleans it up in real time (<200ms). + +### Setup for contributors + +After cloning this repo, set up local inference for development: + +1. **Whisper STT** — download and serve a Whisper model: + ```bash + # Option A: llama.cpp (any platform with Vulkan/CUDA/Metal) + whisper-server -m ggml-large-v3-turbo.bin --port 8100 + + # Option B: faster-whisper-server (CUDA) + pip install faster-whisper-server + faster-whisper-server --model large-v3-turbo --port 8100 + + # Option C: MLX (macOS Apple Silicon) + pip install mlx-whisper + # (use via mlx-whisper Python API, or wrap in a server) + ``` + +2. **Text sanitizer** — download and serve a small LLM: + ```bash + # Option A: llama.cpp + llama-server -m Qwen3-0.6B-Q6_K.gguf --port 8108 -ngl 999 --jinja -c 4096 + + # Option B: MLX + pip install mlx-lm + mlx_lm.server --model mlx-community/Qwen3-0.6B-4bit --port 8108 + ``` + +3. **Configure** `~/.config/fnkey/config.toml`: + ```toml + api_key = "not-needed" + transcription_url = "http://localhost:8100/v1/audio/transcriptions" + whisper_model = "whisper-large-v3-turbo" + polish_url = "http://localhost:8108/v1/chat/completions" + polish_model = "qwen3-0.6b" + always_polish = true + ``` + +### How the system prompt works + +The `polish_prompt` config field overrides the built-in system prompt. For 0.6B models, the prompt must be: +- **Short** — the model has limited instruction-following at this scale +- **Explicit** — use a replacement dictionary for domain terms the STT engine misrecognizes +- **End with `/no_think`** — disables Qwen3 reasoning mode, keeping latency under 200ms + +Example for a codebase that uses llama.cpp, Claude, and Tailscale: + +```toml +polish_prompt = """Fix dictation. Remove filler words. Fix grammar and punctuation. +Replace misheard terms: +- clod dot MD/cloud dot MD → CLAUDE.md +- agents dot MD → agents.md +- lama dot CPP → llama.cpp +- quan three/qan three → Qwen3 +- tail scale → Tailscale +Output ONLY the corrected text. /no_think""" +``` + +**When helping users customize their prompt**: identify the technical terms they use frequently, figure out how Whisper is likely to misrecognize them, and add those as `misheard → correct` pairs. This is the single most impactful thing you can do to improve dictation accuracy. + +## Build & test + +```bash +cargo build --release # Build +./build-app.sh # Create .app bundle +cargo check # Type check only +``` + +After rebuilding, macOS invalidates permissions — user must re-add the app in System Settings (Input Monitoring, Accessibility). + +## Key code locations + +- `Config` struct and `load_config()` — config parsing and defaults +- `polish_text()` — LLM sanitization call +- `transcribe_and_paste()` — main pipeline: encode WAV → call Whisper → sanitize → paste +- `run_event_tap()` — hotkey detection via CGEventTap +- `enhance_audio()` — audio preprocessing (DC removal, high-pass filter, normalization) + +## Config fields + +| Field | Default | Purpose | +|-------|---------|---------| +| `api_key` | `""` | Bearer token for Whisper transcription (also used for sanitizer if `polish_api_key` is empty) | +| `polish_api_key` | `""` | Separate bearer token for sanitizer endpoint (empty = falls back to `api_key`) | +| `transcription_url` | Groq | Whisper endpoint | +| `polish_url` | Groq | Chat completions endpoint for sanitizer | +| `whisper_model` | `whisper-large-v3` | Model name sent to STT endpoint | +| `polish_model` | `llama-3.3-70b-versatile` | Model name sent to sanitizer endpoint | +| `hotkey` | `fn` | Trigger key (fn/option/control/shift/command) | +| `language` | `""` (auto) | ISO-639-1 hint for Whisper | +| `always_polish` | `true` | Sanitize every dictation by default | +| `polish_prompt` | `""` (built-in) | Custom system prompt for sanitizer | diff --git a/Cargo.lock b/Cargo.lock index 3a45e69..ea5c74c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -466,6 +466,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "toml", ] [[package]] @@ -1422,7 +1423,7 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit", + "toml_edit 0.23.7", ] [[package]] @@ -1713,6 +1714,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -1927,6 +1937,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime 0.6.11", + "toml_edit 0.22.27", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + [[package]] name = "toml_datetime" version = "0.7.3" @@ -1936,6 +1967,20 @@ dependencies = [ "serde_core", ] +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime 0.6.11", + "toml_write", + "winnow", +] + [[package]] name = "toml_edit" version = "0.23.7" @@ -1943,7 +1988,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" dependencies = [ "indexmap", - "toml_datetime", + "toml_datetime 0.7.3", "toml_parser", "winnow", ] @@ -1957,6 +2002,12 @@ dependencies = [ "winnow", ] +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "tower" version = "0.5.2" diff --git a/Cargo.toml b/Cargo.toml index 03692fc..e0c4f37 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,3 +28,6 @@ hound = "3.5" # JSON parsing for LLM API serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" + +# Config file parsing +toml = "0.8" diff --git a/Info.plist b/Info.plist index b3eee15..5a15892 100644 --- a/Info.plist +++ b/Info.plist @@ -14,6 +14,8 @@ 1.0.0 CFBundleShortVersionString 1.0.0 + CFBundleIconFile + AppIcon CFBundlePackageType APPL LSMinimumSystemVersion diff --git a/README.md b/README.md index bd0e945..7134725 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # fnkey.ai -Hold Fn key, speak, paste transcribed text. +Hold a hotkey, speak, paste transcribed text. Works with any OpenAI-compatible speech-to-text API. ## Install @@ -14,66 +14,260 @@ Hold Fn key, speak, paste transcribed text. mv FnKey.app /Applications/ ``` -3. Set your Groq API key: - ```bash - mkdir -p ~/.config/fnkey - echo 'your-groq-api-key' > ~/.config/fnkey/api_key - ``` - Get a key at [console.groq.com](https://console.groq.com) +3. Grant macOS permissions (see [Permissions](#permissions) below) 4. Launch: ```bash open /Applications/FnKey.app ``` -5. Grant permissions in **System Settings → Privacy & Security**: +5. Click the **○** menu bar icon → **Settings...** to configure your API endpoint + +## Configuration + +FnKey is configured via `~/.config/fnkey/config.toml`. A template is created automatically on first launch. Click **Settings...** in the menu bar to open it. + +```toml +# API keys (optional — some local servers don't need one) +api_key = "gsk_..." # Used for Whisper transcription +polish_api_key = "" # Used for sanitizer (empty = use api_key) + +# API endpoints (default: Groq — any OpenAI-compatible API works) +transcription_url = "https://api.groq.com/openai/v1/audio/transcriptions" +polish_url = "https://api.groq.com/openai/v1/chat/completions" + +# Models (sent as-is in the API request — use whatever your server expects) +whisper_model = "whisper-large-v3" +polish_model = "llama-3.3-70b-versatile" + +# Hotkey: fn | option | control | shift | command +hotkey = "fn" + +# Language hint (ISO-639-1 code: "en", "sk", "de", "fr", etc. Empty = auto-detect) +language = "" + +# Always run text sanitization on every dictation (default: true) +# When true, hold the polish modifier to get RAW Whisper output instead +always_polish = true + +# Custom system prompt for text sanitization (empty = use built-in) +polish_prompt = "" +``` + +### Custom API endpoints + +FnKey works with any OpenAI-compatible API. Examples: + +```toml +# OpenAI for both +api_key = "sk-..." +transcription_url = "https://api.openai.com/v1/audio/transcriptions" +polish_url = "https://api.openai.com/v1/chat/completions" +whisper_model = "whisper-1" +polish_model = "gpt-4o-mini" + +# Mixed: Groq for Whisper, OpenAI for sanitizer +api_key = "gsk_..." +polish_api_key = "sk-..." +transcription_url = "https://api.groq.com/openai/v1/audio/transcriptions" +polish_url = "https://api.openai.com/v1/chat/completions" +whisper_model = "whisper-large-v3" +polish_model = "gpt-4o-mini" + +# Local / self-hosted (vLLM, faster-whisper-server, etc.) +api_key = "not-needed" +transcription_url = "http://localhost:8000/v1/audio/transcriptions" +whisper_model = "my-model-name" +``` + +Both plain-text and JSON transcription responses are handled automatically. + +### Hotkey options + +| Hotkey | Config value | Polish modifier | +|--------|-------------|-----------------| +| Fn | `"fn"` (default) | Ctrl | +| Option/Alt | `"option"` | Ctrl | +| Control | `"control"` | Shift | +| Shift | `"shift"` | Ctrl | +| Command | `"command"` | Ctrl | - | Permission | Purpose | How to Grant | - |------------|---------|--------------| - | **Input Monitoring** | Detect Fn key press | Add FnKey.app via + button | - | **Microphone** | Record voice | Prompted on first use, or add manually | - | **Accessibility** | Auto-paste text | Add FnKey.app via + button | +When `hotkey = "control"`, the polish modifier switches to Shift to avoid conflict. - Note: After rebuilding the app, you may need to remove and re-add it in these settings. +### Backward compatibility + +FnKey checks for configuration in this order: +1. `~/.config/fnkey/config.toml` +2. `~/.config/fnkey/api_key` (legacy — plain text API key) +3. `GROQ_API_KEY` environment variable ## Usage -- Hold **Fn** and speak → raw transcription -- Hold **Fn+Ctrl** and speak → polished transcription (removes filler words, improves sentence structure) -- Release to transcribe and paste -- Click menu bar icon (○) → Quit to exit +- Hold **hotkey** → speak → release → cleaned text pasted at cursor +- Hold **hotkey + polish modifier** → speak → release → raw Whisper output (bypasses sanitization) +- Click menu bar icon **○** → **Settings...** to edit config, **Quit** to exit The icon changes: ○ (idle) → ● (recording) -## Build from source +When `always_polish = false`, the behavior is inverted: hotkey gives raw output, hotkey + modifier gives polished output. + +## Text Sanitization + +FnKey includes an LLM-powered text sanitization step that runs after Whisper transcription. It fixes the common artifacts of speech-to-text: filler words, repeated words, broken grammar, and misheard terms. + +### How it works + +``` +Voice → [Whisper STT] → raw text → [LLM sanitizer] → clean text → clipboard → paste +``` + +The sanitizer is a lightweight LLM (as small as 0.6B parameters) that receives the raw Whisper output and a system prompt, then returns cleaned text. It uses any OpenAI-compatible chat completions endpoint. + +### Running locally + +For real-time dictation, the sanitizer must be fast. A small model (0.6B–3B) running locally can sanitize a sentence in under 200ms. Two recommended setups: + +#### llama.cpp (Linux/macOS, GPU or CPU) + +Download a small model like [Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B-GGUF) and serve it: ```bash -./build-app.sh -cp -r FnKey.app /Applications/ +llama-server \ + -m Qwen3-0.6B-Q6_K.gguf \ + --port 8108 \ + --host 0.0.0.0 \ + -ngl 999 \ + --jinja \ + -c 4096 + +# On macOS with Metal: +llama-server -m Qwen3-0.6B-Q6_K.gguf --port 8108 -ngl 999 --jinja -c 4096 ``` -Note: If cargo isn't found, run with login shell: `/bin/bash -l -c './build-app.sh'` +Then configure FnKey: +```toml +polish_url = "http://localhost:8108/v1/chat/completions" +polish_model = "Qwen3-0.6B-Q6_K.gguf" +api_key = "not-needed" +``` -## Features +#### MLX (macOS Apple Silicon) -- **Whisper large-v3** - Full model for best accuracy -- **Audio enhancement** - DC offset removal, high-pass filter, peak normalization -- **Config file** - API key stored in `~/.config/fnkey/api_key` -- **Auto sample rate** - Uses device's native sample rate +```bash +pip install mlx-lm +mlx_lm.server --model mlx-community/Qwen3-0.6B-4bit --port 8108 +``` -## TODO +Then configure FnKey the same way. -Features from Ito not yet implemented: +#### Whisper locally -- **Vocabulary hints** - Send prompt with proper nouns/technical terms to improve accuracy -- **No-speech detection** - Use `verbose_json` response format and check `no_speech_prob` to skip silent recordings -- **Custom dictionary** - User-configurable word list for domain-specific terms +For the transcription side, run Whisper via [faster-whisper-server](https://github.com/fedirz/faster-whisper-server), [vLLM](https://docs.vllm.ai/), or llama.cpp's built-in whisper support: -## Notes +```bash +# faster-whisper-server (CUDA) +pip install faster-whisper-server +faster-whisper-server --model large-v3-turbo --port 8100 + +# vLLM (CUDA) +vllm serve openai/whisper-large-v3-turbo --port 8100 + +# llama.cpp whisper +whisper-server -m ggml-large-v3-turbo.bin --port 8100 +``` + +### Custom system prompt + +The built-in prompt handles general dictation cleanup. For domain-specific accuracy, set `polish_prompt` in your config with a replacement dictionary for terms your STT engine commonly misrecognizes: + +```toml +polish_prompt = """Fix dictation. Remove filler words. Fix grammar and punctuation. +Replace misheard terms: +- clod dot MD/cloud dot MD → CLAUDE.md +- agents dot MD → agents.md +- lama dot CPP/llama dot CPP → llama.cpp +- quan three/qan three → Qwen3 +- M L X → MLX +- tailscale/tail scale → Tailscale +Output ONLY the corrected text. /no_think""" +``` + +The `/no_think` suffix disables reasoning on Qwen3 models, keeping response time under 200ms. + +**Adapt this to your codebase.** If you dictate about Kubernetes, add `cooper netties → Kubernetes`. If you work on a project called "Nexus", add `nexus/next us → Nexus`. The replacement dictionary is the key to making 0.6B models accurate for your domain. + +### Recommended models + +| Model | Size | Speed | Notes | +|-------|------|-------|-------| +| Qwen3-0.6B | 600MB | ~270 t/s | Best speed, needs explicit replacement dictionary | +| Qwen2.5-1.5B | 1.5GB | ~150 t/s | Better understanding, less dictionary needed | +| Qwen3-1.7B | 1.7GB | ~120 t/s | Good balance of speed and quality | + +For the sanitizer, smaller is better — the task is simple pattern matching and cleanup, not reasoning. Use `/no_think` with Qwen3 models to disable chain-of-thought and keep latency low. + +## Permissions + +FnKey requires three macOS permissions. All are configured in **System Settings → Privacy & Security**. + +| Permission | Why | How to grant | +|------------|-----|--------------| +| **Input Monitoring** | Detect hotkey press/release | System Settings → Input Monitoring → click **+** → select FnKey.app | +| **Microphone** | Record voice while hotkey is held | Prompted automatically on first recording, or add manually | +| **Accessibility** | Simulate ⌘V to paste transcribed text | System Settings → Accessibility → click **+** → select FnKey.app | + +### After rebuilding from source + +When you rebuild and re-codesign the app, macOS **invalidates all previously granted permissions** because the binary signature changes. You must: + +1. Open **System Settings → Privacy & Security** +2. For **Input Monitoring** and **Accessibility**: remove FnKey, then re-add `/Applications/FnKey.app` +3. Relaunch the app + +The **Microphone** permission is usually re-prompted automatically. + +### Troubleshooting permissions + +| Symptom | Cause | Fix | +|---------|-------|-----| +| App launches but hotkey does nothing | Input Monitoring not granted | Add FnKey to Input Monitoring | +| Hotkey records but text doesn't paste | Accessibility not granted | Add FnKey to Accessibility | +| No microphone indicator when holding hotkey | Microphone not granted | Add FnKey to Microphone, or approve the prompt | +| Permissions are granted but app still doesn't work | Stale permission after rebuild | Remove and re-add FnKey in each permission category | + +## Build from source + +```bash +cargo build --release +``` + +To create an .app bundle: + +```bash +./build-app.sh +cp -r FnKey.app /Applications/ +``` + +To regenerate the app icon (requires Python + Pillow): + +```bash +python3 -m venv .venv && source .venv/bin/activate && pip install Pillow +python3 gen-icon.py +``` + +Note: If cargo isn't found, run with login shell: `/bin/bash -l -c './build-app.sh'` + +## Features -- Falls back to Option key if Fn not detected after 5s -- Floating red dot appears during recording +- **Text sanitization** — LLM-powered cleanup of filler words, repeated words, grammar, and misheard terms +- **Configurable hotkey** — Fn, Option, Control, Shift, or Command +- **Custom API endpoints** — any OpenAI-compatible transcription/chat API (Groq, OpenAI, vLLM, faster-whisper, etc.) +- **Custom system prompt** — domain-specific replacement dictionaries for accurate technical dictation +- **Audio enhancement** — DC offset removal, high-pass filter, peak normalization +- **TOML config** — `~/.config/fnkey/config.toml` with Settings menu item +- **Auto sample rate** — uses device's native sample rate +- **JSON response handling** — works with servers that return JSON instead of plain text ## Known Limitations -**Slight recording delay**: There's a brief moment when you start speaking before audio capture begins. This is a deliberate tradeoff — eliminating this delay would require the microphone to be always active, showing the yellow indicator constantly. The current design prioritizes privacy: the microphone only activates when you press the Fn key. +**Slight recording delay**: There's a brief moment when you start speaking before audio capture begins. This is a deliberate tradeoff — eliminating this delay would require the microphone to be always active, showing the yellow indicator constantly. The current design prioritizes privacy: the microphone only activates when you press the hotkey. diff --git a/build-app.sh b/build-app.sh index fd90dd6..7f745a1 100755 --- a/build-app.sh +++ b/build-app.sh @@ -17,6 +17,9 @@ mkdir -p "$BUNDLE_DIR/Contents/Resources" cp target/release/fnkey "$BUNDLE_DIR/Contents/MacOS/" cp Info.plist "$BUNDLE_DIR/Contents/" +if [ -f AppIcon.icns ]; then + cp AppIcon.icns "$BUNDLE_DIR/Contents/Resources/" +fi echo "Signing app..." codesign --force --deep --sign "FnKey Dev" "$BUNDLE_DIR" diff --git a/gen-icon.py b/gen-icon.py new file mode 100644 index 0000000..cb6bb51 --- /dev/null +++ b/gen-icon.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +"""Generate FnKey app icon using Pillow.""" +import math +import os +import shutil +import subprocess +from PIL import Image, ImageDraw, ImageFont + + +def draw_icon(size): + """Draw the FnKey icon at given pixel size.""" + img = Image.new("RGBA", (size, size), (0, 0, 0, 0)) + draw = ImageDraw.Draw(img) + + s = size + pad = int(s * 0.08) + corner_r = int(s * 0.22) + + # === Background: dark rounded square === + draw.rounded_rectangle( + [pad, pad, s - pad, s - pad], + radius=corner_r, + fill=(28, 28, 32, 255), + ) + + # Subtle border + inset = pad + max(1, int(s * 0.006)) + draw.rounded_rectangle( + [inset, inset, s - inset, s - inset], + radius=corner_r - max(1, int(s * 0.006)), + outline=(60, 60, 72, 130), + width=max(1, int(s * 0.004)), + ) + + cx = s / 2 + cy = s / 2 + + # === Sound wave arcs === + for radius_frac, alpha in [(0.30, 35), (0.23, 55), (0.16, 80)]: + r = s * radius_frac + arc_w = max(1, int(s * 0.013)) + arc_color = (100, 190, 255, alpha) + # Right arcs + bbox = [cx + s*0.02 - r, cy + s*0.04 - r, cx + s*0.02 + r, cy + s*0.04 + r] + draw.arc(bbox, start=-50, end=50, fill=arc_color, width=arc_w) + # Left arcs + bbox = [cx - s*0.02 - r, cy + s*0.04 - r, cx - s*0.02 + r, cy + s*0.04 + r] + draw.arc(bbox, start=130, end=230, fill=arc_color, width=arc_w) + + # === Microphone === + mic_w = s * 0.14 + mic_h = s * 0.24 + mic_x = cx - mic_w / 2 + mic_y = cy - mic_h * 0.15 + + mic_color = (100, 195, 255, 240) + + # Mic capsule (pill shape) + mic_r = mic_w / 2 + draw.rounded_rectangle( + [mic_x, mic_y, mic_x + mic_w, mic_y + mic_h], + radius=int(mic_r), + fill=mic_color, + ) + + # Grille lines on mic + grille_color = (35, 100, 160, 100) + num_lines = 4 + grille_top = mic_y + mic_h * 0.28 + grille_bot = mic_y + mic_h * 0.82 + line_w = max(1, int(s * 0.005)) + for i in range(num_lines): + ly = grille_top + i * (grille_bot - grille_top) / (num_lines - 1) + lx1 = mic_x + mic_w * 0.2 + lx2 = mic_x + mic_w * 0.8 + draw.line([(lx1, ly), (lx2, ly)], fill=grille_color, width=line_w) + + # === Mic stand === + stand_color = (100, 195, 255, 200) + stand_w = max(1, int(s * 0.016)) + + # U-cradle arc + cradle_r = mic_w * 0.85 + cradle_cy = mic_y + mic_h * 0.08 + bbox = [cx - cradle_r, cradle_cy - cradle_r, cx + cradle_r, cradle_cy + cradle_r] + draw.arc(bbox, start=0, end=180, fill=stand_color, width=stand_w) + + # Vertical stem + stem_top = cradle_cy + cradle_r + stem_bottom = stem_top + s * 0.07 + draw.line([(cx, stem_top), (cx, stem_bottom)], fill=stand_color, width=stand_w) + + # Base + base_w = s * 0.12 + draw.line( + [(cx - base_w/2, stem_bottom), (cx + base_w/2, stem_bottom)], + fill=stand_color, + width=stand_w, + ) + + # === "fn" text at bottom === + font_size = int(s * 0.16) + try: + font = ImageFont.truetype("/System/Library/Fonts/HelveticaNeue.ttc", font_size, index=8) # Bold + except (OSError, IndexError): + try: + font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", font_size) + except OSError: + font = ImageFont.load_default() + + text = "fn" + text_bbox = draw.textbbox((0, 0), text, font=font) + tw = text_bbox[2] - text_bbox[0] + th = text_bbox[3] - text_bbox[1] + + text_x = cx - tw / 2 - text_bbox[0] + text_y = pad + s * 0.06 + + draw.text((text_x, text_y), text, fill=(255, 255, 255, 230), font=font) + + return img + + +def main(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + iconset_dir = os.path.join(script_dir, "AppIcon.iconset") + os.makedirs(iconset_dir, exist_ok=True) + + sizes = [ + (16, 1), (16, 2), + (32, 1), (32, 2), + (128, 1), (128, 2), + (256, 1), (256, 2), + (512, 1), (512, 2), + ] + + for base_size, scale in sizes: + px = base_size * scale + if scale == 1: + name = f"icon_{base_size}x{base_size}.png" + else: + name = f"icon_{base_size}x{base_size}@{scale}x.png" + + path = os.path.join(iconset_dir, name) + img = draw_icon(px) + img.save(path, "PNG") + print(f" {name} ({px}x{px})") + + # Convert to .icns + icns_path = os.path.join(script_dir, "AppIcon.icns") + subprocess.run(["iconutil", "-c", "icns", iconset_dir, "-o", icns_path], check=True) + print(f"\nCreated {icns_path}") + + shutil.rmtree(iconset_dir) + + +if __name__ == "__main__": + main() diff --git a/src/main.rs b/src/main.rs index 8fa3492..13edfd3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,7 +26,8 @@ use core_graphics::event_source::{CGEventSource, CGEventSourceStateID}; use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; use cpal::Stream; use hound::{WavSpec, WavWriter}; -use objc::runtime::Object; +use objc::declare::ClassDecl; +use objc::runtime::{Object, Sel}; use objc::{class, msg_send, sel, sel_impl}; // ============================================================================ @@ -135,63 +136,210 @@ fn get_paste_keycode() -> u16 { // Main application // ============================================================================ -// Fn key flag in CGEventFlags +// Modifier key flags in CGEventFlags const FN_KEY_FLAG: u64 = 0x800000; -// Option/Alt key flag const OPTION_KEY_FLAG: u64 = 0x80000; -// Control key flag const CONTROL_KEY_FLAG: u64 = 0x40000; +const SHIFT_KEY_FLAG: u64 = 0x20000; +const COMMAND_KEY_FLAG: u64 = 0x100000; -struct AppState { - audio_buffer: Arc>>, +// ============================================================================ +// Configuration +// ============================================================================ + +fn default_api_key() -> String { + String::new() +} +fn default_transcription_url() -> String { + "https://api.groq.com/openai/v1/audio/transcriptions".to_string() +} +fn default_polish_url() -> String { + "https://api.groq.com/openai/v1/chat/completions".to_string() +} +fn default_whisper_model() -> String { + "whisper-large-v3".to_string() +} +fn default_polish_model() -> String { + "llama-3.3-70b-versatile".to_string() +} +fn default_hotkey() -> String { + "fn".to_string() +} +fn default_language() -> String { + String::new() // empty = auto-detect +} +fn default_always_polish() -> bool { + true +} +fn default_polish_prompt() -> String { + String::new() // empty = use built-in prompt +} +fn default_polish_api_key() -> String { + String::new() // empty = use api_key +} + +#[derive(serde::Deserialize, Clone)] +struct Config { + #[serde(default = "default_api_key")] api_key: String, - use_fn_key: AtomicBool, - sample_rate: std::sync::atomic::AtomicU32, + #[serde(default = "default_transcription_url")] + transcription_url: String, + #[serde(default = "default_polish_url")] + polish_url: String, + #[serde(default = "default_whisper_model")] + whisper_model: String, + #[serde(default = "default_polish_model")] + polish_model: String, + #[serde(default = "default_hotkey")] + hotkey: String, + #[serde(default = "default_language")] + language: String, + #[serde(default = "default_always_polish")] + always_polish: bool, + #[serde(default = "default_polish_prompt")] + polish_prompt: String, + #[serde(default = "default_polish_api_key")] + polish_api_key: String, } -// Global status item pointer for updating from callbacks -static mut STATUS_ITEM: *mut Object = std::ptr::null_mut(); -// Global audio stream (not Send, so can't be in Arc) -static mut AUDIO_STREAM: Option = None; +impl Config { + /// Returns the CGEventFlags bitmask for the configured hotkey + fn hotkey_flag(&self) -> u64 { + match self.hotkey.as_str() { + "option" => OPTION_KEY_FLAG, + "control" => CONTROL_KEY_FLAG, + "shift" => SHIFT_KEY_FLAG, + "command" => COMMAND_KEY_FLAG, + _ => FN_KEY_FLAG, // "fn" or any unrecognized value + } + } -/// Get API key from config file or environment variable. -/// Checks ~/.config/fnkey/api_key first, then GROQ_API_KEY env var. -fn get_api_key() -> Option { - // Try config file first + /// Returns the modifier flag used to trigger polish mode. + /// Normally Ctrl, but if hotkey is already Ctrl, use Shift instead. + fn polish_flag(&self) -> u64 { + if self.hotkey == "control" { + SHIFT_KEY_FLAG + } else { + CONTROL_KEY_FLAG + } + } + + /// API key for the polish/sanitizer endpoint. + /// Falls back to the main api_key when polish_api_key is not set. + fn polish_key(&self) -> &str { + if self.polish_api_key.is_empty() { + &self.api_key + } else { + &self.polish_api_key + } + } +} + +/// Load configuration from TOML file, legacy api_key file, or environment variable. +/// Always returns a Config — creates a default config.toml if nothing exists. +fn load_config() -> Config { if let Some(home) = env::var_os("HOME") { - let config_path = std::path::Path::new(&home) - .join(".config") - .join("fnkey") - .join("api_key"); - if let Ok(key) = std::fs::read_to_string(&config_path) { + let config_dir = std::path::Path::new(&home).join(".config").join("fnkey"); + + // Try config.toml first + let toml_path = config_dir.join("config.toml"); + if let Ok(contents) = std::fs::read_to_string(&toml_path) { + if let Ok(config) = toml::from_str::(&contents) { + return config; + } + } + + // Try legacy api_key file + let key_path = config_dir.join("api_key"); + if let Ok(key) = std::fs::read_to_string(&key_path) { let key = key.trim(); if !key.is_empty() { - return Some(key.to_string()); + return Config { + api_key: key.to_string(), + transcription_url: default_transcription_url(), + polish_url: default_polish_url(), + whisper_model: default_whisper_model(), + polish_model: default_polish_model(), + hotkey: default_hotkey(), + language: default_language(), + always_polish: default_always_polish(), + polish_prompt: default_polish_prompt(), + polish_api_key: default_polish_api_key(), + }; } } + + // Try environment variable + if let Ok(key) = env::var("GROQ_API_KEY") { + return Config { + api_key: key, + transcription_url: default_transcription_url(), + polish_url: default_polish_url(), + whisper_model: default_whisper_model(), + polish_model: default_polish_model(), + hotkey: default_hotkey(), + language: default_language(), + always_polish: default_always_polish(), + polish_prompt: default_polish_prompt(), + polish_api_key: default_polish_api_key(), + }; + } + + // No config found — create a default config.toml for the user to edit + let _ = std::fs::create_dir_all(&config_dir); + let default_toml = r#"# FnKey configuration — edit and relaunch +# api_key = "your-api-key" +# polish_api_key = "" # Separate key for sanitizer (empty = use api_key) +# transcription_url = "https://your-server/v1/audio/transcriptions" +# polish_url = "https://your-server/v1/chat/completions" +# whisper_model = "whisper-large-v3" +# polish_model = "llama-3.3-70b-versatile" +# hotkey = "fn" +# language = "" # ISO-639-1 code: "en", "sk", "de", "fr", etc. Empty = auto-detect +# always_polish = true # Always run LLM cleanup on transcriptions (Ctrl modifier skips it) +# polish_prompt = "" # Custom system prompt for polish mode (empty = use built-in) +"#; + let _ = std::fs::write(&toml_path, default_toml); } - // Fall back to environment variable - env::var("GROQ_API_KEY").ok() + + // Return defaults — app will launch but transcription won't work until configured + Config { + api_key: default_api_key(), + transcription_url: default_transcription_url(), + polish_url: default_polish_url(), + whisper_model: default_whisper_model(), + polish_model: default_polish_model(), + hotkey: default_hotkey(), + language: default_language(), + always_polish: default_always_polish(), + polish_prompt: default_polish_prompt(), + polish_api_key: default_polish_api_key(), + } +} + +struct AppState { + audio_buffer: Arc>>, + config: Config, + sample_rate: std::sync::atomic::AtomicU32, } +// Global status item pointer for updating from callbacks +static mut STATUS_ITEM: *mut Object = std::ptr::null_mut(); +// Global audio stream (not Send, so can't be in Arc) +static mut AUDIO_STREAM: Option = None; + fn main() { - let api_key = get_api_key().unwrap_or_else(|| { - show_alert( - "GROQ_API_KEY not configured", - "Please create ~/.config/fnkey/api_key with your Groq API key.\n\nExample:\n mkdir -p ~/.config/fnkey\n echo 'gsk_your_key_here' > ~/.config/fnkey/api_key" - ); - std::process::exit(1); - }); + let config = load_config(); - // Check Input Monitoring permission - if !check_input_monitoring_permission() { - std::process::exit(1); - } + // Eagerly build keycode map on main thread — Carbon TIS APIs require main thread + let _ = get_paste_keycode(); + + // Request Input Monitoring permission (non-blocking — app continues either way) + check_input_monitoring_permission(); let state = Arc::new(AppState { audio_buffer: Arc::new(Mutex::new(Vec::new())), - api_key, - use_fn_key: AtomicBool::new(true), + config, sample_rate: std::sync::atomic::AtomicU32::new(48000), // Default, will be updated }); @@ -209,29 +357,18 @@ fn main() { run_event_tap(state); } -fn check_input_monitoring_permission() -> bool { +fn check_input_monitoring_permission() { unsafe { - // CGPreflightListenEventAccess and CGRequestListenEventAccess #[link(name = "CoreGraphics", kind = "framework")] extern "C" { fn CGPreflightListenEventAccess() -> bool; fn CGRequestListenEventAccess() -> bool; } - if CGPreflightListenEventAccess() { - return true; + if !CGPreflightListenEventAccess() { + // Request permission - shows system dialog on first run + CGRequestListenEventAccess(); } - - // Request permission - this shows system dialog - if CGRequestListenEventAccess() { - return true; - } - - show_alert( - "Input Monitoring Required", - "FnKey needs Input Monitoring permission to detect the Fn key.\n\nPlease grant access in System Settings → Privacy & Security → Input Monitoring, then relaunch FnKey.", - ); - false } } @@ -249,6 +386,53 @@ fn show_alert(title: &str, message: &str) { } } +/// Objective-C callback: open config.toml in default editor +extern "C" fn open_settings(_this: &Object, _cmd: Sel, _sender: id) { + if let Some(home) = env::var_os("HOME") { + let config_path = std::path::Path::new(&home) + .join(".config") + .join("fnkey") + .join("config.toml"); + // Ensure file exists + let _ = std::fs::create_dir_all(config_path.parent().unwrap()); + if !config_path.exists() { + let default_toml = r#"# FnKey configuration — edit and relaunch +# api_key = "your-api-key" +# polish_api_key = "" # Separate key for sanitizer (empty = use api_key) +# transcription_url = "https://your-server/v1/audio/transcriptions" +# polish_url = "https://your-server/v1/chat/completions" +# whisper_model = "whisper-large-v3" +# polish_model = "llama-3.3-70b-versatile" +# hotkey = "fn" +# language = "" # ISO-639-1 code: "en", "sk", "de", "fr", etc. Empty = auto-detect +# always_polish = true # Always run LLM cleanup on transcriptions (Ctrl modifier skips it) +# polish_prompt = "" # Custom system prompt for polish mode (empty = use built-in) +"#; + let _ = std::fs::write(&config_path, default_toml); + } + unsafe { + let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace]; + let path_str = NSString::alloc(nil).init_str(config_path.to_str().unwrap()); + let url: id = msg_send![class!(NSURL), fileURLWithPath: path_str]; + let _: bool = msg_send![workspace, openURL: url]; + } + } +} + +/// Register a helper class with an openSettings: action +unsafe fn register_menu_delegate() -> id { + let superclass = class!(NSObject); + let mut decl = ClassDecl::new("FnKeyMenuDelegate", superclass).unwrap(); + decl.add_method( + sel!(openSettings:), + open_settings as extern "C" fn(&Object, Sel, id), + ); + let cls = decl.register(); + let obj: id = msg_send![cls, new]; + let _: () = msg_send![obj, retain]; + obj +} + unsafe fn create_status_item() { let status_bar: id = msg_send![class!(NSStatusBar), systemStatusBar]; let status_item: id = msg_send![status_bar, statusItemWithLength: -1.0_f64]; // NSVariableStatusItemLength @@ -260,9 +444,24 @@ unsafe fn create_status_item() { let button: id = msg_send![status_item, button]; let _: () = msg_send![button, setTitle: title]; + // Register menu delegate for Settings action + let delegate = register_menu_delegate(); + // Create menu let menu: id = NSMenu::new(nil); + // Settings item + let settings_title = NSString::alloc(nil).init_str("Settings..."); + let settings_key = NSString::alloc(nil).init_str(","); + let settings_item: id = msg_send![class!(NSMenuItem), alloc]; + let settings_item: id = msg_send![settings_item, initWithTitle: settings_title action: sel!(openSettings:) keyEquivalent: settings_key]; + let _: () = msg_send![settings_item, setTarget: delegate]; + let _: () = msg_send![menu, addItem: settings_item]; + + // Separator + let separator: id = msg_send![class!(NSMenuItem), separatorItem]; + let _: () = msg_send![menu, addItem: separator]; + // Quit item let quit_title = NSString::alloc(nil).init_str("Quit FnKey"); let quit_key = NSString::alloc(nil).init_str("q"); @@ -288,13 +487,14 @@ fn update_status_icon(recording: bool) { fn run_event_tap(state: Arc) { let state_for_callback = Arc::clone(&state); - let fn_detected = Arc::new(AtomicBool::new(false)); let was_pressed = Arc::new(AtomicBool::new(false)); - let ctrl_was_held = Arc::new(AtomicBool::new(false)); + let polish_latched = Arc::new(AtomicBool::new(false)); - let fn_detected_clone = Arc::clone(&fn_detected); let was_pressed_clone = Arc::clone(&was_pressed); - let ctrl_latched_clone = Arc::clone(&ctrl_was_held); // Latches true if Ctrl pressed anytime during recording + let polish_latched_clone = Arc::clone(&polish_latched); + + let hotkey_flag = state.config.hotkey_flag(); + let polish_flag = state.config.polish_flag(); let tap = CGEventTap::new( CGEventTapLocation::HID, @@ -304,65 +504,54 @@ fn run_event_tap(state: Arc) { move |_, _, event| { let flags = event.get_flags().bits(); - // Check Fn key first, then Option as fallback - let fn_pressed = (flags & FN_KEY_FLAG) != 0; - let option_pressed = (flags & OPTION_KEY_FLAG) != 0; - let ctrl_pressed = (flags & CONTROL_KEY_FLAG) != 0; - - let use_fn = state_for_callback.use_fn_key.load(Ordering::SeqCst); - let key_pressed = if use_fn { fn_pressed } else { option_pressed }; - - // Detect if Fn key works (first time detection) - if fn_pressed && !fn_detected_clone.load(Ordering::SeqCst) { - fn_detected_clone.store(true, Ordering::SeqCst); - } + let key_pressed = (flags & hotkey_flag) != 0; + let polish_held = (flags & polish_flag) != 0; let prev_pressed = was_pressed_clone.load(Ordering::SeqCst); - // Handle key state changes if key_pressed && !prev_pressed { - // Key pressed - start recording, reset Ctrl latch - ctrl_latched_clone.store(false, Ordering::SeqCst); + // Key pressed - start recording, reset polish latch + polish_latched_clone.store(false, Ordering::SeqCst); start_recording(&state_for_callback); } else if !key_pressed && prev_pressed { // Key released - stop recording and transcribe - let polish = ctrl_latched_clone.load(Ordering::SeqCst); + let polish = polish_latched_clone.load(Ordering::SeqCst); stop_recording(&state_for_callback, polish); } - // Latch Ctrl if pressed anytime during recording - if key_pressed && ctrl_pressed { - ctrl_latched_clone.store(true, Ordering::SeqCst); + // Latch polish modifier if held anytime during recording + if key_pressed && polish_held { + polish_latched_clone.store(true, Ordering::SeqCst); } was_pressed_clone.store(key_pressed, Ordering::SeqCst); None }, - ) - .expect("Failed to create event tap - check Input Monitoring permissions"); + ); - let source = tap - .mach_port - .create_runloop_source(0) - .expect("Failed to create runloop source"); + match tap { + Ok(tap) => { + let source = tap + .mach_port + .create_runloop_source(0) + .expect("Failed to create runloop source"); - let run_loop = CFRunLoop::get_current(); - run_loop.add_source(&source, unsafe { kCFRunLoopCommonModes }); + let run_loop = CFRunLoop::get_current(); + run_loop.add_source(&source, unsafe { kCFRunLoopCommonModes }); - tap.enable(); + tap.enable(); - // Fallback timer: if no Fn detected in 5 seconds, switch to Option - let state_fallback = Arc::clone(&state); - let fn_detected_fallback = Arc::clone(&fn_detected); - thread::spawn(move || { - thread::sleep(Duration::from_secs(5)); - if !fn_detected_fallback.load(Ordering::SeqCst) && state_fallback.use_fn_key.load(Ordering::SeqCst) { - state_fallback.use_fn_key.store(false, Ordering::SeqCst); + // tap + source must stay alive while the run loop is running + unsafe { NSApp().run(); } + } + Err(_) => { + show_alert( + "Input Monitoring Required", + "FnKey can't detect hotkey presses.\n\nGo to System Settings → Privacy & Security → Input Monitoring, remove FnKey, re-add it, then relaunch.", + ); + // Still run the app so the menu bar icon (Settings/Quit) is usable + unsafe { NSApp().run(); } } - }); - - unsafe { - NSApp().run(); } } @@ -451,48 +640,76 @@ fn stop_recording(state: &Arc, polish: bool) { } // Transcribe in background - let api_key = state.api_key.clone(); + let config = state.config.clone(); let sample_rate = state.sample_rate.load(Ordering::SeqCst); thread::spawn(move || { - transcribe_and_paste(audio_data, sample_rate, &api_key, polish); + transcribe_and_paste(audio_data, sample_rate, &config, polish); }); } -fn transcribe_and_paste(audio: Vec, sample_rate: u32, api_key: &str, polish: bool) { +fn transcribe_and_paste(audio: Vec, sample_rate: u32, config: &Config, polish: bool) { + let duration_secs = audio.len() as f32 / sample_rate as f32; + eprintln!("[fnkey] audio: {:.1}s, {} samples, {}Hz, {:.0}KB raw", + duration_secs, audio.len(), sample_rate, audio.len() as f32 * 4.0 / 1024.0); + let wav_data = match encode_wav(&audio, sample_rate) { Ok(data) => data, Err(_) => return, }; + eprintln!("[fnkey] wav: {:.0}KB", wav_data.len() as f32 / 1024.0); let client = reqwest::blocking::Client::new(); - let form = reqwest::blocking::multipart::Form::new() - .text("model", "whisper-large-v3") // Full model for better accuracy (vs turbo) - .text("response_format", "text") - .part( - "file", - reqwest::blocking::multipart::Part::bytes(wav_data) - .file_name("audio.wav") - .mime_str("audio/wav") - .unwrap(), - ); + let mut form = reqwest::blocking::multipart::Form::new() + .text("model", config.whisper_model.clone()) + .text("response_format", "text"); + + // Send language hint to Whisper if configured (ISO-639-1 code) + if !config.language.is_empty() { + form = form.text("language", config.language.clone()); + } + + let form = form.part( + "file", + reqwest::blocking::multipart::Part::bytes(wav_data) + .file_name("audio.wav") + .mime_str("audio/wav") + .unwrap(), + ); let response = client - .post("https://api.groq.com/openai/v1/audio/transcriptions") - .header("Authorization", format!("Bearer {}", api_key)) + .post(&config.transcription_url) + .header("Authorization", format!("Bearer {}", config.api_key)) .multipart(form) .timeout(Duration::from_secs(30)) .send(); if let Ok(resp) = response { if resp.status().is_success() { - if let Ok(text) = resp.text() { - let text = text.trim(); + if let Ok(raw) = resp.text() { + eprintln!("[fnkey] whisper raw response ({} bytes): {:.200}", raw.len(), raw); + // Handle both plain text and JSON responses + // Some servers (e.g. vLLM) return {"text":"..."} even with response_format=text + let text = if raw.trim_start().starts_with('{') { + serde_json::from_str::(raw.trim()) + .ok() + .and_then(|v| v.get("text")?.as_str().map(String::from)) + .unwrap_or_else(|| raw.trim().to_string()) + } else { + raw.trim().to_string() + }; + if !text.is_empty() { - // Apply polish if requested, fallback to raw on error - let final_text = if polish { - polish_text(text, api_key).unwrap_or_else(|| text.to_string()) + eprintln!("[fnkey] whisper text: {}", text); + // When always_polish is on: polish by default, Ctrl modifier = raw + // When always_polish is off: raw by default, Ctrl modifier = polish + let should_polish = if config.always_polish { !polish } else { polish }; + let final_text = if should_polish { + let polished = polish_text(&text, config).unwrap_or_else(|| text.clone()); + eprintln!("[fnkey] polished: {}", polished); + polished } else { - text.to_string() + eprintln!("[fnkey] raw (no polish)"); + text }; if let Ok(mut clipboard) = Clipboard::new() { @@ -546,27 +763,49 @@ struct ChatMessage { /// Polish transcribed text using LLM to convert spoken style to written prose. /// Returns None on any error (caller should fall back to raw text). -fn polish_text(text: &str, api_key: &str) -> Option { +fn polish_text(text: &str, config: &Config) -> Option { let client = reqwest::blocking::Client::new(); + let system_prompt = if !config.polish_prompt.is_empty() { + config.polish_prompt.clone() + } else if config.language.is_empty() || config.language == "en" { + "Fix dictation. Remove filler words (um, uh, like, you know, basically). \ + Remove repeated words. Fix grammar and punctuation. \ + Keep the same tone and meaning. Output ONLY the corrected text. /no_think".to_string() + } else { + format!( + "Fix dictation in language \"{}\". Remove filler words and hesitations. \ + Remove repeated words. Fix grammar and punctuation. \ + Keep the same language, tone and meaning. Do NOT translate. \ + Output ONLY the corrected text. /no_think", + config.language + ) + }; + + // Cap output tokens: rough estimate of input tokens (words * 1.3) doubled as headroom, + // with a floor of 64 and ceiling of 1024. Prevents hallucination runaway on small models. + let estimated_tokens = (text.split_whitespace().count() as f32 * 1.3 * 2.0) as u64; + let max_tokens = estimated_tokens.clamp(64, 1024); + let body = serde_json::json!({ - "model": "llama-3.3-70b-versatile", + "model": config.polish_model, "messages": [ { "role": "system", - "content": "Clean up this voice message for texting. Remove filler words (um, uh, like, you know). Fix punctuation and sentence structure. Break up run-on sentences. Keep it casual. No trailing period. Output ONLY the cleaned text - no explanations, no quotes." + "content": system_prompt }, { "role": "user", "content": text } ], - "temperature": 0.2 + "temperature": 0.2, + "max_tokens": max_tokens }); let response = client - .post("https://api.groq.com/openai/v1/chat/completions") - .header("Authorization", format!("Bearer {}", api_key)) + .post(&config.polish_url) + .header("Authorization", format!("Bearer {}", config.polish_key())) .header("Content-Type", "application/json") .json(&body) .timeout(Duration::from_secs(30))