diff --git a/.gitignore b/.gitignore
index 330f1d4..9e9a4d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 /target/
 FnKey.app/
 .DS_Store
+.venv/
+__pycache__/
diff --git a/AppIcon.icns b/AppIcon.icns
new file mode 100644
index 0000000..f66f944
Binary files /dev/null and b/AppIcon.icns differ
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..2a75cbe
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,113 @@
+# FnKey — Agent Instructions
+
+## What is this
+
+FnKey is a macOS menu bar app (Rust) that records audio while a hotkey is held, transcribes it via Whisper, sanitizes the text with a small LLM, and pastes the result at the cursor. Single file: `src/main.rs`.
+
+## Architecture
+
+```
+Hotkey held → mic records → release → WAV encoded → Whisper STT → LLM sanitizer → clipboard → ⌘V paste
+```
+
+- **Transcription**: Any OpenAI-compatible `/v1/audio/transcriptions` endpoint
+- **Sanitization**: Any OpenAI-compatible `/v1/chat/completions` endpoint
+- **Config**: `~/.config/fnkey/config.toml` (TOML, deserialized into `Config` struct)
+- **No runtime dependencies** — single static binary, no frameworks beyond macOS system libs
+
+## Text Sanitization Pipeline
+
+The sanitizer is the key differentiator. Raw Whisper output is messy — filler words, repeated words, misheard technical terms. A small LLM (0.6B–3B params) cleans it up in real time (<200ms).
+
+### Setup for contributors
+
+After cloning this repo, set up local inference for development:
+
+1. **Whisper STT** — download and serve a Whisper model:
+   ```bash
+   # Option A: llama.cpp (any platform with Vulkan/CUDA/Metal)
+   whisper-server -m ggml-large-v3-turbo.bin --port 8100
+
+   # Option B: faster-whisper-server (CUDA)
+   pip install faster-whisper-server
+   faster-whisper-server --model large-v3-turbo --port 8100
+
+   # Option C: MLX (macOS Apple Silicon)
+   pip install mlx-whisper
+   # (use via mlx-whisper Python API, or wrap in a server)
+   ```
+
+2. **Text sanitizer** — download and serve a small LLM:
+   ```bash
+   # Option A: llama.cpp
+   llama-server -m Qwen3-0.6B-Q6_K.gguf --port 8108 -ngl 999 --jinja -c 4096
+
+   # Option B: MLX
+   pip install mlx-lm
+   mlx_lm.server --model mlx-community/Qwen3-0.6B-4bit --port 8108
+   ```
+
+3. **Configure** `~/.config/fnkey/config.toml`:
+   ```toml
+   api_key = "not-needed"
+   transcription_url = "http://localhost:8100/v1/audio/transcriptions"
+   whisper_model = "whisper-large-v3-turbo"
+   polish_url = "http://localhost:8108/v1/chat/completions"
+   polish_model = "qwen3-0.6b"
+   always_polish = true
+   ```
+
+### How the system prompt works
+
+The `polish_prompt` config field overrides the built-in system prompt. For 0.6B models, the prompt must be:
+- **Short** — the model has limited instruction-following at this scale
+- **Explicit** — use a replacement dictionary for domain terms the STT engine misrecognizes
+- **End with `/no_think`** — disables Qwen3 reasoning mode, keeping latency under 200ms
+
+Example for a codebase that uses llama.cpp, Claude, and Tailscale:
+
+```toml
+polish_prompt = """Fix dictation. Remove filler words. Fix grammar and punctuation.
+Replace misheard terms:
+- clod dot MD/cloud dot MD → CLAUDE.md
+- agents dot MD → agents.md
+- lama dot CPP → llama.cpp
+- quan three/qan three → Qwen3
+- tail scale → Tailscale
+Output ONLY the corrected text. /no_think"""
+```
+
+**When helping users customize their prompt**: identify the technical terms they use frequently, figure out how Whisper is likely to misrecognize them, and add those as `misheard → correct` pairs. This is the single most impactful thing you can do to improve dictation accuracy.
+
+## Build & test
+
+```bash
+cargo build --release          # Build
+./build-app.sh                 # Create .app bundle
+cargo check                    # Type check only
+```
+
+After rebuilding, macOS invalidates permissions — user must re-add the app in System Settings (Input Monitoring, Accessibility).
+
+## Key code locations
+
+- `Config` struct and `load_config()` — config parsing and defaults
+- `polish_text()` — LLM sanitization call
+- `transcribe_and_paste()` — main pipeline: encode WAV → call Whisper → sanitize → paste
+- `run_event_tap()` — hotkey detection via CGEventTap
+- `enhance_audio()` — audio preprocessing (DC removal, high-pass filter, normalization)
+
+## Config fields
+
+| Field | Default | Purpose |
+|-------|---------|---------|
+| `api_key` | `""` | Bearer token for Whisper transcription (also used for sanitizer if `polish_api_key` is empty) |
+| `polish_api_key` | `""` | Separate bearer token for sanitizer endpoint (empty = falls back to `api_key`) |
+| `transcription_url` | Groq | Whisper endpoint |
+| `polish_url` | Groq | Chat completions endpoint for sanitizer |
+| `whisper_model` | `whisper-large-v3` | Model name sent to STT endpoint |
+| `polish_model` | `llama-3.3-70b-versatile` | Model name sent to sanitizer endpoint |
+| `hotkey` | `fn` | Trigger key (fn/option/control/shift/command) |
+| `language` | `""` (auto) | ISO-639-1 hint for Whisper |
+| `always_polish` | `true` | Sanitize every dictation by default |
+| `polish_prompt` | `""` (built-in) | Custom system prompt for sanitizer |
diff --git a/Cargo.lock b/Cargo.lock
index 3a45e69..ea5c74c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -466,6 +466,7 @@ dependencies = [
  "reqwest",
  "serde",
  "serde_json",
+ "toml",
 ]
 
 [[package]]
@@ -1422,7 +1423,7 @@ version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
 dependencies = [
- "toml_edit",
+ "toml_edit 0.23.7",
 ]
 
 [[package]]
@@ -1713,6 +1714,15 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "serde_spanned"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "serde_urlencoded"
 version = "0.7.1"
@@ -1927,6 +1937,27 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "toml"
+version = "0.8.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
+dependencies = [
+ "serde",
+ "serde_spanned",
+ "toml_datetime 0.6.11",
+ "toml_edit 0.22.27",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.6.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "toml_datetime"
 version = "0.7.3"
@@ -1936,6 +1967,20 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "toml_edit"
+version = "0.22.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
+dependencies = [
+ "indexmap",
+ "serde",
+ "serde_spanned",
+ "toml_datetime 0.6.11",
+ "toml_write",
+ "winnow",
+]
+
 [[package]]
 name = "toml_edit"
 version = "0.23.7"
@@ -1943,7 +1988,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d"
 dependencies = [
  "indexmap",
- "toml_datetime",
+ "toml_datetime 0.7.3",
  "toml_parser",
  "winnow",
 ]
@@ -1957,6 +2002,12 @@ dependencies = [
  "winnow",
 ]
 
+[[package]]
+name = "toml_write"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
+
 [[package]]
 name = "tower"
 version = "0.5.2"
diff --git a/Cargo.toml b/Cargo.toml
index 03692fc..e0c4f37 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -28,3 +28,6 @@ hound = "3.5"
 # JSON parsing for LLM API
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
+
+# Config file parsing
+toml = "0.8"
diff --git a/Info.plist b/Info.plist
index b3eee15..5a15892 100644
--- a/Info.plist
+++ b/Info.plist
@@ -14,6 +14,8 @@
     <string>1.0.0</string>
     <key>CFBundleShortVersionString</key>
     <string>1.0.0</string>
+    <key>CFBundleIconFile</key>
+    <string>AppIcon</string>
     <key>CFBundlePackageType</key>
     <string>APPL</string>
     <key>LSMinimumSystemVersion</key>
diff --git a/README.md b/README.md
index bd0e945..7134725 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # fnkey.ai
 
-Hold Fn key, speak, paste transcribed text.
+Hold a hotkey, speak, paste transcribed text. Works with any OpenAI-compatible speech-to-text API.
 
 ## Install
 
@@ -14,66 +14,260 @@ Hold Fn key, speak, paste transcribed text.
    mv FnKey.app /Applications/
    ```
 
-3. Set your Groq API key:
-   ```bash
-   mkdir -p ~/.config/fnkey
-   echo 'your-groq-api-key' > ~/.config/fnkey/api_key
-   ```
-   Get a key at [console.groq.com](https://console.groq.com)
+3. Grant macOS permissions (see [Permissions](#permissions) below)
 
 4. Launch:
    ```bash
    open /Applications/FnKey.app
    ```
 
-5. Grant permissions in **System Settings → Privacy & Security**:
+5. Click the **○** menu bar icon → **Settings...** to configure your API endpoint
+
+## Configuration
+
+FnKey is configured via `~/.config/fnkey/config.toml`. A template is created automatically on first launch. Click **Settings...** in the menu bar to open it.
+
+```toml
+# API keys (optional — some local servers don't need one)
+api_key = "gsk_..."           # Used for Whisper transcription
+polish_api_key = ""           # Used for sanitizer (empty = use api_key)
+
+# API endpoints (default: Groq — any OpenAI-compatible API works)
+transcription_url = "https://api.groq.com/openai/v1/audio/transcriptions"
+polish_url = "https://api.groq.com/openai/v1/chat/completions"
+
+# Models (sent as-is in the API request — use whatever your server expects)
+whisper_model = "whisper-large-v3"
+polish_model = "llama-3.3-70b-versatile"
+
+# Hotkey: fn | option | control | shift | command
+hotkey = "fn"
+
+# Language hint (ISO-639-1 code: "en", "sk", "de", "fr", etc. Empty = auto-detect)
+language = ""
+
+# Always run text sanitization on every dictation (default: true)
+# When true, hold the polish modifier to get RAW Whisper output instead
+always_polish = true
+
+# Custom system prompt for text sanitization (empty = use built-in)
+polish_prompt = ""
+```
+
+### Custom API endpoints
+
+FnKey works with any OpenAI-compatible API. Examples:
+
+```toml
+# OpenAI for both
+api_key = "sk-..."
+transcription_url = "https://api.openai.com/v1/audio/transcriptions"
+polish_url = "https://api.openai.com/v1/chat/completions"
+whisper_model = "whisper-1"
+polish_model = "gpt-4o-mini"
+
+# Mixed: Groq for Whisper, OpenAI for sanitizer
+api_key = "gsk_..."
+polish_api_key = "sk-..."
+transcription_url = "https://api.groq.com/openai/v1/audio/transcriptions"
+polish_url = "https://api.openai.com/v1/chat/completions"
+whisper_model = "whisper-large-v3"
+polish_model = "gpt-4o-mini"
+
+# Local / self-hosted (vLLM, faster-whisper-server, etc.)
+api_key = "not-needed"
+transcription_url = "http://localhost:8000/v1/audio/transcriptions"
+whisper_model = "my-model-name"
+```
+
+Both plain-text and JSON transcription responses are handled automatically.
+
+### Hotkey options
+
+| Hotkey | Config value | Polish modifier |
+|--------|-------------|-----------------|
+| Fn | `"fn"` (default) | Ctrl |
+| Option/Alt | `"option"` | Ctrl |
+| Control | `"control"` | Shift |
+| Shift | `"shift"` | Ctrl |
+| Command | `"command"` | Ctrl |
 
-   | Permission | Purpose | How to Grant |
-   |------------|---------|--------------|
-   | **Input Monitoring** | Detect Fn key press | Add FnKey.app via + button |
-   | **Microphone** | Record voice | Prompted on first use, or add manually |
-   | **Accessibility** | Auto-paste text | Add FnKey.app via + button |
+When `hotkey = "control"`, the polish modifier switches to Shift to avoid conflict.
 
-   Note: After rebuilding the app, you may need to remove and re-add it in these settings.
+### Backward compatibility
+
+FnKey checks for configuration in this order:
+1. `~/.config/fnkey/config.toml`
+2. `~/.config/fnkey/api_key` (legacy — plain text API key)
+3. `GROQ_API_KEY` environment variable
 
 ## Usage
 
-- Hold **Fn** and speak → raw transcription
-- Hold **Fn+Ctrl** and speak → polished transcription (removes filler words, improves sentence structure)
-- Release to transcribe and paste
-- Click menu bar icon (○) → Quit to exit
+- Hold **hotkey** → speak → release → cleaned text pasted at cursor
+- Hold **hotkey + polish modifier** → speak → release → raw Whisper output (bypasses sanitization)
+- Click menu bar icon **○** → **Settings...** to edit config, **Quit** to exit
 
 The icon changes: ○ (idle) → ● (recording)
 
-## Build from source
+When `always_polish = false`, the behavior is inverted: hotkey gives raw output, hotkey + modifier gives polished output.
+
+## Text Sanitization
+
+FnKey includes an LLM-powered text sanitization step that runs after Whisper transcription. It fixes the common artifacts of speech-to-text: filler words, repeated words, broken grammar, and misheard terms.
+
+### How it works
+
+```
+Voice → [Whisper STT] → raw text → [LLM sanitizer] → clean text → clipboard → paste
+```
+
+The sanitizer is a lightweight LLM (as small as 0.6B parameters) that receives the raw Whisper output and a system prompt, then returns cleaned text. It uses any OpenAI-compatible chat completions endpoint.
+
+### Running locally
+
+For real-time dictation, the sanitizer must be fast. A small model (0.6B–3B) running locally can sanitize a sentence in under 200ms. Two recommended setups:
+
+#### llama.cpp (Linux/macOS, GPU or CPU)
+
+Download a small model like [Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B-GGUF) and serve it:
 
 ```bash
-./build-app.sh
-cp -r FnKey.app /Applications/
+llama-server \
+  -m Qwen3-0.6B-Q6_K.gguf \
+  --port 8108 \
+  --host 0.0.0.0 \
+  -ngl 999 \
+  --jinja \
+  -c 4096
+
+# On macOS with Metal:
+llama-server -m Qwen3-0.6B-Q6_K.gguf --port 8108 -ngl 999 --jinja -c 4096
 ```
 
-Note: If cargo isn't found, run with login shell: `/bin/bash -l -c './build-app.sh'`
+Then configure FnKey:
+```toml
+polish_url = "http://localhost:8108/v1/chat/completions"
+polish_model = "Qwen3-0.6B-Q6_K.gguf"
+api_key = "not-needed"
+```
 
-## Features
+#### MLX (macOS Apple Silicon)
 
-- **Whisper large-v3** - Full model for best accuracy
-- **Audio enhancement** - DC offset removal, high-pass filter, peak normalization
-- **Config file** - API key stored in `~/.config/fnkey/api_key`
-- **Auto sample rate** - Uses device's native sample rate
+```bash
+pip install mlx-lm
+mlx_lm.server --model mlx-community/Qwen3-0.6B-4bit --port 8108
+```
 
-## TODO
+Then configure FnKey the same way.
 
-Features from Ito not yet implemented:
+#### Whisper locally
 
-- **Vocabulary hints** - Send prompt with proper nouns/technical terms to improve accuracy
-- **No-speech detection** - Use `verbose_json` response format and check `no_speech_prob` to skip silent recordings
-- **Custom dictionary** - User-configurable word list for domain-specific terms
+For the transcription side, run Whisper via [faster-whisper-server](https://github.com/fedirz/faster-whisper-server), [vLLM](https://docs.vllm.ai/), or llama.cpp's built-in whisper support:
 
-## Notes
+```bash
+# faster-whisper-server (CUDA)
+pip install faster-whisper-server
+faster-whisper-server --model large-v3-turbo --port 8100
+
+# vLLM (CUDA)
+vllm serve openai/whisper-large-v3-turbo --port 8100
+
+# llama.cpp whisper
+whisper-server -m ggml-large-v3-turbo.bin --port 8100
+```
+
+### Custom system prompt
+
+The built-in prompt handles general dictation cleanup. For domain-specific accuracy, set `polish_prompt` in your config with a replacement dictionary for terms your STT engine commonly misrecognizes:
+
+```toml
+polish_prompt = """Fix dictation. Remove filler words. Fix grammar and punctuation.
+Replace misheard terms:
+- clod dot MD/cloud dot MD → CLAUDE.md
+- agents dot MD → agents.md
+- lama dot CPP/llama dot CPP → llama.cpp
+- quan three/qan three → Qwen3
+- M L X → MLX
+- tailscale/tail scale → Tailscale
+Output ONLY the corrected text. /no_think"""
+```
+
+The `/no_think` suffix disables reasoning on Qwen3 models, keeping response time under 200ms.
+
+**Adapt this to your codebase.** If you dictate about Kubernetes, add `cooper netties → Kubernetes`. If you work on a project called "Nexus", add `nexus/next us → Nexus`. The replacement dictionary is the key to making 0.6B models accurate for your domain.
+
+### Recommended models
+
+| Model | Size | Speed | Notes |
+|-------|------|-------|-------|
+| Qwen3-0.6B | 600MB | ~270 t/s | Best speed, needs explicit replacement dictionary |
+| Qwen2.5-1.5B | 1.5GB | ~150 t/s | Better understanding, less dictionary needed |
+| Qwen3-1.7B | 1.7GB | ~120 t/s | Good balance of speed and quality |
+
+For the sanitizer, smaller is better — the task is simple pattern matching and cleanup, not reasoning. Use `/no_think` with Qwen3 models to disable chain-of-thought and keep latency low.
+
+## Permissions
+
+FnKey requires three macOS permissions. All are configured in **System Settings → Privacy & Security**.
+
+| Permission | Why | How to grant |
+|------------|-----|--------------|
+| **Input Monitoring** | Detect hotkey press/release | System Settings → Input Monitoring → click **+** → select FnKey.app |
+| **Microphone** | Record voice while hotkey is held | Prompted automatically on first recording, or add manually |
+| **Accessibility** | Simulate ⌘V to paste transcribed text | System Settings → Accessibility → click **+** → select FnKey.app |
+
+### After rebuilding from source
+
+When you rebuild and re-codesign the app, macOS **invalidates all previously granted permissions** because the binary signature changes. You must:
+
+1. Open **System Settings → Privacy & Security**
+2. For **Input Monitoring** and **Accessibility**: remove FnKey, then re-add `/Applications/FnKey.app`
+3. Relaunch the app
+
+The **Microphone** permission is usually re-prompted automatically.
+
+### Troubleshooting permissions
+
+| Symptom | Cause | Fix |
+|---------|-------|-----|
+| App launches but hotkey does nothing | Input Monitoring not granted | Add FnKey to Input Monitoring |
+| Hotkey records but text doesn't paste | Accessibility not granted | Add FnKey to Accessibility |
+| No microphone indicator when holding hotkey | Microphone not granted | Add FnKey to Microphone, or approve the prompt |
+| Permissions are granted but app still doesn't work | Stale permission after rebuild | Remove and re-add FnKey in each permission category |
+
+## Build from source
+
+```bash
+cargo build --release
+```
+
+To create an .app bundle:
+
+```bash
+./build-app.sh
+cp -r FnKey.app /Applications/
+```
+
+To regenerate the app icon (requires Python + Pillow):
+
+```bash
+python3 -m venv .venv && source .venv/bin/activate && pip install Pillow
+python3 gen-icon.py
+```
+
+Note: If cargo isn't found, run with login shell: `/bin/bash -l -c './build-app.sh'`
+
+## Features
 
-- Falls back to Option key if Fn not detected after 5s
-- Floating red dot appears during recording
+- **Text sanitization** — LLM-powered cleanup of filler words, repeated words, grammar, and misheard terms
+- **Configurable hotkey** — Fn, Option, Control, Shift, or Command
+- **Custom API endpoints** — any OpenAI-compatible transcription/chat API (Groq, OpenAI, vLLM, faster-whisper, etc.)
+- **Custom system prompt** — domain-specific replacement dictionaries for accurate technical dictation
+- **Audio enhancement** — DC offset removal, high-pass filter, peak normalization
+- **TOML config** — `~/.config/fnkey/config.toml` with Settings menu item
+- **Auto sample rate** — uses device's native sample rate
+- **JSON response handling** — works with servers that return JSON instead of plain text
 
 ## Known Limitations
 
-**Slight recording delay**: There's a brief moment when you start speaking before audio capture begins. This is a deliberate tradeoff — eliminating this delay would require the microphone to be always active, showing the yellow indicator constantly. The current design prioritizes privacy: the microphone only activates when you press the Fn key.
+**Slight recording delay**: There's a brief moment when you start speaking before audio capture begins. This is a deliberate tradeoff — eliminating this delay would require the microphone to be always active, showing the yellow indicator constantly. The current design prioritizes privacy: the microphone only activates when you press the hotkey.
diff --git a/build-app.sh b/build-app.sh
index fd90dd6..7f745a1 100755
--- a/build-app.sh
+++ b/build-app.sh
@@ -17,6 +17,9 @@ mkdir -p "$BUNDLE_DIR/Contents/Resources"
 
 cp target/release/fnkey "$BUNDLE_DIR/Contents/MacOS/"
 cp Info.plist "$BUNDLE_DIR/Contents/"
+if [ -f AppIcon.icns ]; then
+    cp AppIcon.icns "$BUNDLE_DIR/Contents/Resources/"
+fi
 
 echo "Signing app..."
 codesign --force --deep --sign "FnKey Dev" "$BUNDLE_DIR"
diff --git a/gen-icon.py b/gen-icon.py
new file mode 100644
index 0000000..cb6bb51
--- /dev/null
+++ b/gen-icon.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""Generate FnKey app icon using Pillow."""
+import math
+import os
+import shutil
+import subprocess
+from PIL import Image, ImageDraw, ImageFont
+
+
+def draw_icon(size):
+    """Draw the FnKey icon at given pixel size."""
+    img = Image.new("RGBA", (size, size), (0, 0, 0, 0))
+    draw = ImageDraw.Draw(img)
+
+    s = size
+    pad = int(s * 0.08)
+    corner_r = int(s * 0.22)
+
+    # === Background: dark rounded square ===
+    draw.rounded_rectangle(
+        [pad, pad, s - pad, s - pad],
+        radius=corner_r,
+        fill=(28, 28, 32, 255),
+    )
+
+    # Subtle border
+    inset = pad + max(1, int(s * 0.006))
+    draw.rounded_rectangle(
+        [inset, inset, s - inset, s - inset],
+        radius=corner_r - max(1, int(s * 0.006)),
+        outline=(60, 60, 72, 130),
+        width=max(1, int(s * 0.004)),
+    )
+
+    cx = s / 2
+    cy = s / 2
+
+    # === Sound wave arcs ===
+    for radius_frac, alpha in [(0.30, 35), (0.23, 55), (0.16, 80)]:
+        r = s * radius_frac
+        arc_w = max(1, int(s * 0.013))
+        arc_color = (100, 190, 255, alpha)
+        # Right arcs
+        bbox = [cx + s*0.02 - r, cy + s*0.04 - r, cx + s*0.02 + r, cy + s*0.04 + r]
+        draw.arc(bbox, start=-50, end=50, fill=arc_color, width=arc_w)
+        # Left arcs
+        bbox = [cx - s*0.02 - r, cy + s*0.04 - r, cx - s*0.02 + r, cy + s*0.04 + r]
+        draw.arc(bbox, start=130, end=230, fill=arc_color, width=arc_w)
+
+    # === Microphone ===
+    mic_w = s * 0.14
+    mic_h = s * 0.24
+    mic_x = cx - mic_w / 2
+    mic_y = cy - mic_h * 0.15
+
+    mic_color = (100, 195, 255, 240)
+
+    # Mic capsule (pill shape)
+    mic_r = mic_w / 2
+    draw.rounded_rectangle(
+        [mic_x, mic_y, mic_x + mic_w, mic_y + mic_h],
+        radius=int(mic_r),
+        fill=mic_color,
+    )
+
+    # Grille lines on mic
+    grille_color = (35, 100, 160, 100)
+    num_lines = 4
+    grille_top = mic_y + mic_h * 0.28
+    grille_bot = mic_y + mic_h * 0.82
+    line_w = max(1, int(s * 0.005))
+    for i in range(num_lines):
+        ly = grille_top + i * (grille_bot - grille_top) / (num_lines - 1)
+        lx1 = mic_x + mic_w * 0.2
+        lx2 = mic_x + mic_w * 0.8
+        draw.line([(lx1, ly), (lx2, ly)], fill=grille_color, width=line_w)
+
+    # === Mic stand ===
+    stand_color = (100, 195, 255, 200)
+    stand_w = max(1, int(s * 0.016))
+
+    # U-cradle arc
+    cradle_r = mic_w * 0.85
+    cradle_cy = mic_y + mic_h * 0.08
+    bbox = [cx - cradle_r, cradle_cy - cradle_r, cx + cradle_r, cradle_cy + cradle_r]
+    draw.arc(bbox, start=0, end=180, fill=stand_color, width=stand_w)
+
+    # Vertical stem
+    stem_top = cradle_cy + cradle_r
+    stem_bottom = stem_top + s * 0.07
+    draw.line([(cx, stem_top), (cx, stem_bottom)], fill=stand_color, width=stand_w)
+
+    # Base
+    base_w = s * 0.12
+    draw.line(
+        [(cx - base_w/2, stem_bottom), (cx + base_w/2, stem_bottom)],
+        fill=stand_color,
+        width=stand_w,
+    )
+
+    # === "fn" text at bottom ===
+    font_size = int(s * 0.16)
+    try:
+        font = ImageFont.truetype("/System/Library/Fonts/HelveticaNeue.ttc", font_size, index=8)  # Bold
+    except (OSError, IndexError):
+        try:
+            font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", font_size)
+        except OSError:
+            font = ImageFont.load_default()
+
+    text = "fn"
+    text_bbox = draw.textbbox((0, 0), text, font=font)
+    tw = text_bbox[2] - text_bbox[0]
+    th = text_bbox[3] - text_bbox[1]
+
+    text_x = cx - tw / 2 - text_bbox[0]
+    text_y = pad + s * 0.06
+
+    draw.text((text_x, text_y), text, fill=(255, 255, 255, 230), font=font)
+
+    return img
+
+
+def main():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    iconset_dir = os.path.join(script_dir, "AppIcon.iconset")
+    os.makedirs(iconset_dir, exist_ok=True)
+
+    sizes = [
+        (16, 1), (16, 2),
+        (32, 1), (32, 2),
+        (128, 1), (128, 2),
+        (256, 1), (256, 2),
+        (512, 1), (512, 2),
+    ]
+
+    for base_size, scale in sizes:
+        px = base_size * scale
+        if scale == 1:
+            name = f"icon_{base_size}x{base_size}.png"
+        else:
+            name = f"icon_{base_size}x{base_size}@{scale}x.png"
+
+        path = os.path.join(iconset_dir, name)
+        img = draw_icon(px)
+        img.save(path, "PNG")
+        print(f"  {name} ({px}x{px})")
+
+    # Convert to .icns
+    icns_path = os.path.join(script_dir, "AppIcon.icns")
+    subprocess.run(["iconutil", "-c", "icns", iconset_dir, "-o", icns_path], check=True)
+    print(f"\nCreated {icns_path}")
+
+    shutil.rmtree(iconset_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/main.rs b/src/main.rs
index 8fa3492..13edfd3 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -26,7 +26,8 @@ use core_graphics::event_source::{CGEventSource, CGEventSourceStateID};
 use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
 use cpal::Stream;
 use hound::{WavSpec, WavWriter};
-use objc::runtime::Object;
+use objc::declare::ClassDecl;
+use objc::runtime::{Object, Sel};
 use objc::{class, msg_send, sel, sel_impl};
 
 // ============================================================================
@@ -135,63 +136,210 @@ fn get_paste_keycode() -> u16 {
 // Main application
 // ============================================================================
 
-// Fn key flag in CGEventFlags
+// Modifier key flags in CGEventFlags
 const FN_KEY_FLAG: u64 = 0x800000;
-// Option/Alt key flag
 const OPTION_KEY_FLAG: u64 = 0x80000;
-// Control key flag
 const CONTROL_KEY_FLAG: u64 = 0x40000;
+const SHIFT_KEY_FLAG: u64 = 0x20000;
+const COMMAND_KEY_FLAG: u64 = 0x100000;
 
-struct AppState {
-    audio_buffer: Arc<Mutex<Vec<f32>>>,
+// ============================================================================
+// Configuration
+// ============================================================================
+
+fn default_api_key() -> String {
+    String::new()
+}
+fn default_transcription_url() -> String {
+    "https://api.groq.com/openai/v1/audio/transcriptions".to_string()
+}
+fn default_polish_url() -> String {
+    "https://api.groq.com/openai/v1/chat/completions".to_string()
+}
+fn default_whisper_model() -> String {
+    "whisper-large-v3".to_string()
+}
+fn default_polish_model() -> String {
+    "llama-3.3-70b-versatile".to_string()
+}
+fn default_hotkey() -> String {
+    "fn".to_string()
+}
+fn default_language() -> String {
+    String::new() // empty = auto-detect
+}
+fn default_always_polish() -> bool {
+    true
+}
+fn default_polish_prompt() -> String {
+    String::new() // empty = use built-in prompt
+}
+fn default_polish_api_key() -> String {
+    String::new() // empty = use api_key
+}
+
+#[derive(serde::Deserialize, Clone)]
+struct Config {
+    #[serde(default = "default_api_key")]
     api_key: String,
-    use_fn_key: AtomicBool,
-    sample_rate: std::sync::atomic::AtomicU32,
+    #[serde(default = "default_transcription_url")]
+    transcription_url: String,
+    #[serde(default = "default_polish_url")]
+    polish_url: String,
+    #[serde(default = "default_whisper_model")]
+    whisper_model: String,
+    #[serde(default = "default_polish_model")]
+    polish_model: String,
+    #[serde(default = "default_hotkey")]
+    hotkey: String,
+    #[serde(default = "default_language")]
+    language: String,
+    #[serde(default = "default_always_polish")]
+    always_polish: bool,
+    #[serde(default = "default_polish_prompt")]
+    polish_prompt: String,
+    #[serde(default = "default_polish_api_key")]
+    polish_api_key: String,
 }
 
-// Global status item pointer for updating from callbacks
-static mut STATUS_ITEM: *mut Object = std::ptr::null_mut();
-// Global audio stream (not Send, so can't be in Arc)
-static mut AUDIO_STREAM: Option<Stream> = None;
+impl Config {
+    /// Returns the CGEventFlags bitmask for the configured hotkey
+    fn hotkey_flag(&self) -> u64 {
+        match self.hotkey.as_str() {
+            "option" => OPTION_KEY_FLAG,
+            "control" => CONTROL_KEY_FLAG,
+            "shift" => SHIFT_KEY_FLAG,
+            "command" => COMMAND_KEY_FLAG,
+            _ => FN_KEY_FLAG, // "fn" or any unrecognized value
+        }
+    }
 
-/// Get API key from config file or environment variable.
-/// Checks ~/.config/fnkey/api_key first, then GROQ_API_KEY env var.
-fn get_api_key() -> Option<String> {
-    // Try config file first
+    /// Returns the modifier flag used to trigger polish mode.
+    /// Normally Ctrl, but if hotkey is already Ctrl, use Shift instead.
+    fn polish_flag(&self) -> u64 {
+        if self.hotkey == "control" {
+            SHIFT_KEY_FLAG
+        } else {
+            CONTROL_KEY_FLAG
+        }
+    }
+
+    /// API key for the polish/sanitizer endpoint.
+    /// Falls back to the main api_key when polish_api_key is not set.
+    fn polish_key(&self) -> &str {
+        if self.polish_api_key.is_empty() {
+            &self.api_key
+        } else {
+            &self.polish_api_key
+        }
+    }
+}
+
+/// Load configuration from TOML file, legacy api_key file, or environment variable.
+/// Always returns a Config — creates a default config.toml if nothing exists.
+fn load_config() -> Config {
     if let Some(home) = env::var_os("HOME") {
-        let config_path = std::path::Path::new(&home)
-            .join(".config")
-            .join("fnkey")
-            .join("api_key");
-        if let Ok(key) = std::fs::read_to_string(&config_path) {
+        let config_dir = std::path::Path::new(&home).join(".config").join("fnkey");
+
+        // Try config.toml first
+        let toml_path = config_dir.join("config.toml");
+        if let Ok(contents) = std::fs::read_to_string(&toml_path) {
+            if let Ok(config) = toml::from_str::<Config>(&contents) {
+                return config;
+            }
+        }
+
+        // Try legacy api_key file
+        let key_path = config_dir.join("api_key");
+        if let Ok(key) = std::fs::read_to_string(&key_path) {
             let key = key.trim();
             if !key.is_empty() {
-                return Some(key.to_string());
+                return Config {
+                    api_key: key.to_string(),
+                    transcription_url: default_transcription_url(),
+                    polish_url: default_polish_url(),
+                    whisper_model: default_whisper_model(),
+                    polish_model: default_polish_model(),
+                    hotkey: default_hotkey(),
+                    language: default_language(),
+                    always_polish: default_always_polish(),
+                    polish_prompt: default_polish_prompt(),
+                    polish_api_key: default_polish_api_key(),
+                };
             }
         }
+
+        // Try environment variable
+        if let Ok(key) = env::var("GROQ_API_KEY") {
+            return Config {
+                api_key: key,
+                transcription_url: default_transcription_url(),
+                polish_url: default_polish_url(),
+                whisper_model: default_whisper_model(),
+                polish_model: default_polish_model(),
+                hotkey: default_hotkey(),
+                language: default_language(),
+                always_polish: default_always_polish(),
+                polish_prompt: default_polish_prompt(),
+                polish_api_key: default_polish_api_key(),
+            };
+        }
+
+        // No config found — create a default config.toml for the user to edit
+        let _ = std::fs::create_dir_all(&config_dir);
+        let default_toml = r#"# FnKey configuration — edit and relaunch
+# api_key = "your-api-key"
+# polish_api_key = ""  # Separate key for sanitizer (empty = use api_key)
+# transcription_url = "https://your-server/v1/audio/transcriptions"
+# polish_url = "https://your-server/v1/chat/completions"
+# whisper_model = "whisper-large-v3"
+# polish_model = "llama-3.3-70b-versatile"
+# hotkey = "fn"
+# language = ""  # ISO-639-1 code: "en", "sk", "de", "fr", etc. Empty = auto-detect
+# always_polish = true  # Always run LLM cleanup on transcriptions (Ctrl modifier skips it)
+# polish_prompt = ""  # Custom system prompt for polish mode (empty = use built-in)
+"#;
+        let _ = std::fs::write(&toml_path, default_toml);
     }
-    // Fall back to environment variable
-    env::var("GROQ_API_KEY").ok()
+
+    // Return defaults — app will launch but transcription won't work until configured
+    Config {
+        api_key: default_api_key(),
+        transcription_url: default_transcription_url(),
+        polish_url: default_polish_url(),
+        whisper_model: default_whisper_model(),
+        polish_model: default_polish_model(),
+        hotkey: default_hotkey(),
+        language: default_language(),
+        always_polish: default_always_polish(),
+        polish_prompt: default_polish_prompt(),
+        polish_api_key: default_polish_api_key(),
+    }
+}
+
+struct AppState {
+    audio_buffer: Arc<Mutex<Vec<f32>>>,
+    config: Config,
+    sample_rate: std::sync::atomic::AtomicU32,
 }
 
+// Global status item pointer for updating from callbacks
+static mut STATUS_ITEM: *mut Object = std::ptr::null_mut();
+// Global audio stream (not Send, so can't be in Arc)
+static mut AUDIO_STREAM: Option<Stream> = None;
+
 fn main() {
-    let api_key = get_api_key().unwrap_or_else(|| {
-        show_alert(
-            "GROQ_API_KEY not configured",
-            "Please create ~/.config/fnkey/api_key with your Groq API key.\n\nExample:\n  mkdir -p ~/.config/fnkey\n  echo 'gsk_your_key_here' > ~/.config/fnkey/api_key"
-        );
-        std::process::exit(1);
-    });
+    let config = load_config();
 
-    // Check Input Monitoring permission
-    if !check_input_monitoring_permission() {
-        std::process::exit(1);
-    }
+    // Eagerly build keycode map on main thread — Carbon TIS APIs require main thread
+    let _ = get_paste_keycode();
+
+    // Request Input Monitoring permission (non-blocking — app continues either way)
+    check_input_monitoring_permission();
 
     let state = Arc::new(AppState {
         audio_buffer: Arc::new(Mutex::new(Vec::new())),
-        api_key,
-        use_fn_key: AtomicBool::new(true),
+        config,
         sample_rate: std::sync::atomic::AtomicU32::new(48000), // Default, will be updated
     });
 
@@ -209,29 +357,18 @@ fn main() {
     run_event_tap(state);
 }
 
-fn check_input_monitoring_permission() -> bool {
+fn check_input_monitoring_permission() {
     unsafe {
-        // CGPreflightListenEventAccess and CGRequestListenEventAccess
         #[link(name = "CoreGraphics", kind = "framework")]
         extern "C" {
             fn CGPreflightListenEventAccess() -> bool;
             fn CGRequestListenEventAccess() -> bool;
         }
 
-        if CGPreflightListenEventAccess() {
-            return true;
+        if !CGPreflightListenEventAccess() {
+            // Request permission - shows system dialog on first run
+            CGRequestListenEventAccess();
         }
-
-        // Request permission - this shows system dialog
-        if CGRequestListenEventAccess() {
-            return true;
-        }
-
-        show_alert(
-            "Input Monitoring Required",
-            "FnKey needs Input Monitoring permission to detect the Fn key.\n\nPlease grant access in System Settings → Privacy & Security → Input Monitoring, then relaunch FnKey.",
-        );
-        false
     }
 }
 
@@ -249,6 +386,53 @@ fn show_alert(title: &str, message: &str) {
     }
 }
 
+/// Objective-C callback: open config.toml in default editor
+extern "C" fn open_settings(_this: &Object, _cmd: Sel, _sender: id) {
+    if let Some(home) = env::var_os("HOME") {
+        let config_path = std::path::Path::new(&home)
+            .join(".config")
+            .join("fnkey")
+            .join("config.toml");
+        // Ensure file exists
+        let _ = std::fs::create_dir_all(config_path.parent().unwrap());
+        if !config_path.exists() {
+            let default_toml = r#"# FnKey configuration — edit and relaunch
+# api_key = "your-api-key"
+# polish_api_key = ""  # Separate key for sanitizer (empty = use api_key)
+# transcription_url = "https://your-server/v1/audio/transcriptions"
+# polish_url = "https://your-server/v1/chat/completions"
+# whisper_model = "whisper-large-v3"
+# polish_model = "llama-3.3-70b-versatile"
+# hotkey = "fn"
+# language = ""  # ISO-639-1 code: "en", "sk", "de", "fr", etc. Empty = auto-detect
+# always_polish = true  # Always run LLM cleanup on transcriptions (Ctrl modifier skips it)
+# polish_prompt = ""  # Custom system prompt for polish mode (empty = use built-in)
+"#;
+            let _ = std::fs::write(&config_path, default_toml);
+        }
+        unsafe {
+            let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace];
+            let path_str = NSString::alloc(nil).init_str(config_path.to_str().unwrap());
+            let url: id = msg_send![class!(NSURL), fileURLWithPath: path_str];
+            let _: bool = msg_send![workspace, openURL: url];
+        }
+    }
+}
+
+/// Register a helper class with an openSettings: action
+unsafe fn register_menu_delegate() -> id {
+    let superclass = class!(NSObject);
+    let mut decl = ClassDecl::new("FnKeyMenuDelegate", superclass).unwrap();
+    decl.add_method(
+        sel!(openSettings:),
+        open_settings as extern "C" fn(&Object, Sel, id),
+    );
+    let cls = decl.register();
+    let obj: id = msg_send![cls, new];
+    let _: () = msg_send![obj, retain];
+    obj
+}
+
 unsafe fn create_status_item() {
     let status_bar: id = msg_send![class!(NSStatusBar), systemStatusBar];
     let status_item: id = msg_send![status_bar, statusItemWithLength: -1.0_f64]; // NSVariableStatusItemLength
@@ -260,9 +444,24 @@ unsafe fn create_status_item() {
     let button: id = msg_send![status_item, button];
     let _: () = msg_send![button, setTitle: title];
 
+    // Register menu delegate for Settings action
+    let delegate = register_menu_delegate();
+
     // Create menu
     let menu: id = NSMenu::new(nil);
 
+    // Settings item
+    let settings_title = NSString::alloc(nil).init_str("Settings...");
+    let settings_key = NSString::alloc(nil).init_str(",");
+    let settings_item: id = msg_send![class!(NSMenuItem), alloc];
+    let settings_item: id = msg_send![settings_item, initWithTitle: settings_title action: sel!(openSettings:) keyEquivalent: settings_key];
+    let _: () = msg_send![settings_item, setTarget: delegate];
+    let _: () = msg_send![menu, addItem: settings_item];
+
+    // Separator
+    let separator: id = msg_send![class!(NSMenuItem), separatorItem];
+    let _: () = msg_send![menu, addItem: separator];
+
     // Quit item
     let quit_title = NSString::alloc(nil).init_str("Quit FnKey");
     let quit_key = NSString::alloc(nil).init_str("q");
@@ -288,13 +487,14 @@ fn update_status_icon(recording: bool) {
 
 fn run_event_tap(state: Arc<AppState>) {
     let state_for_callback = Arc::clone(&state);
-    let fn_detected = Arc::new(AtomicBool::new(false));
     let was_pressed = Arc::new(AtomicBool::new(false));
-    let ctrl_was_held = Arc::new(AtomicBool::new(false));
+    let polish_latched = Arc::new(AtomicBool::new(false));
 
-    let fn_detected_clone = Arc::clone(&fn_detected);
     let was_pressed_clone = Arc::clone(&was_pressed);
-    let ctrl_latched_clone = Arc::clone(&ctrl_was_held); // Latches true if Ctrl pressed anytime during recording
+    let polish_latched_clone = Arc::clone(&polish_latched);
+
+    let hotkey_flag = state.config.hotkey_flag();
+    let polish_flag = state.config.polish_flag();
 
     let tap = CGEventTap::new(
         CGEventTapLocation::HID,
@@ -304,65 +504,54 @@ fn run_event_tap(state: Arc<AppState>) {
         move |_, _, event| {
             let flags = event.get_flags().bits();
 
-            // Check Fn key first, then Option as fallback
-            let fn_pressed = (flags & FN_KEY_FLAG) != 0;
-            let option_pressed = (flags & OPTION_KEY_FLAG) != 0;
-            let ctrl_pressed = (flags & CONTROL_KEY_FLAG) != 0;
-
-            let use_fn = state_for_callback.use_fn_key.load(Ordering::SeqCst);
-            let key_pressed = if use_fn { fn_pressed } else { option_pressed };
-
-            // Detect if Fn key works (first time detection)
-            if fn_pressed && !fn_detected_clone.load(Ordering::SeqCst) {
-                fn_detected_clone.store(true, Ordering::SeqCst);
-            }
+            let key_pressed = (flags & hotkey_flag) != 0;
+            let polish_held = (flags & polish_flag) != 0;
 
             let prev_pressed = was_pressed_clone.load(Ordering::SeqCst);
 
-            // Handle key state changes
             if key_pressed && !prev_pressed {
-                // Key pressed - start recording, reset Ctrl latch
-                ctrl_latched_clone.store(false, Ordering::SeqCst);
+                // Key pressed - start recording, reset polish latch
+                polish_latched_clone.store(false, Ordering::SeqCst);
                 start_recording(&state_for_callback);
             } else if !key_pressed && prev_pressed {
                 // Key released - stop recording and transcribe
-                let polish = ctrl_latched_clone.load(Ordering::SeqCst);
+                let polish = polish_latched_clone.load(Ordering::SeqCst);
                 stop_recording(&state_for_callback, polish);
             }
 
-            // Latch Ctrl if pressed anytime during recording
-            if key_pressed && ctrl_pressed {
-                ctrl_latched_clone.store(true, Ordering::SeqCst);
+            // Latch polish modifier if held anytime during recording
+            if key_pressed && polish_held {
+                polish_latched_clone.store(true, Ordering::SeqCst);
             }
 
             was_pressed_clone.store(key_pressed, Ordering::SeqCst);
             None
         },
-    )
-    .expect("Failed to create event tap - check Input Monitoring permissions");
+    );
 
-    let source = tap
-        .mach_port
-        .create_runloop_source(0)
-        .expect("Failed to create runloop source");
+    match tap {
+        Ok(tap) => {
+            let source = tap
+                .mach_port
+                .create_runloop_source(0)
+                .expect("Failed to create runloop source");
 
-    let run_loop = CFRunLoop::get_current();
-    run_loop.add_source(&source, unsafe { kCFRunLoopCommonModes });
+            let run_loop = CFRunLoop::get_current();
+            run_loop.add_source(&source, unsafe { kCFRunLoopCommonModes });
 
-    tap.enable();
+            tap.enable();
 
-    // Fallback timer: if no Fn detected in 5 seconds, switch to Option
-    let state_fallback = Arc::clone(&state);
-    let fn_detected_fallback = Arc::clone(&fn_detected);
-    thread::spawn(move || {
-        thread::sleep(Duration::from_secs(5));
-        if !fn_detected_fallback.load(Ordering::SeqCst) && state_fallback.use_fn_key.load(Ordering::SeqCst) {
-            state_fallback.use_fn_key.store(false, Ordering::SeqCst);
+            // tap + source must stay alive while the run loop is running
+            unsafe { NSApp().run(); }
+        }
+        Err(_) => {
+            show_alert(
+                "Input Monitoring Required",
+                "FnKey can't detect hotkey presses.\n\nGo to System Settings → Privacy & Security → Input Monitoring, remove FnKey, re-add it, then relaunch.",
+            );
+            // Still run the app so the menu bar icon (Settings/Quit) is usable
+            unsafe { NSApp().run(); }
         }
-    });
-
-    unsafe {
-        NSApp().run();
     }
 }
 
@@ -451,48 +640,76 @@ fn stop_recording(state: &Arc<AppState>, polish: bool) {
     }
 
     // Transcribe in background
-    let api_key = state.api_key.clone();
+    let config = state.config.clone();
     let sample_rate = state.sample_rate.load(Ordering::SeqCst);
     thread::spawn(move || {
-        transcribe_and_paste(audio_data, sample_rate, &api_key, polish);
+        transcribe_and_paste(audio_data, sample_rate, &config, polish);
     });
 }
 
-fn transcribe_and_paste(audio: Vec<f32>, sample_rate: u32, api_key: &str, polish: bool) {
+fn transcribe_and_paste(audio: Vec<f32>, sample_rate: u32, config: &Config, polish: bool) {
+    let duration_secs = audio.len() as f32 / sample_rate as f32;
+    eprintln!("[fnkey] audio: {:.1}s, {} samples, {}Hz, {:.0}KB raw",
+        duration_secs, audio.len(), sample_rate, audio.len() as f32 * 4.0 / 1024.0);
+
     let wav_data = match encode_wav(&audio, sample_rate) {
         Ok(data) => data,
         Err(_) => return,
     };
+    eprintln!("[fnkey] wav: {:.0}KB", wav_data.len() as f32 / 1024.0);
 
     let client = reqwest::blocking::Client::new();
-    let form = reqwest::blocking::multipart::Form::new()
-        .text("model", "whisper-large-v3")  // Full model for better accuracy (vs turbo)
-        .text("response_format", "text")
-        .part(
-            "file",
-            reqwest::blocking::multipart::Part::bytes(wav_data)
-                .file_name("audio.wav")
-                .mime_str("audio/wav")
-                .unwrap(),
-        );
+    let mut form = reqwest::blocking::multipart::Form::new()
+        .text("model", config.whisper_model.clone())
+        .text("response_format", "text");
+
+    // Send language hint to Whisper if configured (ISO-639-1 code)
+    if !config.language.is_empty() {
+        form = form.text("language", config.language.clone());
+    }
+
+    let form = form.part(
+        "file",
+        reqwest::blocking::multipart::Part::bytes(wav_data)
+            .file_name("audio.wav")
+            .mime_str("audio/wav")
+            .unwrap(),
+    );
 
     let response = client
-        .post("https://api.groq.com/openai/v1/audio/transcriptions")
-        .header("Authorization", format!("Bearer {}", api_key))
+        .post(&config.transcription_url)
+        .header("Authorization", format!("Bearer {}", config.api_key))
         .multipart(form)
         .timeout(Duration::from_secs(30))
         .send();
 
     if let Ok(resp) = response {
         if resp.status().is_success() {
-            if let Ok(text) = resp.text() {
-                let text = text.trim();
+            if let Ok(raw) = resp.text() {
+                eprintln!("[fnkey] whisper raw response ({} bytes): {:.200}", raw.len(), raw);
+                // Handle both plain text and JSON responses
+                // Some servers (e.g. vLLM) return {"text":"..."} even with response_format=text
+                let text = if raw.trim_start().starts_with('{') {
+                    serde_json::from_str::<serde_json::Value>(raw.trim())
+                        .ok()
+                        .and_then(|v| v.get("text")?.as_str().map(String::from))
+                        .unwrap_or_else(|| raw.trim().to_string())
+                } else {
+                    raw.trim().to_string()
+                };
+
                 if !text.is_empty() {
-                    // Apply polish if requested, fallback to raw on error
-                    let final_text = if polish {
-                        polish_text(text, api_key).unwrap_or_else(|| text.to_string())
+                    eprintln!("[fnkey] whisper text: {}", text);
+                    // When always_polish is on: polish by default, Ctrl modifier = raw
+                    // When always_polish is off: raw by default, Ctrl modifier = polish
+                    let should_polish = if config.always_polish { !polish } else { polish };
+                    let final_text = if should_polish {
+                        let polished = polish_text(&text, config).unwrap_or_else(|| text.clone());
+                        eprintln!("[fnkey] polished: {}", polished);
+                        polished
                     } else {
-                        text.to_string()
+                        eprintln!("[fnkey] raw (no polish)");
+                        text
                     };
 
                     if let Ok(mut clipboard) = Clipboard::new() {
@@ -546,27 +763,49 @@ struct ChatMessage {
 
 /// Polish transcribed text using LLM to convert spoken style to written prose.
 /// Returns None on any error (caller should fall back to raw text).
-fn polish_text(text: &str, api_key: &str) -> Option<String> {
+fn polish_text(text: &str, config: &Config) -> Option<String> {
     let client = reqwest::blocking::Client::new();
 
+    let system_prompt = if !config.polish_prompt.is_empty() {
+        config.polish_prompt.clone()
+    } else if config.language.is_empty() || config.language == "en" {
+        "Fix dictation. Remove filler words (um, uh, like, you know, basically). \
+         Remove repeated words. Fix grammar and punctuation. \
+         Keep the same tone and meaning. Output ONLY the corrected text. /no_think".to_string()
+    } else {
+        format!(
+            "Fix dictation in language \"{}\". Remove filler words and hesitations. \
+             Remove repeated words. Fix grammar and punctuation. \
+             Keep the same language, tone and meaning. Do NOT translate. \
+             Output ONLY the corrected text. /no_think",
+            config.language
+        )
+    };
+
+    // Cap output tokens: rough estimate of input tokens (words * 1.3) doubled as headroom,
+    // with a floor of 64 and ceiling of 1024. Prevents hallucination runaway on small models.
+    let estimated_tokens = (text.split_whitespace().count() as f32 * 1.3 * 2.0) as u64;
+    let max_tokens = estimated_tokens.clamp(64, 1024);
+
     let body = serde_json::json!({
-        "model": "llama-3.3-70b-versatile",
+        "model": config.polish_model,
         "messages": [
             {
                 "role": "system",
-                "content": "Clean up this voice message for texting. Remove filler words (um, uh, like, you know). Fix punctuation and sentence structure. Break up run-on sentences. Keep it casual. No trailing period. Output ONLY the cleaned text - no explanations, no quotes."
+                "content": system_prompt
             },
             {
                 "role": "user",
                 "content": text
             }
         ],
-        "temperature": 0.2
+        "temperature": 0.2,
+        "max_tokens": max_tokens
     });
 
     let response = client
-        .post("https://api.groq.com/openai/v1/chat/completions")
-        .header("Authorization", format!("Bearer {}", api_key))
+        .post(&config.polish_url)
+        .header("Authorization", format!("Bearer {}", config.polish_key()))
         .header("Content-Type", "application/json")
         .json(&body)
         .timeout(Duration::from_secs(30))