diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml new file mode 100644 index 00000000..11a4f125 --- /dev/null +++ b/.github/workflows/build-linux.yml @@ -0,0 +1,291 @@ +name: Build Linux + +on: + push: + tags: + - 'v*' + workflow_dispatch: + inputs: + version: + description: 'Version (without v prefix)' + required: true + default: '0.5.0' + +jobs: + # AVX2 build - uses Docker for clean toolchain (no AVX-512 contamination) + build-avx2: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Determine version + id: version + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT + else + echo "version=${GITHUB_REF_NAME#v}" >> $GITHUB_OUTPUT + fi + + - name: Build AVX2 binary in Docker + run: | + VERSION=${{ steps.version.outputs.version }} + docker build -f Dockerfile.build -t voxtype-avx2 --build-arg VERSION=${VERSION} . + mkdir -p releases/${VERSION} + docker run --rm -v $(pwd)/releases/${VERSION}:/output voxtype-avx2 + + - name: Verify binary + run: | + VERSION=${{ steps.version.outputs.version }} + releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-avx2 --version + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: linux-avx2 + path: releases/${{ steps.version.outputs.version }}/voxtype-*-linux-x86_64-avx2 + + # Vulkan build - uses Docker for clean toolchain + build-vulkan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Determine version + id: version + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT + else + echo "version=${GITHUB_REF_NAME#v}" >> $GITHUB_OUTPUT + fi + + - name: Build Vulkan binary in Docker + run: | + VERSION=${{ steps.version.outputs.version }} + docker build -f Dockerfile.vulkan -t voxtype-vulkan --build-arg VERSION=${VERSION} . + mkdir -p releases/${VERSION} + docker run --rm -v $(pwd)/releases/${VERSION}:/output voxtype-vulkan + + - name: Verify binary + run: | + VERSION=${{ steps.version.outputs.version }} + releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-vulkan --version + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: linux-vulkan + path: releases/${{ steps.version.outputs.version }}/voxtype-*-linux-x86_64-vulkan + + # Parakeet AVX2 build - uses Docker for clean toolchain + build-parakeet-avx2: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Determine version + id: version + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT + else + echo "version=${GITHUB_REF_NAME#v}" >> $GITHUB_OUTPUT + fi + + - name: Build Parakeet AVX2 binary in Docker + run: | + VERSION=${{ steps.version.outputs.version }} + docker build -f Dockerfile.parakeet -t voxtype-parakeet-avx2 --build-arg VERSION=${VERSION} . + mkdir -p releases/${VERSION} + docker run --rm -v $(pwd)/releases/${VERSION}:/output voxtype-parakeet-avx2 + + - name: Verify binary + run: | + VERSION=${{ steps.version.outputs.version }} + releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-parakeet-avx2 --version + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: linux-parakeet-avx2 + path: releases/${{ steps.version.outputs.version }}/voxtype-*-linux-x86_64-parakeet-avx2 + + # AVX-512 build - requires AVX-512 capable runner (best effort) + build-avx512: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Check for AVX-512 support + id: check-avx512 + run: | + if grep -q avx512f /proc/cpuinfo; then + echo "supported=true" >> $GITHUB_OUTPUT + echo "AVX-512 is supported on this runner" + else + echo "supported=false" >> $GITHUB_OUTPUT + echo "AVX-512 is NOT supported on this runner - skipping build" + fi + + - name: Determine version + if: steps.check-avx512.outputs.supported == 'true' + id: version + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT + else + echo "version=${GITHUB_REF_NAME#v}" >> $GITHUB_OUTPUT + fi + + - name: Install Rust + if: steps.check-avx512.outputs.supported == 'true' + uses: dtolnay/rust-toolchain@stable + + - name: Install dependencies + if: steps.check-avx512.outputs.supported == 'true' + run: | + sudo apt-get update + sudo apt-get install -y libasound2-dev libclang-dev cmake \ + libgtk-3-dev libglib2.0-dev libx11-dev libxi-dev libxtst-dev + + - name: Build AVX-512 binary + if: steps.check-avx512.outputs.supported == 'true' + env: + RUSTFLAGS: "-C target-cpu=native" + run: | + cargo build --release + VERSION=${{ steps.version.outputs.version }} + mkdir -p releases/${VERSION} + cp target/release/voxtype releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-avx512 + + - name: Verify binary + if: steps.check-avx512.outputs.supported == 'true' + run: | + VERSION=${{ steps.version.outputs.version }} + releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-avx512 --version + + - name: Upload artifact + if: steps.check-avx512.outputs.supported == 'true' + uses: actions/upload-artifact@v4 + with: + name: linux-avx512 + path: releases/${{ steps.version.outputs.version }}/voxtype-*-linux-x86_64-avx512 + + # Parakeet AVX-512 build - requires AVX-512 capable runner (best effort) + build-parakeet-avx512: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Check for AVX-512 support + id: check-avx512 + run: | + if grep -q avx512f /proc/cpuinfo; then + echo "supported=true" >> $GITHUB_OUTPUT + echo "AVX-512 is supported on this runner" + else + echo "supported=false" >> $GITHUB_OUTPUT + echo "AVX-512 is NOT supported on this runner - skipping build" + fi + + - name: Determine version + if: steps.check-avx512.outputs.supported == 'true' + id: version + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT + else + echo "version=${GITHUB_REF_NAME#v}" >> $GITHUB_OUTPUT + fi + + - name: Install Rust + if: steps.check-avx512.outputs.supported == 'true' + uses: dtolnay/rust-toolchain@stable + + - name: Install dependencies + if: steps.check-avx512.outputs.supported == 'true' + run: | + sudo apt-get update + sudo apt-get install -y libasound2-dev libclang-dev cmake \ + libgtk-3-dev libglib2.0-dev libx11-dev libxi-dev libxtst-dev \ + libssl-dev protobuf-compiler libprotobuf-dev + + - name: Build Parakeet AVX-512 binary + if: steps.check-avx512.outputs.supported == 'true' + env: + RUSTFLAGS: "-C target-cpu=native" + run: | + cargo build --release --features parakeet + VERSION=${{ steps.version.outputs.version }} + mkdir -p releases/${VERSION} + cp target/release/voxtype releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-parakeet-avx512 + + - name: Verify binary + if: steps.check-avx512.outputs.supported == 'true' + run: | + VERSION=${{ steps.version.outputs.version }} + releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-parakeet-avx512 --version + + - name: Upload artifact + if: steps.check-avx512.outputs.supported == 'true' + uses: actions/upload-artifact@v4 + with: + name: linux-parakeet-avx512 + path: releases/${{ steps.version.outputs.version }}/voxtype-*-linux-x86_64-parakeet-avx512 + + # Collect all artifacts and create release + release: + needs: [build-avx2, build-vulkan, build-parakeet-avx2, build-avx512, build-parakeet-avx512] + if: always() && needs.build-avx2.result == 'success' + runs-on: ubuntu-latest + steps: + - name: Determine version + id: version + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT + else + echo "version=${GITHUB_REF_NAME#v}" >> $GITHUB_OUTPUT + fi + + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Collect binaries + run: | + VERSION=${{ steps.version.outputs.version }} + mkdir -p releases/${VERSION} + + # Move all binaries to releases directory + find artifacts -name 'voxtype-*' -type f -exec mv {} releases/${VERSION}/ \; + + # List collected binaries + echo "Collected binaries:" + ls -la releases/${VERSION}/ + + - name: Generate checksums + run: | + VERSION=${{ steps.version.outputs.version }} + cd releases/${VERSION} + sha256sum voxtype-* > SHA256SUMS.txt + echo "Checksums:" + cat SHA256SUMS.txt + + - name: Upload combined artifact + uses: actions/upload-artifact@v4 + with: + name: linux-release-all + path: | + releases/${{ steps.version.outputs.version }}/voxtype-* + releases/${{ steps.version.outputs.version }}/SHA256SUMS.txt + + - name: Upload to release + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + uses: softprops/action-gh-release@v1 + with: + files: | + releases/${{ steps.version.outputs.version }}/voxtype-* + releases/${{ steps.version.outputs.version }}/SHA256SUMS.txt diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml new file mode 100644 index 00000000..6a595ad3 --- /dev/null +++ b/.github/workflows/build-macos.yml @@ -0,0 +1,172 @@ +name: Build macOS + +on: + push: + tags: + - 'v*' + workflow_dispatch: + inputs: + version: + description: 'Version (without v prefix)' + required: true + default: '0.5.0' + +jobs: + build: + runs-on: macos-14 # Apple Silicon runner for ARM64 build + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Install Rust targets + run: | + rustup target add x86_64-apple-darwin + rustup target add aarch64-apple-darwin + + - name: Determine version + id: version + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "version=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT + else + echo "version=${GITHUB_REF_NAME#v}" >> $GITHUB_OUTPUT + fi + + - name: Build for x86_64 + run: | + cargo build --release --target x86_64-apple-darwin --features gpu-metal + + - name: Build for aarch64 + run: | + cargo build --release --target aarch64-apple-darwin --features gpu-metal + + - name: Create universal binary + run: | + VERSION=${{ steps.version.outputs.version }} + mkdir -p releases/${VERSION} + lipo -create \ + target/x86_64-apple-darwin/release/voxtype \ + target/aarch64-apple-darwin/release/voxtype \ + -output releases/${VERSION}/voxtype-${VERSION}-macos-universal + chmod +x releases/${VERSION}/voxtype-${VERSION}-macos-universal + + - name: Verify universal binary + run: | + VERSION=${{ steps.version.outputs.version }} + lipo -info releases/${VERSION}/voxtype-${VERSION}-macos-universal + releases/${VERSION}/voxtype-${VERSION}-macos-universal --version + + - name: Import certificate + if: env.APPLE_DEVELOPER_ID_CERT != '' + env: + APPLE_DEVELOPER_ID_CERT: ${{ secrets.APPLE_DEVELOPER_ID_CERT }} + APPLE_DEVELOPER_ID_CERT_PASSWORD: ${{ secrets.APPLE_DEVELOPER_ID_CERT_PASSWORD }} + run: | + # Create temporary keychain + KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db + KEYCHAIN_PASSWORD=$(openssl rand -base64 32) + + security create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + security set-keychain-settings -lut 21600 $KEYCHAIN_PATH + security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + + # Import certificate + echo "$APPLE_DEVELOPER_ID_CERT" | base64 --decode > certificate.p12 + security import certificate.p12 -P "$APPLE_DEVELOPER_ID_CERT_PASSWORD" \ + -A -t cert -f pkcs12 -k $KEYCHAIN_PATH + rm certificate.p12 + + # Add keychain to search list + security list-keychain -d user -s $KEYCHAIN_PATH + + # Allow codesign to access key + security set-key-partition-list -S apple-tool:,apple:,codesign: \ + -s -k "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + + - name: Sign binary + if: env.APPLE_DEVELOPER_ID_CERT != '' + env: + APPLE_DEVELOPER_ID_CERT: ${{ secrets.APPLE_DEVELOPER_ID_CERT }} + run: | + VERSION=${{ steps.version.outputs.version }} + BINARY=releases/${VERSION}/voxtype-${VERSION}-macos-universal + + # Find the signing identity + IDENTITY=$(security find-identity -v -p codesigning | \ + grep "Developer ID Application" | head -1 | \ + sed 's/.*"\(.*\)".*/\1/') + + codesign --deep --force --verify --verbose \ + --sign "$IDENTITY" \ + --timestamp \ + --options runtime \ + "$BINARY" + + codesign --verify --strict --verbose=2 "$BINARY" + + - name: Notarize binary + if: env.APPLE_ID != '' + env: + APPLE_ID: ${{ secrets.APPLE_ID }} + APPLE_ID_PASSWORD: ${{ secrets.APPLE_ID_PASSWORD }} + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + run: | + VERSION=${{ steps.version.outputs.version }} + BINARY=releases/${VERSION}/voxtype-${VERSION}-macos-universal + + # Create ZIP for notarization + ditto -c -k "$BINARY" "${BINARY}.zip" + + # Submit for notarization + xcrun notarytool submit "${BINARY}.zip" \ + --apple-id "$APPLE_ID" \ + --password "$APPLE_ID_PASSWORD" \ + --team-id "$APPLE_TEAM_ID" \ + --wait + + # Clean up and staple + rm "${BINARY}.zip" + xcrun stapler staple "$BINARY" + + - name: Create DMG + run: | + VERSION=${{ steps.version.outputs.version }} + BINARY=releases/${VERSION}/voxtype-${VERSION}-macos-universal + DMG=releases/${VERSION}/voxtype-${VERSION}-macos-universal.dmg + + # Create temp directory with binary + TEMP_DIR=$(mktemp -d) + cp "$BINARY" "$TEMP_DIR/voxtype" + + # Create simple DMG + hdiutil create -volname "Voxtype $VERSION" \ + -srcfolder "$TEMP_DIR" \ + -ov -format UDZO \ + "$DMG" + + rm -rf "$TEMP_DIR" + + - name: Generate checksums + run: | + VERSION=${{ steps.version.outputs.version }} + cd releases/${VERSION} + shasum -a 256 * > SHA256SUMS.txt + cat SHA256SUMS.txt + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: macos-release + path: | + releases/${{ steps.version.outputs.version }}/voxtype-* + releases/${{ steps.version.outputs.version }}/SHA256SUMS.txt + + - name: Upload to release + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + uses: softprops/action-gh-release@v1 + with: + files: | + releases/${{ steps.version.outputs.version }}/voxtype-* + releases/${{ steps.version.outputs.version }}/SHA256SUMS.txt diff --git a/.github/workflows/test-packages.yml b/.github/workflows/test-packages.yml index 854183cd..248ddf32 100644 --- a/.github/workflows/test-packages.yml +++ b/.github/workflows/test-packages.yml @@ -43,7 +43,12 @@ jobs: cmake \ ruby \ ruby-dev \ - build-essential + build-essential \ + libgtk-3-dev \ + libglib2.0-dev \ + libx11-dev \ + libxi-dev \ + libxtst-dev sudo gem install fpm - name: Cache cargo registry diff --git a/.gitignore b/.gitignore index c0747bd2..443904b3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ /target /dist +# Cached prebuilt downloads (e.g. Microsoft ONNX Runtime for macOS builds) +/.cache/ + # AUR repos (nested git repos) /packaging/arch/ /packaging/arch-bin/ diff --git a/CLAUDE.md b/CLAUDE.md index 051e6919..7865d588 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -340,17 +340,24 @@ Based on open issues and project direction: - **Eager input processing** ([#70](https://github.com/peteonrails/voxtype/issues/70)) - Start transcription while still recording **Exploratory:** -- **Consolidated release binaries** - Reduce from 7 binaries to 3 (cpu, cuda, rocm) by combining Whisper + Vulkan + Parakeet into each binary. Vulkan and CUDA/ROCm fall back to CPU when no GPU is present, and ONNX Runtime (Parakeet) does runtime CPU dispatch. The trade-off is losing AVX-512 Whisper performance (~10-30%) and larger binaries (~35-40 MB vs 8 MB). Blocked on whisper.cpp/ggml adding runtime SIMD dispatch if AVX-512 performance must be preserved; otherwise, AVX2-only Whisper is safe on all x86-64 CPUs. +- **Consolidated release binaries** - Reduce from 7 binaries to 3 (cpu, cuda, migraphx) by combining Whisper + Vulkan + Parakeet into each binary. Vulkan and CUDA/MIGraphX fall back to CPU when no GPU is present, and ONNX Runtime (Parakeet) does runtime CPU dispatch. The trade-off is losing AVX-512 Whisper performance (~10-30%) and larger binaries (~35-40 MB vs 8 MB). Blocked on whisper.cpp/ggml adding runtime SIMD dispatch if AVX-512 performance must be preserved; otherwise, AVX2-only Whisper is safe on all x86-64 CPUs. - **Nemotron Speech backend** ([#47](https://github.com/peteonrails/voxtype/issues/47)) - Alternative ASR engine - **Foreign exception handling** ([#30](https://github.com/peteonrails/voxtype/issues/30)) - Investigate whisper.cpp crash recovery **Blocked/Waiting:** -- **Parakeet MIGraphX acceleration** - When parakeet-rs 0.3.0 releases on crates.io, update AMD GPU builds to use MIGraphX instead of ROCm. The current ROCm EP has upstream ONNX Runtime compatibility issues. Consider renaming `parakeet-rocm` feature to `parakeet-migraphx`. Also check nixpkgs onnxruntime for MIGraphX support options. + + +- **Nixpkgs onnxruntime MIGraphX support** - Verify the nixpkgs `onnxruntime` build (with `rocmSupport = true`) actually exposes the MIGraphX EP. The Nix flake's `parakeet-migraphx` output uses `onnxruntimeRocm` and sets `ORT_MIGRAPHX_MODEL_CACHE_PATH`; if MIGraphX isn't exposed in nixpkgs, ORT will fail to register the EP at runtime. ### Non-Goals - Windows/macOS support (Linux-first, Wayland-native) -- GUI configuration (CLI and config file are the interface) +- GUI configuration (GTK/Qt/web). A TUI (`voxtype configure`) is supported and + surfaced as a desktop-file launcher entry; CLI and config file remain the + primary interfaces for scripting and headless setups. - Continuous dictation mode (push-to-talk is the paradigm) --- @@ -418,9 +425,11 @@ Building on hosts with newer glibc (e.g. 2.43 on CachyOS/Arch) can produce binar ### Build Strategy -**CRITICAL: Every binary must be built in Docker.** Never build release binaries directly on the host, even for AVX-512 or ROCm builds that require specific hardware. Run Docker locally on the machine with the required hardware instead. +A full release requires **8 Linux binaries** (3 Whisper variants and 5 ONNX variants) plus a macOS arm64 DMG. + +**CRITICAL: Every binary must be built in Docker.** Never build release binaries directly on the host, even for AVX-512 or MIGraphX builds that require specific hardware. Run Docker locally on the machine with the required hardware instead. -**Whisper Binaries:** +**Whisper Binaries (3):** | Binary | Dockerfile | Docker Context | Base Image | Max glibc | |--------|-----------|----------------|------------|-----------| @@ -428,14 +437,27 @@ Building on hosts with newer glibc (e.g. 2.43 on CachyOS/Arch) can produce binar | Vulkan | `Dockerfile.vulkan` | Remote (pre-AVX-512) | Ubuntu 24.04 | 2.39 | | AVX-512 | `Dockerfile.avx512` | Local (AVX-512 host) | Ubuntu 22.04 | 2.35 | -**ONNX Binaries (all ONNX engines: Parakeet, Moonshine, SenseVoice, Paraformer, Dolphin, Omnilingual):** +**ONNX Binaries (all ONNX engines: Parakeet, Moonshine, SenseVoice, Paraformer, Dolphin, Omnilingual, Cohere):** | Binary | Dockerfile | Docker Context | Base Image | Max glibc | |--------|-----------|----------------|------------|-----------| | onnx-avx2 | `Dockerfile.onnx` | Remote (pre-AVX-512) | Ubuntu 24.04 | 2.39 | | onnx-avx512 | `Dockerfile.onnx-avx512` | Local (AVX-512 host) | Ubuntu 24.04 | 2.39 | -| onnx-cuda | `Dockerfile.onnx-cuda` | Remote (NVIDIA GPU) | Ubuntu 24.04 | 2.39 | -| onnx-rocm | `Dockerfile.onnx-rocm` | Local (AMD GPU host) | Ubuntu 24.04 | 2.39 | +| onnx-cuda-12 | `Dockerfile.onnx-cuda-12` | Remote (NVIDIA GPU) | nvidia/cuda:12.6.1-cudnn-devel-ubuntu24.04 | 2.39 | +| onnx-cuda-13 | `Dockerfile.onnx-cuda-13` | Remote (NVIDIA GPU) | nvidia/cuda:13.0.3-cudnn-devel-ubuntu24.04 | 2.39 | +| onnx-migraphx | `Dockerfile.onnx-migraphx` | Local (AMD GPU host) | Ubuntu 24.04 | 2.39 | + +Note: ort 2.0.0-rc.12's CUDA prebuilt is selected at build time (cu12 vs cu13) +based on the ORT_CUDA_VERSION env var or build host's CUDA install. A single +binary is locked to one CUDA major version. v0.7.0 ships both onnx-cuda-12 +and onnx-cuda-13; the AUR PKGBUILD or `voxtype setup gpu --enable` symlinks +voxtype-onnx-cuda to whichever variant matches the host's runtime CUDA. + +Each GPU-using ONNX binary ships with its companion shared libraries +(libonnxruntime_providers_*.so) which the EP dlopens at runtime via +/proc/self/exe. scripts/package.sh installs each variant into its own +subdirectory under /usr/lib/voxtype/ (cuda-12/, cuda-13/, migraphx/) so +the .so files sit alongside the binary. Note: ONNX binaries include bundled ONNX Runtime which contains AVX-512 instructions, but ONNX Runtime uses runtime CPU detection and falls back gracefully on older CPUs. @@ -450,15 +472,25 @@ GPU acceleration is enabled via Cargo features: | `gpu-hipblas` | ROCm/HIP | AMD GPUs (alternative to Vulkan) | | `gpu-metal` | Metal | macOS (not applicable for Linux builds) | +**CRITICAL: Always run `cargo clean` before building with different features.** + +When switching between feature sets (e.g., CPU-only to GPU-enabled, or between different GPU backends), stale build artifacts can cause GPU support to silently fail at runtime. The binary will compile, have a different checksum, and appear correct, but GPU acceleration won't work. + +This is especially insidious because: +- The build succeeds without errors +- The binary size and checksum differ from previous builds +- `--version` reports correctly +- But GPU detection fails silently at runtime (e.g., `use gpu = 0` instead of `use gpu = 1`) + ```bash # Build with Vulkan GPU support -cargo build --release --features gpu-vulkan +cargo clean && cargo build --release --features gpu-vulkan # Build with CUDA GPU support -cargo build --release --features gpu-cuda +cargo clean && cargo build --release --features gpu-cuda # Build CPU-only (no GPU feature) -cargo build --release +cargo clean && cargo build --release ``` ### Remote Docker Context @@ -480,9 +512,13 @@ docker context use default ### Full Release Build Process -**CRITICAL: Always use `--no-cache` for release builds to prevent stale binaries.** +**CRITICAL: Always use `--no-cache` for Docker builds and `cargo clean` for local builds.** + +Stale build artifacts cause two categories of failures: + +1. **Docker cache** - Without `--no-cache`, Docker may reuse layers with old version numbers. This caused AUR packages to ship v0.4.1 binaries labeled as v0.4.5. -Docker caches build layers aggressively. Without `--no-cache`, you may upload binaries with old version numbers even after updating Cargo.toml. This has caused AUR packages to ship v0.4.1 binaries labeled as v0.4.5. +2. **Cargo incremental compilation** - Without `cargo clean`, switching between feature sets (e.g., CPU-only to `--features gpu-vulkan`) can produce binaries where GPU support silently fails at runtime. The binary compiles, has a different checksum, and reports the correct version, but GPU acceleration doesn't work. This is undetectable without actually testing GPU functionality. ```bash # Set version @@ -494,25 +530,26 @@ docker compose -f docker-compose.build.yml build --no-cache avx2 vulkan onnx-avx docker compose -f docker-compose.build.yml up avx2 vulkan onnx-avx2 # 2. Build ONNX CUDA on remote server (has NVIDIA GPU) -docker compose -f docker-compose.build.yml build --no-cache onnx-cuda -docker compose -f docker-compose.build.yml up onnx-cuda +docker compose -f docker-compose.build.yml build --no-cache onnx-cuda-12 onnx-cuda-13 +docker compose -f docker-compose.build.yml up onnx-cuda-12 onnx-cuda-13 -# 3. Copy binaries from remote Docker volumes to local +# 3. Copy binaries from remote Docker containers to local mkdir -p releases/${VERSION} -docker run --rm -v $(pwd)/releases/${VERSION}:/test ubuntu:24.04 ls /test # verify -# Use tar pipe to copy from remote Docker volume: -docker run --rm -v $(pwd)/releases/${VERSION}:/src ubuntu:24.04 tar -cf - -C /src . | tar -xf - -C releases/${VERSION}/ +docker cp macos-release-avx2-1:/output/. releases/${VERSION}/ +docker cp macos-release-vulkan-1:/output/. releases/${VERSION}/ +docker cp macos-release-onnx-avx2-1:/output/. releases/${VERSION}/ +docker cp macos-release-onnx-cuda-1:/output/. releases/${VERSION}/ -# 4. Build AVX-512 + ROCm binaries locally IN DOCKER (caps glibc at container version) +# 4. Build AVX-512 + MIGraphX binaries locally IN DOCKER (caps glibc at container version) docker context use # Whisper AVX-512 + ONNX AVX-512 (requires AVX-512 capable host) docker compose -f docker-compose.build.yml --profile avx512 build --no-cache avx512 onnx-avx512 docker compose -f docker-compose.build.yml --profile avx512 up avx512 onnx-avx512 -# ONNX ROCm (requires AMD GPU host) -docker compose -f docker-compose.build.yml build --no-cache onnx-rocm -docker compose -f docker-compose.build.yml up onnx-rocm +# ONNX MIGraphX (requires AMD GPU host) +docker compose -f docker-compose.build.yml build --no-cache onnx-migraphx +docker compose -f docker-compose.build.yml up onnx-migraphx # 5. VERIFY VERSIONS before uploading (critical!) for bin in releases/${VERSION}/voxtype-*; do @@ -525,10 +562,10 @@ done ### Version Verification Checklist -**Before uploading any release, verify ALL binaries report the correct version:** +**Before uploading any release, verify ALL 7 binaries report the correct version:** ```bash -# Whisper binaries +# Whisper binaries (3) releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-avx2 --version releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-avx512 --version releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-vulkan --version @@ -536,12 +573,35 @@ releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-vulkan --version # ONNX binaries releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-onnx-avx2 --version releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-onnx-avx512 --version -releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-onnx-cuda --version -releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-onnx-rocm --version +releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-onnx-cuda-12 --version +releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-onnx-cuda-13 --version +releases/${VERSION}/voxtype-${VERSION}-linux-x86_64-onnx-migraphx --version ``` If versions don't match, the Docker cache is stale. Rebuild with `--no-cache`. +### Functional Verification (GPU Builds) + +**Version checks and checksums are NOT sufficient to verify GPU builds.** A binary can report the correct version, have the expected file size, and still have non-functional GPU support due to stale build artifacts. + +For GPU-enabled binaries (Vulkan, CUDA, ROCm), verify GPU is actually detected: + +```bash +# Test Vulkan build - should show "use gpu = 1" and "ggml_vulkan: Found N devices" +./voxtype-${VERSION}-linux-x86_64-vulkan daemon & +sleep 3 +journalctl --user -u voxtype --since "10 seconds ago" | grep -E "(use gpu|ggml_vulkan|Found.*devices)" +# Expected: "use gpu = 1", "ggml_vulkan: Found 1 Vulkan devices" +# Bad: "use gpu = 0" or "no GPU found" + +# For ONNX ROCm - should show ROCm execution provider +./voxtype-${VERSION}-linux-x86_64-onnx-rocm daemon & +sleep 3 +journalctl --user -u voxtype --since "10 seconds ago" | grep -iE "(rocm|execution provider)" +``` + +If GPU detection fails but the binary otherwise works, the build used stale artifacts. Run `cargo clean` and rebuild. + ### Validating Binaries (AVX-512 Detection) Use `objdump` to verify binaries don't contain forbidden instructions: @@ -590,8 +650,9 @@ done | vulkan | Ubuntu 24.04 | 2.39 | | onnx-avx2 | Ubuntu 24.04 | 2.39 | | onnx-avx512 | Ubuntu 24.04 | 2.39 | -| onnx-cuda | Ubuntu 24.04 | 2.39 | -| onnx-rocm | Ubuntu 24.04 | 2.39 | +| onnx-cuda-12 | Ubuntu 24.04 | 2.39 | +| onnx-cuda-13 | Ubuntu 24.04 | 2.39 | +| onnx-migraphx | Ubuntu 24.04 | 2.39 | If any binary exceeds its expected glibc version, it was likely built outside Docker. Rebuild it in the appropriate Docker container. diff --git a/Cargo.lock b/Cargo.lock index e600c6cf..5fddfce8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "accesskit" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5351dcebb14b579ccab05f288596b2ae097005be7ee50a7c3d4ca9d0d5a66f6a" +dependencies = [ + "uuid", +] + [[package]] name = "adler2" version = "2.0.1" @@ -31,6 +40,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "alsa" version = "0.9.1" @@ -118,6 +133,21 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "ash" +version = "0.38.0+1.3.281" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb44936d800fea8f016d7f2311c6a4f97aebd5dc86f09906139ec848cf3a46f" +dependencies = [ + "libloading 0.8.9", +] + [[package]] name = "async-trait" version = "0.1.89" @@ -126,7 +156,30 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "atk" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241b621213072e993be4f6f3a9e4b45f65b7e6faad43001be957184b7bb1824b" +dependencies = [ + "atk-sys", + "glib 0.18.5", + "libc", +] + +[[package]] +name = "atk-sys" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5e48b684b0ca77d2bbadeef17424c2ea3c897d44d566a1617e7e8f30614d086" +dependencies = [ + "glib-sys 0.18.1", + "gobject-sys 0.18.0", + "libc", + "system-deps 6.2.2", ] [[package]] @@ -168,11 +221,26 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.1", "shlex", - "syn", + "syn 2.0.117", ] +[[package]] +name = "bit-set" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ddef2995421ab6a5c779542c81ee77c115206f4ad9d5a8e05f4ff49716a3dd" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71798fca2c1fe1086445a7258a4bc81e6e49dcd24c8d0dd9a1e57395b603f51" + [[package]] name = "bitflags" version = "1.3.2" @@ -184,6 +252,9 @@ name = "bitflags" version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +dependencies = [ + "serde_core", +] [[package]] name = "bitvec" @@ -197,12 +268,47 @@ dependencies = [ "wyz", ] +[[package]] +name = "block" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" + +[[package]] +name = "block2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5" +dependencies = [ + "objc2", +] + [[package]] name = "bumpalo" version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -215,6 +321,85 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +[[package]] +name = "cairo-rs" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca26ef0159422fb77631dc9d17b102f253b876fe1586b03b803e63a309b4ee2" +dependencies = [ + "bitflags 2.10.0", + "cairo-sys-rs 0.18.2", + "glib 0.18.5", + "libc", + "once_cell", + "thiserror 1.0.69", +] + +[[package]] +name = "cairo-rs" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc8d9aa793480744cd9a0524fef1a2e197d9eaa0f739cde19d16aba530dcb95" +dependencies = [ + "bitflags 2.10.0", + "cairo-sys-rs 0.22.0", + "glib 0.22.7", + "libc", +] + +[[package]] +name = "cairo-sys-rs" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "685c9fa8e590b8b3d678873528d83411db17242a73fccaed827770ea0fedda51" +dependencies = [ + "glib-sys 0.18.1", + "libc", + "system-deps 6.2.2", +] + +[[package]] +name = "cairo-sys-rs" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8b4985713047f5faee02b8db6a6ef32bbb50269ff53c1aee716d1d195b76d54" +dependencies = [ + "glib-sys 0.22.6", + "libc", + "system-deps 7.0.8", +] + +[[package]] +name = "calloop" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dbf9978365bac10f54d1d4b04f7ce4427e51f71d61f2fe15e3fed5166474df7" +dependencies = [ + "bitflags 2.10.0", + "polling", + "rustix 1.1.2", + "slab", + "tracing", +] + +[[package]] +name = "calloop-wayland-source" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138efcf0940a02ebf0cc8d1eff41a1682a46b431630f4c52450d6265876021fa" +dependencies = [ + "calloop", + "rustix 1.1.2", + "wayland-backend", + "wayland-client", +] + +[[package]] +name = "cassowary" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" + [[package]] name = "castaway" version = "0.2.4" @@ -251,6 +436,26 @@ dependencies = [ "nom", ] +[[package]] +name = "cfg-expr" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02" +dependencies = [ + "smallvec", + "target-lexicon 0.12.16", +] + +[[package]] +name = "cfg-expr" +version = "0.20.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c6b04e07d8080154ed4ac03546d9a2b303cc2fe1901ba0b35b301516e289368" +dependencies = [ + "smallvec", + "target-lexicon 0.13.3", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -274,7 +479,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -316,10 +521,10 @@ version = "4.5.49" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -347,6 +552,39 @@ dependencies = [ "cc", ] +[[package]] +name = "cocoa" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "667fdc068627a2816b9ff831201dd9864249d6ee8d190b9532357f1fc0f61ea7" +dependencies = [ + "bitflags 1.3.2", + "block", + "core-foundation 0.9.4", + "core-graphics 0.21.0", + "foreign-types 0.3.2", + "libc", + "objc", +] + +[[package]] +name = "codespan-reporting" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" +dependencies = [ + "unicode-width 0.2.0", +] + +[[package]] +name = "color" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18ef4657441fb193b65f34dc39b3781f0dfec23d3bd94d0eeb4e88cde421edb" +dependencies = [ + "bytemuck", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -363,6 +601,20 @@ dependencies = [ "memchr", ] +[[package]] +name = "compact_str" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", +] + [[package]] name = "compact_str" version = "0.9.0" @@ -378,22 +630,105 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "core-foundation" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d24c7a13c43e870e37c1556b74555437870a04514f7685f5b354e090567171" +dependencies = [ + "core-foundation-sys 0.7.0", + "libc", +] + [[package]] name = "core-foundation" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ - "core-foundation-sys", + "core-foundation-sys 0.8.7", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys 0.8.7", "libc", ] +[[package]] +name = "core-foundation-sys" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3a71ab494c0b5b860bdc8407ae08978052417070c2ced38573a9157ad75b8ac" + [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core-graphics" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3889374e6ea6ab25dba90bb5d96202f61108058361f6dc72e8b03e6f8bbe923" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.7.0", + "foreign-types 0.3.2", + "libc", +] + +[[package]] +name = "core-graphics" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52a67c4378cf203eace8fb6567847eb641fd6ff933c1145a115c6ee820ebb978" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "foreign-types 0.3.2", + "libc", +] + +[[package]] +name = "core-graphics" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa95a34622365fa5bbf40b20b75dba8dfa8c94c734aea8ac9a5ca38af14316f1" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.10.1", + "core-graphics-types", + "foreign-types 0.5.0", + "libc", +] + +[[package]] +name = "core-graphics-types" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d44a101f213f6c4cdc1853d4b78aef6db6bdfa3468798cc1d9912f4735013eb" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.10.1", + "libc", +] + [[package]] name = "coreaudio-rs" version = "0.11.3" @@ -401,7 +736,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "321077172d79c662f64f5071a03120748d5bb652f5231570141be24cfcd2bace" dependencies = [ "bitflags 1.3.2", - "core-foundation-sys", + "core-foundation-sys 0.8.7", "coreaudio-sys", ] @@ -421,20 +756,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "873dab07c8f743075e57f524c583985fbaf745602acbe916a01539364369a779" dependencies = [ "alsa", - "core-foundation-sys", + "core-foundation-sys 0.8.7", "coreaudio-rs", "dasp_sample", "jni", "js-sys", "libc", "mach2", - "ndk", + "ndk 0.8.0", "ndk-context", "oboe", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "windows", + "windows 0.54.0", ] [[package]] @@ -480,14 +815,61 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags 2.10.0", + "crossterm_winapi", + "mio 1.1.0", + "parking_lot", + "rustix 0.38.44", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "cursor-icon" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27ae1dd37df86211c42e150270f82743308803d90a6f6e6651cd730d5e1732f" + [[package]] name = "darling" version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core 0.23.0", + "darling_macro 0.23.0", ] [[package]] @@ -501,7 +883,20 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", ] [[package]] @@ -510,16 +905,27 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ - "darling_core", + "darling_core 0.20.11", "quote", - "syn", + "syn 2.0.117", ] [[package]] -name = "dary_heap" -version = "0.3.8" +name = "darling_macro" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core 0.23.0", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dary_heap" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04" dependencies = [ "serde", ] @@ -540,6 +946,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_builder" version = "0.20.2" @@ -555,10 +970,10 @@ version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -568,257 +983,1154 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "directories" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a49173b84e034382284f27f1af4dcbbd231ffa358c0fe316541a7337f376a35" +dependencies = [ + "dirs-sys 0.4.1", +] + +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys 0.5.0", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.2", + "windows-sys 0.61.2", +] + +[[package]] +name = "dispatch" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd0c93bb4b0c6d9b77f4435b0ae98c24d17f1c45b2ff844c6151a07256ca923b" + +[[package]] +name = "dispatch2" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" +dependencies = [ + "bitflags 2.10.0", + "objc2", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dlib" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab8ecd87370524b461f8557c119c405552c396ed91fc0a8eec68679eab26f94a" +dependencies = [ + "libloading 0.8.9", +] + +[[package]] +name = "dlopen2" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1297103d2bbaea85724fcee6294c2d50b1081f9ad47d0f6f6f61eda65315a6" +dependencies = [ + "dlopen2_derive", + "libc", + "once_cell", + "winapi", +] + +[[package]] +name = "dlopen2_derive" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fbbb781877580993a8707ec48672673ec7b81eeba04cfd2310bd28c08e47c8f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + +[[package]] +name = "dpi" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b14ccef22fc6f5a8f4d7d768562a182c04ce9a3b3157b91390b52ddfdf1a76" + +[[package]] +name = "ecolor" +version = "0.34.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "137c0ce4ce4152ff7e223a7ce22ee1057cdff61fce0a45c32459c3ccec64868d" +dependencies = [ + "bytemuck", + "emath", +] + +[[package]] +name = "egui" +version = "0.34.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f34aaf627da598dfadd64b0fee6101d22e9c451d1e5348157312720b7f459f0f" +dependencies = [ + "accesskit", + "ahash", + "bitflags 2.10.0", + "emath", + "epaint", + "log", + "nohash-hasher", + "profiling", + "smallvec", + "unicode-segmentation", +] + +[[package]] +name = "egui-wgpu" +version = "0.34.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71033ff78b041c9c363450f4498ff95468ef3ecbcc71a62f67036a6207d98fa4" +dependencies = [ + "ahash", + "bytemuck", + "document-features", + "epaint", + "log", + "profiling", + "thiserror 2.0.17", + "type-map", + "web-time", + "wgpu", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "emath" +version = "0.34.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a05cd8bdf3b598488c627ca97c7fe8909448ffa26278dd3c7e535cdb554d721" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + +[[package]] +name = "epaint" +version = "0.34.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04f3017dd67f147a697ee0c8484fb568fd9553e2a0c114be5020dbbc11962841" +dependencies = [ + "ahash", + "bytemuck", + "ecolor", + "emath", + "epaint_default_fonts", + "font-types", + "log", + "nohash-hasher", + "parking_lot", + "profiling", + "self_cell", + "skrifa", + "smallvec", + "vello_cpu", +] + +[[package]] +name = "epaint_default_fonts" +version = "0.34.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e3b85a2bb775a3ab02d077a65cc31575c11b2584581913253cc11ce49f48bba" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" + +[[package]] +name = "euclid" +version = "0.22.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a05365e3b1c6d1650318537c7460c6923f1abdd272ad6842baa2b509957a06" +dependencies = [ + "num-traits", +] + +[[package]] +name = "evdev" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab6055a93a963297befb0f4f6e18f314aec9767a4bbe88b151126df2433610a7" +dependencies = [ + "bitvec", + "cfg-if", + "libc", + "nix 0.23.2", + "thiserror 1.0.69", +] + +[[package]] +name = "eyre" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "fearless_simd" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fb2907d1f08b2b316b9223ced5b0e89d87028ba8deae9764741dba8ff7f3903" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "field-offset" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38e2275cc4e4fc009b0669731a1e5ab7ebf11f469eaede2bab9309a5b4d6057f" +dependencies = [ + "memoffset 0.9.1", + "rustc_version", +] + +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + +[[package]] +name = "flate2" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "font-types" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b38ad915f6dadd993ced50848a8291a543bd41ca62bc10740d5e64e2ab4cfd7" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared 0.1.1", +] + +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared 0.3.1", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-macro", + "futures-task", + "pin-project-lite", + "slab", +] + +[[package]] +name = "gdk" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9f245958c627ac99d8e529166f9823fb3b838d1d41fd2b297af3075093c2691" +dependencies = [ + "cairo-rs 0.18.5", + "gdk-pixbuf 0.18.5", + "gdk-sys", + "gio 0.18.4", + "glib 0.18.5", + "libc", + "pango 0.18.3", +] + +[[package]] +name = "gdk-pixbuf" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50e1f5f1b0bfb830d6ccc8066d18db35c487b1b2b1e8589b5dfe9f07e8defaec" +dependencies = [ + "gdk-pixbuf-sys 0.18.0", + "gio 0.18.4", + "glib 0.18.5", + "libc", + "once_cell", +] + +[[package]] +name = "gdk-pixbuf" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25f420376dbee041b2db374ce4573892a36222bb3f6c0c43e24f0d67eae9b646" +dependencies = [ + "gdk-pixbuf-sys 0.22.0", + "gio 0.22.6", + "glib 0.22.7", + "libc", +] + +[[package]] +name = "gdk-pixbuf-sys" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9839ea644ed9c97a34d129ad56d38a25e6756f99f3a88e15cd39c20629caf7" +dependencies = [ + "gio-sys 0.18.1", + "glib-sys 0.18.1", + "gobject-sys 0.18.0", + "libc", + "system-deps 6.2.2", +] + +[[package]] +name = "gdk-pixbuf-sys" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48f31b37b1fc4b48b54f6b91b7ef04c18e00b4585d98359dd7b998774bbd91fb" +dependencies = [ + "gio-sys 0.22.0", + "glib-sys 0.22.6", + "gobject-sys 0.22.6", + "libc", + "system-deps 7.0.8", +] + +[[package]] +name = "gdk-sys" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c2d13f38594ac1e66619e188c6d5a1adb98d11b2fcf7894fc416ad76aa2f3f7" +dependencies = [ + "cairo-sys-rs 0.18.2", + "gdk-pixbuf-sys 0.18.0", + "gio-sys 0.18.1", + "glib-sys 0.18.1", + "gobject-sys 0.18.0", + "libc", + "pango-sys 0.18.0", + "pkg-config", + "system-deps 6.2.2", +] + +[[package]] +name = "gdk4" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd42fdbbf48612c6e8f47c65fb92d2e8f39c25aecd6af047e83897c1a22d2a4e" +dependencies = [ + "cairo-rs 0.22.0", + "gdk-pixbuf 0.22.0", + "gdk4-sys", + "gio 0.22.6", + "gl", + "glib 0.22.7", + "libc", + "pango 0.22.6", +] + +[[package]] +name = "gdk4-sys" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d974ac4f15e67472c3a9728daf612590b4a5762a4b33f0edd298df0b80d043c" +dependencies = [ + "cairo-sys-rs 0.22.0", + "gdk-pixbuf-sys 0.22.0", + "gio-sys 0.22.0", + "glib-sys 0.22.6", + "gobject-sys 0.22.6", + "libc", + "pango-sys 0.22.0", + "pkg-config", + "system-deps 7.0.8", +] + +[[package]] +name = "gdkwayland-sys" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "140071d506d223f7572b9f09b5e155afbd77428cd5cc7af8f2694c41d98dfe69" +dependencies = [ + "gdk-sys", + "glib-sys 0.18.1", + "gobject-sys 0.18.0", + "libc", + "pkg-config", + "system-deps 6.2.2", +] + +[[package]] +name = "gdkx11-sys" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e7445fe01ac26f11601db260dd8608fe172514eb63b3b5e261ea6b0f4428d" +dependencies = [ + "gdk-sys", + "glib-sys 0.18.1", + "libc", + "system-deps 6.2.2", + "x11", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "gio" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fc8f532f87b79cbc51a79748f16a6828fb784be93145a322fa14d06d354c73" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "gio-sys 0.18.1", + "glib 0.18.5", + "libc", + "once_cell", + "pin-project-lite", + "smallvec", + "thiserror 1.0.69", +] + +[[package]] +name = "gio" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3848bcba3a35cc0a71df8ba8ecfd799d6bfb862342a53a4a915fb62213aa4e6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "gio-sys 0.22.0", + "glib 0.22.7", + "libc", + "pin-project-lite", + "smallvec", +] + +[[package]] +name = "gio-sys" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37566df850baf5e4cb0dfb78af2e4b9898d817ed9263d1090a2df958c64737d2" +dependencies = [ + "glib-sys 0.18.1", + "gobject-sys 0.18.0", + "libc", + "system-deps 6.2.2", + "winapi", +] + +[[package]] +name = "gio-sys" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64729ba2772c080448f9f966dba8f4456beeb100d8c28a865ef8a0f2ef4987e1" +dependencies = [ + "glib-sys 0.22.6", + "gobject-sys 0.22.6", + "libc", + "system-deps 7.0.8", + "windows-sys 0.61.2", +] + +[[package]] +name = "gl" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a94edab108827d67608095e269cf862e60d920f144a5026d3dbcfd8b877fb404" +dependencies = [ + "gl_generator", +] + +[[package]] +name = "gl_generator" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a95dfc23a2b4a9a2f5ab41d194f8bfda3cabec42af4e39f08c339eb2a0c124d" +dependencies = [ + "khronos_api", + "log", + "xml-rs", ] [[package]] -name = "directories" -version = "5.0.1" +name = "glib" +version = "0.18.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a49173b84e034382284f27f1af4dcbbd231ffa358c0fe316541a7337f376a35" +checksum = "233daaf6e83ae6a12a52055f568f9d7cf4671dabb78ff9560ab6da230ce00ee5" dependencies = [ - "dirs-sys", + "bitflags 2.10.0", + "futures-channel", + "futures-core", + "futures-executor", + "futures-task", + "futures-util", + "gio-sys 0.18.1", + "glib-macros 0.18.5", + "glib-sys 0.18.1", + "gobject-sys 0.18.0", + "libc", + "memchr", + "once_cell", + "smallvec", + "thiserror 1.0.69", ] [[package]] -name = "dirs" -version = "5.0.1" +name = "glib" +version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +checksum = "c207e04e51605dcf7b2924c41591b3a10e1438eaac5bcf448fb91f325381104a" dependencies = [ - "dirs-sys", + "bitflags 2.10.0", + "futures-channel", + "futures-core", + "futures-executor", + "futures-task", + "futures-util", + "gio-sys 0.22.0", + "glib-macros 0.22.6", + "glib-sys 0.22.6", + "gobject-sys 0.22.6", + "libc", + "memchr", + "smallvec", ] [[package]] -name = "dirs-sys" -version = "0.4.1" +name = "glib-macros" +version = "0.18.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +checksum = "0bb0228f477c0900c880fd78c8759b95c7636dbd7842707f49e132378aa2acdc" dependencies = [ - "libc", - "option-ext", - "redox_users", - "windows-sys 0.48.0", + "heck 0.4.1", + "proc-macro-crate 2.0.0", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] -name = "displaydoc" -version = "0.2.5" +name = "glib-macros" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "506d23499707c7142898429757e8d9a3871d965239a2cb66dfa05052be6d6f19" dependencies = [ + "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] -name = "either" -version = "1.15.0" +name = "glib-sys" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "063ce2eb6a8d0ea93d2bf8ba1957e78dbab6be1c2220dd3daca57d5a9d869898" +dependencies = [ + "libc", + "system-deps 6.2.2", +] [[package]] -name = "env_home" -version = "0.1.0" +name = "glib-sys" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" +checksum = "5f7fbac234ed5bc2a28359b7bde8e1b9cdf1441cc2d7f068e4824672d7db9445" +dependencies = [ + "libc", + "system-deps 7.0.8", +] [[package]] -name = "equivalent" -version = "1.0.2" +name = "glob" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] -name = "errno" -version = "0.3.14" +name = "glow" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +checksum = "29038e1c483364cc6bb3cf78feee1816002e127c331a1eec55a4d202b9e1adb5" dependencies = [ - "libc", - "windows-sys 0.61.2", + "js-sys", + "slotmap", + "wasm-bindgen", + "web-sys", ] [[package]] -name = "esaxx-rs" -version = "0.1.10" +name = "glutin_wgl_sys" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +checksum = "2c4ee00b289aba7a9e5306d57c2d05499b2e5dc427f84ac708bd2c090212cf3e" +dependencies = [ + "gl_generator", +] [[package]] -name = "evdev" -version = "0.12.2" +name = "gobject-sys" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6055a93a963297befb0f4f6e18f314aec9767a4bbe88b151126df2433610a7" +checksum = "0850127b514d1c4a4654ead6dedadb18198999985908e6ffe4436f53c785ce44" dependencies = [ - "bitvec", - "cfg-if", + "glib-sys 0.18.1", "libc", - "nix 0.23.2", - "thiserror 1.0.69", + "system-deps 6.2.2", ] [[package]] -name = "eyre" -version = "0.6.12" +name = "gobject-sys" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" +checksum = "22a861859b887a79cf461359c192c97a57d8fb0229dd291232e57aa11f6fa72c" dependencies = [ - "indenter", - "once_cell", + "glib-sys 0.22.6", + "libc", + "system-deps 7.0.8", ] [[package]] -name = "fallible-iterator" -version = "0.3.0" +name = "gpu-allocator" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" +checksum = "51255ea7cfaadb6c5f1528d43e92a82acb2b96c43365989a28b2d44ee38f8795" +dependencies = [ + "ash", + "hashbrown 0.16.1", + "log", + "presser", + "thiserror 2.0.17", +] [[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" +name = "gpu-descriptor" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +checksum = "b89c83349105e3732062a895becfc71a8f921bb71ecbbdd8ff99263e3b53a0ca" +dependencies = [ + "bitflags 2.10.0", + "gpu-descriptor-types", + "hashbrown 0.15.5", +] [[package]] -name = "fastrand" -version = "2.3.0" +name = "gpu-descriptor-types" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" +dependencies = [ + "bitflags 2.10.0", +] [[package]] -name = "filetime" -version = "0.2.26" +name = "graphene-rs" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +checksum = "c7d1b7881f96869f49808b6adfe906a93a57a34204952253444d68c3208d71f1" dependencies = [ - "cfg-if", + "glib 0.22.7", + "graphene-sys", "libc", - "libredox", - "windows-sys 0.60.2", ] [[package]] -name = "find-msvc-tools" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" - -[[package]] -name = "flate2" -version = "1.1.5" +name = "graphene-sys" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "517f062f3fd6b7fd3e57a3f038a74b3c23ca32f51199ff028aa704609943f79c" dependencies = [ - "crc32fast", - "miniz_oxide", + "glib-sys 0.22.6", + "libc", + "pkg-config", + "system-deps 7.0.8", ] [[package]] -name = "fnv" -version = "1.0.7" +name = "gsk4" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +checksum = "53c912dfcbd28acace5fc99c40bb9f25e1dcb73efb1f2608327f66a99acdcb62" +dependencies = [ + "cairo-rs 0.22.0", + "gdk4", + "glib 0.22.7", + "graphene-rs", + "gsk4-sys", + "libc", + "pango 0.22.6", +] [[package]] -name = "foldhash" -version = "0.1.5" +name = "gsk4-sys" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +checksum = "d7d54bbc7a9d8b6ffe4f0c95eede15ccfb365c8bf521275abe6bcfb57b18fb8a" +dependencies = [ + "cairo-sys-rs 0.22.0", + "gdk4-sys", + "glib-sys 0.22.6", + "gobject-sys 0.22.6", + "graphene-sys", + "libc", + "pango-sys 0.22.0", + "system-deps 7.0.8", +] [[package]] -name = "foreign-types" -version = "0.3.2" +name = "gtk" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +checksum = "fd56fb197bfc42bd5d2751f4f017d44ff59fbb58140c6b49f9b3b2bdab08506a" dependencies = [ - "foreign-types-shared", + "atk", + "cairo-rs 0.18.5", + "field-offset", + "futures-channel", + "gdk", + "gdk-pixbuf 0.18.5", + "gio 0.18.4", + "glib 0.18.5", + "gtk-sys", + "gtk3-macros", + "libc", + "pango 0.18.3", + "pkg-config", ] [[package]] -name = "foreign-types-shared" -version = "0.1.1" +name = "gtk-sys" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +checksum = "8f29a1c21c59553eb7dd40e918be54dccd60c52b049b75119d5d96ce6b624414" +dependencies = [ + "atk-sys", + "cairo-sys-rs 0.18.2", + "gdk-pixbuf-sys 0.18.0", + "gdk-sys", + "gio-sys 0.18.1", + "glib-sys 0.18.1", + "gobject-sys 0.18.0", + "libc", + "pango-sys 0.18.0", + "system-deps 6.2.2", +] [[package]] -name = "form_urlencoded" -version = "1.2.2" +name = "gtk3-macros" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +checksum = "52ff3c5b21f14f0736fed6dcfc0bfb4225ebf5725f3c0209edeec181e4d73e9d" dependencies = [ - "percent-encoding", + "proc-macro-crate 1.3.1", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] -name = "fs_extra" -version = "1.3.0" +name = "gtk4" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +checksum = "7181b837f04cbe93f79441475f7a00560a92cba7a72e38cc1a68b6f8b78eaae2" +dependencies = [ + "cairo-rs 0.22.0", + "field-offset", + "futures-channel", + "gdk-pixbuf 0.22.0", + "gdk4", + "gio 0.22.6", + "glib 0.22.7", + "graphene-rs", + "gsk4", + "gtk4-macros", + "gtk4-sys", + "libc", + "pango 0.22.6", +] [[package]] -name = "fsevent-sys" -version = "4.1.0" +name = "gtk4-layer-shell" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +checksum = "a4069987ff4793699511a251028cc336b438e46565b463f111250148d574752a" dependencies = [ + "bitflags 2.10.0", + "gdk4", + "glib 0.22.7", + "glib-sys 0.22.6", + "gtk4", + "gtk4-layer-shell-sys", "libc", ] [[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - -[[package]] -name = "futures-core" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" - -[[package]] -name = "getrandom" -version = "0.2.16" +name = "gtk4-layer-shell-sys" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "8f566a5ec5bcc454e7fcf2ab76930887ced5365afce12c1e5201bb296b95f1b9" dependencies = [ - "cfg-if", + "gdk4-sys", + "glib-sys 0.22.6", + "gtk4-sys", "libc", - "wasi", + "system-deps 7.0.8", ] [[package]] -name = "getrandom" -version = "0.3.4" +name = "gtk4-macros" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +checksum = "3581b242ba62fdff122ebb626ea641582ec326031622bd19d60f85029c804a87" dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", + "proc-macro-crate 3.4.0", + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] -name = "getrandom" -version = "0.4.1" +name = "gtk4-sys" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +checksum = "20ba8e695e2640455561274e65e45f0a151619e450746007667f4b23ceae4e1b" dependencies = [ - "cfg-if", + "cairo-sys-rs 0.22.0", + "gdk-pixbuf-sys 0.22.0", + "gdk4-sys", + "gio-sys 0.22.0", + "glib-sys 0.22.6", + "gobject-sys 0.22.6", + "graphene-sys", + "gsk4-sys", "libc", - "r-efi", - "wasip2", - "wasip3", + "pango-sys 0.22.0", + "system-deps 7.0.8", ] [[package]] -name = "glob" -version = "0.3.3" +name = "half" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] [[package]] name = "hashbrown" @@ -835,7 +2147,9 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "foldhash", + "allocator-api2", + "equivalent", + "foldhash 0.1.5", ] [[package]] @@ -843,6 +2157,11 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "hashlink" @@ -853,6 +2172,12 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -865,6 +2190,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "hexf-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" + [[package]] name = "hmac-sha256" version = "1.1.12" @@ -900,7 +2231,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", - "core-foundation-sys", + "core-foundation-sys 0.8.7", "iana-time-zone-haiku", "js-sys", "log", @@ -1049,6 +2380,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + [[package]] name = "inotify" version = "0.9.6" @@ -1082,6 +2422,19 @@ dependencies = [ "libc", ] +[[package]] +name = "instability" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb2d60ef19920a3a9193c3e371f726ec1dafc045dac788d0fb3704272458971" +dependencies = [ + "darling 0.23.0", + "indoc", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -1164,14 +2517,44 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] +[[package]] +name = "keyboard-types" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b750dcadc39a09dbadd74e118f6dd6598df77fa01df0cfcdc52c28dece74528a" +dependencies = [ + "bitflags 2.10.0", + "serde", + "unicode-segmentation", +] + +[[package]] +name = "khronos-egl" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76" +dependencies = [ + "libc", + "libloading 0.8.9", + "pkg-config", +] + +[[package]] +name = "khronos_api" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" + [[package]] name = "kqueue" version = "1.1.1" @@ -1186,10 +2569,21 @@ dependencies = [ name = "kqueue-sys" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b" +checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "kurbo" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7564e90fe3c0d5771e1f0bc95322b21baaeaa0d9213fa6a0b61c99f8b17b3bfb" dependencies = [ - "bitflags 1.3.2", - "libc", + "arrayvec", + "euclid", + "smallvec", ] [[package]] @@ -1204,12 +2598,46 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +[[package]] +name = "libappindicator" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03589b9607c868cc7ae54c0b2a22c8dc03dd41692d48f2d7df73615c6a95dc0a" +dependencies = [ + "glib 0.18.5", + "gtk", + "gtk-sys", + "libappindicator-sys", + "log", +] + +[[package]] +name = "libappindicator-sys" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf" +dependencies = [ + "gtk-sys", + "libloading 0.7.4", + "once_cell", +] + [[package]] name = "libc" version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "libloading" version = "0.8.9" @@ -1217,7 +2645,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -1227,9 +2655,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" dependencies = [ "cfg-if", - "windows-link", + "windows-link 0.2.1", ] +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "libredox" version = "0.1.10" @@ -1252,6 +2686,37 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libxdo" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00333b8756a3d28e78def82067a377de7fa61b24909000aeaa2b446a948d14db" +dependencies = [ + "libxdo-sys", +] + +[[package]] +name = "libxdo-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db23b9e7e2b7831bbd8aac0bbeeeb7b68cbebc162b227e7052e8e55829a09212" +dependencies = [ + "libc", + "x11", +] + +[[package]] +name = "linebender_resource_handle" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a5ff6bcca6c4867b1c4fd4ef63e4db7436ef363e0ad7531d1558856bae64f4" + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -1264,6 +2729,12 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" + [[package]] name = "lock_api" version = "0.4.14" @@ -1275,9 +2746,18 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] [[package]] name = "lzma-rust2" @@ -1285,6 +2765,18 @@ version = "0.15.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17f7337d278fec032975dc884152491580dd23750ee957047856735fe0e61ede" +[[package]] +name = "mac-notification-sys" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29a16783dd1a47849b8c8133c9cd3eb2112cfbc6901670af3dba47c8bbfb07d3" +dependencies = [ + "cc", + "objc2", + "objc2-foundation", + "time", +] + [[package]] name = "mach2" version = "0.4.3" @@ -1310,6 +2802,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + [[package]] name = "matchers" version = "0.2.0" @@ -1331,9 +2832,18 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "memmap2" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] [[package]] name = "memoffset" @@ -1344,6 +2854,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1379,6 +2898,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" dependencies = [ "libc", + "log", "wasi", "windows-sys 0.61.2", ] @@ -1402,7 +2922,54 @@ checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "muda" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9fec5a4e89860383d778d10563a605838f8f0b2f9303868937e5ff32e86177" +dependencies = [ + "crossbeam-channel", + "dpi", + "gtk", + "keyboard-types", + "libxdo", + "objc2", + "objc2-app-kit", + "objc2-core-foundation", + "objc2-foundation", + "once_cell", + "png", + "thiserror 2.0.17", + "windows-sys 0.60.2", +] + +[[package]] +name = "naga" +version = "29.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2630921705b9b01dcdd0b6864b9562ca3c1951eecd0f0c4f5f04f61e412647" +dependencies = [ + "arrayvec", + "bit-set", + "bitflags 2.10.0", + "cfg-if", + "cfg_aliases", + "codespan-reporting", + "half", + "hashbrown 0.16.1", + "hexf-parse", + "indexmap", + "libm", + "log", + "num-traits", + "once_cell", + "rustc-hash 1.1.0", + "spirv", + "thiserror 2.0.17", + "unicode-ident", ] [[package]] @@ -1461,8 +3028,23 @@ dependencies = [ "bitflags 2.10.0", "jni-sys", "log", - "ndk-sys", + "ndk-sys 0.5.0+25.2.9519653", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "ndk" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3f42e7bbe13d351b6bead8286a43aac9534b82bd3cc43e47037f012ebfd62d4" +dependencies = [ + "bitflags 2.10.0", + "jni-sys", + "log", + "ndk-sys 0.6.0+11769913", "num_enum", + "raw-window-handle", "thiserror 1.0.69", ] @@ -1481,6 +3063,15 @@ dependencies = [ "jni-sys", ] +[[package]] +name = "ndk-sys" +version = "0.6.0+11769913" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6cda3051665f1fb8d9e08fc35c96d5a244fb1be711a03b71118828afc9a873" +dependencies = [ + "jni-sys", +] + [[package]] name = "nix" version = "0.23.2" @@ -1491,7 +3082,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "memoffset", + "memoffset 0.6.5", ] [[package]] @@ -1506,6 +3097,12 @@ dependencies = [ "libc", ] +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" @@ -1553,6 +3150,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + [[package]] name = "num-derive" version = "0.4.2" @@ -1561,7 +3164,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1580,6 +3183,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1608,10 +3212,104 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 3.4.0", "proc-macro2", "quote", - "syn", + "syn 2.0.117", +] + +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-app-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d49e936b501e5c5bf01fda3a9452ff86dc3ea98ad5f283e1455153142d97518c" +dependencies = [ + "bitflags 2.10.0", + "objc2", + "objc2-core-foundation", + "objc2-foundation", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags 2.10.0", + "dispatch2", + "objc2", +] + +[[package]] +name = "objc2-core-graphics" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807" +dependencies = [ + "bitflags 2.10.0", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" +dependencies = [ + "bitflags 2.10.0", + "block2", + "libc", + "objc2", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-metal" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0125f776a10d00af4152d74616409f0d4a2053a6f57fa5b7d6aa2854ac04794" +dependencies = [ + "bitflags 2.10.0", + "objc2", + "objc2-foundation", +] + +[[package]] +name = "objc2-quartz-core" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c1358452b371bf9f104e21ec536d37a650eb10f7ee379fff67d2e08d537f1f" +dependencies = [ + "bitflags 2.10.0", + "objc2", + "objc2-core-foundation", + "objc2-foundation", + "objc2-metal", ] [[package]] @@ -1621,7 +3319,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8b61bebd49e5d43f5f8cc7ee2891c16e0f41ec7954d36bcb6c14c5e0de867fb" dependencies = [ "jni", - "ndk", + "ndk 0.8.0", "ndk-context", "num-derive", "num-traits", @@ -1679,7 +3377,7 @@ checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ "bitflags 2.10.0", "cfg-if", - "foreign-types", + "foreign-types 0.3.2", "libc", "once_cell", "openssl-macros", @@ -1694,7 +3392,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -1721,11 +3419,20 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" +dependencies = [ + "num-traits", +] + [[package]] name = "ort" -version = "2.0.0-rc.11" +version = "2.0.0-rc.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5df903c0d2c07b56950f1058104ab0c8557159f2741782223704de9be73c3c" +checksum = "d7de3af33d24a745ffb8fab904b13478438d1cd52868e6f17735ef6e1f8bf133" dependencies = [ "libloading 0.9.0", "ndarray 0.17.1", @@ -1737,26 +3444,75 @@ dependencies = [ [[package]] name = "ort-sys" -version = "2.0.0-rc.11" +version = "2.0.0-rc.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06503bb33f294c5f1ba484011e053bfa6ae227074bdb841e9863492dc5960d4b" +checksum = "d7b497d21a8b6fbb4b5a544f8fadb77e801a09ae0add9e411d31c6f89e3c1e90" dependencies = [ "hmac-sha256", "lzma-rust2", "ureq 3.1.4", ] +[[package]] +name = "pango" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca27ec1eb0457ab26f3036ea52229edbdb74dee1edd29063f5b9b010e7ebee4" +dependencies = [ + "gio 0.18.4", + "glib 0.18.5", + "libc", + "once_cell", + "pango-sys 0.18.0", +] + +[[package]] +name = "pango" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "251bdc6e6487b811be0e406a21e301e07e45c0aa8fa39e00c0c8e12a91752438" +dependencies = [ + "gio 0.22.6", + "glib 0.22.7", + "libc", + "pango-sys 0.22.0", +] + +[[package]] +name = "pango-sys" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436737e391a843e5933d6d9aa102cb126d501e815b83601365a948a518555dc5" +dependencies = [ + "glib-sys 0.18.1", + "gobject-sys 0.18.0", + "libc", + "system-deps 6.2.2", +] + +[[package]] +name = "pango-sys" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd111a20ca90fedf03e09c59783c679c00900f1d8491cca5399f5e33609d5d6" +dependencies = [ + "glib-sys 0.22.6", + "gobject-sys 0.22.6", + "libc", + "system-deps 7.0.8", +] + [[package]] name = "parakeet-rs" -version = "0.3.0" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cbd5310b3d9a1d8ab59369a2e6dd20511f46a81de08f5aaca0ba811059c2c93" +checksum = "e2c54ec33ff53d2078cc6a48dcdd9c50c229905e392743375e29652a2b9a369e" dependencies = [ "eyre", "hound", "ndarray 0.17.1", "ort", - "rustfft", + "realfft", "serde", "serde_json", "tokenizers 0.22.2", @@ -1782,7 +3538,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -1800,6 +3556,19 @@ dependencies = [ "base64ct", ] +[[package]] +name = "peniko" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2b6aadb221872732e87d465213e9be5af2849b0e8cc5300a8ba98fffa2e00a" +dependencies = [ + "bytemuck", + "color", + "kurbo", + "linebender_resource_handle", + "smallvec", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1818,16 +3587,49 @@ dependencies = [ ] [[package]] -name = "pin-project-lite" -version = "0.2.16" +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "png" +version = "0.17.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + +[[package]] +name = "polling" +version = "3.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi", + "pin-project-lite", + "rustix 1.1.2", + "windows-sys 0.61.2", +] [[package]] -name = "pkg-config" -version = "0.3.32" +name = "pollster" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "2f3a9f18d041e6d0e102a0a46750538147e5e8992d3b4873aaafee2520b00ce3" [[package]] name = "portable-atomic" @@ -1853,6 +3655,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1862,6 +3670,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "presser" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" + [[package]] name = "prettyplease" version = "0.2.37" @@ -1869,7 +3683,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.117", ] [[package]] @@ -1881,6 +3695,25 @@ dependencies = [ "num-integer", ] +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit 0.19.15", +] + +[[package]] +name = "proc-macro-crate" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8366a6159044a37876a2b9817124296703c586a5c92e2c53751fa06d8d43e8" +dependencies = [ + "toml_edit 0.20.7", +] + [[package]] name = "proc-macro-crate" version = "3.4.0" @@ -1890,6 +3723,30 @@ dependencies = [ "toml_edit 0.23.7", ] +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro2" version = "1.0.103" @@ -1899,6 +3756,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "profiling" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3eb8486b569e12e2c32ad3e204dbaba5e4b5b216e9367044f25f1dba42341773" + +[[package]] +name = "quick-xml" +version = "0.39.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "958f21e8e7ceb5a1aa7fa87fab28e7c75976e0bfe7e23ff069e0a260f894067d" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.42" @@ -1979,6 +3851,45 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "ratatui" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" +dependencies = [ + "bitflags 2.10.0", + "cassowary", + "compact_str 0.8.1", + "crossterm", + "indoc", + "instability", + "itertools 0.13.0", + "lru", + "paste", + "strum", + "unicode-segmentation", + "unicode-truncate", + "unicode-width 0.2.0", +] + +[[package]] +name = "raw-window-handle" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" + +[[package]] +name = "raw-window-metal" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40d213455a5f1dc59214213c7330e074ddf8114c9a42411eb890c767357ce135" +dependencies = [ + "objc2", + "objc2-core-foundation", + "objc2-foundation", + "objc2-quartz-core", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -2027,6 +3938,41 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "rdev" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00552ca2dc2f93b84cd7b5581de49549411e4e41d89e1c691bcb93dc4be360c3" +dependencies = [ + "cocoa", + "core-foundation 0.7.0", + "core-foundation-sys 0.7.0", + "core-graphics 0.19.2", + "lazy_static", + "libc", + "winapi", + "x11", +] + +[[package]] +name = "read-fonts" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b634fabf032fab15307ffd272149b622260f55974d9fad689292a5d33df02e5" +dependencies = [ + "bytemuck", + "font-types", +] + +[[package]] +name = "realfft" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f821338fddb99d089116342c46e9f1fbf3828dba077674613e734e01d6ea8677" +dependencies = [ + "rustfft", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -2047,6 +3993,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 2.0.17", +] + [[package]] name = "regex" version = "1.12.2" @@ -2076,6 +4033,12 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "renderdoc-sys" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b30a45b0cd0bcca8037f3d0dc3421eaf95327a17cad11964fb8179b4fc4832" + [[package]] name = "ring" version = "0.17.14" @@ -2121,12 +4084,27 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustfft" version = "6.4.1" @@ -2141,6 +4119,19 @@ dependencies = [ "transpose", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.10.0", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.52.0", +] + [[package]] name = "rustix" version = "1.1.2" @@ -2150,7 +4141,7 @@ dependencies = [ "bitflags 2.10.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.11.0", "windows-sys 0.61.2", ] @@ -2219,6 +4210,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -2232,8 +4229,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags 2.10.0", - "core-foundation", - "core-foundation-sys", + "core-foundation 0.9.4", + "core-foundation-sys 0.8.7", "libc", "security-framework-sys", ] @@ -2244,10 +4241,16 @@ version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" dependencies = [ - "core-foundation-sys", + "core-foundation-sys 0.8.7", "libc", ] +[[package]] +name = "self_cell" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" + [[package]] name = "semver" version = "1.0.27" @@ -2281,7 +4284,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2306,6 +4309,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26" +dependencies = [ + "serde_core", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -2321,6 +4333,27 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc" +dependencies = [ + "libc", + "mio 1.1.0", + "signal-hook", +] + [[package]] name = "signal-hook-registry" version = "1.4.7" @@ -2336,12 +4369,67 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "skrifa" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fbdfe3d2475fbd7ddd1f3e5cf8288a30eb3e5f95832829570cd88115a7434ac" +dependencies = [ + "bytemuck", + "read-fonts", +] + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "slotmap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdd58c3c93c3d278ca835519292445cb4b0d4dc59ccfdf7ceadaab3f8aeb4038" +dependencies = [ + "version_check", +] + [[package]] name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "smithay-client-toolkit" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0512da38f5e2b31201a93524adb8d3136276fa4fe4aafab4e1f727a82b534cc0" +dependencies = [ + "bitflags 2.10.0", + "bytemuck", + "calloop", + "calloop-wayland-source", + "cursor-icon", + "libc", + "log", + "memmap2", + "pkg-config", + "rustix 1.1.2", + "thiserror 2.0.17", + "wayland-backend", + "wayland-client", + "wayland-csd-frame", + "wayland-cursor", + "wayland-protocols", + "wayland-protocols-experimental", + "wayland-protocols-misc", + "wayland-protocols-wlr", + "wayland-scanner", + "xkbcommon", + "xkeysym", +] + [[package]] name = "socket2" version = "0.6.1" @@ -2363,6 +4451,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "spirv" +version = "0.4.0+sdk-1.4.341.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9571ea910ebd84c86af4b3ed27f9dbdc6ad06f17c5f96146b2b671e2976744f" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "spm_precompiled" version = "0.1.4" @@ -2399,6 +4496,28 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.117", +] + [[package]] name = "subtle" version = "2.6.1" @@ -2407,9 +4526,19 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.111" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -2417,14 +4546,90 @@ dependencies = [ ] [[package]] -name = "synstructure" -version = "0.13.2" +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "system-deps" +version = "6.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349" +dependencies = [ + "cfg-expr 0.15.8", + "heck 0.5.0", + "pkg-config", + "toml 0.8.23", + "version-compare", +] + +[[package]] +name = "system-deps" +version = "7.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "396a35feb67335377e0251fcbc1092fc85c484bd4e3a7a54319399da127796e7" +dependencies = [ + "cfg-expr 0.20.7", + "heck 0.5.0", + "pkg-config", + "toml 1.1.0+spec-1.1.0", + "version-compare", +] + +[[package]] +name = "tao" +version = "0.32.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63c8b1020610b9138dd7b1e06cf259ae91aa05c30f3bd0d6b42a03997b92dec1" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.10.1", + "core-graphics 0.24.0", + "crossbeam-channel", + "dispatch", + "dlopen2", + "dpi", + "gdkwayland-sys", + "gdkx11-sys", + "gtk", + "jni", + "lazy_static", + "libc", + "log", + "ndk 0.9.0", + "ndk-context", + "ndk-sys 0.6.0+11769913", + "objc2", + "objc2-app-kit", + "objc2-foundation", + "once_cell", + "parking_lot", + "raw-window-handle", + "scopeguard", + "tao-macros", + "unicode-segmentation", + "url", + "windows 0.60.0", + "windows-core 0.60.1", + "windows-version", + "x11-dl", +] + +[[package]] +name = "tao-macros" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +checksum = "f4e16beb8b2ac17db28eab8bca40e62dbfbb34c0fcdc6d9826b11b7b5d047dfd" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2433,6 +4638,18 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "target-lexicon" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" + [[package]] name = "tempfile" version = "3.23.0" @@ -2442,7 +4659,7 @@ dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix", + "rustix 1.1.2", "windows-sys 0.61.2", ] @@ -2472,7 +4689,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2483,7 +4700,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2495,6 +4712,25 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "serde_core", + "time-core", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + [[package]] name = "tinystr" version = "0.8.2" @@ -2544,7 +4780,7 @@ checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223" dependencies = [ "ahash", "aho-corasick", - "compact_str", + "compact_str 0.9.0", "dary_heap", "derive_builder", "esaxx-rs", @@ -2594,7 +4830,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2604,11 +4840,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ "serde", - "serde_spanned", + "serde_spanned 0.6.9", "toml_datetime 0.6.11", "toml_edit 0.22.27", ] +[[package]] +name = "toml" +version = "1.1.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8195ca05e4eb728f4ba94f3e3291661320af739c4e43779cbdfae82ab239fcc" +dependencies = [ + "indexmap", + "serde_core", + "serde_spanned 1.1.1", + "toml_datetime 1.1.1+spec-1.1.0", + "toml_parser", + "toml_writer", + "winnow 1.0.2", +] + [[package]] name = "toml_datetime" version = "0.6.11" @@ -2627,6 +4878,37 @@ dependencies = [ "serde_core", ] +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "toml_datetime 0.6.11", + "winnow 0.5.40", +] + +[[package]] +name = "toml_edit" +version = "0.20.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81" +dependencies = [ + "indexmap", + "toml_datetime 0.6.11", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.27" @@ -2635,10 +4917,10 @@ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap", "serde", - "serde_spanned", + "serde_spanned 0.6.9", "toml_datetime 0.6.11", "toml_write", - "winnow", + "winnow 0.7.14", ] [[package]] @@ -2650,16 +4932,16 @@ dependencies = [ "indexmap", "toml_datetime 0.7.3", "toml_parser", - "winnow", + "winnow 0.7.14", ] [[package]] name = "toml_parser" -version = "1.0.4" +version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "winnow", + "winnow 1.0.2", ] [[package]] @@ -2668,12 +4950,19 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "toml_writer" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" + [[package]] name = "tracing" version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -2687,7 +4976,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -2739,6 +5028,36 @@ dependencies = [ "strength_reduce", ] +[[package]] +name = "tray-icon" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e85aa143ceb072062fc4d6356c1b520a51d636e7bc8e77ec94be3608e5e80c" +dependencies = [ + "crossbeam-channel", + "dirs 6.0.0", + "libappindicator", + "muda", + "objc2", + "objc2-app-kit", + "objc2-core-foundation", + "objc2-core-graphics", + "objc2-foundation", + "once_cell", + "png", + "thiserror 2.0.17", + "windows-sys 0.60.2", +] + +[[package]] +name = "type-map" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb30dbbd9036155e74adad6812e9898d03ec374946234fbcebd5dfc7b9187b90" +dependencies = [ + "rustc-hash 2.1.1", +] + [[package]] name = "unicode-ident" version = "1.0.22" @@ -2760,6 +5079,29 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-truncate" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf" +dependencies = [ + "itertools 0.13.0", + "unicode-segmentation", + "unicode-width 0.1.14", +] + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -2880,6 +5222,38 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vello_common" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bd1a4c633ce09e7d713df1a6e036644a125e15e0c169cfb5180ddf5836ca04b" +dependencies = [ + "bytemuck", + "fearless_simd", + "hashbrown 0.16.1", + "log", + "peniko", + "skrifa", + "smallvec", +] + +[[package]] +name = "vello_cpu" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0162bfe48aabf6a9fdcd401b628c7d9f260c2cbabb343c70a65feba6f7849edc" +dependencies = [ + "bytemuck", + "hashbrown 0.16.1", + "vello_common", +] + +[[package]] +name = "version-compare" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c2856837ef78f57382f06b2b8563a2f512f7185d732608fd9176cb3b8edf0e" + [[package]] name = "version_check" version = "0.9.5" @@ -2888,20 +5262,33 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "voxtype" -version = "0.6.6" +version = "0.7.0-rc1" dependencies = [ "anyhow", "async-trait", + "bytemuck", + "cairo-rs 0.22.0", + "calloop", + "calloop-wayland-source", "chrono", "clap", "clap_mangen", + "core-foundation 0.10.1", + "core-graphics 0.24.0", "cpal", + "crossterm", "directories", - "dirs", + "dirs 5.0.1", + "egui", + "egui-wgpu", "evdev", + "glib 0.22.7", + "gtk4", + "gtk4-layer-shell", "hound", "inotify 0.10.2", "libc", + "mac-notification-sys", "ndarray 0.16.1", "nix 0.29.0", "notify", @@ -2909,21 +5296,35 @@ dependencies = [ "ort", "parakeet-rs", "pidlock", + "pollster", + "ratatui", + "raw-window-handle", + "rdev", "regex", "rodio", "rusqlite", "rustfft", + "semver", "serde", "serde_json", + "smithay-client-toolkit", + "tao", "tempfile", "thiserror 1.0.69", "tokenizers 0.20.4", "tokio", - "toml", + "toml 0.8.23", + "toml_edit 0.22.27", "tracing", "tracing-subscriber", + "tray-icon", "ureq 2.12.1", "uuid", + "wayland-backend", + "wayland-client", + "wayland-protocols", + "wayland-protocols-wlr", + "wgpu", "which", "whisper-rs", ] @@ -2963,132 +5364,407 @@ dependencies = [ ] [[package]] -name = "wasm-bindgen" -version = "0.2.105" +name = "wasm-bindgen" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.10.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "wayland-backend" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2857dd20b54e916ec7253b3d6b4d5c4d7d4ca2c33c2e11c6c76a99bd8744755d" +dependencies = [ + "cc", + "downcast-rs", + "rustix 1.1.2", + "scoped-tls", + "smallvec", + "wayland-sys", +] + +[[package]] +name = "wayland-client" +version = "0.31.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c7c96bb74690c3189b5c9cb4ca1627062bb23693a4fad9d8c3de958260144" +dependencies = [ + "bitflags 2.10.0", + "rustix 1.1.2", + "wayland-backend", + "wayland-scanner", +] + +[[package]] +name = "wayland-csd-frame" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625c5029dbd43d25e6aa9615e88b829a5cad13b2819c4ae129fdbb7c31ab4c7e" +dependencies = [ + "bitflags 2.10.0", + "cursor-icon", + "wayland-backend", +] + +[[package]] +name = "wayland-cursor" +version = "0.31.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a52d18780be9b1314328a3de5f930b73d2200112e3849ca6cb11822793fb34d" +dependencies = [ + "rustix 1.1.2", + "wayland-client", + "xcursor", +] + +[[package]] +name = "wayland-protocols" +version = "0.32.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "563a85523cade2429938e790815fd7319062103b9f4a2dc806e9b53b95982d8f" +dependencies = [ + "bitflags 2.10.0", + "wayland-backend", + "wayland-client", + "wayland-scanner", +] + +[[package]] +name = "wayland-protocols-experimental" +version = "20250721.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40a1f863128dcaaec790d7b4b396cc9b9a7a079e878e18c47e6c2d2c5a8dcbb1" +dependencies = [ + "bitflags 2.10.0", + "wayland-backend", + "wayland-client", + "wayland-protocols", + "wayland-scanner", +] + +[[package]] +name = "wayland-protocols-misc" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9567599ef23e09b8dad6e429e5738d4509dfc46b3b21f32841a304d16b29c8" +dependencies = [ + "bitflags 2.10.0", + "wayland-backend", + "wayland-client", + "wayland-protocols", + "wayland-scanner", +] + +[[package]] +name = "wayland-protocols-wlr" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb04e52f7836d7c7976c78ca0250d61e33873c34156a2a1fc9474828ec268234" +dependencies = [ + "bitflags 2.10.0", + "wayland-backend", + "wayland-client", + "wayland-protocols", + "wayland-scanner", +] + +[[package]] +name = "wayland-scanner" +version = "0.31.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c324a910fd86ebdc364a3e61ec1f11737d3b1d6c273c0239ee8ff4bc0d24b4a" +dependencies = [ + "proc-macro2", + "quick-xml", + "quote", +] + +[[package]] +name = "wayland-sys" +version = "0.31.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8eab23fefc9e41f8e841df4a9c707e8a8c4ed26e944ef69297184de2785e3be" +dependencies = [ + "dlib", + "log", + "once_cell", + "pkg-config", +] + +[[package]] +name = "web-sys" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602" dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", + "js-sys", + "wasm-bindgen", ] [[package]] -name = "wasm-bindgen-futures" -version = "0.4.55" +name = "web-time" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ - "cfg-if", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] -name = "wasm-bindgen-macro" -version = "0.2.105" +name = "webpki-root-certs" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc" dependencies = [ - "quote", - "wasm-bindgen-macro-support", + "rustls-pki-types", ] [[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.105" +name = "webpki-roots" +version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", + "webpki-roots 1.0.5", ] [[package]] -name = "wasm-bindgen-shared" -version = "0.2.105" +name = "webpki-roots" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" dependencies = [ - "unicode-ident", + "rustls-pki-types", ] [[package]] -name = "wasm-encoder" -version = "0.244.0" +name = "wgpu" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +checksum = "72c239a9a747bbd379590985bac952c2e53cb19873f7072b3370c6a6a8e06837" dependencies = [ - "leb128fmt", - "wasmparser", + "arrayvec", + "bitflags 2.10.0", + "bytemuck", + "cfg-if", + "cfg_aliases", + "document-features", + "hashbrown 0.16.1", + "js-sys", + "log", + "portable-atomic", + "profiling", + "raw-window-handle", + "smallvec", + "static_assertions", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "wgpu-core", + "wgpu-hal", + "wgpu-types", ] [[package]] -name = "wasm-metadata" -version = "0.244.0" +name = "wgpu-core" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +checksum = "1e80ac6cf1895df6342f87d975162108f9d98772a0d74bc404ab7304ac29469e" dependencies = [ - "anyhow", + "arrayvec", + "bit-set", + "bit-vec", + "bitflags 2.10.0", + "bytemuck", + "cfg_aliases", + "document-features", + "hashbrown 0.16.1", "indexmap", - "wasm-encoder", - "wasmparser", + "log", + "naga", + "once_cell", + "parking_lot", + "portable-atomic", + "profiling", + "raw-window-handle", + "rustc-hash 1.1.0", + "smallvec", + "thiserror 2.0.17", + "wgpu-core-deps-emscripten", + "wgpu-core-deps-windows-linux-android", + "wgpu-hal", + "wgpu-naga-bridge", + "wgpu-types", ] [[package]] -name = "wasmparser" -version = "0.244.0" +name = "wgpu-core-deps-emscripten" +version = "29.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +checksum = "ef043bf135cc68b6f667c55ff4e345ce2b5924d75bad36a47921b0287ca4b24a" dependencies = [ - "bitflags 2.10.0", - "hashbrown 0.15.5", - "indexmap", - "semver", + "wgpu-hal", ] [[package]] -name = "web-sys" -version = "0.3.82" +name = "wgpu-core-deps-windows-linux-android" +version = "29.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "725d5c006a8c02967b6d93ef04f6537ec4593313e330cfe86d9d3f946eb90f28" dependencies = [ - "js-sys", - "wasm-bindgen", + "wgpu-hal", ] [[package]] -name = "webpki-root-certs" -version = "1.0.5" +name = "wgpu-hal" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc" +checksum = "89a47aef47636562f3937285af4c44b4b5b404b46577471411cc5313a921da7e" dependencies = [ - "rustls-pki-types", + "android_system_properties", + "arrayvec", + "ash", + "bitflags 2.10.0", + "bytemuck", + "cfg-if", + "cfg_aliases", + "glow", + "glutin_wgl_sys", + "gpu-allocator", + "gpu-descriptor", + "hashbrown 0.16.1", + "js-sys", + "khronos-egl", + "libc", + "libloading 0.8.9", + "log", + "naga", + "ndk-sys 0.6.0+11769913", + "objc2", + "ordered-float", + "parking_lot", + "portable-atomic", + "portable-atomic-util", + "profiling", + "raw-window-handle", + "raw-window-metal", + "renderdoc-sys", + "smallvec", + "thiserror 2.0.17", + "wasm-bindgen", + "wayland-sys", + "web-sys", + "wgpu-naga-bridge", + "wgpu-types", + "windows 0.62.2", ] [[package]] -name = "webpki-roots" -version = "0.26.11" +name = "wgpu-naga-bridge" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +checksum = "7b4684f4410da0cf95a4cb63bb5edaac022461dedb6adf0b64d0d9b5f6890d51" dependencies = [ - "webpki-roots 1.0.5", + "naga", + "wgpu-types", ] [[package]] -name = "webpki-roots" -version = "1.0.5" +name = "wgpu-types" +version = "29.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +checksum = "ec2675540fb1a5cfa5ef122d3d5f390e2c75711a0b946410f2d6ac3a0f77d1f6" dependencies = [ - "rustls-pki-types", + "bitflags 2.10.0", + "bytemuck", + "js-sys", + "log", + "raw-window-handle", + "web-sys", ] [[package]] @@ -3099,7 +5775,7 @@ checksum = "24d643ce3fd3e5b54854602a080f34fb10ab75e0b813ee32d00ca2b44fa74762" dependencies = [ "either", "env_home", - "rustix", + "rustix 1.1.2", "winsafe", ] @@ -3167,6 +5843,49 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddf874e74c7a99773e62b1c671427abf01a425e77c3d3fb9fb1e4883ea934529" +dependencies = [ + "windows-collections 0.1.1", + "windows-core 0.60.1", + "windows-future 0.1.1", + "windows-link 0.1.3", + "windows-numerics 0.1.1", +] + +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections 0.3.2", + "windows-core 0.62.2", + "windows-future 0.3.2", + "windows-numerics 0.3.1", +] + +[[package]] +name = "windows-collections" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5467f79cc1ba3f52ebb2ed41dbb459b8e7db636cc3429458d9a852e15bc24dec" +dependencies = [ + "windows-core 0.60.1", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core 0.62.2", +] + [[package]] name = "windows-core" version = "0.54.0" @@ -3177,17 +5896,62 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-core" +version = "0.60.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca21a92a9cae9bf4ccae5cf8368dce0837100ddf6e6d57936749e85f152f6247" +dependencies = [ + "windows-implement 0.59.0", + "windows-interface", + "windows-link 0.1.3", + "windows-result 0.3.4", + "windows-strings 0.3.1", +] + [[package]] name = "windows-core" version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ - "windows-implement", + "windows-implement 0.60.2", "windows-interface", - "windows-link", + "windows-link 0.2.1", "windows-result 0.4.1", - "windows-strings", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-future" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a787db4595e7eb80239b74ce8babfb1363d8e343ab072f2ffe901400c03349f0" +dependencies = [ + "windows-core 0.60.1", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core 0.62.2", + "windows-link 0.2.1", + "windows-threading", +] + +[[package]] +name = "windows-implement" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -3198,7 +5962,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3209,15 +5973,41 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "005dea54e2f6499f2cee279b8f703b3cf3b5734a2d8d21867c8f44003182eeed" +dependencies = [ + "windows-core 0.60.1", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core 0.62.2", + "windows-link 0.2.1", +] + [[package]] name = "windows-result" version = "0.1.2" @@ -3227,13 +6017,31 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link 0.1.3", +] + [[package]] name = "windows-result" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link", + "windows-link 0.2.1", +] + +[[package]] +name = "windows-strings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" +dependencies = [ + "windows-link 0.1.3", ] [[package]] @@ -3242,7 +6050,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -3287,7 +6095,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -3342,7 +6150,7 @@ version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link", + "windows-link 0.2.1", "windows_aarch64_gnullvm 0.53.1", "windows_aarch64_msvc 0.53.1", "windows_i686_gnu 0.53.1", @@ -3353,6 +6161,24 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link 0.2.1", +] + +[[package]] +name = "windows-version" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4060a1da109b9d0326b7262c8e12c84df67cc0dbc9e33cf49e01ccc2eb63631" +dependencies = [ + "windows-link 0.2.1", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -3533,6 +6359,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "0.7.14" @@ -3542,6 +6377,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "winnow" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" + [[package]] name = "winsafe" version = "0.0.19" @@ -3570,7 +6411,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "wit-parser", ] @@ -3581,10 +6422,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "indexmap", "prettyplease", - "syn", + "syn 2.0.117", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -3600,7 +6441,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -3657,6 +6498,59 @@ dependencies = [ "tap", ] +[[package]] +name = "x11" +version = "2.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "502da5464ccd04011667b11c435cb992822c2c0dbde1770c988480d312a0db2e" +dependencies = [ + "libc", + "pkg-config", +] + +[[package]] +name = "x11-dl" +version = "2.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38735924fedd5314a6e548792904ed8c6de6636285cb9fec04d5b1db85c1516f" +dependencies = [ + "libc", + "once_cell", + "pkg-config", +] + +[[package]] +name = "xcursor" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec9e4a500ca8864c5b47b8b482a73d62e4237670e5b5f1d6b9e3cae50f28f2b" + +[[package]] +name = "xkbcommon" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d66ca9352cbd4eecbbc40871d8a11b4ac8107cfc528a6e14d7c19c69d0e1ac9" +dependencies = [ + "libc", + "memmap2", + "xkeysym", +] + +[[package]] +name = "xkeysym" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9cc00251562a284751c9973bace760d86c0276c471b4be569fe6b068ee97a56" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "xml-rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae8337f8a065cfc972643663ea4279e04e7256de865aa66fe25cec5fb912d3f" + [[package]] name = "xtask" version = "0.1.0" @@ -3683,7 +6577,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -3704,7 +6598,7 @@ checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -3724,7 +6618,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", "synstructure", ] @@ -3764,7 +6658,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d52058bb..d39a3e27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["xtask"] [package] name = "voxtype" -version = "0.6.6" +version = "0.7.0-rc1" edition = "2021" authors = ["Peter Jackson", "Jean-Paul van Tillo", "Máté Rémiás", "Rob Zolkos", "Dan Heuckeroth", "Igor Warzocha", "Julian Kaiser", "Kevin Miller", "konnsim", "reisset", "Zubair", "Loki Coyote", "Umesh", "Barrett Ruth", "André Silva", "Chmouel Boudjnah", "Christopher Albert", "Phuoc Thinh Vu", "Alexander Bosu-Kellett", "ayoahha", "Toizi", "kakapt", "Rinor Maloku", "Sami Jawhar", "jan Lemata", "Kai Stark", "graysky"] description = "Push-to-talk voice-to-text for Wayland" @@ -17,12 +17,13 @@ categories = ["multimedia::audio", "accessibility"] tokio = { version = "1", features = ["full", "signal", "sync", "time", "process", "io-util"] } # CLI -clap = { version = "4", features = ["derive"] } +clap = { version = "4", features = ["derive", "env"] } # Configuration serde = { version = "1", features = ["derive"] } toml = "0.8" directories = "5" +dirs = "5" # Logging tracing = "0.1" @@ -38,11 +39,12 @@ regex = "1" # Async traits async-trait = "0.1" -# Input handling (evdev for kernel-level key events) -evdev = "0.12" +# Common dependencies libc = "0.2" -inotify = "0.10" # Watch /dev/input for device hotplug -nix = { version = "0.29", features = ["signal", "process"] } # Unix signals for IPC + +# Menu bar and global hotkeys (cross-platform) +rdev = "0.5.3" +tray-icon = "0.21.3" # Audio capture cpal = "0.15" @@ -54,6 +56,9 @@ ureq = { version = "2", features = ["json"] } # JSON parsing (for CLI backend) serde_json = "1" +# Version comparison for update checking +semver = "1" + # CLI path resolution (for CLI backend) which = "7" @@ -67,10 +72,10 @@ rodio = { version = "0.19", default-features = false, features = ["wav"] } whisper-rs = "0.16.0" # Parakeet speech-to-text (optional, ONNX-based) -parakeet-rs = { version = "0.3", optional = true } +parakeet-rs = { version = "0.3.5", optional = true } # ONNX-based ASR engines (Moonshine, SenseVoice, Paraformer, Dolphin, Omnilingual) -ort = { version = "2.0.0-rc.11", optional = true } +ort = { version = "2.0.0-rc.12", optional = true, default-features = false, features = ["std", "ndarray", "api-24"] } ndarray = { version = "0.16", optional = true } tokenizers = { version = "0.20", optional = true, default-features = false, features = ["onig"] } rustfft = { version = "6", optional = true } @@ -84,13 +89,72 @@ notify = "6" # Single instance check pidlock = "0.1" +# TUI for `voxtype configure` +ratatui = "0.29" +crossterm = "0.28" +toml_edit = "0.22" + # Meeting mode (Pro feature) uuid = { version = "1", features = ["v4", "serde"] } chrono = { version = "0.4", features = ["serde"] } rusqlite = { version = "0.32", features = ["bundled"] } +# OSD native frontend (SCTK + wgpu + egui-wgpu). All optional, gated by the +# `osd-native` feature so users not opting in don't pay the build cost. +smithay-client-toolkit = { version = "0.20", optional = true } +calloop = { version = "0.14", optional = true } +calloop-wayland-source = { version = "0.4", optional = true } +wayland-client = { version = "0.31", optional = true, features = ["system"] } +wayland-backend = { version = "0.3", optional = true, features = ["client_system"] } +wayland-protocols = { version = "0.32", optional = true, features = ["client", "staging"] } +wayland-protocols-wlr = { version = "0.3", optional = true, features = ["client"] } +wgpu = { version = "29", optional = true, default-features = false, features = ["vulkan", "gles", "wgsl", "std"] } +egui = { version = "0.34", optional = true } +egui-wgpu = { version = "0.34", optional = true, default-features = false } +raw-window-handle = { version = "0.6", optional = true } +pollster = { version = "0.4", optional = true } +bytemuck = { version = "1", optional = true } + +# OSD GTK4 frontend (gated on `osd-gtk4` feature; dyn-links system GTK4). +gtk4 = { version = "0.11", optional = true } +gtk4-layer-shell = { version = "0.8", optional = true } +cairo-rs = { version = "0.22", optional = true } +glib = { version = "0.22", optional = true } + +[target.'cfg(target_os = "macos")'.dependencies] +tao = "0.32" +core-graphics = "0.24" +core-foundation = "0.10" +dirs = "5" +mac-notification-sys = "0.6" # Native macOS notifications + +[target.'cfg(target_os = "linux")'.dependencies] +# Input handling (evdev for kernel-level key events) +evdev = "0.12" +inotify = "0.10" # Watch /dev/input for device hotplug +nix = { version = "0.29", features = ["signal", "process"] } # Unix signals for IPC + [features] default = [] +# OSD frontends. Each binary is gated on its own feature so users can +# build only the rendering stack they need. Real GUI deps land in +# Commits 4a/4b; the lists below grow then. +osd-native = [ + "dep:smithay-client-toolkit", + "dep:calloop", + "dep:calloop-wayland-source", + "dep:wayland-client", + "dep:wayland-backend", + "dep:wayland-protocols", + "dep:wayland-protocols-wlr", + "dep:wgpu", + "dep:egui", + "dep:egui-wgpu", + "dep:raw-window-handle", + "dep:pollster", + "dep:bytemuck", +] +osd-gtk4 = ["dep:gtk4", "dep:gtk4-layer-shell", "dep:cairo-rs", "dep:glib"] gpu-vulkan = ["whisper-rs/vulkan"] gpu-cuda = ["whisper-rs/cuda"] gpu-metal = ["whisper-rs/metal"] @@ -101,31 +165,66 @@ ml-diarization = ["dep:ort", "dep:ndarray"] parakeet = ["dep:parakeet-rs"] parakeet-cuda = ["parakeet", "parakeet-rs/cuda"] parakeet-tensorrt = ["parakeet", "parakeet-rs/tensorrt"] -parakeet-rocm = ["parakeet", "parakeet-rs/rocm"] +parakeet-migraphx = ["parakeet", "parakeet-rs/migraphx"] +parakeet-coreml = ["parakeet", "parakeet-rs/coreml"] # Dynamic loading for system ONNX Runtime (used by Nix builds) parakeet-load-dynamic = ["parakeet", "parakeet-rs/load-dynamic"] # Shared ONNX dependencies for engines using fbank/CTC preprocessing onnx-common = ["dep:ort", "dep:ndarray", "dep:rustfft"] -# Moonshine backend (ONNX-based, encoder-decoder ASR) +# Marker features that aggregate per-engine GPU EP flags. Each engine's +# session builder calls a shared helper (src/transcribe/onnx_ep.rs) that +# checks these markers to decide which EPs to register at runtime. +# Enabling any *-cuda feature pulls onnx-cuda-enabled in transitively; +# same for *-migraphx and *-tensorrt. +onnx-cuda-enabled = ["ort/cuda"] +onnx-migraphx-enabled = ["ort/migraphx"] +onnx-tensorrt-enabled = ["ort/tensorrt"] +# Moonshine backend (ONNX-based, encoder-decoder ASR). +# No migraphx feature — MIGraphX 7.2 can't compile Moonshine's +# encoder-decoder `If` op (different output shapes between branches). +# AMD users run Moonshine on CPU. moonshine = ["onnx-common", "dep:tokenizers"] -moonshine-cuda = ["moonshine", "ort/cuda"] -moonshine-tensorrt = ["moonshine", "ort/tensorrt"] -# SenseVoice backend (ONNX-based, CTC encoder-only ASR) +moonshine-cuda = ["moonshine", "onnx-cuda-enabled"] +moonshine-tensorrt = ["moonshine", "onnx-tensorrt-enabled"] +# SenseVoice backend (ONNX-based, CTC encoder-only ASR). +# No migraphx feature — MIGraphX rejects the encoder's Where-op +# broadcast pattern (axis mismatch). AMD users run SenseVoice on CPU. sensevoice = ["onnx-common"] -sensevoice-cuda = ["sensevoice", "ort/cuda"] -sensevoice-tensorrt = ["sensevoice", "ort/tensorrt"] -# Paraformer backend (FunASR ONNX-based CTC encoder) +sensevoice-cuda = ["sensevoice", "onnx-cuda-enabled"] +sensevoice-tensorrt = ["sensevoice", "onnx-tensorrt-enabled"] +# Paraformer backend (FunASR ONNX-based CTC encoder). +# No migraphx feature — MIGraphX EP segfaults during model load. +# AMD users run Paraformer on CPU. paraformer = ["onnx-common"] -paraformer-cuda = ["paraformer", "ort/cuda"] -paraformer-tensorrt = ["paraformer", "ort/tensorrt"] -# Dolphin backend (ONNX-based CTC encoder, dictation-optimized) +paraformer-cuda = ["paraformer", "onnx-cuda-enabled"] +paraformer-tensorrt = ["paraformer", "onnx-tensorrt-enabled"] +# Dolphin backend (ONNX-based CTC encoder, dictation-optimized). +# No migraphx feature — MIGraphX rejects the encoder's Slice op shape. +# AMD users run Dolphin on CPU. dolphin = ["onnx-common"] -dolphin-cuda = ["dolphin", "ort/cuda"] -dolphin-tensorrt = ["dolphin", "ort/tensorrt"] -# Omnilingual backend (FunASR ONNX-based, 50+ languages) +dolphin-cuda = ["dolphin", "onnx-cuda-enabled"] +dolphin-tensorrt = ["dolphin", "onnx-tensorrt-enabled"] +# Omnilingual backend (FunASR ONNX-based, 50+ languages). +# No migraphx feature — graph compiles but produces garbled output and +# pays a 65s+ compile penalty per first inference. AMD users run on CPU. omnilingual = ["onnx-common"] -omnilingual-cuda = ["omnilingual", "ort/cuda"] -omnilingual-tensorrt = ["omnilingual", "ort/tensorrt"] +omnilingual-cuda = ["omnilingual", "onnx-cuda-enabled"] +omnilingual-tensorrt = ["omnilingual", "onnx-tensorrt-enabled"] +# Cohere Transcribe backend (ONNX-based encoder-decoder, Whisper-style task tokens). +# Top of the Open ASR Leaderboard. Wired into CLI/config/factory; users opt in +# via `cargo build --features cohere` and the model files are downloaded into +# the standard models directory like other ONNX engines. +cohere = ["onnx-common"] +cohere-cuda = ["cohere", "onnx-cuda-enabled"] +cohere-tensorrt = ["cohere", "onnx-tensorrt-enabled"] +# No cohere-migraphx feature: the cstr/cohere-transcribe-onnx-int8 export +# uses MatMulNBits with bits=8, which MIGraphX 7.2 doesn't support (only +# bits=4). The graph compile fails and inference crashes. AMD users run +# Cohere on CPU until we ship int4/FP16 model variants (see issue #?). +# Dynamic loading for system ONNX Runtime. Used by Nix builds and the +# AMD/MIGraphX binary, which pairs with the system `onnxruntime-rocm` +# package (no upstream pyke prebuilt ships the MIGraphX provider .so). +onnx-load-dynamic = ["ort/load-dynamic"] [build-dependencies] clap = { version = "4", features = ["derive"] } @@ -146,3 +245,21 @@ opt-level = 1 # Faster dev builds, whisper still usable [[bin]] name = "voxtype" path = "src/main.rs" + +[[bin]] +name = "voxtype-osd-native" +path = "src/bin/voxtype_osd_native/main.rs" +required-features = ["osd-native"] + +[[bin]] +name = "voxtype-osd-gtk4" +path = "src/bin/voxtype_osd_gtk4.rs" +required-features = ["osd-gtk4"] + +# Tiny launcher that picks between voxtype-osd-gtk4 and voxtype-osd-native +# at runtime based on config/env/CLI. Always built, no GUI deps; falls back +# to whichever frontend binary it finds on PATH if the preferred one isn't +# installed. +[[bin]] +name = "voxtype-osd" +path = "src/bin/voxtype_osd.rs" diff --git a/Dockerfile.avx512 b/Dockerfile.avx512 index ea1bdd91..000236b0 100644 --- a/Dockerfile.avx512 +++ b/Dockerfile.avx512 @@ -20,6 +20,12 @@ RUN apt-get update && apt-get install -y \ cmake \ pkg-config \ libasound2-dev \ + libx11-dev \ + libxi-dev \ + libxtst-dev \ + libgtk-3-dev \ + libglib2.0-dev \ + libappindicator3-dev \ git \ binutils \ && rm -rf /var/lib/apt/lists/* diff --git a/Dockerfile.build b/Dockerfile.build index b41c6e94..1b8d9f7d 100644 --- a/Dockerfile.build +++ b/Dockerfile.build @@ -23,6 +23,7 @@ ARG DEBIAN_FRONTEND=noninteractive ENV VERSION=${VERSION} # Install build dependencies (no Vulkan - see header comment) +# X11 and GTK deps required for rdev (hotkeys) and tray-icon crates RUN apt-get update && apt-get install -y \ curl \ build-essential \ @@ -30,6 +31,12 @@ RUN apt-get update && apt-get install -y \ cmake \ pkg-config \ libasound2-dev \ + libx11-dev \ + libxi-dev \ + libxtst-dev \ + libgtk-3-dev \ + libglib2.0-dev \ + libappindicator3-dev \ git \ binutils \ && rm -rf /var/lib/apt/lists/* @@ -73,5 +80,6 @@ RUN echo "=== Verifying AVX2 binary ===" \ # Output stage - copy binary to /output volume CMD mkdir -p /output \ && cp /tmp/voxtype-avx2 /output/voxtype-${VERSION}-linux-x86_64-avx2 \ + && chmod 755 /output/voxtype-${VERSION}-linux-x86_64-avx2 \ && echo "Binary copied to /output:" \ && ls -la /output/voxtype-* diff --git a/Dockerfile.onnx b/Dockerfile.onnx index b13ba66e..afc62cf9 100644 --- a/Dockerfile.onnx +++ b/Dockerfile.onnx @@ -33,6 +33,12 @@ RUN apt-get update && apt-get install -y \ cmake \ pkg-config \ libasound2-dev \ + libx11-dev \ + libxi-dev \ + libxtst-dev \ + libgtk-3-dev \ + libglib2.0-dev \ + libappindicator3-dev \ libssl-dev \ protobuf-compiler \ libprotobuf-dev \ @@ -62,7 +68,7 @@ ENV ORT_STRATEGY=download # Disable LTO for faster builds (can hang on TrueNAS) # Build with all ONNX engines -RUN cargo build --release --features parakeet,moonshine,sensevoice,paraformer,dolphin,omnilingual,ml-diarization \ +RUN cargo build --release --features parakeet,moonshine,sensevoice,paraformer,dolphin,omnilingual,cohere,ml-diarization \ --config 'profile.release.lto=false' \ --config 'profile.release.codegen-units=8' \ && cp target/release/voxtype /tmp/voxtype-onnx-avx2 diff --git a/Dockerfile.onnx-avx512 b/Dockerfile.onnx-avx512 index 45f7bfbc..b6cf38b9 100644 --- a/Dockerfile.onnx-avx512 +++ b/Dockerfile.onnx-avx512 @@ -48,7 +48,7 @@ ENV ORT_STRATEGY=download # Disable LTO for faster builds # Build with all ONNX engines -RUN cargo build --release --features parakeet,moonshine,sensevoice,paraformer,dolphin,omnilingual,ml-diarization \ +RUN cargo build --release --features parakeet,moonshine,sensevoice,paraformer,dolphin,omnilingual,cohere,ml-diarization \ --config 'profile.release.lto=false' \ --config 'profile.release.codegen-units=8' \ && cp target/release/voxtype /tmp/voxtype-onnx-avx512 diff --git a/Dockerfile.onnx-cuda b/Dockerfile.onnx-cuda deleted file mode 100644 index dc49c429..00000000 --- a/Dockerfile.onnx-cuda +++ /dev/null @@ -1,69 +0,0 @@ -# Build environment for voxtype with ONNX CUDA support -# -# Builds voxtype with all ONNX engines + CUDA for NVIDIA GPU acceleration. -# Uses NVIDIA CUDA base image with cuDNN for optimal ONNX Runtime performance. -# -# Usage: -# docker build -f Dockerfile.onnx-cuda -t voxtype-onnx-cuda-builder . -# docker run --rm -v $(pwd)/releases:/output voxtype-onnx-cuda-builder -# -# The resulting binary requires: -# - NVIDIA GPU with CUDA support -# - CUDA drivers installed on host -# - libcudart.so and libcudnn.so at runtime (or use LD_LIBRARY_PATH) -# -FROM nvidia/cuda:12.6.1-cudnn-devel-ubuntu24.04 - -ARG VERSION=0.6.1 -ARG DEBIAN_FRONTEND=noninteractive - -ENV VERSION=${VERSION} - -# Install build dependencies -RUN apt-get update && apt-get install -y \ - curl \ - build-essential \ - clang \ - cmake \ - pkg-config \ - libasound2-dev \ - libssl-dev \ - protobuf-compiler \ - libprotobuf-dev \ - git \ - binutils \ - && rm -rf /var/lib/apt/lists/* - -# Install Rust -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -ENV PATH="/root/.cargo/bin:${PATH}" - -WORKDIR /build - -# Copy source code -COPY . . - -# ONNX Runtime CUDA settings -# The ort crate will download CUDA-enabled ONNX Runtime -ENV ORT_STRATEGY=download - -# Disable LTO for faster builds -# Build with all ONNX engines + CUDA support for NVIDIA GPUs -RUN cargo build --release --features parakeet-cuda,moonshine-cuda,sensevoice-cuda,paraformer-cuda,dolphin-cuda,omnilingual-cuda,ml-diarization \ - --config 'profile.release.lto=false' \ - --config 'profile.release.codegen-units=8' \ - && cp target/release/voxtype /tmp/voxtype-onnx-cuda - -# Verify binary -RUN echo "=== Verifying ONNX CUDA binary ===" \ - && /tmp/voxtype-onnx-cuda --version \ - && echo "=== Binary size ===" \ - && ls -lh /tmp/voxtype-onnx-cuda \ - && echo "=== Checking CUDA libraries linked ===" \ - && ldd /tmp/voxtype-onnx-cuda | grep -iE 'cuda|cudnn|onnx' || echo "Note: CUDA libs may be loaded dynamically at runtime" - -# Output stage -CMD mkdir -p /output \ - && cp /tmp/voxtype-onnx-cuda /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda \ - && echo "Binary copied to /output:" \ - && ls -la /output/voxtype-* diff --git a/Dockerfile.onnx-cuda-12 b/Dockerfile.onnx-cuda-12 new file mode 100644 index 00000000..9a015cb7 --- /dev/null +++ b/Dockerfile.onnx-cuda-12 @@ -0,0 +1,91 @@ +# Build environment for voxtype with ONNX CUDA 12 support +# +# Builds voxtype with all ONNX engines + CUDA 12 for NVIDIA GPU acceleration. +# Pairs with Dockerfile.onnx-cuda-13 for CUDA 13.x systems. The two binaries +# differ only in which ONNX Runtime prebuilt is bundled (cu12 vs cu13). At +# install time, voxtype-onnx-cuda is symlinked to the matching variant. +# +# Usage: +# docker build -f Dockerfile.onnx-cuda-12 -t voxtype-onnx-cuda-12-builder . +# docker run --rm -v $(pwd)/releases:/output voxtype-onnx-cuda-12-builder +# +# The resulting binary requires: +# - NVIDIA GPU with CUDA 12.x runtime +# - libcudart.so.12 and cuDNN 9 at runtime (or via LD_LIBRARY_PATH) +# - The bundled libonnxruntime_providers_cuda.so and *_shared.so +# installed alongside the binary (handled by scripts/package.sh) +# +FROM nvidia/cuda:12.6.1-cudnn-devel-ubuntu24.04 + +ARG VERSION=0.7.0 +ARG DEBIAN_FRONTEND=noninteractive + +ENV VERSION=${VERSION} + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + curl \ + build-essential \ + clang \ + cmake \ + pkg-config \ + libasound2-dev \ + libssl-dev \ + protobuf-compiler \ + libprotobuf-dev \ + git \ + binutils \ + && rm -rf /var/lib/apt/lists/* + +# Install Rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +WORKDIR /build + +# Copy source code +COPY . . + +# ONNX Runtime CUDA 12 settings +# ort-sys reads ORT_CUDA_VERSION at build time to choose the cu12 or cu13 +# prebuilt. The base image's NV_CUDA_CUDART_VERSION would also yield cu12, +# but setting ORT_CUDA_VERSION explicitly is unambiguous. +ENV ORT_STRATEGY=download +ENV ORT_CUDA_VERSION=12 + +# Build with all ONNX engines + CUDA support for NVIDIA GPUs +# Disable LTO for faster builds +RUN cargo build --release --features parakeet-cuda,moonshine-cuda,sensevoice-cuda,paraformer-cuda,dolphin-cuda,omnilingual-cuda,cohere-cuda,ml-diarization \ + --config 'profile.release.lto=false' \ + --config 'profile.release.codegen-units=8' \ + && cp target/release/voxtype /tmp/voxtype-onnx-cuda-12 + +# The CUDA execution provider in ort 2.0.0-rc.12 dlopen's its companion +# shared libs at runtime instead of being statically linked. They must be +# installed alongside the binary or the EP fails to register and ort +# silently falls back to CPU. +# +# cargo emits the prebuilt .so files into either /build/target/release/ or +# /build/target/release/examples/ depending on link search behavior; copy +# from whichever location actually has them. +RUN mkdir -p /tmp/onnx-cuda-12-libs \ + && find /build/target/release -maxdepth 2 -name 'libonnxruntime_providers_cuda.so' -exec cp -L {} /tmp/onnx-cuda-12-libs/ \; \ + && find /build/target/release -maxdepth 2 -name 'libonnxruntime_providers_shared.so' -exec cp -L {} /tmp/onnx-cuda-12-libs/ \; \ + && test -f /tmp/onnx-cuda-12-libs/libonnxruntime_providers_cuda.so \ + && test -f /tmp/onnx-cuda-12-libs/libonnxruntime_providers_shared.so + +# Verify binary +RUN echo "=== Verifying ONNX CUDA 12 binary ===" \ + && /tmp/voxtype-onnx-cuda-12 --version \ + && echo "=== Binary size ===" \ + && ls -lh /tmp/voxtype-onnx-cuda-12 \ + && echo "=== Companion .so files ===" \ + && ls -lh /tmp/onnx-cuda-12-libs/ + +# Output stage: copy binary and companion .so files +CMD mkdir -p /output \ + && cp /tmp/voxtype-onnx-cuda-12 /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda-12 \ + && cp /tmp/onnx-cuda-12-libs/libonnxruntime_providers_cuda.so /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda-12.libonnxruntime_providers_cuda.so \ + && cp /tmp/onnx-cuda-12-libs/libonnxruntime_providers_shared.so /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda-12.libonnxruntime_providers_shared.so \ + && echo "Output:" \ + && ls -la /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda-12* diff --git a/Dockerfile.onnx-cuda-13 b/Dockerfile.onnx-cuda-13 new file mode 100644 index 00000000..be35b805 --- /dev/null +++ b/Dockerfile.onnx-cuda-13 @@ -0,0 +1,86 @@ +# Build environment for voxtype with ONNX CUDA 13 support +# +# Builds voxtype with all ONNX engines + CUDA 13 for NVIDIA GPU acceleration. +# Pairs with Dockerfile.onnx-cuda-12 for CUDA 12.x systems. At install time, +# voxtype-onnx-cuda is symlinked to whichever variant matches the host's +# CUDA runtime (see voxtype setup gpu --enable). +# +# Usage: +# docker build -f Dockerfile.onnx-cuda-13 -t voxtype-onnx-cuda-13-builder . +# docker run --rm -v $(pwd)/releases:/output voxtype-onnx-cuda-13-builder +# +# The resulting binary requires: +# - NVIDIA GPU with CUDA 13.x runtime +# - NVIDIA driver 580 or newer (CUDA 13 minimum) +# - libcudart.so.13 and cuDNN 9 at runtime (or via LD_LIBRARY_PATH) +# - The bundled libonnxruntime_providers_cuda.so and *_shared.so +# installed alongside the binary (handled by scripts/package.sh) +# +FROM nvidia/cuda:13.0.3-cudnn-devel-ubuntu24.04 + +ARG VERSION=0.7.0 +ARG DEBIAN_FRONTEND=noninteractive + +ENV VERSION=${VERSION} + +# Install build dependencies +RUN apt-get update && apt-get install -y \ + curl \ + build-essential \ + clang \ + cmake \ + pkg-config \ + libasound2-dev \ + libssl-dev \ + protobuf-compiler \ + libprotobuf-dev \ + git \ + binutils \ + && rm -rf /var/lib/apt/lists/* + +# Install Rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +WORKDIR /build + +# Copy source code +COPY . . + +# ONNX Runtime CUDA 13 settings +# ort-sys reads ORT_CUDA_VERSION at build time to choose the cu12 or cu13 +# prebuilt. The base image's NV_CUDA_CUDART_VERSION would also yield cu13, +# but setting ORT_CUDA_VERSION explicitly is unambiguous and survives any +# upstream changes to NVIDIA's image env vars. +ENV ORT_STRATEGY=download +ENV ORT_CUDA_VERSION=13 + +# Build with all ONNX engines + CUDA 13 support for NVIDIA GPUs +# Disable LTO for faster builds +RUN cargo build --release --features parakeet-cuda,moonshine-cuda,sensevoice-cuda,paraformer-cuda,dolphin-cuda,omnilingual-cuda,cohere-cuda,ml-diarization \ + --config 'profile.release.lto=false' \ + --config 'profile.release.codegen-units=8' \ + && cp target/release/voxtype /tmp/voxtype-onnx-cuda-13 + +# Bundle the CUDA EP companion shared libs (see Dockerfile.onnx-cuda-12 for rationale) +RUN mkdir -p /tmp/onnx-cuda-13-libs \ + && find /build/target/release -maxdepth 2 -name 'libonnxruntime_providers_cuda.so' -exec cp -L {} /tmp/onnx-cuda-13-libs/ \; \ + && find /build/target/release -maxdepth 2 -name 'libonnxruntime_providers_shared.so' -exec cp -L {} /tmp/onnx-cuda-13-libs/ \; \ + && test -f /tmp/onnx-cuda-13-libs/libonnxruntime_providers_cuda.so \ + && test -f /tmp/onnx-cuda-13-libs/libonnxruntime_providers_shared.so + +# Verify binary +RUN echo "=== Verifying ONNX CUDA 13 binary ===" \ + && /tmp/voxtype-onnx-cuda-13 --version \ + && echo "=== Binary size ===" \ + && ls -lh /tmp/voxtype-onnx-cuda-13 \ + && echo "=== Companion .so files ===" \ + && ls -lh /tmp/onnx-cuda-13-libs/ + +# Output stage: copy binary and companion .so files +CMD mkdir -p /output \ + && cp /tmp/voxtype-onnx-cuda-13 /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda-13 \ + && cp /tmp/onnx-cuda-13-libs/libonnxruntime_providers_cuda.so /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda-13.libonnxruntime_providers_cuda.so \ + && cp /tmp/onnx-cuda-13-libs/libonnxruntime_providers_shared.so /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda-13.libonnxruntime_providers_shared.so \ + && echo "Output:" \ + && ls -la /output/voxtype-${VERSION}-linux-x86_64-onnx-cuda-13* diff --git a/Dockerfile.onnx-migraphx b/Dockerfile.onnx-migraphx new file mode 100644 index 00000000..9a6e3727 --- /dev/null +++ b/Dockerfile.onnx-migraphx @@ -0,0 +1,131 @@ +# Build environment for voxtype ONNX MIGraphX binary +# MUST be run on a machine with an AMD GPU +# +# pyke ort 2.0.0-rc.12 ships no ROCm/MIGraphX prebuilt — only cu12, cu13, +# wgpu, nvrtx, and CPU-only ("none"). The migraphx feature flag wires the +# EP into the binary but the runtime .so files (libonnxruntime_providers_*.so) +# come from a from-source ONNX Runtime build with --use_migraphx. +# +# This Dockerfile compiles ONNX Runtime 1.24.2 (matching the version pyke's +# rc.12 wraps) against AMD's ROCm SDK, then builds voxtype against that local +# ORT via ORT_LIB_LOCATION. Total build time: 45-60 minutes. +# +# Usage: +# docker build -f Dockerfile.onnx-migraphx -t voxtype-onnx-migraphx-builder . +# docker run --rm -v $(pwd)/releases:/output voxtype-onnx-migraphx-builder +# +# The resulting binary requires the bundled libonnxruntime_providers_migraphx.so +# and *_shared.so installed alongside the binary (handled by scripts/package.sh), +# plus AMD's ROCm runtime (rocm-libs / rocm-hip-runtime / migraphx) on the host. +# +# ROCm 7.2.2 — older 7.0.x bundles a MIGraphX without `fp4x2` shape type, +# which ORT 1.24.2's MIGraphX provider unconditionally references and +# fails to compile against. +FROM rocm/dev-ubuntu-24.04:7.2.2-complete + +ARG VERSION=0.7.0 +ARG ORT_VERSION=1.24.2 +ARG DEBIAN_FRONTEND=noninteractive + +ENV VERSION=${VERSION} +ENV ROCM_PATH=/opt/rocm +ENV PATH="/opt/rocm/bin:${PATH}" + +# Install build dependencies for both ORT and voxtype. +# migraphx-dev is required so ORT's CMake find_package(migraphx) succeeds; +# it's not pulled in by the rocm/dev-ubuntu-24.04 base image even though +# rocm-hip-sdk and the runtime libs are. +RUN apt-get update && apt-get install -y \ + curl \ + build-essential \ + clang \ + cmake \ + ninja-build \ + pkg-config \ + libasound2-dev \ + libssl-dev \ + protobuf-compiler \ + libprotobuf-dev \ + git \ + binutils \ + python3 \ + python3-pip \ + python3-numpy \ + python3-sympy \ + python3-flatbuffers \ + python3-packaging \ + migraphx-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +WORKDIR /build + +# Clone and build ONNX Runtime from source with MIGraphX EP. +# - --build_shared_lib produces libonnxruntime.so for the EP bridge. +# - FETCHCONTENT_TRY_FIND_PACKAGE_MODE=NEVER forces ORT to use its bundled +# abseil-cpp/re2/protobuf instead of the system versions, which can have +# ABI drift across patch releases (the original v0.6 ROCm/CUDA Dockerfiles +# relied on pyke prebuilts so this wasn't an issue then). +RUN git clone --depth 1 --branch v${ORT_VERSION} --recurse-submodules --shallow-submodules \ + https://github.com/microsoft/onnxruntime.git /build/onnxruntime \ + && cd /build/onnxruntime \ + && ./build.sh \ + --config Release \ + --use_migraphx \ + --migraphx_home /opt/rocm \ + --rocm_home /opt/rocm \ + --build_shared_lib \ + --parallel \ + --skip_tests \ + --skip_submodule_sync \ + --allow_running_as_root \ + --cmake_extra_defines \ + onnxruntime_BUILD_UNIT_TESTS=OFF \ + FETCHCONTENT_TRY_FIND_PACKAGE_MODE=NEVER \ + && ls -la /build/onnxruntime/build/Linux/Release/libonnxruntime_providers_migraphx.so \ + && ls -la /build/onnxruntime/build/Linux/Release/libonnxruntime_providers_shared.so + +ENV ORT_LIB_LOCATION=/build/onnxruntime/build/Linux/Release + +# Copy voxtype source and build against the local ORT +COPY . /build/voxtype +WORKDIR /build/voxtype + +# Disable LTO for faster builds. The single voxtype-onnx-migraphx binary +# bundles every ONNX engine, but MIGraphX EP registration only happens +# for Parakeet — every other ONNX engine runs on CPU because MIGraphX 7.2 +# can't compile their graphs (per-engine notes in src/transcribe/*.rs and +# Cargo.toml). Cohere is held back until an int4/FP16 model variant ships. +RUN cargo build --release \ + --features parakeet-migraphx,moonshine,sensevoice,paraformer,dolphin,omnilingual,cohere,ml-diarization \ + --config 'profile.release.lto=false' \ + --config 'profile.release.codegen-units=8' \ + && cp target/release/voxtype /tmp/voxtype-onnx-migraphx + +# Copy companion provider .so files. ORT computes the .so search path from +# argv[0]'s dirname (NOT /proc/self/exe), so package.sh installs the binary +# and these .so files together under /usr/lib/voxtype/migraphx/. +RUN mkdir -p /tmp/onnx-migraphx-libs \ + && cp -L /build/onnxruntime/build/Linux/Release/libonnxruntime_providers_migraphx.so \ + /tmp/onnx-migraphx-libs/ \ + && cp -L /build/onnxruntime/build/Linux/Release/libonnxruntime_providers_shared.so \ + /tmp/onnx-migraphx-libs/ + +# Verify binary +RUN echo "=== Verifying ONNX MIGraphX binary ===" \ + && /tmp/voxtype-onnx-migraphx --version \ + && echo "=== Binary size ===" \ + && ls -lh /tmp/voxtype-onnx-migraphx \ + && echo "=== Companion .so files ===" \ + && ls -lh /tmp/onnx-migraphx-libs/ + +# Output stage: copy binary and companion .so files +CMD mkdir -p /output \ + && cp /tmp/voxtype-onnx-migraphx /output/voxtype-${VERSION}-linux-x86_64-onnx-migraphx \ + && cp /tmp/onnx-migraphx-libs/libonnxruntime_providers_migraphx.so /output/voxtype-${VERSION}-linux-x86_64-onnx-migraphx.libonnxruntime_providers_migraphx.so \ + && cp /tmp/onnx-migraphx-libs/libonnxruntime_providers_shared.so /output/voxtype-${VERSION}-linux-x86_64-onnx-migraphx.libonnxruntime_providers_shared.so \ + && echo "Output:" \ + && ls -la /output/voxtype-${VERSION}-linux-x86_64-onnx-migraphx* diff --git a/Dockerfile.onnx-rocm b/Dockerfile.onnx-rocm deleted file mode 100644 index ec000efb..00000000 --- a/Dockerfile.onnx-rocm +++ /dev/null @@ -1,66 +0,0 @@ -# Build environment for voxtype ONNX ROCm binary -# MUST be run on a machine with an AMD GPU -# -# Uses Ubuntu 24.04 for glibc 2.39+ (required by ONNX Runtime prebuilt binaries). -# Building inside Docker caps the glibc requirement at 2.39, preventing the -# binary from picking up symbols from newer host glibc (e.g. 2.43 on Arch). -# -# Usage: -# docker build -f Dockerfile.onnx-rocm -t voxtype-onnx-rocm-builder . -# docker run --rm -v $(pwd)/releases:/output voxtype-onnx-rocm-builder -# -FROM ubuntu:24.04 - -ARG VERSION=0.6.1 -ARG DEBIAN_FRONTEND=noninteractive - -ENV VERSION=${VERSION} - -# Install build dependencies -RUN apt-get update && apt-get install -y \ - curl \ - build-essential \ - clang \ - cmake \ - pkg-config \ - libasound2-dev \ - libssl-dev \ - protobuf-compiler \ - libprotobuf-dev \ - git \ - binutils \ - && rm -rf /var/lib/apt/lists/* - -# Install Rust -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -ENV PATH="/root/.cargo/bin:${PATH}" - -WORKDIR /build - -# Copy source code -COPY . . - -# Build with native optimizations -ENV RUSTFLAGS="-C target-cpu=native" - -# ONNX Runtime will be downloaded during build by the ort crate -ENV ORT_STRATEGY=download - -# Disable LTO for faster builds -# Build with all ONNX engines + ROCm for Parakeet (only engine with ROCm support) -RUN cargo build --release --features parakeet-rocm,moonshine,sensevoice,paraformer,dolphin,omnilingual,ml-diarization \ - --config 'profile.release.lto=false' \ - --config 'profile.release.codegen-units=8' \ - && cp target/release/voxtype /tmp/voxtype-onnx-rocm - -# Verify binary -RUN echo "=== Verifying ONNX ROCm binary ===" \ - && /tmp/voxtype-onnx-rocm --version \ - && echo "=== Binary size ===" \ - && ls -lh /tmp/voxtype-onnx-rocm - -# Output stage -CMD mkdir -p /output \ - && cp /tmp/voxtype-onnx-rocm /output/voxtype-${VERSION}-linux-x86_64-onnx-rocm \ - && echo "Binary copied to /output:" \ - && ls -la /output/voxtype-* diff --git a/Dockerfile.vulkan b/Dockerfile.vulkan index 6de91d50..03401f72 100644 --- a/Dockerfile.vulkan +++ b/Dockerfile.vulkan @@ -24,6 +24,12 @@ RUN apt-get update && apt-get install -y \ cmake \ pkg-config \ libasound2-dev \ + libx11-dev \ + libxi-dev \ + libxtst-dev \ + libgtk-3-dev \ + libglib2.0-dev \ + libappindicator3-dev \ git \ binutils \ libvulkan-dev \ @@ -81,5 +87,6 @@ RUN echo "=== Verifying Vulkan binary ===" \ # Output stage - copy binary to /output volume CMD mkdir -p /output \ && cp /tmp/voxtype-vulkan /output/voxtype-${VERSION}-linux-x86_64-vulkan \ + && chmod 755 /output/voxtype-${VERSION}-linux-x86_64-vulkan \ && echo "Binary copied to /output:" \ && ls -la /output/voxtype-*-vulkan diff --git a/README.md b/README.md index e0674ff0..1e6b11f0 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Hold a hotkey (default: ScrollLock) while speaking, release to transcribe and ou - **Push-to-talk or Toggle mode** - Hold to record, or press once to start/stop - **Audio feedback** - Optional sound cues when recording starts/stops - **Configurable** - Choose your hotkey, model size, output mode, and more +- **Interactive TUI** - `voxtype configure` (also surfaces in Walker / fuzzel / rofi as "Voxtype Configuration") edits every option without touching config.toml by hand - **Waybar integration** - Optional status indicator shows recording state in your bar ## Quick Start @@ -673,6 +674,7 @@ We want to hear from you! Voxtype is a young project and your feedback helps mak - [Zubair](https://github.com/mzubair481) - dotool output driver with keyboard layout support - [ayoahha](https://github.com/ayoahha) - CLI backend for whisper-cli subprocess transcription - [Loki Coyote](https://github.com/lokkju) - eitype output driver for KDE/GNOME support, media keys and numeric keycode hotkey support +- [Christopher Albert](https://github.com/krystophny) - macOS port foundation, CoreAudio capture, CGEvent output, Homebrew packaging - [Umesh](https://github.com/radiorambo) - Documentation website - [Sami Jawhar](https://github.com/sjawhar) - Eager input processing wiring - [KaiStarkk](https://github.com/KaiStarkk) - Post-process trim and fallback_on_empty options diff --git a/STATUS.md b/STATUS.md new file mode 100644 index 00000000..fe9a2df6 --- /dev/null +++ b/STATUS.md @@ -0,0 +1,237 @@ +# mic-osd worktree status + +## Commit 1 — daemon-side audio level emitter and IPC + +Landed: daemon-side scaffolding for the OSD audio-frame channel. + +- New module `src/audio/levels.rs` (497 lines, 7 tests passing). + - `AudioFrame { seq: u32, min: f32, max: f32, peak_dbfs: f32 }` (16 bytes, native byte order). + - `LevelHub` binds a Unix socket and runs an accept loop + a broadcast loop. + - `LevelBucketer` collects samples into 10 ms windows (160 samples at 16 kHz) and + emits one `AudioFrame` per window. No allocation in the hot path. + - `spawn_emitter` plumbs an existing `mpsc::Receiver>` (the chunk stream + from `AudioCapture::start()`) through the bucketer into the hub. Task ends when + the input channel closes (i.e. when the recording capture is dropped/stopped). + - Fan-out is non-blocking: per-subscriber bounded queue (30 frames). Slow consumers + are dropped, never back-pressured. When no subscribers are connected, frames are + discarded with no work beyond a `try_send` and an empty `Vec::retain`. +- `Daemon` now owns an `Option` plus an active emitter `JoinHandle`. + - Hub is bound at daemon startup; bind failure is logged, not fatal. + - `start_recording_capture()` helper centralises the three non-meeting + `audio::create_capture` + `capture.start()` call sites and (when the hub is + present) attaches a per-recording emitter task. Meeting `DualCapture` is left + untouched. + - Emitter is aborted in `start_transcription_task`; cancel paths rely on the + capture's `Drop` closing the channel naturally. + - Socket file is removed on shutdown. + +### IPC choice + +A new Unix socket at `$XDG_RUNTIME_DIR/voxtype/audio.sock`, separate from the +status socket. Reasoning: 100 Hz binary frames don't belong on the human-readable +status stream, and a separate socket lets subscribers connect/disconnect +independently without parsing status events. Per BRIEF.md, this is the recommended +shape. + +### Design questions for Pete + +1. The emitter is on by default once the hub binds; opt-out is "don't run the OSD". + Adding an `[osd] enabled = false` switch is deferred to Commit 6 (config). Idle + cost is essentially zero (no recording = no frames at all). OK to defer? +2. `to_bytes()` uses native byte order. Same-machine IPC, no portability concern, + matches the `repr(C)` layout assertion in tests. OK? +3. Cancel paths abort the emitter implicitly via `capture.stop()` closing the + chunk receiver. I considered adding `stop_level_emitter()` to each cancel site + but the implicit close is correct and simpler. + +## Validation + +- `cargo check --offline --lib --bins --tests` clean (only pre-existing warnings). +- `cargo test --offline --lib`: 546 passed, 7 new in `audio::levels::tests`. +- `cargo fmt` applied to changed files. +- Clippy on changed files clean (the workspace has plenty of pre-existing + clippy lints that aren't ours to fix here). + +## Commit 2 — voxtype-osd binary skeleton + +Landed: a second `[[bin]]` at `src/bin/voxtype_osd.rs`. + +- Connects to the daemon socket, decodes `AudioFrame`s, drops them into a + 300-entry ring buffer (3 s at 100 Hz). +- Logs a `tracing::debug!` line every N frames so end-to-end IPC can be + verified before any Wayland code lands. +- Reconnects automatically: when the daemon is down the binary sleeps for + `--reconnect-secs` and tries again. EOF on the socket is handled the same + way (daemon restart, recording ended cleanly, etc.). +- Three unit tests on the ring buffer pass. +- CLI: `--socket`, `--reconnect-secs`, `--log-every`, plus `VOXTYPE_OSD_SOCKET` + env var (added the `env` feature to clap). + +Smoke check is pending until Pete runs the daemon + OSD side by side. The +binary builds clean and the IPC types are shared via `voxtype::audio::levels`, +so a runtime mismatch is impossible. + +## Commit 3 — shared `osd::` module + dual-binary skeleton + +Pete decided to ship two frontends so users can pick their deployment +style: `voxtype-osd-native` (SCTK + wgpu + egui-wgpu, single static +binary) and `voxtype-osd-gtk4` (GTK4 + gtk4-layer-shell, smaller binary, +dyn-links GTK4 for systems that already ship it). This commit lands the +shared logic both binaries consume, and replaces the single +`voxtype-osd` skeleton from Commit 2. + +- New module tree at `src/osd/`: + - `ipc.rs` — `FrameRing` and `run_ipc_loop` factored out of the old + skeleton; takes a per-frame callback so each frontend supplies its + own state. Six unit tests on the ring buffer (oldest-first iter, + partial-fill, clear/reset). + - `visual.rs` — `Color`, `Palette` (with `fallback()`), `MeterZone`, + `PeakHold` + free-function `update_peak_hold` matching BRIEF.md + verbatim, `EnvelopeColumn`, `project_envelope` (handles partial-ring + "fills from right", aggregates min/max when full), and + `peak_meter_fraction`. Ten unit tests cover the math. + - `config.rs` — `OsdConfig` and `OsdPosition`, defaults match BRIEF.md + (`enabled=true`, 600x80, bottom-center, 0.85 opacity, 3s window, + 6 dB/sec decay). Three tests (defaults, kebab-case serde, partial + TOML deserialise). + - `theme.rs` — `omarchy_theme_dir()`, `load_palette()` (returns + `Palette::fallback()` for now), `ThemeWatcher` placeholder. Real + parsing + `notify`-based watcher land in Commit 5. Two tests. +- Two new feature-gated bin entry points: + - `src/bin/voxtype_osd_native.rs` (required-features `osd-native`) + - `src/bin/voxtype_osd_gtk4.rs` (required-features `osd-gtk4`) + - Both connect via `osd::ipc::run_ipc_loop`, push frames into a + shared `Arc>`, run a `PeakHold` update per frame, + and emit a `tracing::debug!` line every `--log-every` frames. The + `frontend` field in the log line distinguishes them; everything + else (seq, peak_dbfs, held_dbfs, ring_len, …) is identical so + Pete can verify shared logic by running them side-by-side. +- `Cargo.toml`: removed the `voxtype-osd` `[[bin]]` entry; added + `osd-native` and `osd-gtk4` features (empty for now; GUI deps land + in Commits 4a/4b) and the two `[[bin]]` entries gated on those + features. +- `src/lib.rs` exposes `pub mod osd`. +- Old `src/bin/voxtype_osd.rs` deleted. + +### Validation + +- `cargo check --offline --lib`: clean (1 pre-existing warning). +- `cargo check --offline --features osd-native --bin voxtype-osd-native`: + clean. +- `cargo check --offline --features osd-gtk4 --bin voxtype-osd-gtk4`: + clean. +- `cargo test --offline --features osd-native,osd-gtk4 --lib`: + 566 passed (was 546; +20 new tests in `osd::*`). +- `cargo clippy --offline --features osd-native,osd-gtk4 --bin + voxtype-osd-native --bin voxtype-osd-gtk4` clean for files we + touched (preexisting warnings on unmodified files left alone per + worktree brief). +- `cargo fmt -- --check` clean for files we touched. + +### Notes + +- The shared logic is fully runtime-verifiable now: with the daemon + recording, both binaries pump identical frames through the same + ring + peak-hold and log identical numerics. Stdout sanity check is + Pete's call. +- Choice of GUI deps for Commits 4a/4b is deferred. The brief lists + starting points; verify exact crate names + versions when wiring + them in. Both feature flags currently have empty `dep:` lists so + the build works today and grows naturally. + +## Commit 4a — native (SCTK + wgpu + egui-wgpu) rendering + +Landed: real GUI for `voxtype-osd-native`. The binary now opens a +wlr-layer-shell surface on demand and renders the waveform + peak meter +via egui-wgpu, with the architecture described below. + +### Architecture + +The binary splits into a main thread that runs the calloop event loop +(Wayland + render timer) and a dedicated IPC thread that owns a +single-threaded Tokio runtime to drive `osd::ipc::run_ipc_loop`. The IPC +thread pushes decoded `AudioFrame`s into the shared `Arc>`, +updates the `Arc>`, and pings the main thread via +`calloop::ping::Ping` after every frame. Pings coalesce, so 100 Hz of +notifications is fine. + +Lifecycle: + +- Surface is created on the first frame ping (initial connect, or after + the daemon resumes recording). All wgpu/egui state lives in + `RenderSurface`, which is `None` while idle. +- Surface is destroyed after `IDLE_TEARDOWN_SECS` (5 s) without a frame. + This matches the BRIEF: surface destroyed when Idle, not just hidden. +- `LayerShellHandler::configure` accepts the compositor's size, configures + the wgpu swapchain, and triggers an immediate render so the surface + becomes visible. Subsequent renders are driven by a 16 ms calloop timer. +- Click-through is set up by attaching an empty `wl_region` as the input + region (`KeyboardInteractivity::None` in addition). + +### Files + +- `src/bin/voxtype_osd_native/main.rs` — CLI parsing, IPC thread spawn, + entry into the Wayland event loop. Replaces the old single-file + `src/bin/voxtype_osd_native.rs`. +- `src/bin/voxtype_osd_native/app.rs` — all SCTK + wgpu + egui glue. + Single file because the borrow relationships between the SCTK state, + the wgpu device/queue, and the egui renderer fight when split. + +### Rendering + +- Waveform: `osd::visual::project_envelope` with 3 s of frames mapped onto + ~95 % of the surface width. Filled `Shape::convex_polygon` (mirrored + min/max columns) in `palette.accent` over `palette.background`. +- Peak meter: 10 vertical segments, color zones from `MeterZone::from_dbfs`, + segment fill via `peak_meter_fraction(peak_dbfs, -60.0)`. Held-peak tick + drawn as a thin foreground bar at the held position; held-peak decays + through `osd::visual::PeakHold` (already updated on the IPC thread). +- Background uses `Palette::fallback()` today (Commit 5 swaps in real + Omarchy parsing without changing this surface). + +### Cargo.toml + +`osd-native` now pulls in: + +- `smithay-client-toolkit 0.20`, `calloop 0.14`, `calloop-wayland-source 0.4` +- `wayland-client 0.31` (with `system` feature) + `wayland-backend 0.3` + (with `client_system` feature) so we can hand wgpu the raw libwayland + pointers +- `wayland-protocols 0.32` and `wayland-protocols-wlr 0.3` +- `wgpu 29` (default-features off; `vulkan + gles + wgsl + std`) +- `egui 0.34` and `egui-wgpu 0.34` +- `raw-window-handle 0.6`, `pollster 0.4`, `bytemuck 1` + +The binary path moved from `src/bin/voxtype_osd_native.rs` to +`src/bin/voxtype_osd_native/main.rs` so we can split modules cleanly. + +### Validation + +- `cargo check --features osd-native --bin voxtype-osd-native` clean. +- `cargo build --features osd-native --bin voxtype-osd-native --release` + clean. +- `cargo test --features osd-native,osd-gtk4 --lib` 566 passed. +- `cargo clippy --features osd-native --bin voxtype-osd-native` clean on + the OSD files (preexisting warnings on unmodified files left alone per + brief). +- `rustfmt` clean on touched files (pre-existing diffs in unrelated + files left alone per brief). +- Runtime smoke test (does the surface appear, does it look right, idle + CPU < 0.1 %) is Pete's call; the agent environment can't run a Wayland + client. + +### Notes / things to review + +- The `OsdConfig` consumed here is built from defaults plus a few CLI + overrides (`--width-px`, `--height-px`, `--opacity`). Wiring the full + `[osd]` config block + env-var layering is Commit 6, as planned. +- `IDLE_TEARDOWN_SECS = 5.0` is a literal in `app.rs`; if Pete wants it + user-tunable, lift it onto `OsdConfig` in Commit 6. +- The wgpu swapchain uses `CompositeAlphaMode::PreMultiplied` so the + background alpha (`palette.background.a = 0.85`) actually goes through + the compositor as transparency. + +## Next + +Commit 4b: GTK4 + gtk4-layer-shell rendering for `voxtype-osd-gtk4`. +Commits 5/6: Omarchy theme parsing + watcher; `[osd]` config wiring. diff --git a/assets/engines/parakeet.png b/assets/engines/parakeet.png new file mode 100644 index 00000000..9ba10fe8 Binary files /dev/null and b/assets/engines/parakeet.png differ diff --git a/assets/engines/whisper.png b/assets/engines/whisper.png new file mode 100644 index 00000000..85e45b55 Binary files /dev/null and b/assets/engines/whisper.png differ diff --git a/assets/icon.png b/assets/icon.png new file mode 100644 index 00000000..ff2faba9 Binary files /dev/null and b/assets/icon.png differ diff --git a/build.rs b/build.rs index 17a5b64c..6d881b38 100644 --- a/build.rs +++ b/build.rs @@ -66,5 +66,25 @@ fn main() -> Result<(), Error> { man_dir.display() ); + expose_cuda_build_major(); + Ok(()) } + +/// Mirror ort-sys's build-time CUDA version selection so the binary's runtime +/// probe can reject mismatched hosts before ort attempts (and crashes on) +/// EP registration. ort 2.0.0-rc.12 picks cu12 vs cu13 prebuilt at compile time +/// based on the same env var; we read it here and emit a compile-time constant +/// the parakeet code path uses to short-circuit graceful fallback. +fn expose_cuda_build_major() { + println!("cargo:rerun-if-env-changed=ORT_CUDA_VERSION"); + let major = match env::var("ORT_CUDA_VERSION").as_deref() { + Ok("12") => "12", + Ok("13") => "13", + // ort-sys defaults to cu12 when unset (see resolve.rs in ort-sys 2.0.0-rc.12). + // Match that default so a debug build without ORT_CUDA_VERSION set agrees + // with the bundled prebuilt. + _ => "12", + }; + println!("cargo:rustc-env=VOXTYPE_BUILD_CUDA_MAJOR={major}"); +} diff --git a/config/default.toml b/config/default.toml index 0fac466f..8a179fcf 100644 --- a/config/default.toml +++ b/config/default.toml @@ -116,12 +116,6 @@ on_demand_loading = false # remote_api_key = "sk-..." # Or use VOXTYPE_WHISPER_API_KEY env var # remote_timeout_secs = 30 -# --- CLI mode settings (used when mode = "cli") --- -# Uses whisper-cli subprocess instead of whisper-rs FFI bindings. -# Fallback for systems where whisper-rs crashes (e.g., glibc 2.42+ on Ubuntu 25.10). -# Requires whisper-cli from whisper.cpp: https://github.com/ggerganov/whisper.cpp -# whisper_cli_path = "/usr/local/bin/whisper-cli" # Optional, searches PATH if not set - # [parakeet] # Parakeet configuration (only used when engine = "parakeet") # Requires: cargo build --features parakeet diff --git a/contrib/hammerspoon/README.md b/contrib/hammerspoon/README.md new file mode 100644 index 00000000..83103eef --- /dev/null +++ b/contrib/hammerspoon/README.md @@ -0,0 +1,73 @@ +# Voxtype Hammerspoon Integration + +Hammerspoon integration for voxtype on macOS. This is an alternative to the built-in hotkey support that doesn't require granting Accessibility permissions to Terminal. + +## Installation + +1. Install Hammerspoon: + ```bash + brew install --cask hammerspoon + ``` + +2. Copy the voxtype module: + ```bash + cp voxtype.lua ~/.hammerspoon/ + ``` + +3. Add to your `~/.hammerspoon/init.lua`: + ```lua + local voxtype = require("voxtype") + voxtype.setup({ hotkey = "rightalt" }) + ``` + +4. Reload Hammerspoon config (Cmd+Shift+R or click menu bar icon → Reload Config) + +## Configuration + +```lua +voxtype.setup({ + -- Key to use for push-to-talk + -- Options: "rightalt", "rightcmd", "f13", "f14", etc. + hotkey = "rightalt", + + -- Mode: "push_to_talk" or "toggle" + -- push_to_talk: Hold key to record, release to transcribe + -- toggle: Press once to start, press again to stop + mode = "push_to_talk", + + -- Path to voxtype binary (optional, auto-detected) + voxtype_path = nil, +}) +``` + +## Adding a Cancel Hotkey + +You can add a separate hotkey to cancel recording: + +```lua +voxtype.add_cancel_hotkey({"cmd", "shift"}, "escape") +``` + +## Checking Status + +```lua +print(voxtype.status()) -- Returns: "idle", "recording", "transcribing", or "stopped" +``` + +## Why Use Hammerspoon? + +- **No Accessibility permissions for Terminal**: The built-in rdev hotkey requires granting Accessibility access to your terminal app +- **More flexible hotkey options**: Hammerspoon supports complex key combinations +- **Integration with other automations**: Combine voxtype with your other Hammerspoon workflows +- **Visual feedback**: Easy to add custom alerts and notifications + +## Troubleshooting + +**Hotkey not working?** +- Make sure Hammerspoon has Accessibility permissions (System Settings → Privacy & Security → Accessibility) +- Check the Hammerspoon console for errors (click menu bar icon → Console) +- Verify voxtype daemon is running: `voxtype status` + +**voxtype not found?** +- Set the path explicitly: `voxtype.setup({ voxtype_path = "/path/to/voxtype" })` +- Or add voxtype to your PATH diff --git a/contrib/hammerspoon/voxtype.lua b/contrib/hammerspoon/voxtype.lua new file mode 100644 index 00000000..14d7062f --- /dev/null +++ b/contrib/hammerspoon/voxtype.lua @@ -0,0 +1,188 @@ +-- Voxtype Hammerspoon Integration +-- +-- This module provides hotkey support for voxtype on macOS using Hammerspoon. +-- It's an alternative to the built-in rdev hotkey capture that doesn't require +-- granting Accessibility permissions to Terminal. +-- +-- Installation: +-- 1. Install Hammerspoon: brew install --cask hammerspoon +-- 2. Copy this file to ~/.hammerspoon/voxtype.lua +-- 3. Add to your ~/.hammerspoon/init.lua: +-- local voxtype = require("voxtype") +-- voxtype.setup({ hotkey = "rightalt" }) -- or your preferred key +-- 4. Reload Hammerspoon config (Cmd+Shift+R or click menu bar icon) +-- +-- Configuration options: +-- hotkey: The key to use for push-to-talk (default: "rightalt") +-- Common choices: "rightalt", "rightcmd", "f13", "f14", etc. +-- mode: "push_to_talk" (hold to record) or "toggle" (press to start/stop) +-- voxtype_path: Path to voxtype binary (default: auto-detect) + +local M = {} + +-- Default configuration +M.config = { + hotkey = "rightalt", + mode = "push_to_talk", + voxtype_path = nil, -- Auto-detect +} + +-- State +M.is_recording = false +M.hotkey_binding = nil + +-- Find voxtype binary +local function find_voxtype() + if M.config.voxtype_path then + return M.config.voxtype_path + end + + -- Common installation paths + local paths = { + "/opt/homebrew/bin/voxtype", + "/usr/local/bin/voxtype", + os.getenv("HOME") .. "/.cargo/bin/voxtype", + os.getenv("HOME") .. "/workspace/voxtype/target/release/voxtype", + } + + for _, path in ipairs(paths) do + if hs.fs.attributes(path) then + return path + end + end + + -- Try which + local handle = io.popen("which voxtype 2>/dev/null") + if handle then + local result = handle:read("*a"):gsub("%s+", "") + handle:close() + if result ~= "" then + return result + end + end + + return nil +end + +-- Execute voxtype command +local function voxtype_cmd(cmd) + local path = find_voxtype() + if not path then + hs.alert.show("voxtype not found!") + return + end + + hs.task.new(path, nil, {"record", cmd}):start() +end + +-- Start recording +local function start_recording() + if not M.is_recording then + M.is_recording = true + voxtype_cmd("start") + -- Optional: show visual feedback + -- hs.alert.show("🎤 Recording...", 0.5) + end +end + +-- Stop recording +local function stop_recording() + if M.is_recording then + M.is_recording = false + voxtype_cmd("stop") + end +end + +-- Toggle recording +local function toggle_recording() + if M.is_recording then + stop_recording() + else + start_recording() + end +end + +-- Cancel recording +local function cancel_recording() + if M.is_recording then + M.is_recording = false + voxtype_cmd("cancel") + hs.alert.show("Recording cancelled", 0.5) + end +end + +-- Map key name to Hammerspoon key +local function map_key(key) + local keymap = { + rightalt = "rightalt", + rightoption = "rightalt", + rightopt = "rightalt", + leftalt = "alt", + leftoption = "alt", + leftopt = "alt", + rightcmd = "rightcmd", + rightcommand = "rightcmd", + leftcmd = "cmd", + leftcommand = "cmd", + rightctrl = "rightctrl", + rightcontrol = "rightctrl", + leftctrl = "ctrl", + leftcontrol = "ctrl", + rightshift = "rightshift", + leftshift = "shift", + } + + local lower = key:lower() + return keymap[lower] or lower +end + +-- Setup voxtype hotkey +function M.setup(opts) + opts = opts or {} + + -- Merge config + for k, v in pairs(opts) do + M.config[k] = v + end + + -- Remove existing binding + if M.hotkey_binding then + M.hotkey_binding:delete() + end + + local key = map_key(M.config.hotkey) + + if M.config.mode == "toggle" then + -- Toggle mode: single press to start/stop + M.hotkey_binding = hs.hotkey.bind({}, key, toggle_recording) + else + -- Push-to-talk mode: hold to record, release to stop + M.hotkey_binding = hs.hotkey.bind({}, key, start_recording, stop_recording) + end + + print("Voxtype: Hotkey '" .. key .. "' bound in " .. M.config.mode .. " mode") +end + +-- Add cancel hotkey (optional) +function M.add_cancel_hotkey(mods, key) + hs.hotkey.bind(mods, key, cancel_recording) + print("Voxtype: Cancel hotkey bound to " .. table.concat(mods, "+") .. "+" .. key) +end + +-- Status check +function M.status() + local path = find_voxtype() + if not path then + return "voxtype not found" + end + + local handle = io.popen(path .. " status 2>/dev/null") + if handle then + local result = handle:read("*a"):gsub("%s+", "") + handle:close() + return result + end + return "unknown" +end + +return M diff --git a/docker-compose.build.yml b/docker-compose.build.yml index 5f9c0455..ae36b05a 100644 --- a/docker-compose.build.yml +++ b/docker-compose.build.yml @@ -76,11 +76,14 @@ services: profiles: - avx512 # Only build if explicitly requested (requires AVX-512 capable host) - # ONNX CUDA build (NVIDIA GPU acceleration with CPU fallback) - onnx-cuda: + # ONNX CUDA 12 build (NVIDIA GPUs on CUDA 12.x runtime) + # ort 2.0.0-rc.12 picks the cu12 prebuilt at build time; shipping a separate + # cu13 binary covers the CUDA 13.x user base. At install time, voxtype-onnx-cuda + # is symlinked to whichever variant matches the host's CUDA runtime. + onnx-cuda-12: build: context: . - dockerfile: Dockerfile.onnx-cuda + dockerfile: Dockerfile.onnx-cuda-12 args: VERSION: ${VERSION:-0.6.2} volumes: @@ -88,11 +91,23 @@ services: environment: - VERSION=${VERSION:-0.6.2} - # ONNX ROCm build (AMD GPU acceleration, requires AMD GPU host) - onnx-rocm: + # ONNX CUDA 13 build (NVIDIA GPUs on CUDA 13.x runtime, requires driver 580+) + onnx-cuda-13: build: context: . - dockerfile: Dockerfile.onnx-rocm + dockerfile: Dockerfile.onnx-cuda-13 + args: + VERSION: ${VERSION:-0.6.2} + volumes: + - ./releases/${VERSION:-0.6.2}:/output + environment: + - VERSION=${VERSION:-0.6.2} + + # ONNX MIGraphX build (AMD GPU acceleration, requires AMD GPU host) + onnx-migraphx: + build: + context: . + dockerfile: Dockerfile.onnx-migraphx args: VERSION: ${VERSION:-0.6.2} volumes: diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 2b5e2ee5..11eb83d9 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -2,6 +2,14 @@ Complete reference for all configuration options in Voxtype. +> **Tip**: For interactive editing, run `voxtype configure` — it edits the +> same `config.toml` this document describes, preserves comments and unknown +> fields, and validates each save before swapping the file in. The reference +> below stays useful for scripted setups, advanced fields the TUI doesn't +> surface yet, and understanding each section end-to-end. See the +> [TUI section in the user manual](USER_MANUAL.md#voxtype-configure) for +> keybindings. + ## Configuration File Location Voxtype looks for configuration in the following locations (in order): @@ -27,6 +35,11 @@ Selects which speech-to-text engine to use for transcription. - `whisper` - OpenAI Whisper via whisper.cpp (default, recommended) - `parakeet` - NVIDIA Parakeet via ONNX Runtime (requires ONNX binary) - `moonshine` - Moonshine encoder-decoder transformer via ONNX Runtime (experimental, requires special binary) +- `sensevoice` - Alibaba SenseVoice CTC via ONNX Runtime (CJK + English) +- `paraformer` - FunASR Paraformer CTC via ONNX Runtime (Chinese + English) +- `dolphin` - Dictation-optimized CTC via ONNX Runtime (Chinese + English) +- `omnilingual` - FunASR Omnilingual CTC via ONNX Runtime (50+ languages) +- `cohere` - Cohere Transcribe encoder-decoder via ONNX Runtime (#1 Open ASR Leaderboard, 14 languages, ~3 GB model) **Example:** ```toml @@ -39,11 +52,11 @@ voxtype --engine parakeet daemon ``` **Notes:** -- Parakeet requires an ONNX-enabled binary (`voxtype-*-onnx-*`) -- When using Parakeet, you must also configure the `[parakeet]` section -- When using Moonshine, you must also configure the `[moonshine]` section +- All engines except Whisper require an ONNX-enabled binary (`voxtype-*-onnx-*`) +- Each ONNX engine reads its own `[]` section (e.g. `[parakeet]`, `[cohere]`) - See [PARAKEET.md](PARAKEET.md) for detailed Parakeet setup instructions - See [MOONSHINE.md](MOONSHINE.md) for detailed Moonshine setup instructions +- Cohere Transcribe is the largest model voxtype ships (~3 GB int8); use `voxtype setup model` to download it --- @@ -406,7 +419,7 @@ Selects the transcription backend. > **Privacy Notice**: When using `remote` backend, audio is transmitted over the network. See [User Manual - Remote Whisper Servers](USER_MANUAL.md#remote-whisper-servers) for privacy considerations. -**When to use `cli` backend:** +**When to use `cli` backend (Linux only):** The `cli` backend is a workaround for systems where the whisper-rs FFI bindings crash due to C++ exceptions crossing the FFI boundary. This affects some systems with glibc 2.42+ (e.g., Ubuntu 25.10). If voxtype crashes during transcription, try the `cli` backend. Requires `whisper-cli` from [whisper.cpp](https://github.com/ggerganov/whisper.cpp). @@ -997,6 +1010,7 @@ remote_timeout_secs = 60 # 60 second timeout for long recordings **Type:** String **Default:** Auto-detected from PATH **Required:** No +**Platform:** Linux only Path to the `whisper-cli` binary. Only used when `backend = "cli"`. @@ -1189,6 +1203,114 @@ on_demand_loading = false # Keep model loaded for fast response --- +## [cohere] + +Configuration for the Cohere Transcribe speech-to-text engine. This section is only used when `engine = "cohere"`. + +Cohere Transcribe is an encoder-decoder ASR model from Cohere Labs. It currently sits at #1 on the Open ASR Leaderboard. Whisper-style task tokens give it punctuation, capitalization, and inverse text normalization out of the box. + +### model + +**Type:** String +**Default:** `"cohere-transcribe-int8"` +**Required:** No + +The Cohere model to use. Can be a model name (looked up in `~/.local/share/voxtype/models//`) or an absolute path to a model directory. + +**Available models:** + +| Model | Quantization | Size | Notes | +|-------|--------------|------|-------| +| `cohere-transcribe-int8` | int8 | ~3.1 GB | Default; runs on CPU or GPU | + +Download via `voxtype setup model` (interactive) — pick the Cohere section and confirm the size warning. + +**Example:** +```toml +[cohere] +model = "cohere-transcribe-int8" +``` + +### language + +**Type:** String +**Default:** `"en"` +**Required:** No + +Two-letter ISO 639-1 language code. Cohere officially supports 14 languages. + +**Supported values:** `ar`, `de`, `en`, `es`, `fr`, `hi`, `it`, `ja`, `ko`, `nl`, `pt`, `ru`, `tr`, `zh`. + +**Example:** +```toml +[cohere] +language = "fr" +``` + +The daemon resolves the language to its decoder prefix at startup. Unsupported codes are rejected with a clear error. + +### threads + +**Type:** Integer (optional) +**Default:** unset (uses `min(num_cpus, 4)`) +**Required:** No + +Number of CPU threads for ONNX Runtime intra-op parallelism. Leave unset on most machines. + +**Example:** +```toml +[cohere] +threads = 8 +``` + +### on_demand_loading + +**Type:** Boolean +**Default:** `false` +**Required:** No + +Same behavior as `[whisper].on_demand_loading`. When `true`, loads the model only when recording starts and unloads after transcription. Useful when working on a laptop where 3 GB of RAM dedicated to the daemon is too costly. + +**Example:** +```toml +[cohere] +on_demand_loading = true +``` + +### Configuration Summary + +| Option | CLI Flag | Environment Variable | Default | Description | +|--------|----------|---------------------|---------|-------------| +| `model` | `--model` | `VOXTYPE_MODEL` | `"cohere-transcribe-int8"` | Cohere model name or path | +| `language` | `--language` | `VOXTYPE_LANGUAGE` | `"en"` | One of the 14 supported language codes | +| `threads` | - | - | auto | ONNX intra-op thread count | +| `on_demand_loading` | - | - | `false` | Load model only when recording starts | + +### Complete Example + +```toml +engine = "cohere" + +[cohere] +model = "cohere-transcribe-int8" +language = "en" +on_demand_loading = false +``` + +### Building from Source + +Source builds need the `cohere` Cargo feature. Optional GPU acceleration via `cohere-cuda` or `cohere-tensorrt`: + +```bash +cargo build --release --features cohere # CPU +cargo build --release --features cohere-cuda # NVIDIA GPU +cargo build --release --features cohere-tensorrt # NVIDIA + TensorRT EP +``` + +The prebuilt `voxtype-*-onnx-*` release binaries already include `cohere`, so users installing via AUR/.deb/.rpm don't need to rebuild. + +--- + ## [output] Controls how transcribed text is delivered. @@ -1480,6 +1602,24 @@ on_recording_stop = true # Notify when transcribing on_transcription = true # Show transcribed text ``` +### urgency + +**Type:** String (`"low"`, `"normal"`, or `"critical"`) +**Default:** `"normal"` +**Required:** No + +Sets the urgency level passed to `notify-send` for all voxtype notifications. + +On GNOME, notifications with `"low"` urgency are delivered to the notification drawer without showing as a popup banner. Use `"normal"` (the default) if you want notifications to pop up on screen. Use `"critical"` if you want notifications that persist until dismissed. + +Unknown values fall back to `"normal"`. + +**Example:** +```toml +[output.notification] +urgency = "normal" # "low" | "normal" | "critical" +``` + ### type_delay_ms **Type:** Integer @@ -1991,6 +2131,60 @@ VOXTYPE_SMART_AUTO_SUBMIT=true voxtype **Note:** `smart_auto_submit` is conditional - it only fires when you say "submit". The existing `auto_submit` option always presses Enter after every transcription. Use `smart_auto_submit` when you want the choice per dictation, and `auto_submit` when you always want Enter pressed. +### filter_filler_words + +**Type:** Boolean +**Default:** `true` +**Required:** No + +When `true` (the default), strips common filler words ("uh", "um", "er", ...) from each transcription before output. Matching is case-insensitive and respects word boundaries, so words like "umbrella" or "summer" are not affected. Surrounding commas, semicolons, and double spaces are cleaned up so the result reads naturally. Set to `false` to disable. + +**Example:** + +```toml +[text] +filter_filler_words = true +``` + +With this enabled: + +- "Well, um, I think" becomes "Well, I think" +- "uh hello world" becomes "hello world" +- "hello world, uh." becomes "hello world." + +**CLI flag:** + +```bash +voxtype --filter-fillers # force on (overrides config) +voxtype --no-filter-fillers # force off (overrides config) +``` + +**Environment variable:** + +```bash +VOXTYPE_FILTER_FILLERS=true voxtype +``` + +The filter runs before `replacements` and the `[post_process]` LLM hook, so any custom replacements still apply on top of filtered text. + +### filler_words + +**Type:** Array of strings +**Default:** `["uh", "um", "er", "ah", "eh", "hmm", "hm", "mm", "mhm"]` +**Required:** No + +Words removed by the filler-word filter. The default list is conservative and includes only single-syllable disfluencies. Override it to add your own (for example "like" or "you know"), or to disable specific entries by replacing the list. + +**Example:** + +```toml +[text] +filter_filler_words = true +filler_words = ["uh", "um", "er", "like", "you know"] +``` + +Multi-word entries like "you know" are matched as a single phrase. Adding aggressive entries (such as "like") may strip legitimate uses of the word; keep the list conservative or disable the filter for technical writing. + --- ## [vad] @@ -2518,6 +2712,8 @@ Any config file setting can be overridden via environment variable. These are ap | `VOXTYPE_PASTE_KEYS` | string | `output.paste_keys` | | `VOXTYPE_DOTOOL_XKB_LAYOUT` | string | `output.dotool_xkb_layout` | | `VOXTYPE_SPOKEN_PUNCTUATION` | bool | `text.spoken_punctuation` | +| `VOXTYPE_SMART_AUTO_SUBMIT` | bool | `text.smart_auto_submit` | +| `VOXTYPE_FILTER_FILLERS` | bool | `text.filter_filler_words` | Boolean values: `true`, `1` to enable; `false`, `0` to disable. diff --git a/docs/INSTALL.md b/docs/INSTALL.md index a2ea9ebc..e8bf3639 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -311,6 +311,13 @@ cargo install --git https://github.com/peteonrails/voxtype ## Post-Installation Setup +> **Quick path**: most settings can be configured interactively with +> `voxtype configure` (or by searching for "Voxtype Configuration" in +> Walker / fuzzel / rofi / GNOME Activities). The steps below set up the +> system-level pieces the TUI can't change for you (input-group membership, +> typing backend, model download, daemon enable). See +> [USER_MANUAL.md](USER_MANUAL.md#voxtype-configure) for the TUI keybindings. + ### 1. Add user to input group Voxtype uses the Linux evdev subsystem to detect hotkeys, which requires `input` group membership: @@ -346,7 +353,7 @@ sudo pacman -S ydotool # Ubuntu: sudo apt install ydotool -# Enable and start the daemon (Arch) +# Enable and start the daemon (Arch/Ubuntu) systemctl --user enable --now ydotool ``` @@ -366,7 +373,7 @@ For ydotool: ```bash # Install ydotool (see commands above for your distro) # Then enable and start the daemon (required!) -systemctl --user enable --now ydotool # Arch +systemctl --user enable --now ydotool # Arch/Ubuntu # For Fedora, see Troubleshooting guide for system service setup ``` diff --git a/docs/INSTALL_MACOS.md b/docs/INSTALL_MACOS.md new file mode 100644 index 00000000..55995ba8 --- /dev/null +++ b/docs/INSTALL_MACOS.md @@ -0,0 +1,189 @@ +# Voxtype macOS Installation Guide + +Voxtype is a push-to-talk voice-to-text tool with fast, local speech recognition using Parakeet or Whisper. + +> **Note:** macOS support is in beta. The binaries are currently unsigned, which requires a few extra steps during installation. Once we have signed and notarized binaries, this process will be simpler. + +## Requirements + +- macOS 13 (Ventura) or later +- Apple Silicon (M1/M2/M3/M4) +- Microphone access +- Input Monitoring permission (for global hotkey) + +## Installation via Homebrew (Recommended) + +```bash +# Add the tap +brew tap peteonrails/voxtype + +# Install +brew install --cask peteonrails/voxtype/voxtype +``` + +The Cask automatically: +- Installs Voxtype.app to /Applications +- Creates CLI symlink (`voxtype` command) +- Sets up auto-start at login +- Starts the daemon + +### First-Time Security Setup + +Because the app is unsigned, macOS will block it on first run. This is a one-time setup: + +1. **Allow the app to run:** + - Open **System Settings** > **Privacy & Security** + - Scroll down to find "Voxtype.app was blocked" + - Click **Open Anyway** + +2. **Grant Input Monitoring permission (required for hotkey):** + - Open **System Settings** > **Privacy & Security** > **Input Monitoring** + - Enable **Voxtype** + +3. **Restart the daemon** to pick up permissions: + ```bash + launchctl stop io.voxtype.daemon + launchctl start io.voxtype.daemon + ``` + +### Download a Speech Model + +```bash +# Recommended: Parakeet (fast, accurate) +voxtype setup --download --model parakeet-tdt-0.6b-v3-int8 + +# Or use Whisper +voxtype setup --download --model base.en +``` + +## Usage + +Hold **Right Option** (⌥) to record, release to transcribe. Text is typed into the active application. + +### Quick Commands + +```bash +voxtype status # Check daemon status +voxtype record start # Start recording manually +voxtype record stop # Stop and transcribe +voxtype setup check # Verify setup +voxtype menubar # Show menu bar status icon +``` + +### Menu Bar Icon (Optional) + +For a status icon showing recording state: + +```bash +voxtype menubar +``` + +This shows: +- 🎙️ Ready (idle) +- 🔴 Recording +- ⏳ Transcribing + +## Configuration + +Config file: `~/Library/Application Support/voxtype/config.toml` + +```toml +# Transcription engine +engine = "parakeet" # or "whisper" + +[hotkey] +key = "RIGHTALT" # Right Option key +mode = "push_to_talk" # or "toggle" + +[parakeet] +model = "parakeet-tdt-0.6b-v3-int8" + +[whisper] +model = "base.en" + +[output] +mode = "type" # or "clipboard", "paste" +``` + +See [CONFIGURATION.md](CONFIGURATION.md) for full options. + +## Troubleshooting + +### Hotkey not working + +1. Verify Input Monitoring permission is granted: + - System Settings > Privacy & Security > Input Monitoring + - Voxtype must be enabled + +2. Restart the daemon: + ```bash + launchctl stop io.voxtype.daemon + launchctl start io.voxtype.daemon + ``` + +3. Check daemon logs: + ```bash + tail -f ~/Library/Logs/voxtype/stdout.log + ``` + +### "Voxtype was blocked" / "damaged app" + +This happens because the app is unsigned. Go to System Settings > Privacy & Security and click "Open Anyway". + +### Model not found + +```bash +voxtype setup --download --model parakeet-tdt-0.6b-v3-int8 +``` + +### Daemon not starting + +```bash +# Check status +launchctl list | grep voxtype + +# View logs +tail -f ~/Library/Logs/voxtype/stderr.log + +# Manual start for debugging +voxtype daemon +``` + +### "Another instance is already running" + +```bash +# Clean up stale state +pkill -9 voxtype +rm -rf /tmp/voxtype +launchctl start io.voxtype.daemon +``` + +## Uninstalling + +```bash +brew uninstall --cask voxtype +``` + +This removes: +- Voxtype.app from /Applications +- LaunchAgent (auto-start) +- CLI symlink + +To also remove data: +```bash +rm -rf ~/Library/Application\ Support/voxtype +rm -rf ~/Library/Logs/voxtype +``` + +## Building from Source + +```bash +git clone https://github.com/peteonrails/voxtype.git +cd voxtype +cargo build --release --features parakeet +``` + +## Getting Help + +- GitHub Issues: https://github.com/peteonrails/voxtype/issues +- Documentation: https://voxtype.io diff --git a/docs/MACOS_ARCHITECTURE.md b/docs/MACOS_ARCHITECTURE.md new file mode 100644 index 00000000..da930cf1 --- /dev/null +++ b/docs/MACOS_ARCHITECTURE.md @@ -0,0 +1,178 @@ +# Voxtype macOS Architecture + +This document describes the macOS-specific architecture for Voxtype. + +## Component Overview + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ macOS System │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────────┐ ┌──────────────────┐ ┌───────────────┐ │ +│ │ VoxtypeMenubar │ │ VoxtypeSetup │ │ Voxtype.app │ │ +│ │ (.app) │ │ (.app) │ │ (daemon) │ │ +│ │ │ │ │ │ │ │ +│ │ - Menu bar icon │ │ - Settings GUI │ │ - CLI binary │ │ +│ │ - Status display │ │ - Config editor │ │ - Transcriber │ │ +│ │ - Quick settings │ │ - Model manager │ │ - Hotkey │ │ +│ │ - Opens Setup │ │ - Permissions │ │ - Audio │ │ +│ └────────┬─────────┘ └────────┬─────────┘ └───────┬───────┘ │ +│ │ │ │ │ +│ │ Reads config │ Writes config │ │ +│ └──────────┬──────────┴──────────┬──────────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌─────────────────────────────────────────────────────────────┐│ +│ │ ~/Library/Application Support/voxtype/ ││ +│ │ - config.toml (configuration) ││ +│ │ - models/ (Whisper/Parakeet models) ││ +│ └─────────────────────────────────────────────────────────────┘│ +│ │ +│ ┌─────────────────────────────────────────────────────────────┐│ +│ │ /tmp/voxtype/ ││ +│ │ - state (idle/recording/transcribing) ││ +│ │ - pid (daemon process ID) ││ +│ │ - voxtype.lock (prevents multiple instances) ││ +│ └─────────────────────────────────────────────────────────────┘│ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Applications + +### 1. Voxtype.app (Main Binary) + +**Location:** `/Applications/Voxtype.app/Contents/MacOS/voxtype` + +The core Rust binary that provides: +- `voxtype daemon` - Background service for voice transcription +- `voxtype status` - Check daemon state +- `voxtype record start/stop/toggle` - Manual recording control +- `voxtype setup` - Installation and model management + +**Key Files:** +- `src/daemon.rs` - Main event loop +- `src/hotkey_macos.rs` - macOS hotkey detection via rdev +- `src/notification.rs` - macOS notifications via mac-notification-sys +- `src/output/cgevent.rs` - Text output via CGEvent (macOS native) + +### 2. VoxtypeMenubar.app (Menu Bar Widget) + +**Location:** `/Applications/VoxtypeMenubar.app` + +Swift/SwiftUI app that provides: +- Menu bar icon showing daemon status +- Quick access to start/stop recording +- Quick settings (Engine, Output Mode, Hotkey Mode) +- Link to open VoxtypeSetup + +**Key Files:** +- `macos/VoxtypeMenubar/Sources/VoxtypeMenubarApp.swift` - App entry point +- `macos/VoxtypeMenubar/Sources/MenuBarView.swift` - Menu dropdown UI +- `macos/VoxtypeMenubar/Sources/VoxtypeStatusMonitor.swift` - Polls /tmp/voxtype/state +- `macos/VoxtypeMenubar/Sources/VoxtypeCLI.swift` - Runs voxtype CLI commands + +### 3. VoxtypeSetup.app (Settings Application) + +**Location:** `/Applications/VoxtypeSetup.app` + +Swift/SwiftUI app that provides: +- Full settings GUI with sidebar navigation +- Model download and management +- Permission status checking +- Daemon control (start/stop/restart) + +**Settings Sections:** +- General - Engine selection, daemon status +- Hotkey - Key selection, mode, cancel key +- Audio - Device, max duration, feedback +- Models - Installed models, download new +- Whisper - Language, translate, GPU isolation +- Remote Whisper - Server URL, API key +- Output - Mode, type delay, auto-submit +- Text Processing - Spoken punctuation, replacements +- Notifications - Event triggers, engine icon +- Permissions - macOS permissions status +- Advanced - Config file, logs, auto-start + +**Key Files:** +- `macos/VoxtypeSetup/Sources/VoxtypeSetupApp.swift` - App entry point +- `macos/VoxtypeSetup/Sources/Settings/*.swift` - Settings views +- `macos/VoxtypeSetup/Sources/Utilities/ConfigManager.swift` - Config read/write +- `macos/VoxtypeSetup/Sources/Utilities/VoxtypeCLI.swift` - CLI integration + +## Configuration + +**Config File:** `~/Library/Application Support/voxtype/config.toml` + +The ConfigManager (in VoxtypeSetup) handles section-aware config updates to prevent corruption. + +## macOS Permissions Required + +1. **Microphone** - For audio capture +2. **Input Monitoring** - For global hotkey detection (rdev library) +3. **Accessibility** - For typing text into applications (CGEvent) + +## LaunchAgent (Auto-Start) + +**Plist Location:** `~/Library/LaunchAgents/io.voxtype.daemon.plist` + +Managed via: +- `voxtype setup launchd` - Install service +- `voxtype setup launchd --uninstall` - Remove service + +## Build Process + +### Building Swift Apps + +```bash +# Build VoxtypeMenubar +cd macos/VoxtypeMenubar +./build-app.sh + +# Build VoxtypeSetup +cd macos/VoxtypeSetup +./build-app.sh +``` + +Build scripts: +1. Run `swift build -c release` +2. Create .app bundle structure +3. Generate AppIcon.icns from assets/icon.png +4. Create Info.plist +5. Code sign with entitlements + +### Building Rust Binary + +**All macOS binaries must include Parakeet support:** + +```bash +cargo build --release --features parakeet +cp target/release/voxtype /Applications/Voxtype.app/Contents/MacOS/ +``` + +## Known Issues / TODOs + +1. **Notification Icon** - Daemon notifications use default icon (not app icon) because daemon runs as CLI process, not from app bundle context + +2. **Audio Feedback** - Currently disabled on macOS due to "use_default" file dialog issue with rodio/cpal + +3. **Unsigned Binaries** - Apps are ad-hoc signed, require "Open Anyway" in Security settings + +4. **LaunchAgent Conflicts** - If launchd keeps restarting daemon, use `launchctl unload` before manual testing + +## File Locations Summary + +| Item | Path | +|------|------| +| Main binary | `/Applications/Voxtype.app/Contents/MacOS/voxtype` | +| Menubar app | `/Applications/VoxtypeMenubar.app` | +| Settings app | `/Applications/VoxtypeSetup.app` | +| Config file | `~/Library/Application Support/voxtype/config.toml` | +| Models | `~/Library/Application Support/voxtype/models/` | +| State file | `/tmp/voxtype/state` | +| PID file | `/tmp/voxtype/pid` | +| Lock file | `/tmp/voxtype/voxtype.lock` | +| LaunchAgent | `~/Library/LaunchAgents/io.voxtype.daemon.plist` | +| Logs | `~/Library/Logs/voxtype/` (if enabled) | diff --git a/docs/MACOS_TROUBLESHOOTING.md b/docs/MACOS_TROUBLESHOOTING.md new file mode 100644 index 00000000..5b408690 --- /dev/null +++ b/docs/MACOS_TROUBLESHOOTING.md @@ -0,0 +1,183 @@ +# Voxtype macOS Troubleshooting Checklist + +Use this checklist to debug issues and resume work after context resets. + +## Quick Status Check + +```bash +# Check if daemon is running +ps aux | grep "[v]oxtype daemon" + +# Check daemon status +/Applications/Voxtype.app/Contents/MacOS/voxtype status + +# Check state file +cat /tmp/voxtype/state + +# Check config +cat "$HOME/Library/Application Support/voxtype/config.toml" | head -50 +``` + +## Common Issues + +### 1. "Another voxtype instance is already running" + +**Cause:** Stale lock file or launchd keeps restarting daemon. + +**Fix:** +```bash +# Stop launchd service +launchctl stop io.voxtype.daemon +launchctl unload ~/Library/LaunchAgents/io.voxtype.daemon.plist + +# Kill all instances +pkill -9 voxtype + +# Clean up lock files +rm -rf /tmp/voxtype + +# Start fresh +/Applications/Voxtype.app/Contents/MacOS/voxtype daemon & +``` + +### 2. Hotkey Not Working + +**Possible causes:** +- Wrong key configured +- Input Monitoring permission not granted +- Daemon not running + +**Debug:** +```bash +# Check current hotkey +grep "^key" "$HOME/Library/Application Support/voxtype/config.toml" + +# Run daemon with verbose output +pkill voxtype +rm -rf /tmp/voxtype +/Applications/Voxtype.app/Contents/MacOS/voxtype -vv daemon +``` + +**Fix permissions:** +- System Settings → Privacy & Security → Input Monitoring +- Add `/Applications/Voxtype.app` or the Terminal app + +### 3. "use_default" Dialog Appears + +**Cause:** `mac-notification-sys` crate looking for bundle identifier. + +**Fix:** Use osascript for notifications (already fixed in current code): +```rust +// In src/notification.rs, send_macos_native should use osascript +fn send_macos_native(title: &str, body: &str) { + send_macos_osascript_sync(title, body); +} +``` + +### 4. Config Changes Not Taking Effect + +**Cause:** Daemon needs restart after config changes. + +**Fix:** +```bash +pkill voxtype +rm -rf /tmp/voxtype +/Applications/Voxtype.app/Contents/MacOS/voxtype daemon & +``` + +### 5. Settings App Config Updates Corrupting File + +**Cause:** Old ConfigManager did global regex replace instead of section-aware updates. + +**Fix:** ConfigManager now does line-by-line, section-aware updates. If config is corrupted, reset: +```bash +# Backup current config +cp "$HOME/Library/Application Support/voxtype/config.toml" ~/config.toml.bak + +# Regenerate default config +/Applications/Voxtype.app/Contents/MacOS/voxtype setup --quiet +``` + +### 6. Audio Feedback "use_default" Dialog + +**Cause:** rodio/cpal audio output stream initialization on macOS. + +**Fix:** Disable audio feedback in config: +```toml +[audio.feedback] +enabled = false +``` + +### 7. Status Shows "stopped" But Daemon Is Running + +**Cause:** Multiple daemon processes or state file mismatch. + +**Fix:** +```bash +# Clean slate +pkill -9 voxtype +rm -rf /tmp/voxtype +sleep 2 +/Applications/Voxtype.app/Contents/MacOS/voxtype daemon & +sleep 3 +/Applications/Voxtype.app/Contents/MacOS/voxtype status +``` + +## Building and Installing + +### Rebuild Rust Binary +```bash +cd /Users/pete/workspace/voxtype +cargo build --release +cp target/release/voxtype /Applications/Voxtype.app/Contents/MacOS/ +``` + +### Rebuild Swift Apps +```bash +# Menubar +cd macos/VoxtypeMenubar +./build-app.sh +cp -r .build/VoxtypeMenubar.app /Applications/ + +# Settings +cd macos/VoxtypeSetup +./build-app.sh +cp -r .build/VoxtypeSetup.app /Applications/ +``` + +### Restart Apps After Rebuild +```bash +pkill -x VoxtypeMenubar +pkill -x VoxtypeSetup +pkill -x voxtype +rm -rf /tmp/voxtype +open /Applications/VoxtypeMenubar.app +/Applications/Voxtype.app/Contents/MacOS/voxtype daemon & +``` + +## Current Known Issues (as of session) + +1. **Notification icon** - Daemon uses osascript so notifications show Script Editor icon, not Voxtype icon. Menubar app notifications show correct icon. + +2. **Audio feedback disabled** - Causes "use_default" dialog on macOS. + +3. **Hotkey restart required** - Config changes to hotkey require daemon restart. Settings app now has "Restart Now" button. + +## File Locations Quick Reference + +| Item | Path | +|------|------| +| Config | `~/Library/Application Support/voxtype/config.toml` | +| Models | `~/Library/Application Support/voxtype/models/` | +| State | `/tmp/voxtype/state` | +| Lock | `/tmp/voxtype/voxtype.lock` | +| PID | `/tmp/voxtype/pid` | + +## Verification Steps + +After fixing an issue, verify: + +1. `voxtype status` returns `idle` +2. Pressing hotkey (default: Right Option) starts recording (state becomes `recording`) +3. Releasing hotkey transcribes and types text +4. Notification appears after transcription diff --git a/docs/MODEL_SELECTION_GUIDE.md b/docs/MODEL_SELECTION_GUIDE.md index d36d572d..c1f996f7 100644 --- a/docs/MODEL_SELECTION_GUIDE.md +++ b/docs/MODEL_SELECTION_GUIDE.md @@ -188,14 +188,14 @@ model = "parakeet-tdt-0.6b-v3-int8" - Best accuracy for English (~6% WER, top of HuggingFace ASR leaderboard) - Built-in punctuation and capitalization (TDT) - Fast even on CPU thanks to efficient FastConformer architecture -- GPU acceleration via CUDA, ROCm, or TensorRT +- GPU acceleration via CUDA, MIGraphX, or TensorRT **Cons:** - Limited to 25 European languages (no CJK, Arabic, Hindi, etc.) - Requires ONNX binary - Only one model size (0.6B parameters) -**GPU builds:** The ONNX binary variants include GPU support. `onnx-cuda` for NVIDIA, `onnx-rocm` for AMD. +**GPU builds:** The ONNX binary variants include GPU support. `onnx-cuda` for NVIDIA, `onnx-migraphx` for AMD. --- diff --git a/docs/SMOKE_TESTS.md b/docs/SMOKE_TESTS.md index 81f44cb7..aaa3d98b 100644 --- a/docs/SMOKE_TESTS.md +++ b/docs/SMOKE_TESTS.md @@ -1321,17 +1321,102 @@ test -f src/audio/media.rs && echo "media.rs exists" || echo "MISSING" ### Post-Process trim and fallback_on_empty (#270) -Verifies the new post-process config options work. +Verifies the post-process trim / fallback_on_empty config options end-to-end. + +#### Unit-level (fast) ```bash -# Unit tests -cargo test no_trim_preserves -- --nocapture -cargo test no_trim_still_strips -- --nocapture -cargo test no_fallback_on_empty -- --nocapture -cargo test fallback_on_empty_default -- --nocapture -# Expected: all 4 tests pass +# Behavior covered by tests in src/output/post_process.rs: +cargo test --lib output::post_process +# Expected: 21 passed (covers all four trim×fallback combinations +# plus whitespace-only output, multiline, unicode, timeout, etc.) +``` -# Structural verification +#### End-to-end · trim = true (default) + +```bash +# 1. Set up a post-process command that emits trailing whitespace. +# Backup the existing config first. +cp ~/.config/voxtype/config.toml ~/.config/voxtype/config.toml.bak + +cat >> ~/.config/voxtype/config.toml <<'EOF' + +[post_process] +command = "sed 's/$/ /'" +trim = true +fallback_on_empty = true +EOF + +systemctl --user restart voxtype + +# 2. Switch output mode to file so the result is observable. +voxtype record start --file=/tmp/voxtype-trim.txt +sleep 2 && say-something-out-loud +voxtype record stop --file=/tmp/voxtype-trim.txt + +# 3. Verify trailing whitespace was trimmed. +xxd /tmp/voxtype-trim.txt | tail -1 +# Expected: line ends with the last spoken word's bytes, no +# trailing 0x20 0x20 0x20 (the spaces sed appended). + +# 4. Restore config. +cp ~/.config/voxtype/config.toml.bak ~/.config/voxtype/config.toml +systemctl --user restart voxtype +``` + +#### End-to-end · fallback_on_empty = true + +```bash +# 1. Configure a post-process command that always returns empty. +cat >> ~/.config/voxtype/config.toml <<'EOF' + +[post_process] +command = "true" # exit 0, emit nothing +trim = true +fallback_on_empty = true +EOF + +systemctl --user restart voxtype + +# 2. Record and stop. +voxtype record start --file=/tmp/voxtype-fallback.txt +sleep 2 && say-something-out-loud +voxtype record stop --file=/tmp/voxtype-fallback.txt + +# 3. The transcript should still appear — fallback kept the original +# text instead of the empty post-process output. +cat /tmp/voxtype-fallback.txt +# Expected: non-empty file containing the spoken words. +``` + +#### End-to-end · fallback_on_empty = false + +```bash +# 1. Same command, but flip fallback off. +cat >> ~/.config/voxtype/config.toml <<'EOF' + +[post_process] +command = "true" +trim = true +fallback_on_empty = false +EOF + +systemctl --user restart voxtype + +# 2. Record and stop. +voxtype record start --file=/tmp/voxtype-no-fallback.txt +sleep 2 && say-something-out-loud +voxtype record stop --file=/tmp/voxtype-no-fallback.txt + +# 3. The transcript should be empty — fallback disabled, post-process +# returned nothing, no fallback to original. +test ! -s /tmp/voxtype-no-fallback.txt && echo "PASS: empty output" +# Expected: PASS +``` + +#### Structural verification + +```bash grep -c "trim\|fallback_on_empty" src/output/post_process.rs # Expected: 10+ references ``` diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 0f9ed3e9..0d7b7d8f 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -318,9 +318,9 @@ curl -L -o ~/.local/share/voxtype/models/ggml-base.en.bin \ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin ``` -### Voxtype crashes during transcription +### Voxtype crashes during transcription (Linux) -**Cause:** On some systems (particularly with glibc 2.42+ like Ubuntu 25.10), the whisper-rs FFI bindings crash due to C++ exceptions crossing the FFI boundary. +**Cause:** On some Linux systems (particularly with glibc 2.42+ like Ubuntu 25.10), the whisper-rs FFI bindings crash due to C++ exceptions crossing the FFI boundary. **Solution:** Use the CLI backend which runs whisper-cli as a subprocess: @@ -685,74 +685,20 @@ mode = "paste" ### "ydotool daemon not running" -**Cause:** ydotool systemd service not started, or configured incorrectly for your distribution. - -**Solution:** The setup varies by distribution: - -#### Arch Linux (user service) - -Arch provides a user-level service that runs in your session: +**Cause:** ydotool systemd service not started. +**Solution:** ```bash -# Enable and start ydotool as a user service +# Enable and start ydotool systemctl --user enable --now ydotool # Verify it's running systemctl --user status ydotool -``` - -#### Fedora (system service) - -Fedora provides a system-level service that requires additional configuration to work with your user: - -```bash -# 1. Enable and start the system service -sudo systemctl enable --now ydotool - -# 2. Edit the service to allow your user to access the socket -sudo systemctl edit ydotool -``` - -Add this content (replace `1000` with your user/group ID from `id -u` and `id -g`): - -```ini -[Service] -ExecStart= -ExecStart=/usr/bin/ydotoold --socket-path="/run/user/1000/.ydotool_socket" --socket-own="1000:1000" -``` - -Then restart: - -```bash -sudo systemctl restart ydotool - -# Verify it's running -systemctl status ydotool -``` - -#### Ubuntu/Debian - -Check which service type is available: - -```bash -# Check for user service -systemctl --user status ydotool - -# If not found, check for system service -systemctl status ydotool -``` - -If only a system service exists, follow the Fedora instructions above. -#### Verify ydotool works - -```bash -# Test that ydotool can type -ydotool type "test" +# Check for errors +journalctl --user -u ydotool ``` -If you see "Failed to connect to socket", the daemon isn't running or the socket permissions are wrong. - ### Text not typed / nothing happens **Possible causes:** diff --git a/docs/USER_MANUAL.md b/docs/USER_MANUAL.md index b939da81..0f056130 100644 --- a/docs/USER_MANUAL.md +++ b/docs/USER_MANUAL.md @@ -128,6 +128,77 @@ Display the current configuration. voxtype config ``` +### `voxtype configure` + +Open an interactive terminal UI for editing every voxtype option. The TUI +edits `~/.config/voxtype/config.toml` directly, preserves comments and +unknown fields, and validates the file before swapping it in. Saving a +change that would break the daemon's parser leaves the on-disk file +untouched and reports the parse error. + +```bash +voxtype configure +``` + +The TUI is also surfaced as a `.desktop` entry, so it shows up in Walker, +fuzzel, rofi, KRunner, and GNOME Activities as **"Voxtype Configuration"**. +The launcher script picks the first available terminal emulator (`$TERMINAL`, +then ghostty / alacritty / kitty / foot / wezterm / konsole / xterm) and +sets the window class to `voxtype` so compositors can float it. + +#### Sections + +| Section | What it covers | +|---|---| +| General | Active engine, variant binary, daemon status, hardware-aware variant recommendation | +| Engine | Per-engine tuning for Whisper / Parakeet / Moonshine / SenseVoice / Paraformer / Dolphin / Omnilingual / Cohere | +| Hotkey | PTT key, mode (PTT vs toggle), cancel key, modifier key, evdev-listener toggle | +| Audio | Input device, max recording length, MPRIS-pause, audio feedback theme/volume | +| Output | Mode (type/clipboard/paste/file), driver order, auto-submit, post-process command | +| Text | Spoken-punctuation toggle, smart-auto-submit, custom replacements list editor | +| VAD | Silero VAD enable, backend (auto/energy/whisper), threshold | +| Meeting | Meeting mode enable, speaker diarization, audio source (mic/system/both) | +| Notifications | Desktop notifications for recording start/stop and transcription | +| Waybar | Status integration: icon theme + per-state icon overrides | +| Advanced | GPU isolation, on-demand model loading, flash attention, eager processing, GPU device | + +#### Keyboard + +``` +Global Tab / Esc focus toggle ? help overlay q quit +Sidebar ↑↓ / jk navigate Enter / → / l open section +Section form ↑↓ navigate fields ←→ / hl cycle value + Space toggle Enter / i edit text field + s save r revert +Text editor type insert ←→ / Home / End / Ctrl-A/E + Backspace / Delete Ctrl-W delete word + Ctrl-U clear Enter commit / Esc cancel +``` + +Press `?` from anywhere in the TUI for the same reference as a popup. + +#### Hardware-aware recommendations + +The General section detects your CPU (AVX2 / AVX-512) and GPU (NVIDIA / +AMD), then marks the recommended variant for each engine family with `★`. +The About pane explains the choice (`"AMD GPU detected. The MIGraphX +execution provider is new… ONNX (AVX-512) on CPU is the safe default."`). + +When you switch the engine on the Engine page, the TUI also picks the +matching binary variant if one is needed (e.g. moving from Whisper to +Parakeet swaps the symlink at `/usr/bin/voxtype` from a Whisper variant to +an ONNX variant). The actual symlink change runs through `pkexec` so you +get the standard polkit prompt. + +#### Compositor binding awareness + +If you have the evdev listener disabled and rely on compositor bindings, +the Hotkey screen scans `~/.config/hypr/*.conf`, `~/.config/sway/config*`, +and `~/.config/niri/config.kdl` for any `voxtype record` / `voxtype meeting` +bindings, lists them in the About pane, and suggests config-format-specific +snippets for any standard role you haven't bound (cancel, toggle, meeting +start/stop). Suggestions skip key combos already in use by other actions. + ### `voxtype status` Query the daemon's current state (for Waybar/Polybar integration). @@ -642,6 +713,7 @@ Voxtype supports seven speech-to-text engines. Whisper uses whisper.cpp and work | **Paraformer** | Chinese + English dictation | No | Chinese (with English code-switching) | | **Dolphin** | Dictation-optimized, fast CTC | No | Chinese + English | | **Omnilingual** | Broadest language coverage in ONNX engines | No | 50+ languages | +| **Cohere** | #1 Open ASR Leaderboard accuracy | Optional (CUDA via `cohere-cuda`) | Arabic, German, English, Spanish, French, Hindi, Italian, Japanese, Korean, Dutch, Portuguese, Russian, Turkish, Chinese (14) | ### Selecting an Engine @@ -658,6 +730,7 @@ engine = "sensevoice" engine = "paraformer" engine = "dolphin" engine = "omnilingual" +engine = "cohere" ``` **Via CLI flag** (overrides config): @@ -670,9 +743,10 @@ voxtype --engine sensevoice daemon voxtype --engine paraformer daemon voxtype --engine dolphin daemon voxtype --engine omnilingual daemon +voxtype --engine cohere daemon ``` -Valid `--engine` values: `whisper`, `parakeet`, `moonshine`, `sensevoice`, `paraformer`, `dolphin`, `omnilingual`. +Valid `--engine` values: `whisper`, `parakeet`, `moonshine`, `sensevoice`, `paraformer`, `dolphin`, `omnilingual`, `cohere`. ### Switching to an ONNX Engine @@ -841,6 +915,46 @@ model = "omnilingual-large" # Default model # on_demand_loading = false ``` +### Cohere Transcribe + +Cohere Transcribe is an encoder-decoder ASR model from Cohere Labs running via ONNX Runtime. It currently sits at #1 on the Open ASR Leaderboard. It offers: + +- Best-in-class accuracy on a wide range of audio (5.42 average WER on the leaderboard) +- Support for 14 languages with a single model +- Whisper-style task tokens for punctuation, capitalization, and inverse text normalization +- Optional CUDA acceleration via the `cohere-cuda` feature + +The trade-off: it's the largest model voxtype ships at ~3.1 GB on disk for the int8 quantization. Plan accordingly on laptops with limited storage. + +**Requirements:** +- An ONNX-enabled binary (`voxtype-*-onnx-*`) +- ~3.1 GB free disk space for the model +- The Cohere Transcribe model downloaded (`voxtype setup model`, then pick the Cohere section) + +**Configuration:** + +```toml +engine = "cohere" + +[cohere] +model = "cohere-transcribe-int8" # Default model +language = "en" # One of: ar, de, en, es, fr, hi, it, ja, ko, nl, pt, ru, tr, zh +# threads = 4 +# on_demand_loading = false +``` + +**Supported languages:** + +Arabic (`ar`), German (`de`), English (`en`, default), Spanish (`es`), French (`fr`), Hindi (`hi`), Italian (`it`), Japanese (`ja`), Korean (`ko`), Dutch (`nl`), Portuguese (`pt`), Russian (`ru`), Turkish (`tr`), Mandarin Chinese (`zh`). + +**Installing:** + +```bash +voxtype setup model # Pick the Cohere section, confirm the size warning +``` + +The download fetches five files from the `cstr/cohere-transcribe-onnx-int8` HuggingFace repository (Apache 2.0 licensed, not gated): the encoder/decoder ONNX graphs, their weight sidecars, and `tokens.txt`. + --- ## Multi-Model Support @@ -1303,7 +1417,6 @@ Then record for 10+ seconds. You should see log messages like: ``` --- - ## Output Modes ### Type Mode (Default) @@ -1522,6 +1635,45 @@ Or via environment variable for the whole session: VOXTYPE_SMART_AUTO_SUBMIT=true voxtype ``` +**Filter filler words ("uh", "um", ...):** + +Voxtype filters single-syllable filler words by default. To turn it off: + +```toml +[text] +filter_filler_words = false +``` + +When enabled (the default), Voxtype strips common filler words from each transcription before output and cleans up the surrounding punctuation. Word boundaries are respected, so "umbrella" and "summer" are untouched. + +``` +# You say: "Well, um, I think we should ship it" +# Voxtype types: "Well, I think we should ship it" +``` + +The default list contains single-syllable disfluencies: `uh`, `um`, `er`, `ah`, `eh`, `hmm`, `hm`, `mm`, `mhm`. Override it to add your own words: + +```toml +[text] +filter_filler_words = true +filler_words = ["uh", "um", "er", "like", "you know"] +``` + +CLI flag (overrides config for the running daemon): + +```bash +voxtype --filter-fillers # force on +voxtype --no-filter-fillers # force off +``` + +Or via environment variable: + +```bash +VOXTYPE_FILTER_FILLERS=true voxtype +``` + +The filter runs before `replacements` and the `[post_process]` LLM hook, so any custom replacements still apply on top of filtered text. + **Shift+Enter for newlines:** ```toml diff --git a/examples/inspect_cohere_onnx.rs b/examples/inspect_cohere_onnx.rs new file mode 100644 index 00000000..e2e157aa --- /dev/null +++ b/examples/inspect_cohere_onnx.rs @@ -0,0 +1,69 @@ +//! Quick inspector for the cstr/cohere-transcribe-onnx-int8 model files. +//! Run with: +//! cargo run --example inspect_cohere_onnx --features cohere -- \ +//! ~/.cache/voxtype-models/cohere-transcribe-int8 + +use std::env; +use std::path::PathBuf; + +use ort::session::Session; +use ort::value::ValueType; + +fn type_str(ty: &ValueType) -> String { + match ty { + ValueType::Tensor { + ty, + shape, + dimension_symbols, + } => { + let dims: Vec = shape + .iter() + .zip(dimension_symbols.iter()) + .map(|(d, sym)| { + if *d < 0 { + let s: &str = sym.as_ref(); + if s.is_empty() { + "?".into() + } else { + format!("?:{s}") + } + } else { + d.to_string() + } + }) + .collect(); + format!("Tensor<{ty:?}>[{}]", dims.join(", ")) + } + other => format!("{other:?}"), + } +} + +fn dump(label: &str, path: &PathBuf) -> ort::Result<()> { + println!("\n=== {label} ==="); + println!("file: {}", path.display()); + let session = Session::builder()?.commit_from_file(path)?; + println!("inputs:"); + for (i, input) in session.inputs().iter().enumerate() { + println!(" [{i}] {} : {}", input.name(), type_str(input.dtype())); + } + println!("outputs:"); + for (i, output) in session.outputs().iter().enumerate() { + println!(" [{i}] {} : {}", output.name(), type_str(output.dtype())); + } + Ok(()) +} + +fn main() -> ort::Result<()> { + let dir: PathBuf = env::args() + .nth(1) + .map(PathBuf::from) + .expect("usage: inspect_cohere_onnx "); + // Tolerate one model failing to load so we get whichever is ready. + if let Err(e) = dump("encoder", &dir.join("cohere-encoder.int8.onnx")) { + println!("encoder failed: {e}"); + } + if let Err(e) = dump("decoder", &dir.join("cohere-decoder.int8.onnx")) { + println!("decoder failed: {e}"); + } + Ok(()) +} diff --git a/flake.lock b/flake.lock index 7b5fb122..f1943347 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1767767207, - "narHash": "sha256-Mj3d3PfwltLmukFal5i3fFt27L6NiKXdBezC1EBuZs4=", + "lastModified": 1774386573, + "narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "5912c1772a44e31bf1c63c0390b90501e5026886", + "rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 0cc0dd86..14b897a4 100644 --- a/flake.nix +++ b/flake.nix @@ -67,10 +67,10 @@ "omnilingual-cuda" ]; - # Only Parakeet has ROCm support; other engines run on CPU - onnxRocmFeatures = [ + # Only Parakeet has AMD GPU support (via MIGraphX); other engines run on CPU + onnxMigraphxFeatures = [ "parakeet-load-dynamic" - "parakeet-rocm" + "parakeet-migraphx" "moonshine" "sensevoice" "paraformer" @@ -93,7 +93,7 @@ # Wrap an ONNX package with runtime dependencies and ORT_DYLIB_PATH # ONNX engines need ONNX Runtime at runtime for inference libExt = if pkgs.stdenv.isDarwin then "dylib" else "so"; - wrapOnnx = { onnxruntime ? pkgs.onnxruntime, pkg }: pkgs.symlinkJoin { + wrapOnnx = { onnxruntime ? pkgs.onnxruntime, pkg, extraWrapperArgs ? "" }: pkgs.symlinkJoin { name = "${pkg.pname or "voxtype"}-wrapped-${pkg.version}"; paths = [ pkg ]; buildInputs = [ pkgs.makeWrapper ]; @@ -101,15 +101,26 @@ wrapProgram $out/bin/voxtype \ --prefix PATH : ${pkgs.lib.makeBinPath runtimeDeps} \ --set ORT_DYLIB_PATH "${onnxruntime}/lib/libonnxruntime.${libExt}" \ - --prefix LD_LIBRARY_PATH : "${onnxruntime}/lib" + --prefix LD_LIBRARY_PATH : "${onnxruntime}/lib" \ + ${extraWrapperArgs} ''; inherit (pkg) meta; }; + # Extra wrapper args for MIGraphX (ROCm) to set cache directory + migraphxWrapperArgs = '' + --run ' + : "''${ORT_MIGRAPHX_MODEL_CACHE_PATH:=''${XDG_CACHE_HOME:-$HOME/.cache}/voxtype/migraphx}" + export ORT_MIGRAPHX_MODEL_CACHE_PATH + mkdir -p "$ORT_MIGRAPHX_MODEL_CACHE_PATH" + ' + ''; + # ONNX Runtime variants for different GPU backends onnxruntimeCuda = pkgsUnfree.onnxruntime.override { cudaSupport = true; }; onnxruntimeRocm = pkgs.onnxruntime.override { rocmSupport = true; }; + # Base derivation for voxtype (unwrapped) mkVoxtypeUnwrapped = { pname ? "voxtype", features ? [], extraNativeBuildInputs ? [], extraBuildInputs ? [] }: pkgs.rustPlatform.buildRustPackage { @@ -253,12 +264,12 @@ ORT_LIB_LOCATION = "${onnxruntimeCuda}/lib"; }); - # Build the ONNX + ROCm variant for AMD GPUs - # Only Parakeet gets ROCm acceleration; other engines run on CPU - onnxRocmUnwrapped = let + # Build the ONNX + MIGraphX variant for AMD GPUs + # Only Parakeet gets AMD GPU acceleration; other engines run on CPU + onnxMigraphxUnwrapped = let pkg = mkVoxtypeUnwrapped { - pname = "voxtype-onnx-rocm"; - features = onnxRocmFeatures; + pname = "voxtype-onnx-migraphx"; + features = onnxMigraphxFeatures; extraNativeBuildInputs = with pkgs; [ rocmPackages.clr ]; @@ -272,6 +283,7 @@ ORT_LIB_LOCATION = "${onnxruntimeRocm}/lib"; }); + in { packages = { # Wrapped packages (ready to use, runtime deps in PATH) @@ -284,12 +296,16 @@ # Paraformer, Dolphin, Omnilingual) onnx = wrapOnnx { pkg = onnxUnwrapped; }; onnx-cuda = wrapOnnx { onnxruntime = onnxruntimeCuda; pkg = onnxCudaUnwrapped; }; - onnx-rocm = wrapOnnx { onnxruntime = onnxruntimeRocm; pkg = onnxRocmUnwrapped; }; + onnx-migraphx = wrapOnnx { onnxruntime = onnxruntimeRocm; pkg = onnxMigraphxUnwrapped; extraWrapperArgs = migraphxWrapperArgs; }; - # Backwards-compatible aliases (parakeet → onnx) + # Backwards-compatible aliases (parakeet → onnx, rocm → migraphx) parakeet = wrapOnnx { pkg = onnxUnwrapped; }; parakeet-cuda = wrapOnnx { onnxruntime = onnxruntimeCuda; pkg = onnxCudaUnwrapped; }; - parakeet-rocm = wrapOnnx { onnxruntime = onnxruntimeRocm; pkg = onnxRocmUnwrapped; }; + parakeet-migraphx = wrapOnnx { onnxruntime = onnxruntimeRocm; pkg = onnxMigraphxUnwrapped; extraWrapperArgs = migraphxWrapperArgs; }; + # Legacy: rocm → migraphx (drop in v0.8.0) + onnx-rocm = wrapOnnx { onnxruntime = onnxruntimeRocm; pkg = onnxMigraphxUnwrapped; extraWrapperArgs = migraphxWrapperArgs; }; + parakeet-rocm = wrapOnnx { onnxruntime = onnxruntimeRocm; pkg = onnxMigraphxUnwrapped; extraWrapperArgs = migraphxWrapperArgs; }; + # Unwrapped packages (for custom wrapping scenarios) voxtype-unwrapped = mkVoxtypeUnwrapped {}; @@ -297,12 +313,15 @@ voxtype-rocm-unwrapped = rocmUnwrapped; voxtype-onnx-unwrapped = onnxUnwrapped; voxtype-onnx-cuda-unwrapped = onnxCudaUnwrapped; - voxtype-onnx-rocm-unwrapped = onnxRocmUnwrapped; + voxtype-onnx-migraphx-unwrapped = onnxMigraphxUnwrapped; # Backwards-compatible aliases voxtype-parakeet-unwrapped = onnxUnwrapped; voxtype-parakeet-cuda-unwrapped = onnxCudaUnwrapped; - voxtype-parakeet-rocm-unwrapped = onnxRocmUnwrapped; + voxtype-parakeet-migraphx-unwrapped = onnxMigraphxUnwrapped; + # Legacy + voxtype-onnx-rocm-unwrapped = onnxMigraphxUnwrapped; + voxtype-parakeet-rocm-unwrapped = onnxMigraphxUnwrapped; }; # Development shell with all dependencies diff --git a/macos/README.md b/macos/README.md new file mode 100644 index 00000000..2be5fee2 --- /dev/null +++ b/macos/README.md @@ -0,0 +1,45 @@ +# Voxtype macOS + +This directory contains macOS-specific code, separate from the cross-platform Rust core. + +## VoxtypeSetup + +A native SwiftUI app that provides: + +1. **Setup Wizard** - First-run experience that guides users through: + - Granting permissions (Microphone, Accessibility, Input Monitoring) + - Downloading a speech model + - Installing the LaunchAgent for auto-start + +2. **Preferences** - Settings panel for changing: + - Speech engine (Parakeet vs Whisper) + - Model selection + - Auto-start toggle + - Daemon control + +## Building + +```bash +cd macos/VoxtypeSetup +swift build -c release + +# Or open in Xcode +open Package.swift +``` + +## Architecture + +The SwiftUI app is a thin GUI layer. All actual functionality is delegated to the +`voxtype` Rust binary via CLI calls: + +- `VoxtypeCLI.swift` - Wrapper that calls voxtype commands +- `PermissionChecker.swift` - Native macOS permission checking + +This keeps business logic in Rust while providing a native Mac experience. + +## Distribution + +The setup app can be: +1. Bundled inside Voxtype.app as a helper +2. Distributed separately as VoxtypeSetup.app +3. Invoked via `voxtype setup macos --gui` (if integrated) diff --git a/macos/RELEASE_PLAN.md b/macos/RELEASE_PLAN.md new file mode 100644 index 00000000..65be01d3 --- /dev/null +++ b/macos/RELEASE_PLAN.md @@ -0,0 +1,135 @@ +# macOS Release Plan + +Status: In Progress +Branch: feature/macos-release +Target: Merge to main, Homebrew distribution, then signed distribution + +--- + +## Phase 1: Rebase and Linux Validation + +### 1.1 Rebase onto main (v0.5.3) +- [ ] `git fetch origin` +- [ ] `git rebase origin/main` +- [ ] Resolve any conflicts +- [ ] Verify build after rebase + +### 1.2 Validate Linux compilation +- [ ] Run `cargo check` (quick syntax/type check) +- [ ] Run `cargo build --release` on Linux (via Docker or remote) +- [ ] Run `cargo test` to verify no regressions +- [ ] Confirm macOS-specific code is properly gated with `#[cfg(target_os = "macos")]` + +--- + +## Phase 2: macOS Build and Homebrew + +### 2.1 Build macOS binary +- [ ] `cargo build --release` on macOS +- [ ] Verify binary works: `./target/release/voxtype --version` +- [ ] Test basic functionality (record, transcribe, output) + +### 2.2 Build SwiftUI Setup App +- [ ] `cd macos/VoxtypeSetup && ./build-app.sh` +- [ ] Test setup wizard flow +- [ ] Test preferences panel + +### 2.3 Create Homebrew formula +- [ ] Create formula in homebrew-voxtype tap +- [ ] Test `brew install --build-from-source` +- [ ] Test `brew install` from bottle (if available) + +--- + +## Phase 3: Signed Distribution + +### 3.1 Apple Developer Setup +- [ ] Ensure Apple Developer account is active +- [ ] Create/verify Developer ID Application certificate +- [ ] Create/verify Developer ID Installer certificate (for pkg) +- [ ] Set up notarization credentials (app-specific password or API key) + +### 3.2 Code Signing +- [ ] Sign voxtype binary with Developer ID +- [ ] Sign VoxtypeSetup.app with Developer ID +- [ ] Verify signatures: `codesign -dv --verbose=4` + +### 3.3 Notarization +- [ ] Submit for notarization: `xcrun notarytool submit` +- [ ] Wait for approval +- [ ] Staple ticket: `xcrun stapler staple` + +### 3.4 Distribution Package (choose one or both) + +#### Option A: DMG Installer +- [ ] Create DMG with app bundle and symlink to /Applications +- [ ] Sign DMG +- [ ] Notarize DMG +- [ ] Test fresh install on clean Mac + +#### Option B: Mac App Store (more restrictive) +- [ ] Create App Store Connect record +- [ ] Add required entitlements +- [ ] Sandbox compliance (may require significant changes) +- [ ] Submit for review + +**Recommendation:** Start with DMG. App Store sandboxing may conflict with: +- Accessibility permission requirements +- Input monitoring +- LaunchAgent installation +- Calling external binaries + +--- + +## Current State + +### Completed +- [x] Basic macOS daemon functionality +- [x] LaunchAgent for auto-start +- [x] Hotkey detection via rdev +- [x] Audio capture via cpal +- [x] Text output via Accessibility API +- [x] Notifications +- [x] SwiftUI Setup App scaffolded (needs testing) + +### In Progress +- [ ] Rebase onto v0.5.3 + +### Blocked +- [ ] Signed distribution (needs Phase 1-2 complete) + +--- + +## Commands Reference + +```bash +# Rebase +git fetch origin && git rebase origin/main + +# Linux check (Docker) +docker run --rm -v $(pwd):/src -w /src rust:latest cargo check + +# macOS build +cargo build --release + +# SwiftUI app build +cd macos/VoxtypeSetup && ./build-app.sh + +# Sign binary +codesign --force --options runtime --sign "Developer ID Application: YOUR NAME" target/release/voxtype + +# Notarize +xcrun notarytool submit app.zip --apple-id EMAIL --team-id TEAM --password APP_PASSWORD --wait + +# Staple +xcrun stapler staple Voxtype.app +``` + +--- + +## Notes + +- SwiftUI app requires macOS 13+ +- Homebrew formula should handle both Intel and Apple Silicon +- DMG is simpler for initial release; App Store can come later +- Keep CLI setup as fallback for power users / Homebrew installs diff --git a/macos/VoxtypeMenubar/.gitignore b/macos/VoxtypeMenubar/.gitignore new file mode 100644 index 00000000..2d9f16e2 --- /dev/null +++ b/macos/VoxtypeMenubar/.gitignore @@ -0,0 +1,2 @@ +.build/ +.swiftpm/ diff --git a/macos/VoxtypeMenubar/Package.swift b/macos/VoxtypeMenubar/Package.swift new file mode 100644 index 00000000..d8d938a8 --- /dev/null +++ b/macos/VoxtypeMenubar/Package.swift @@ -0,0 +1,18 @@ +// swift-tools-version: 5.9 +import PackageDescription + +let package = Package( + name: "VoxtypeMenubar", + platforms: [ + .macOS(.v13) + ], + products: [ + .executable(name: "VoxtypeMenubar", targets: ["VoxtypeMenubar"]) + ], + targets: [ + .executableTarget( + name: "VoxtypeMenubar", + path: "Sources" + ) + ] +) diff --git a/macos/VoxtypeMenubar/Sources/MenuBarView.swift b/macos/VoxtypeMenubar/Sources/MenuBarView.swift new file mode 100644 index 00000000..7db1ee6f --- /dev/null +++ b/macos/VoxtypeMenubar/Sources/MenuBarView.swift @@ -0,0 +1,203 @@ +import SwiftUI + +struct MenuBarView: View { + @EnvironmentObject var statusMonitor: VoxtypeStatusMonitor + + var body: some View { + VStack(alignment: .leading, spacing: 0) { + // Status header - single line with icon, name, and status + Label { + Text("Voxtype · \(statusMonitor.statusText)") + } icon: { + Image(systemName: statusMonitor.iconName) + .foregroundColor(statusColor) + } + .font(.headline) + .padding(.horizontal, 12) + .padding(.vertical, 8) + + Divider() + + // Recording controls + Button(action: toggleRecording) { + Label("Toggle Recording", systemImage: "record.circle") + } + .keyboardShortcut("r", modifiers: []) + .disabled(!statusMonitor.daemonRunning) + + Button(action: cancelRecording) { + Label("Cancel Recording", systemImage: "xmark.circle") + } + .disabled(statusMonitor.state != .recording) + + Divider() + + // Quick settings menus (at top level) + Menu("Engine") { + Button("Parakeet (Fast)") { + setEngine("parakeet") + } + Button("Whisper") { + setEngine("whisper") + } + } + + Menu("Output Mode") { + Button("Type Text") { + setOutputMode("type") + } + Button("Clipboard") { + setOutputMode("clipboard") + } + Button("Clipboard + Paste") { + setOutputMode("paste") + } + } + + Menu("Hotkey Mode") { + Button("Push-to-Talk (hold)") { + setHotkeyMode("push_to_talk") + } + Button("Toggle (press)") { + setHotkeyMode("toggle") + } + } + + Divider() + + Button(action: openSettings) { + Label("Settings", systemImage: "gearshape") + } + + Button(action: restartDaemon) { + Label("Restart Daemon", systemImage: "arrow.clockwise") + } + + Button(action: viewLogs) { + Label("View Logs", systemImage: "doc.text") + } + + Divider() + + Button(action: quitApp) { + Label("Quit Voxtype Menu Bar", systemImage: "power") + } + .keyboardShortcut("q", modifiers: .command) + } + } + + private var statusColor: Color { + switch statusMonitor.state { + case .idle: + return .green + case .recording: + return .red + case .transcribing: + return .orange + case .stopped: + return .gray + } + } + + // MARK: - Actions + + private func toggleRecording() { + VoxtypeCLI.run(["record", "toggle"]) + } + + private func cancelRecording() { + VoxtypeCLI.run(["record", "cancel"]) + } + + private func setEngine(_ engine: String) { + // Update config file + updateConfig(key: "engine", value: "\"\(engine)\"", section: nil) + showNotification(title: "Voxtype", message: "Engine set to \(engine). Restart daemon to apply.") + } + + private func setOutputMode(_ mode: String) { + updateConfig(key: "mode", value: "\"\(mode)\"", section: "[output]") + } + + private func setHotkeyMode(_ mode: String) { + updateConfig(key: "mode", value: "\"\(mode)\"", section: "[hotkey]") + showNotification(title: "Voxtype", message: "Hotkey mode changed. Restart daemon to apply.") + } + + private func openSettings() { + // Try multiple locations for VoxtypeSetup + let possiblePaths = [ + // Inside main app bundle + "/Applications/Voxtype.app/Contents/MacOS/VoxtypeSetup", + // Standalone app in Applications + "/Applications/VoxtypeSetup.app", + // Next to this menubar app + Bundle.main.bundlePath.replacingOccurrences(of: "VoxtypeMenubar.app", with: "VoxtypeSetup.app"), + ] + + for path in possiblePaths { + if path.hasSuffix(".app") { + // It's an app bundle + if FileManager.default.fileExists(atPath: path) { + NSWorkspace.shared.open(URL(fileURLWithPath: path)) + return + } + } else { + // It's a binary + if FileManager.default.fileExists(atPath: path) { + do { + try Process.run(URL(fileURLWithPath: path), arguments: []) + return + } catch { + continue + } + } + } + } + + // Fallback: show notification that settings app not found + showNotification(title: "Voxtype", message: "Settings app not found. Edit config at ~/Library/Application Support/voxtype/config.toml") + } + + private func restartDaemon() { + VoxtypeCLI.run(["daemon", "restart"], wait: false) + showNotification(title: "Voxtype", message: "Restarting daemon...") + } + + private func viewLogs() { + let logsPath = NSHomeDirectory() + "/Library/Logs/voxtype" + NSWorkspace.shared.open(URL(fileURLWithPath: logsPath)) + } + + private func quitApp() { + NSApplication.shared.terminate(nil) + } + + // MARK: - Helpers + + private func updateConfig(key: String, value: String, section: String?) { + let configPath = NSHomeDirectory() + "/Library/Application Support/voxtype/config.toml" + + guard var content = try? String(contentsOfFile: configPath, encoding: .utf8) else { + return + } + + let pattern = "\(key)\\s*=\\s*\"[^\"]*\"" + let replacement = "\(key) = \(value)" + + if let regex = try? NSRegularExpression(pattern: pattern, options: []) { + let range = NSRange(content.startIndex..., in: content) + content = regex.stringByReplacingMatches(in: content, options: [], range: range, withTemplate: replacement) + } + + try? content.write(toFile: configPath, atomically: true, encoding: .utf8) + } + + private func showNotification(title: String, message: String) { + let script = "display notification \"\(message)\" with title \"\(title)\"" + if let appleScript = NSAppleScript(source: script) { + var error: NSDictionary? + appleScript.executeAndReturnError(&error) + } + } +} diff --git a/macos/VoxtypeMenubar/Sources/VoxtypeCLI.swift b/macos/VoxtypeMenubar/Sources/VoxtypeCLI.swift new file mode 100644 index 00000000..10f5b31c --- /dev/null +++ b/macos/VoxtypeMenubar/Sources/VoxtypeCLI.swift @@ -0,0 +1,77 @@ +import Foundation + +/// Helper to run voxtype CLI commands +enum VoxtypeCLI { + /// Path to voxtype binary + static var binaryPath: String { + // First try the app bundle location (works for both VoxtypeMenubar.app and VoxtypeSetup.app) + let bundlePath = Bundle.main.bundlePath + let parentDir = (bundlePath as NSString).deletingLastPathComponent + let siblingBinaryPath = (parentDir as NSString).appendingPathComponent("Voxtype.app/Contents/MacOS/voxtype") + + if FileManager.default.fileExists(atPath: siblingBinaryPath) { + return siblingBinaryPath + } + + // Try /Applications + let applicationsPath = "/Applications/Voxtype.app/Contents/MacOS/voxtype" + if FileManager.default.fileExists(atPath: applicationsPath) { + return applicationsPath + } + + // Try homebrew symlink + let homebrewPath = "/opt/homebrew/bin/voxtype" + if FileManager.default.fileExists(atPath: homebrewPath) { + return homebrewPath + } + + // Try ~/.local/bin + let localBinPath = NSHomeDirectory() + "/.local/bin/voxtype" + if FileManager.default.fileExists(atPath: localBinPath) { + return localBinPath + } + + // Fallback to PATH + return "voxtype" + } + + /// Run a voxtype command + @discardableResult + static func run(_ arguments: [String], wait: Bool = true) -> (output: String, success: Bool) { + let task = Process() + task.launchPath = binaryPath + task.arguments = arguments + + let pipe = Pipe() + task.standardOutput = pipe + task.standardError = pipe + + do { + try task.run() + + if wait { + task.waitUntilExit() + let data = pipe.fileHandleForReading.readDataToEndOfFile() + let output = String(data: data, encoding: .utf8) ?? "" + return (output, task.terminationStatus == 0) + } else { + return ("", true) + } + } catch { + return ("Error: \(error.localizedDescription)", false) + } + } + + /// Get daemon status + static func getStatus() -> String { + let result = run(["status"]) + return result.output.trimmingCharacters(in: .whitespacesAndNewlines) + } + + /// Check if daemon is running + static func isDaemonRunning() -> Bool { + let result = run(["status"]) + let status = result.output.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + return status == "idle" || status == "recording" || status == "transcribing" + } +} diff --git a/macos/VoxtypeMenubar/Sources/VoxtypeMenubarApp.swift b/macos/VoxtypeMenubar/Sources/VoxtypeMenubarApp.swift new file mode 100644 index 00000000..b5ef4cea --- /dev/null +++ b/macos/VoxtypeMenubar/Sources/VoxtypeMenubarApp.swift @@ -0,0 +1,17 @@ +import SwiftUI + +@main +struct VoxtypeMenubarApp: App { + @StateObject private var statusMonitor = VoxtypeStatusMonitor() + + var body: some Scene { + MenuBarExtra { + MenuBarView() + .environmentObject(statusMonitor) + } label: { + Image(systemName: statusMonitor.iconName) + .symbolRenderingMode(.hierarchical) + } + .menuBarExtraStyle(.menu) + } +} diff --git a/macos/VoxtypeMenubar/Sources/VoxtypeStatusMonitor.swift b/macos/VoxtypeMenubar/Sources/VoxtypeStatusMonitor.swift new file mode 100644 index 00000000..0fa22d77 --- /dev/null +++ b/macos/VoxtypeMenubar/Sources/VoxtypeStatusMonitor.swift @@ -0,0 +1,122 @@ +import Foundation +import Combine + +/// Monitors voxtype daemon state by watching the state file +class VoxtypeStatusMonitor: ObservableObject { + @Published var state: VoxtypeState = .stopped + @Published var daemonRunning: Bool = false + + private var timer: Timer? + private let stateFilePath = "/tmp/voxtype/state" + + var iconName: String { + switch state { + case .idle: + return "mic.fill" + case .recording: + return "mic.badge.plus" + case .transcribing: + return "ellipsis.circle.fill" + case .stopped: + return "mic.slash.fill" + } + } + + var statusText: String { + switch state { + case .idle: + return "Ready" + case .recording: + return "Recording..." + case .transcribing: + return "Transcribing..." + case .stopped: + return "Daemon not running" + } + } + + init() { + startMonitoring() + } + + deinit { + stopMonitoring() + } + + func startMonitoring() { + // Check immediately + updateState() + + // Then poll every 500ms + timer = Timer.scheduledTimer(withTimeInterval: 0.5, repeats: true) { [weak self] _ in + self?.updateState() + } + } + + func stopMonitoring() { + timer?.invalidate() + timer = nil + } + + private func updateState() { + // Check if daemon is running + daemonRunning = isDaemonRunning() + + if !daemonRunning { + state = .stopped + return + } + + // Read state file + guard let content = try? String(contentsOfFile: stateFilePath, encoding: .utf8) else { + state = .stopped + return + } + + let trimmed = content.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + switch trimmed { + case "idle": + state = .idle + case "recording": + state = .recording + case "transcribing": + state = .transcribing + default: + state = .stopped + } + } + + private func isDaemonRunning() -> Bool { + // First check if launchd service is running + let launchctlTask = Process() + launchctlTask.launchPath = "/bin/launchctl" + launchctlTask.arguments = ["list", "io.voxtype.daemon"] + launchctlTask.standardOutput = FileHandle.nullDevice + launchctlTask.standardError = FileHandle.nullDevice + + do { + try launchctlTask.run() + launchctlTask.waitUntilExit() + if launchctlTask.terminationStatus == 0 { + return true + } + } catch {} + + // Fall back to checking if daemon process is running via PID file + let pidPath = "/tmp/voxtype/pid" + guard let pidString = try? String(contentsOfFile: pidPath, encoding: .utf8), + let pid = Int32(pidString.trimmingCharacters(in: .whitespacesAndNewlines)) else { + return false + } + + // Check if process with this PID exists + return kill(pid, 0) == 0 + } +} + +enum VoxtypeState { + case idle + case recording + case transcribing + case stopped +} diff --git a/macos/VoxtypeMenubar/build-app.sh b/macos/VoxtypeMenubar/build-app.sh new file mode 100755 index 00000000..d5a55f4c --- /dev/null +++ b/macos/VoxtypeMenubar/build-app.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# Build VoxtypeMenubar.app bundle + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$SCRIPT_DIR" + +# Build release +swift build -c release + +# Create app bundle structure +APP_NAME="VoxtypeMenubar" +APP_BUNDLE="$SCRIPT_DIR/.build/${APP_NAME}.app" +CONTENTS="$APP_BUNDLE/Contents" +MACOS="$CONTENTS/MacOS" +RESOURCES="$CONTENTS/Resources" + +rm -rf "$APP_BUNDLE" +mkdir -p "$MACOS" "$RESOURCES" + +# Copy binary +cp ".build/release/$APP_NAME" "$MACOS/" + +# Create icns from source icon +ICON_SOURCE="$REPO_ROOT/assets/icon.png" +if [ -f "$ICON_SOURCE" ]; then + ICONSET_DIR="$SCRIPT_DIR/.build/AppIcon.iconset" + rm -rf "$ICONSET_DIR" + mkdir -p "$ICONSET_DIR" + + # Generate all required sizes for macOS app icons + sips -z 16 16 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_16x16.png" 2>/dev/null + sips -z 32 32 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_16x16@2x.png" 2>/dev/null + sips -z 32 32 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_32x32.png" 2>/dev/null + sips -z 64 64 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_32x32@2x.png" 2>/dev/null + sips -z 128 128 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_128x128.png" 2>/dev/null + sips -z 256 256 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_128x128@2x.png" 2>/dev/null + sips -z 256 256 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_256x256.png" 2>/dev/null + sips -z 512 512 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_256x256@2x.png" 2>/dev/null + sips -z 512 512 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_512x512.png" 2>/dev/null + sips -z 1024 1024 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_512x512@2x.png" 2>/dev/null + + # Convert iconset to icns + iconutil -c icns "$ICONSET_DIR" -o "$RESOURCES/AppIcon.icns" + rm -rf "$ICONSET_DIR" + echo "Created app icon from $ICON_SOURCE" +fi + +# Create Info.plist +cat > "$CONTENTS/Info.plist" << 'EOF' + + + + + CFBundleExecutable + VoxtypeMenubar + CFBundleIdentifier + io.voxtype.menubar + CFBundleName + Voxtype Menu Bar + CFBundleDisplayName + Voxtype Menu Bar + CFBundleIconFile + AppIcon + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0.0 + CFBundleVersion + 1 + LSMinimumSystemVersion + 13.0 + LSUIElement + + NSHighResolutionCapable + + + +EOF + +# Sign the app +codesign --force --deep --sign - "$APP_BUNDLE" + +echo "Built: $APP_BUNDLE" +echo "" +echo "To install to app bundle:" +echo " cp -r $APP_BUNDLE /Applications/Voxtype.app/Contents/MacOS/" +echo "" +echo "To run:" +echo " open $APP_BUNDLE" diff --git a/macos/VoxtypeSetup/.gitignore b/macos/VoxtypeSetup/.gitignore new file mode 100644 index 00000000..54346f50 --- /dev/null +++ b/macos/VoxtypeSetup/.gitignore @@ -0,0 +1,4 @@ +.build/ +.swiftpm/ +*.xcodeproj/xcuserdata/ +DerivedData/ diff --git a/macos/VoxtypeSetup/Package.swift b/macos/VoxtypeSetup/Package.swift new file mode 100644 index 00000000..9bdbe728 --- /dev/null +++ b/macos/VoxtypeSetup/Package.swift @@ -0,0 +1,18 @@ +// swift-tools-version: 5.9 +import PackageDescription + +let package = Package( + name: "VoxtypeSetup", + platforms: [ + .macOS(.v13) + ], + products: [ + .executable(name: "VoxtypeSetup", targets: ["VoxtypeSetup"]) + ], + targets: [ + .executableTarget( + name: "VoxtypeSetup", + path: "Sources" + ) + ] +) diff --git a/macos/VoxtypeSetup/Sources/Settings/AdvancedSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/AdvancedSettingsView.swift new file mode 100644 index 00000000..1e882829 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/AdvancedSettingsView.swift @@ -0,0 +1,219 @@ +import SwiftUI + +struct AdvancedSettingsView: View { + @State private var autoStartEnabled: Bool = false + @State private var daemonRunning: Bool = false + @State private var daemonStatus: String = "Unknown" + + var body: some View { + Form { + Section { + HStack { + VStack(alignment: .leading) { + Text("Configuration File") + Text("~/Library/Application Support/voxtype/config.toml") + .font(.caption) + .foregroundColor(.secondary) + } + + Spacer() + + Button("Open in Editor") { + openConfigFile() + } + } + + HStack { + VStack(alignment: .leading) { + Text("Log Files") + Text("~/Library/Logs/voxtype/") + .font(.caption) + .foregroundColor(.secondary) + } + + Spacer() + + Button("Open Folder") { + openLogsFolder() + } + } + + HStack { + VStack(alignment: .leading) { + Text("Models Folder") + Text("~/Library/Application Support/voxtype/models/") + .font(.caption) + .foregroundColor(.secondary) + } + + Spacer() + + Button("Open Folder") { + openModelsFolder() + } + } + } header: { + Text("Files & Folders") + } + + Section { + Toggle("Start Voxtype at login", isOn: $autoStartEnabled) + .onChange(of: autoStartEnabled) { newValue in + toggleAutoStart(enabled: newValue) + } + + Text("Runs the Voxtype daemon automatically when you log in.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Auto-Start") + } + + Section { + HStack { + Circle() + .fill(daemonRunning ? Color.green : Color.red) + .frame(width: 10, height: 10) + Text("Status: \(daemonStatus)") + + Spacer() + + Button("Refresh") { + checkDaemonStatus() + } + } + + if daemonRunning { + Button(action: restartDaemon) { + Label("Restart Daemon", systemImage: "arrow.clockwise") + } + } else { + Button(action: startDaemon) { + Label("Start Daemon", systemImage: "play.fill") + } + } + + Button(action: stopDaemon) { + Label("Stop Daemon", systemImage: "stop.fill") + } + .foregroundColor(.red) + .disabled(!daemonRunning) + + Button(action: runSetupCheck) { + Label("Run Setup Check", systemImage: "checkmark.circle") + } + } header: { + Text("Daemon") + } + + Section { + HStack { + Text("Version") + Spacer() + Text(getVersion()) + .foregroundColor(.secondary) + } + + Link(destination: URL(string: "https://github.com/peteonrails/voxtype")!) { + Label("View on GitHub", systemImage: "link") + } + + Link(destination: URL(string: "https://voxtype.io")!) { + Label("Documentation", systemImage: "book") + } + } header: { + Text("About") + } + } + .formStyle(.grouped) + .onAppear { + checkAutoStartStatus() + checkDaemonStatus() + } + } + + private func checkDaemonStatus() { + let result = VoxtypeCLI.run(["status"]) + let status = result.output.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + + if status == "idle" || status == "recording" || status == "transcribing" { + daemonRunning = true + daemonStatus = status.capitalized + } else if status.contains("not running") || status.isEmpty || !result.success { + daemonRunning = false + daemonStatus = "Not Running" + } else { + daemonRunning = false + daemonStatus = status.capitalized + } + } + + private func startDaemon() { + VoxtypeCLI.run(["daemon"], wait: false) + DispatchQueue.main.asyncAfter(deadline: .now() + 2) { + checkDaemonStatus() + } + } + + private func openConfigFile() { + let path = NSHomeDirectory() + "/Library/Application Support/voxtype/config.toml" + NSWorkspace.shared.open(URL(fileURLWithPath: path)) + } + + private func openLogsFolder() { + let path = NSHomeDirectory() + "/Library/Logs/voxtype" + NSWorkspace.shared.open(URL(fileURLWithPath: path)) + } + + private func openModelsFolder() { + let path = NSHomeDirectory() + "/Library/Application Support/voxtype/models" + NSWorkspace.shared.open(URL(fileURLWithPath: path)) + } + + private func checkAutoStartStatus() { + let plistPath = NSHomeDirectory() + "/Library/LaunchAgents/io.voxtype.daemon.plist" + autoStartEnabled = FileManager.default.fileExists(atPath: plistPath) + } + + private func toggleAutoStart(enabled: Bool) { + if enabled { + VoxtypeCLI.run(["setup", "launchd"]) + } else { + VoxtypeCLI.run(["setup", "launchd", "--uninstall"]) + } + } + + private func restartDaemon() { + let task = Process() + task.launchPath = "/bin/launchctl" + task.arguments = ["kickstart", "-k", "gui/\(getuid())/io.voxtype.daemon"] + try? task.run() + } + + private func stopDaemon() { + let task = Process() + task.launchPath = "/bin/launchctl" + task.arguments = ["stop", "io.voxtype.daemon"] + try? task.run() + } + + private func runSetupCheck() { + // Open Terminal with setup check command + let voxtype = VoxtypeCLI.binaryPath + let script = """ + tell application "Terminal" + do script "\(voxtype) setup check" + activate + end tell + """ + if let appleScript = NSAppleScript(source: script) { + var error: NSDictionary? + appleScript.executeAndReturnError(&error) + } + } + + private func getVersion() -> String { + let result = VoxtypeCLI.run(["--version"]) + return result.output.trimmingCharacters(in: .whitespacesAndNewlines) + } +} diff --git a/macos/VoxtypeSetup/Sources/Settings/AudioSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/AudioSettingsView.swift new file mode 100644 index 00000000..aa628319 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/AudioSettingsView.swift @@ -0,0 +1,136 @@ +import SwiftUI +import AVFoundation + +struct AudioSettingsView: View { + @State private var audioDevice: String = "default" + @State private var maxDurationSecs: Int = 60 + @State private var feedbackEnabled: Bool = false + @State private var feedbackVolume: Double = 0.7 + @State private var availableDevices: [AudioDeviceInfo] = [] + + var body: some View { + Form { + Section { + Picker("Input Device", selection: $audioDevice) { + Text("System Default").tag("default") + ForEach(availableDevices, id: \.id) { device in + Text(device.name).tag(device.id) + } + } + .onChange(of: audioDevice) { newValue in + ConfigManager.shared.updateConfig(key: "device", value: "\"\(newValue)\"", section: "[audio]") + } + + Button("Refresh Devices") { + loadAudioDevices() + } + + Text("Select the microphone to use for recording.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Audio Input") + } + + Section { + Stepper("Maximum Recording: \(maxDurationSecs) seconds", value: $maxDurationSecs, in: 10...300, step: 10) + .onChange(of: maxDurationSecs) { newValue in + ConfigManager.shared.updateConfig(key: "max_duration_secs", value: "\(newValue)", section: "[audio]") + } + + Text("Safety limit to prevent accidentally long recordings.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Recording Duration") + } + + Section { + Toggle("Enable Audio Feedback", isOn: $feedbackEnabled) + .onChange(of: feedbackEnabled) { newValue in + ConfigManager.shared.updateConfig(key: "enabled", value: newValue ? "true" : "false", section: "[audio.feedback]") + } + + if feedbackEnabled { + HStack { + Text("Volume") + Slider(value: $feedbackVolume, in: 0...1, step: 0.1) + .onChange(of: feedbackVolume) { newValue in + ConfigManager.shared.updateConfig(key: "volume", value: String(format: "%.1f", newValue), section: "[audio.feedback]") + } + Text(String(format: "%.0f%%", feedbackVolume * 100)) + .frame(width: 50) + } + } + + Text("Play audio cues when recording starts and stops.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Audio Feedback") + } + + Section { + Button("Test Microphone") { + testMicrophone() + } + + Text("Opens System Preferences to test your microphone.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Testing") + } + } + .formStyle(.grouped) + .onAppear { + loadSettings() + loadAudioDevices() + } + } + + private func loadSettings() { + let config = ConfigManager.shared.readConfig() + + if let device = config["audio.device"]?.replacingOccurrences(of: "\"", with: "") { + audioDevice = device + } + + if let duration = config["audio.max_duration_secs"], let d = Int(duration) { + maxDurationSecs = d + } + + if let feedback = config["audio.feedback.enabled"] { + feedbackEnabled = feedback == "true" + } + + if let volume = config["audio.feedback.volume"], let v = Double(volume) { + feedbackVolume = v + } + } + + private func loadAudioDevices() { + availableDevices = [] + + // Get audio input devices using AVFoundation + // Use the older API for macOS 13 compatibility + let devices = AVCaptureDevice.devices(for: .audio) + + for device in devices { + availableDevices.append(AudioDeviceInfo( + id: device.uniqueID, + name: device.localizedName + )) + } + } + + private func testMicrophone() { + let url = URL(string: "x-apple.systempreferences:com.apple.preference.sound?input")! + NSWorkspace.shared.open(url) + } +} + +struct AudioDeviceInfo: Identifiable { + let id: String + let name: String +} diff --git a/macos/VoxtypeSetup/Sources/Settings/GeneralSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/GeneralSettingsView.swift new file mode 100644 index 00000000..fcf2d3ce --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/GeneralSettingsView.swift @@ -0,0 +1,183 @@ +import SwiftUI +import AppKit + +struct GeneralSettingsView: View { + @State private var selectedEngine: String = "parakeet" + @State private var hotkeyMode: String = "push_to_talk" + @State private var hotkey: String = "RIGHTALT" + @State private var daemonRunning: Bool = false + @State private var menubarRunning: Bool = false + @State private var needsRestart: Bool = false + + var body: some View { + Form { + if needsRestart { + Section { + HStack { + Image(systemName: "exclamationmark.triangle.fill") + .foregroundColor(.orange) + Text("Engine changed. Restart daemon to apply.") + Spacer() + Button("Restart Now") { + restartDaemon() + needsRestart = false + } + .buttonStyle(.borderedProminent) + } + } + } + + Section { + Picker("Transcription Engine", selection: $selectedEngine) { + Text("Parakeet (Fast)").tag("parakeet") + Text("Whisper").tag("whisper") + } + .onChange(of: selectedEngine) { newValue in + ConfigManager.shared.updateConfig(key: "engine", value: "\"\(newValue)\"") + needsRestart = true + } + + Text("Parakeet is faster and recommended for most users.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Engine") + } + + Section { + Picker("Hotkey", selection: $hotkey) { + Text("Right Option (⌥)").tag("RIGHTALT") + Text("Right Command (⌘)").tag("RIGHTMETA") + Text("Right Control (⌃)").tag("RIGHTCTRL") + Text("F13").tag("F13") + Text("F14").tag("F14") + Text("F15").tag("F15") + } + .onChange(of: hotkey) { newValue in + ConfigManager.shared.updateConfig(key: "key", value: "\"\(newValue)\"", section: "[hotkey]") + needsRestart = true + } + + Picker("Mode", selection: $hotkeyMode) { + Text("Push-to-Talk (hold to record)").tag("push_to_talk") + Text("Toggle (press to start/stop)").tag("toggle") + } + .onChange(of: hotkeyMode) { newValue in + ConfigManager.shared.updateConfig(key: "mode", value: "\"\(newValue)\"", section: "[hotkey]") + needsRestart = true + } + } header: { + Text("Hotkey") + } + + Section { + HStack { + Circle() + .fill(daemonRunning ? Color.green : Color.red) + .frame(width: 10, height: 10) + Text(daemonRunning ? "Daemon is running" : "Daemon is not running") + + Spacer() + + if daemonRunning { + Button("Restart") { + restartDaemon() + } + } else { + Button("Start") { + startDaemon() + } + } + } + } header: { + Text("Daemon Status") + } + + Section { + Toggle("Show in Menu Bar", isOn: $menubarRunning) + .onChange(of: menubarRunning) { newValue in + if newValue { + launchMenubar() + } else { + quitMenubar() + } + } + + Text("Display a status icon in the menu bar for quick access.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Menu Bar") + } + } + .formStyle(.grouped) + .onAppear { + loadSettings() + checkDaemonStatus() + checkMenubarStatus() + } + } + + private func loadSettings() { + if let engine = ConfigManager.shared.getString("engine") { + selectedEngine = engine + } + + if let key = ConfigManager.shared.getString("hotkey.key") { + hotkey = key + } + + if let mode = ConfigManager.shared.getString("hotkey.mode") { + hotkeyMode = mode + } + } + + private func checkDaemonStatus() { + let result = VoxtypeCLI.run(["status"]) + let status = result.output.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + daemonRunning = (status == "idle" || status == "recording" || status == "transcribing") + } + + private func startDaemon() { + _ = VoxtypeCLI.run(["daemon"], wait: false) + DispatchQueue.main.asyncAfter(deadline: .now() + 2) { + checkDaemonStatus() + } + } + + private func restartDaemon() { + VoxtypeCLI.restartDaemon { + DispatchQueue.main.asyncAfter(deadline: .now() + 2) { + self.checkDaemonStatus() + } + } + } + + private func checkMenubarStatus() { + let task = Process() + task.launchPath = "/usr/bin/pgrep" + task.arguments = ["-x", "VoxtypeMenubar"] + task.standardOutput = FileHandle.nullDevice + task.standardError = FileHandle.nullDevice + try? task.run() + task.waitUntilExit() + menubarRunning = (task.terminationStatus == 0) + } + + private func launchMenubar() { + let menubarPath = "/Applications/Voxtype.app/Contents/MacOS/VoxtypeMenubar.app" + if FileManager.default.fileExists(atPath: menubarPath) { + NSWorkspace.shared.open(URL(fileURLWithPath: menubarPath)) + } + } + + private func quitMenubar() { + let task = Process() + task.launchPath = "/usr/bin/pkill" + task.arguments = ["-x", "VoxtypeMenubar"] + task.standardOutput = FileHandle.nullDevice + task.standardError = FileHandle.nullDevice + try? task.run() + task.waitUntilExit() + } +} diff --git a/macos/VoxtypeSetup/Sources/Settings/HotkeySettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/HotkeySettingsView.swift new file mode 100644 index 00000000..e34fbada --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/HotkeySettingsView.swift @@ -0,0 +1,185 @@ +import SwiftUI + +struct HotkeySettingsView: View { + @State private var hotkeyEnabled: Bool = true + @State private var hotkey: String = "RIGHTALT" + @State private var hotkeyMode: String = "push_to_talk" + @State private var cancelKey: String = "" + @State private var modelModifier: String = "" + @State private var modifiers: [String] = [] + @State private var needsRestart: Bool = false + + private let availableKeys = [ + ("Right Option (⌥)", "RIGHTALT"), + ("Right Command (⌘)", "RIGHTMETA"), + ("Right Control (⌃)", "RIGHTCTRL"), + ("Left Option (⌥)", "LEFTALT"), + ("Left Command (⌘)", "LEFTMETA"), + ("Left Control (⌃)", "LEFTCTRL"), + ("F13", "F13"), + ("F14", "F14"), + ("F15", "F15"), + ("F16", "F16"), + ("F17", "F17"), + ("F18", "F18"), + ("F19", "F19"), + ("Scroll Lock", "SCROLLLOCK"), + ("Pause", "PAUSE"), + ] + + private let availableModifiers = [ + ("None", ""), + ("Left Shift", "LEFTSHIFT"), + ("Right Shift", "RIGHTSHIFT"), + ("Left Control", "LEFTCTRL"), + ("Right Control", "RIGHTCTRL"), + ("Left Option", "LEFTALT"), + ("Right Option", "RIGHTALT"), + ] + + var body: some View { + Form { + if needsRestart { + Section { + HStack { + Image(systemName: "exclamationmark.triangle.fill") + .foregroundColor(.orange) + Text("Restart daemon to apply hotkey changes") + Spacer() + Button("Restart Now") { + restartDaemon() + } + .buttonStyle(.borderedProminent) + } + } + } + + Section { + Toggle("Enable built-in hotkey detection", isOn: $hotkeyEnabled) + .onChange(of: hotkeyEnabled) { newValue in + updateConfig(key: "enabled", value: newValue ? "true" : "false", section: "[hotkey]") + needsRestart = true + } + + Text("Disable if using compositor keybindings (Hyprland, Sway) instead.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Hotkey Detection") + } + + Section { + Picker("Hotkey", selection: $hotkey) { + ForEach(availableKeys, id: \.1) { name, value in + Text(name).tag(value) + } + } + .onChange(of: hotkey) { newValue in + updateConfig(key: "key", value: "\"\(newValue)\"", section: "[hotkey]") + needsRestart = true + } + + Picker("Mode", selection: $hotkeyMode) { + Text("Push-to-Talk (hold to record)").tag("push_to_talk") + Text("Toggle (press to start/stop)").tag("toggle") + } + .onChange(of: hotkeyMode) { newValue in + updateConfig(key: "mode", value: "\"\(newValue)\"", section: "[hotkey]") + needsRestart = true + } + + Text(hotkeyMode == "push_to_talk" + ? "Hold the hotkey to record, release to transcribe." + : "Press once to start recording, press again to stop.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Primary Hotkey") + } + + Section { + Picker("Cancel Key", selection: $cancelKey) { + Text("None").tag("") + Text("Escape").tag("ESC") + Text("Backspace").tag("BACKSPACE") + Text("F12").tag("F12") + } + .onChange(of: cancelKey) { newValue in + if newValue.isEmpty { + // Remove the key from config + updateConfig(key: "cancel_key", value: "# disabled", section: "[hotkey]") + } else { + updateConfig(key: "cancel_key", value: "\"\(newValue)\"", section: "[hotkey]") + } + needsRestart = true + } + + Text("Press this key to cancel the current recording or transcription.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Cancel Key") + } + + Section { + Picker("Model Modifier", selection: $modelModifier) { + ForEach(availableModifiers, id: \.1) { name, value in + Text(name).tag(value) + } + } + .onChange(of: modelModifier) { newValue in + if newValue.isEmpty { + updateConfig(key: "model_modifier", value: "# disabled", section: "[hotkey]") + } else { + updateConfig(key: "model_modifier", value: "\"\(newValue)\"", section: "[hotkey]") + } + needsRestart = true + } + + Text("Hold this modifier with the hotkey to use a secondary model (e.g., larger model for difficult audio).") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Secondary Model Modifier") + } + } + .formStyle(.grouped) + .onAppear { + loadSettings() + } + } + + private func loadSettings() { + let config = ConfigManager.shared.readConfig() + + if let enabled = config["hotkey.enabled"] { + hotkeyEnabled = enabled == "true" + } + + if let key = config["hotkey.key"]?.replacingOccurrences(of: "\"", with: "") { + hotkey = key + } + + if let mode = config["hotkey.mode"]?.replacingOccurrences(of: "\"", with: "") { + hotkeyMode = mode + } + + if let cancel = config["hotkey.cancel_key"]?.replacingOccurrences(of: "\"", with: "") { + cancelKey = cancel + } + + if let modifier = config["hotkey.model_modifier"]?.replacingOccurrences(of: "\"", with: "") { + modelModifier = modifier + } + } + + private func updateConfig(key: String, value: String, section: String? = nil) { + ConfigManager.shared.updateConfig(key: key, value: value, section: section) + } + + private func restartDaemon() { + VoxtypeCLI.restartDaemon { + self.needsRestart = false + } + } +} diff --git a/macos/VoxtypeSetup/Sources/Settings/ModelsSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/ModelsSettingsView.swift new file mode 100644 index 00000000..1ef37bac --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/ModelsSettingsView.swift @@ -0,0 +1,244 @@ +import SwiftUI + +struct ModelsSettingsView: View { + @State private var installedModels: Set = [] + @State private var selectedModel: String = "" + @State private var downloadingModel: String? = nil + @State private var needsRestart: Bool = false + + private let allModels: [ModelCategory] = [ + ModelCategory(name: "Parakeet", description: "Fast, English-only, recommended", models: [ + ModelDefinition(id: "parakeet-tdt-0.6b-v3-int8", name: "Parakeet INT8", size: "~640 MB", description: "Quantized, fastest"), + ModelDefinition(id: "parakeet-tdt-0.6b-v3", name: "Parakeet Full", size: "~1.2 GB", description: "Full precision"), + ]), + ModelCategory(name: "Whisper English", description: "OpenAI Whisper, optimized for English", models: [ + ModelDefinition(id: "base.en", name: "Base English", size: "~142 MB", description: "Fast, good accuracy"), + ModelDefinition(id: "small.en", name: "Small English", size: "~466 MB", description: "Better accuracy"), + ModelDefinition(id: "medium.en", name: "Medium English", size: "~1.5 GB", description: "High accuracy"), + ]), + ModelCategory(name: "Whisper Multilingual", description: "Supports 99 languages", models: [ + ModelDefinition(id: "base", name: "Base", size: "~142 MB", description: "Fast, 99 languages"), + ModelDefinition(id: "small", name: "Small", size: "~466 MB", description: "Better accuracy"), + ModelDefinition(id: "medium", name: "Medium", size: "~1.5 GB", description: "High accuracy"), + ModelDefinition(id: "large-v3", name: "Large V3", size: "~3.1 GB", description: "Best quality"), + ModelDefinition(id: "large-v3-turbo", name: "Large V3 Turbo", size: "~1.6 GB", description: "Fast, near-large quality"), + ]), + ] + + var body: some View { + Form { + if needsRestart { + Section { + HStack { + Image(systemName: "exclamationmark.triangle.fill") + .foregroundColor(.orange) + Text("Model changed. Restart daemon to apply.") + Spacer() + Button("Restart Now") { + restartDaemon() + needsRestart = false + } + .buttonStyle(.borderedProminent) + } + } + } + + ForEach(allModels, id: \.name) { category in + Section { + ForEach(category.models, id: \.id) { model in + ModelRowView( + model: model, + isInstalled: installedModels.contains(model.id), + isSelected: selectedModel == model.id, + isDownloading: downloadingModel == model.id, + onSelect: { selectModel(model.id) }, + onDownload: { downloadModel(model.id) } + ) + } + } header: { + VStack(alignment: .leading, spacing: 2) { + Text(category.name) + Text(category.description) + .font(.caption) + .foregroundColor(.secondary) + .fontWeight(.regular) + } + } + } + } + .formStyle(.grouped) + .onAppear { + loadInstalledModels() + } + } + + private func loadInstalledModels() { + let modelsDir = NSHomeDirectory() + "/Library/Application Support/voxtype/models" + + guard let contents = try? FileManager.default.contentsOfDirectory(atPath: modelsDir) else { + return + } + + var installed: Set = [] + + for item in contents { + let path = modelsDir + "/" + item + + var isDir: ObjCBool = false + FileManager.default.fileExists(atPath: path, isDirectory: &isDir) + + if isDir.boolValue && item.contains("parakeet") { + installed.insert(item) + } else if item.hasPrefix("ggml-") && item.hasSuffix(".bin") { + let modelName = item + .replacingOccurrences(of: "ggml-", with: "") + .replacingOccurrences(of: ".bin", with: "") + installed.insert(modelName) + } + } + + installedModels = installed + + // Get currently selected model from config + if let engine = ConfigManager.shared.getString("engine"), engine == "parakeet" { + if let model = ConfigManager.shared.getString("parakeet.model") { + selectedModel = model + } + } else { + if let model = ConfigManager.shared.getString("whisper.model") { + selectedModel = model + } + } + } + + private func selectModel(_ name: String) { + let isParakeet = name.contains("parakeet") + + if isParakeet { + ConfigManager.shared.updateConfig(key: "engine", value: "\"parakeet\"") + ConfigManager.shared.updateConfig(key: "model", value: "\"\(name)\"", section: "[parakeet]") + } else { + ConfigManager.shared.updateConfig(key: "engine", value: "\"whisper\"") + ConfigManager.shared.updateConfig(key: "model", value: "\"\(name)\"", section: "[whisper]") + } + + selectedModel = name + needsRestart = true + } + + private func downloadModel(_ name: String) { + downloadingModel = name + + DispatchQueue.global().async { + let result = VoxtypeCLI.run(["setup", "--download", "--model", name]) + + DispatchQueue.main.async { + downloadingModel = nil + loadInstalledModels() + + if result.success { + selectModel(name) + } + } + } + } + + private func restartDaemon() { + VoxtypeCLI.restartDaemon() + } +} + +struct ModelRowView: View { + let model: ModelDefinition + let isInstalled: Bool + let isSelected: Bool + let isDownloading: Bool + let onSelect: () -> Void + let onDownload: () -> Void + + var body: some View { + HStack(spacing: 12) { + // Status icon + statusIcon + .frame(width: 20) + + // Model info + VStack(alignment: .leading, spacing: 2) { + HStack { + Text(model.name) + .fontWeight(isSelected ? .semibold : .regular) + if isSelected { + Text("Active") + .font(.caption) + .foregroundColor(.white) + .padding(.horizontal, 6) + .padding(.vertical, 2) + .background(Color.green) + .cornerRadius(4) + } + } + + if isDownloading { + HStack(spacing: 8) { + ProgressView() + .scaleEffect(0.7) + Text("Downloading...") + .font(.caption) + .foregroundColor(.secondary) + } + } else { + Text("\(model.size) - \(model.description)") + .font(.caption) + .foregroundColor(.secondary) + } + } + + Spacer() + + // Action button + if isDownloading { + // No button while downloading + } else if isInstalled { + if !isSelected { + Button("Select") { + onSelect() + } + .buttonStyle(.bordered) + } + } else { + Button("Download") { + onDownload() + } + .buttonStyle(.borderedProminent) + } + } + .padding(.vertical, 4) + } + + @ViewBuilder + private var statusIcon: some View { + if isSelected { + Image(systemName: "checkmark.circle.fill") + .foregroundColor(.green) + } else if isInstalled { + Image(systemName: "checkmark.circle") + .foregroundColor(.secondary) + } else { + Image(systemName: "arrow.down.circle") + .foregroundColor(.blue) + } + } +} + +struct ModelCategory { + let name: String + let description: String + let models: [ModelDefinition] +} + +struct ModelDefinition { + let id: String + let name: String + let size: String + let description: String +} diff --git a/macos/VoxtypeSetup/Sources/Settings/NotificationSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/NotificationSettingsView.swift new file mode 100644 index 00000000..3bb12e59 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/NotificationSettingsView.swift @@ -0,0 +1,113 @@ +import SwiftUI + +struct NotificationSettingsView: View { + @State private var onRecordingStart: Bool = false + @State private var onRecordingStop: Bool = false + @State private var onTranscription: Bool = true + @State private var showEngineIcon: Bool = false + + var body: some View { + Form { + Section { + Toggle("Notify when recording starts", isOn: $onRecordingStart) + .onChange(of: onRecordingStart) { newValue in + ConfigManager.shared.updateConfig(key: "on_recording_start", value: newValue ? "true" : "false", section: "[output.notification]") + } + + Toggle("Notify when recording stops", isOn: $onRecordingStop) + .onChange(of: onRecordingStop) { newValue in + ConfigManager.shared.updateConfig(key: "on_recording_stop", value: newValue ? "true" : "false", section: "[output.notification]") + } + + Toggle("Show transcribed text", isOn: $onTranscription) + .onChange(of: onTranscription) { newValue in + ConfigManager.shared.updateConfig(key: "on_transcription", value: newValue ? "true" : "false", section: "[output.notification]") + } + + Text("Choose which events trigger desktop notifications.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Notification Events") + } + + Section { + Toggle("Show engine icon in notification", isOn: $showEngineIcon) + .onChange(of: showEngineIcon) { newValue in + ConfigManager.shared.updateConfig(key: "show_engine_icon", value: newValue ? "true" : "false", section: "[output.notification]") + } + + HStack(spacing: 20) { + VStack { + Text("🦜") + .font(.largeTitle) + Text("Parakeet") + .font(.caption) + } + VStack { + Text("🗣️") + .font(.largeTitle) + Text("Whisper") + .font(.caption) + } + } + .padding(.vertical, 8) + + Text("When enabled, notifications will include an icon indicating which transcription engine was used.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Engine Icon") + } + + Section { + VStack(alignment: .leading, spacing: 8) { + Text("macOS Notification Settings") + .fontWeight(.medium) + + Text("To customize notification style, banners, and sounds:") + .font(.caption) + .foregroundColor(.secondary) + + Button("Open System Notification Settings") { + openNotificationSettings() + } + } + } header: { + Text("System Settings") + } + } + .formStyle(.grouped) + .onAppear { + loadSettings() + } + } + + private func loadSettings() { + let config = ConfigManager.shared.readConfig() + + if let start = config["output.notification.on_recording_start"] { + onRecordingStart = start == "true" + } + + if let stop = config["output.notification.on_recording_stop"] { + onRecordingStop = stop == "true" + } + + if let trans = config["output.notification.on_transcription"] { + onTranscription = trans == "true" + } else { + // Default is true + onTranscription = true + } + + if let icon = config["output.notification.show_engine_icon"] { + showEngineIcon = icon == "true" + } + } + + private func openNotificationSettings() { + let url = URL(string: "x-apple.systempreferences:com.apple.preference.notifications")! + NSWorkspace.shared.open(url) + } +} diff --git a/macos/VoxtypeSetup/Sources/Settings/OutputSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/OutputSettingsView.swift new file mode 100644 index 00000000..f5294622 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/OutputSettingsView.swift @@ -0,0 +1,99 @@ +import SwiftUI + +struct OutputSettingsView: View { + @State private var outputMode: String = "type" + @State private var fallbackToClipboard: Bool = true + @State private var typeDelayMs: Int = 0 + @State private var autoSubmit: Bool = false + + var body: some View { + Form { + Section { + Picker("Output Mode", selection: $outputMode) { + Text("Type Text").tag("type") + Text("Copy to Clipboard").tag("clipboard") + Text("Clipboard + Paste").tag("paste") + } + .onChange(of: outputMode) { newValue in + updateConfig(key: "mode", value: "\"\(newValue)\"", section: "[output]") + } + + Text(outputModeDescription) + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Output Mode") + } + + Section { + Toggle("Fall back to clipboard if typing fails", isOn: $fallbackToClipboard) + .onChange(of: fallbackToClipboard) { newValue in + updateConfig(key: "fallback_to_clipboard", value: newValue ? "true" : "false", section: "[output]") + } + + Stepper("Type delay: \(typeDelayMs) ms", value: $typeDelayMs, in: 0...100, step: 5) + .onChange(of: typeDelayMs) { newValue in + updateConfig(key: "type_delay_ms", value: "\(newValue)", section: "[output]") + } + + Text("Increase delay if characters are being dropped.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Typing Options") + } + + Section { + Toggle("Auto-submit after transcription", isOn: $autoSubmit) + .onChange(of: autoSubmit) { newValue in + updateConfig(key: "auto_submit", value: newValue ? "true" : "false", section: "[output]") + } + + Text("Press Enter automatically after typing transcribed text.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Behavior") + } + } + .formStyle(.grouped) + .onAppear { + loadSettings() + } + } + + private var outputModeDescription: String { + switch outputMode { + case "type": + return "Text is typed directly into the active application." + case "clipboard": + return "Text is copied to clipboard. Paste manually with ⌘V." + case "paste": + return "Text is copied to clipboard and pasted automatically." + default: + return "" + } + } + + private func loadSettings() { + if let mode = ConfigManager.shared.getString("output.mode") { + outputMode = mode + } + + if let fallback = ConfigManager.shared.getBool("output.fallback_to_clipboard") { + fallbackToClipboard = fallback + } + + if let delay = ConfigManager.shared.getInt("output.type_delay_ms") { + typeDelayMs = delay + } + + if let submit = ConfigManager.shared.getBool("output.auto_submit") { + autoSubmit = submit + } + } + + private func updateConfig(key: String, value: String, section: String? = nil) { + ConfigManager.shared.updateConfig(key: key, value: value, section: section) + } +} diff --git a/macos/VoxtypeSetup/Sources/Settings/PermissionsSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/PermissionsSettingsView.swift new file mode 100644 index 00000000..69eeb7ac --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/PermissionsSettingsView.swift @@ -0,0 +1,119 @@ +import SwiftUI + +struct PermissionsSettingsView: View { + @State private var microphoneGranted: Bool = false + @State private var inputMonitoringGranted: Bool = false + @State private var accessibilityGranted: Bool = false + + var body: some View { + Form { + Section { + PermissionRow( + title: "Microphone", + description: "Required to capture your voice for transcription", + icon: "mic.fill", + isGranted: microphoneGranted + ) { + openSystemPreferences("Privacy_Microphone") + } + + PermissionRow( + title: "Input Monitoring", + description: "Required for global hotkey detection", + icon: "keyboard", + isGranted: inputMonitoringGranted + ) { + openSystemPreferences("Privacy_ListenEvent") + } + + PermissionRow( + title: "Accessibility", + description: "Required to type transcribed text into applications", + icon: "hand.raised.fill", + isGranted: accessibilityGranted + ) { + openSystemPreferences("Privacy_Accessibility") + } + } header: { + Text("Required Permissions") + } footer: { + Text("Click \"Open Settings\" to grant each permission. You may need to add Voxtype manually.") + } + + Section { + Button(action: checkPermissions) { + Label("Refresh Permission Status", systemImage: "arrow.clockwise") + } + } + } + .formStyle(.grouped) + .onAppear { + checkPermissions() + } + } + + private func checkPermissions() { + // Check microphone permission + switch AVCaptureDevice.authorizationStatus(for: .audio) { + case .authorized: + microphoneGranted = true + default: + microphoneGranted = false + } + + // Input monitoring and accessibility are harder to check programmatically + // We use a heuristic: try to see if voxtype status works + let result = VoxtypeCLI.run(["status"]) + let status = result.output.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + + // If daemon is running and responding, permissions are likely granted + if status == "idle" || status == "recording" || status == "transcribing" { + inputMonitoringGranted = true + accessibilityGranted = true + } + } + + private func openSystemPreferences(_ pane: String) { + let url = URL(string: "x-apple.systempreferences:com.apple.preference.security?\(pane)")! + NSWorkspace.shared.open(url) + } +} + +struct PermissionRow: View { + let title: String + let description: String + let icon: String + let isGranted: Bool + let openSettings: () -> Void + + var body: some View { + HStack { + Image(systemName: icon) + .frame(width: 24) + .foregroundColor(.accentColor) + + VStack(alignment: .leading) { + Text(title) + .fontWeight(.medium) + Text(description) + .font(.caption) + .foregroundColor(.secondary) + } + + Spacer() + + if isGranted { + Image(systemName: "checkmark.circle.fill") + .foregroundColor(.green) + } else { + Button("Open Settings") { + openSettings() + } + .buttonStyle(.bordered) + } + } + .padding(.vertical, 4) + } +} + +import AVFoundation diff --git a/macos/VoxtypeSetup/Sources/Settings/RemoteWhisperSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/RemoteWhisperSettingsView.swift new file mode 100644 index 00000000..3298bc55 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/RemoteWhisperSettingsView.swift @@ -0,0 +1,139 @@ +import SwiftUI + +struct RemoteWhisperSettingsView: View { + @State private var endpoint: String = "" + @State private var apiKey: String = "" + @State private var remoteModel: String = "whisper-1" + @State private var timeoutSecs: Int = 30 + + var body: some View { + Form { + Section { + TextField("Server URL", text: $endpoint) + .textFieldStyle(.roundedBorder) + .onSubmit { + saveEndpoint() + } + + Text("Examples:\n• whisper.cpp server: http://192.168.1.100:8080\n• OpenAI API: https://api.openai.com") + .font(.caption) + .foregroundColor(.secondary) + + Button("Save Endpoint") { + saveEndpoint() + } + } header: { + Text("Remote Endpoint") + } + + Section { + SecureField("API Key", text: $apiKey) + .textFieldStyle(.roundedBorder) + .onSubmit { + saveApiKey() + } + + Text("Required for OpenAI API. Can also be set via VOXTYPE_WHISPER_API_KEY environment variable.") + .font(.caption) + .foregroundColor(.secondary) + + Button("Save API Key") { + saveApiKey() + } + } header: { + Text("Authentication") + } + + Section { + TextField("Model Name", text: $remoteModel) + .textFieldStyle(.roundedBorder) + .onSubmit { + saveRemoteModel() + } + + Text("Model name to send to the remote server. Default: \"whisper-1\" for OpenAI.") + .font(.caption) + .foregroundColor(.secondary) + + Button("Save Model") { + saveRemoteModel() + } + } header: { + Text("Remote Model") + } + + Section { + Stepper("Timeout: \(timeoutSecs) seconds", value: $timeoutSecs, in: 10...120, step: 10) + .onChange(of: timeoutSecs) { newValue in + ConfigManager.shared.updateConfig(key: "remote_timeout_secs", value: "\(newValue)", section: "[whisper]") + } + + Text("Maximum time to wait for remote server response.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Timeout") + } + + Section { + VStack(alignment: .leading, spacing: 8) { + Text("To use remote Whisper:") + .fontWeight(.medium) + + Text("1. Set Whisper mode to \"Remote\" in Whisper Settings") + Text("2. Enter your server URL above") + Text("3. Add API key if required") + Text("4. Restart the daemon") + } + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Setup Instructions") + } + } + .formStyle(.grouped) + .onAppear { + loadSettings() + } + } + + private func loadSettings() { + let config = ConfigManager.shared.readConfig() + + if let ep = config["whisper.remote_endpoint"]?.replacingOccurrences(of: "\"", with: "") { + endpoint = ep + } + + if let key = config["whisper.remote_api_key"]?.replacingOccurrences(of: "\"", with: "") { + apiKey = key + } + + if let model = config["whisper.remote_model"]?.replacingOccurrences(of: "\"", with: "") { + remoteModel = model + } + + if let timeout = config["whisper.remote_timeout_secs"], let t = Int(timeout) { + timeoutSecs = t + } + } + + private func saveEndpoint() { + if endpoint.isEmpty { + ConfigManager.shared.updateConfig(key: "remote_endpoint", value: "# not set", section: "[whisper]") + } else { + ConfigManager.shared.updateConfig(key: "remote_endpoint", value: "\"\(endpoint)\"", section: "[whisper]") + } + } + + private func saveApiKey() { + if apiKey.isEmpty { + ConfigManager.shared.updateConfig(key: "remote_api_key", value: "# not set", section: "[whisper]") + } else { + ConfigManager.shared.updateConfig(key: "remote_api_key", value: "\"\(apiKey)\"", section: "[whisper]") + } + } + + private func saveRemoteModel() { + ConfigManager.shared.updateConfig(key: "remote_model", value: "\"\(remoteModel)\"", section: "[whisper]") + } +} diff --git a/macos/VoxtypeSetup/Sources/Settings/TextProcessingSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/TextProcessingSettingsView.swift new file mode 100644 index 00000000..cf6afab0 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/TextProcessingSettingsView.swift @@ -0,0 +1,187 @@ +import SwiftUI + +struct TextProcessingSettingsView: View { + @State private var spokenPunctuation: Bool = false + @State private var replacements: [(key: String, value: String)] = [] + @State private var newKey: String = "" + @State private var newValue: String = "" + + var body: some View { + Form { + Section { + Toggle("Enable Spoken Punctuation", isOn: $spokenPunctuation) + .onChange(of: spokenPunctuation) { newValue in + ConfigManager.shared.updateConfig(key: "spoken_punctuation", value: newValue ? "true" : "false", section: "[text]") + } + + VStack(alignment: .leading, spacing: 4) { + Text("Convert spoken words to punctuation marks:") + .font(.caption) + .foregroundColor(.secondary) + Text("• \"period\" → \".\"") + Text("• \"comma\" → \",\"") + Text("• \"question mark\" → \"?\"") + Text("• \"exclamation point\" → \"!\"") + Text("• \"new line\" → newline") + } + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Spoken Punctuation") + } + + Section { + if replacements.isEmpty { + Text("No word replacements configured") + .foregroundColor(.secondary) + } else { + ForEach(Array(replacements.enumerated()), id: \.offset) { index, replacement in + HStack { + Text("\"\(replacement.key)\"") + Image(systemName: "arrow.right") + .foregroundColor(.secondary) + Text("\"\(replacement.value)\"") + Spacer() + Button(role: .destructive) { + removeReplacement(at: index) + } label: { + Image(systemName: "trash") + } + .buttonStyle(.borderless) + } + } + } + + Divider() + + HStack { + TextField("From", text: $newKey) + .textFieldStyle(.roundedBorder) + Image(systemName: "arrow.right") + .foregroundColor(.secondary) + TextField("To", text: $newValue) + .textFieldStyle(.roundedBorder) + Button("Add") { + addReplacement() + } + .disabled(newKey.isEmpty || newValue.isEmpty) + } + + Text("Example: \"vox type\" → \"voxtype\"") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Word Replacements") + } footer: { + Text("Replacements are case-insensitive and applied after transcription.") + } + } + .formStyle(.grouped) + .onAppear { + loadSettings() + } + } + + private func loadSettings() { + let config = ConfigManager.shared.readConfig() + + if let sp = config["text.spoken_punctuation"] { + spokenPunctuation = sp == "true" + } + + // Load replacements - they're stored as a TOML table + // For now, we'll parse them from the raw config file + loadReplacements() + } + + private func loadReplacements() { + let configPath = NSHomeDirectory() + "/Library/Application Support/voxtype/config.toml" + guard let content = try? String(contentsOfFile: configPath, encoding: .utf8) else { + return + } + + var inReplacementsSection = false + var loaded: [(key: String, value: String)] = [] + + for line in content.components(separatedBy: .newlines) { + let trimmed = line.trimmingCharacters(in: .whitespaces) + + if trimmed == "[text.replacements]" { + inReplacementsSection = true + continue + } + + if trimmed.hasPrefix("[") && trimmed.hasSuffix("]") { + inReplacementsSection = false + continue + } + + if inReplacementsSection && trimmed.contains("=") && !trimmed.hasPrefix("#") { + let parts = trimmed.components(separatedBy: "=") + if parts.count >= 2 { + let key = parts[0].trimmingCharacters(in: .whitespaces).replacingOccurrences(of: "\"", with: "") + let value = parts.dropFirst().joined(separator: "=").trimmingCharacters(in: .whitespaces).replacingOccurrences(of: "\"", with: "") + loaded.append((key: key, value: value)) + } + } + } + + replacements = loaded + } + + private func addReplacement() { + guard !newKey.isEmpty && !newValue.isEmpty else { return } + + replacements.append((key: newKey, value: newValue)) + saveReplacements() + + newKey = "" + newValue = "" + } + + private func removeReplacement(at index: Int) { + replacements.remove(at: index) + saveReplacements() + } + + private func saveReplacements() { + let configPath = NSHomeDirectory() + "/Library/Application Support/voxtype/config.toml" + guard var content = try? String(contentsOfFile: configPath, encoding: .utf8) else { + return + } + + // Remove existing [text.replacements] section + var lines = content.components(separatedBy: .newlines) + var newLines: [String] = [] + var inReplacementsSection = false + + for line in lines { + let trimmed = line.trimmingCharacters(in: .whitespaces) + + if trimmed == "[text.replacements]" { + inReplacementsSection = true + continue + } + + if inReplacementsSection && trimmed.hasPrefix("[") && trimmed.hasSuffix("]") { + inReplacementsSection = false + } + + if !inReplacementsSection { + newLines.append(line) + } + } + + // Add new [text.replacements] section + if !replacements.isEmpty { + newLines.append("") + newLines.append("[text.replacements]") + for r in replacements { + newLines.append("\"\(r.key)\" = \"\(r.value)\"") + } + } + + content = newLines.joined(separator: "\n") + try? content.write(toFile: configPath, atomically: true, encoding: .utf8) + } +} diff --git a/macos/VoxtypeSetup/Sources/Settings/WhisperSettingsView.swift b/macos/VoxtypeSetup/Sources/Settings/WhisperSettingsView.swift new file mode 100644 index 00000000..5da97f38 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Settings/WhisperSettingsView.swift @@ -0,0 +1,237 @@ +import SwiftUI + +struct WhisperSettingsView: View { + @State private var backend: String = "local" + @State private var language: String = "en" + @State private var translate: Bool = false + @State private var gpuIsolation: Bool = false + @State private var onDemandLoading: Bool = false + @State private var initialPrompt: String = "" + + // Remote settings + @State private var endpoint: String = "" + @State private var apiKey: String = "" + @State private var remoteModel: String = "whisper-1" + @State private var timeoutSecs: Int = 30 + + private let languages = [ + ("English", "en"), + ("Auto-detect", "auto"), + ("Spanish", "es"), + ("French", "fr"), + ("German", "de"), + ("Italian", "it"), + ("Portuguese", "pt"), + ("Dutch", "nl"), + ("Polish", "pl"), + ("Russian", "ru"), + ("Japanese", "ja"), + ("Chinese", "zh"), + ("Korean", "ko"), + ] + + var body: some View { + Form { + Section { + Picker("Backend", selection: $backend) { + Text("Local (whisper.cpp)").tag("local") + Text("Remote Server").tag("remote") + } + .onChange(of: backend) { newValue in + ConfigManager.shared.updateConfig(key: "backend", value: "\"\(newValue)\"", section: "[whisper]") + } + + Text(backend == "local" + ? "Run transcription locally using whisper.cpp." + : "Send audio to a remote Whisper server or OpenAI API.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Whisper Backend") + } + + // Remote-only settings + if backend == "remote" { + Group { + Section { + TextField("Server URL", text: $endpoint) + .textFieldStyle(.roundedBorder) + .onSubmit { saveEndpoint() } + + Text("Examples: http://192.168.1.100:8080 or https://api.openai.com") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Remote Endpoint") + } + + Section { + SecureField("API Key", text: $apiKey) + .textFieldStyle(.roundedBorder) + .onSubmit { saveApiKey() } + + Text("Required for OpenAI API. Can also use VOXTYPE_WHISPER_API_KEY env var.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Authentication") + } + + Section { + TextField("Model Name", text: $remoteModel) + .textFieldStyle(.roundedBorder) + .onSubmit { saveRemoteModel() } + + Stepper("Timeout: \(timeoutSecs)s", value: $timeoutSecs, in: 10...120, step: 10) + .onChange(of: timeoutSecs) { newValue in + ConfigManager.shared.updateConfig(key: "remote_timeout_secs", value: "\(newValue)", section: "[whisper]") + } + } header: { + Text("Remote Options") + } + } + .transition(.opacity.combined(with: .move(edge: .top))) + } + + // Local-only settings + if backend == "local" { + Section { + Toggle("GPU Isolation", isOn: $gpuIsolation) + .onChange(of: gpuIsolation) { newValue in + ConfigManager.shared.updateConfig(key: "gpu_isolation", value: newValue ? "true" : "false", section: "[whisper]") + } + + Text("Run in subprocess that exits after use, releasing GPU memory.") + .font(.caption) + .foregroundColor(.secondary) + + Toggle("On-Demand Loading", isOn: $onDemandLoading) + .onChange(of: onDemandLoading) { newValue in + ConfigManager.shared.updateConfig(key: "on_demand_loading", value: newValue ? "true" : "false", section: "[whisper]") + } + + Text("Load model only when recording. Saves memory but adds latency.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Performance") + } + .transition(.opacity.combined(with: .move(edge: .top))) + } + + // Shared settings (both local and remote) + Section { + Picker("Language", selection: $language) { + ForEach(languages, id: \.1) { name, code in + Text(name).tag(code) + } + } + .onChange(of: language) { newValue in + ConfigManager.shared.updateConfig(key: "language", value: "\"\(newValue)\"", section: "[whisper]") + } + + Toggle("Translate to English", isOn: $translate) + .onChange(of: translate) { newValue in + ConfigManager.shared.updateConfig(key: "translate", value: newValue ? "true" : "false", section: "[whisper]") + } + + Text("Translate non-English speech to English.") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Language") + } + + Section { + TextField("Initial Prompt", text: $initialPrompt, axis: .vertical) + .lineLimit(2...4) + .onSubmit { saveInitialPrompt() } + + Text("Hint at terminology or formatting. Example: \"Technical discussion about Rust.\"") + .font(.caption) + .foregroundColor(.secondary) + } header: { + Text("Initial Prompt") + } + } + .formStyle(.grouped) + .animation(.easeInOut(duration: 0.25), value: backend) + .onAppear { + loadSettings() + } + } + + private func loadSettings() { + let config = ConfigManager.shared.readConfig() + + if let b = config["whisper.backend"]?.replacingOccurrences(of: "\"", with: "") { + backend = b + } + + if let lang = config["whisper.language"]?.replacingOccurrences(of: "\"", with: "") { + language = lang + } + + if let trans = config["whisper.translate"] { + translate = trans == "true" + } + + if let gpu = config["whisper.gpu_isolation"] { + gpuIsolation = gpu == "true" + } + + if let onDemand = config["whisper.on_demand_loading"] { + onDemandLoading = onDemand == "true" + } + + if let prompt = config["whisper.initial_prompt"]?.replacingOccurrences(of: "\"", with: "") { + initialPrompt = prompt + } + + // Remote settings + if let ep = config["whisper.remote_endpoint"]?.replacingOccurrences(of: "\"", with: "") { + endpoint = ep + } + + if let key = config["whisper.remote_api_key"]?.replacingOccurrences(of: "\"", with: "") { + apiKey = key + } + + if let model = config["whisper.remote_model"]?.replacingOccurrences(of: "\"", with: "") { + remoteModel = model + } + + if let timeout = config["whisper.remote_timeout_secs"], let t = Int(timeout) { + timeoutSecs = t + } + } + + private func saveInitialPrompt() { + if initialPrompt.isEmpty { + ConfigManager.shared.updateConfig(key: "initial_prompt", value: "\"\"", section: "[whisper]") + } else { + let escaped = initialPrompt.replacingOccurrences(of: "\"", with: "\\\"") + ConfigManager.shared.updateConfig(key: "initial_prompt", value: "\"\(escaped)\"", section: "[whisper]") + } + } + + private func saveEndpoint() { + if endpoint.isEmpty { + ConfigManager.shared.updateConfig(key: "remote_endpoint", value: "\"\"", section: "[whisper]") + } else { + ConfigManager.shared.updateConfig(key: "remote_endpoint", value: "\"\(endpoint)\"", section: "[whisper]") + } + } + + private func saveApiKey() { + if apiKey.isEmpty { + ConfigManager.shared.updateConfig(key: "remote_api_key", value: "\"\"", section: "[whisper]") + } else { + ConfigManager.shared.updateConfig(key: "remote_api_key", value: "\"\(apiKey)\"", section: "[whisper]") + } + } + + private func saveRemoteModel() { + ConfigManager.shared.updateConfig(key: "remote_model", value: "\"\(remoteModel)\"", section: "[whisper]") + } +} diff --git a/macos/VoxtypeSetup/Sources/Utilities/ConfigManager.swift b/macos/VoxtypeSetup/Sources/Utilities/ConfigManager.swift new file mode 100644 index 00000000..e88e68ca --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Utilities/ConfigManager.swift @@ -0,0 +1,147 @@ +import Foundation + +/// Centralized config file management +class ConfigManager { + static let shared = ConfigManager() + + private let configPath: String + + private init() { + configPath = NSHomeDirectory() + "/Library/Application Support/voxtype/config.toml" + } + + /// Read config file and return key-value pairs + /// Keys are in the format "section.key" (e.g., "hotkey.key", "whisper.model") + func readConfig() -> [String: String] { + guard let content = try? String(contentsOfFile: configPath, encoding: .utf8) else { + return [:] + } + + var result: [String: String] = [:] + var currentSection = "" + + for line in content.components(separatedBy: .newlines) { + let trimmed = line.trimmingCharacters(in: .whitespaces) + + if trimmed.hasPrefix("[") && trimmed.hasSuffix("]") { + currentSection = String(trimmed.dropFirst().dropLast()) + } else if trimmed.contains("=") && !trimmed.hasPrefix("#") { + let parts = trimmed.components(separatedBy: "=") + if parts.count >= 2 { + let key = parts[0].trimmingCharacters(in: .whitespaces) + let value = parts.dropFirst().joined(separator: "=").trimmingCharacters(in: .whitespaces) + let fullKey = currentSection.isEmpty ? key : "\(currentSection).\(key)" + result[fullKey] = value + } + } + } + + return result + } + + /// Update a config value within a specific section + /// - Parameters: + /// - key: The key name (without section prefix) + /// - value: The new value (including quotes if string) + /// - section: Optional section like "[hotkey]" - if provided, only updates the key within that section + func updateConfig(key: String, value: String, section: String? = nil) { + guard let content = try? String(contentsOfFile: configPath, encoding: .utf8) else { + return + } + + var lines = content.components(separatedBy: .newlines) + let targetSection = section?.trimmingCharacters(in: CharacterSet(charactersIn: "[]")) ?? "" + var currentSection = "" + var foundAndReplaced = false + + for i in 0.. String { + var lines = content.components(separatedBy: .newlines) + var sectionIndex: Int? = nil + + // Find the section + for (index, line) in lines.enumerated() { + let trimmed = line.trimmingCharacters(in: .whitespaces) + if trimmed == section { + sectionIndex = index + break + } + } + + if let sectionIndex = sectionIndex { + // Find the end of this section (next section or end of file) + var insertIndex = sectionIndex + 1 + for i in (sectionIndex + 1).. String? { + readConfig()[key]?.replacingOccurrences(of: "\"", with: "") + } + + /// Get a boolean value from config + func getBool(_ key: String) -> Bool? { + guard let value = readConfig()[key] else { return nil } + return value == "true" + } + + /// Get an integer value from config + func getInt(_ key: String) -> Int? { + guard let value = readConfig()[key] else { return nil } + return Int(value) + } +} diff --git a/macos/VoxtypeSetup/Sources/Utilities/PermissionChecker.swift b/macos/VoxtypeSetup/Sources/Utilities/PermissionChecker.swift new file mode 100644 index 00000000..9930c254 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Utilities/PermissionChecker.swift @@ -0,0 +1,117 @@ +import Foundation +import AVFoundation +import AppKit + +/// Checks and requests macOS permissions required by Voxtype +class PermissionChecker: ObservableObject { + static let shared = PermissionChecker() + + @Published var hasMicrophoneAccess: Bool = false + @Published var hasAccessibilityAccess: Bool = false + @Published var hasInputMonitoringAccess: Bool = false + + private init() { + refresh() + } + + /// Refresh all permission states + func refresh() { + checkMicrophoneAccess() + checkAccessibilityAccess() + checkInputMonitoringAccess() + } + + // MARK: - Microphone + + private func checkMicrophoneAccess() { + // Check confirmation from user (permission is for Voxtype.app, not this app) + hasMicrophoneAccess = UserDefaults.standard.bool(forKey: "microphoneConfirmed") + } + + func openMicrophoneSettings() { + // Use osascript to open Microphone privacy settings directly + let script = """ + tell application "System Settings" + activate + reveal anchor "Privacy_Microphone" of pane id "com.apple.settings.PrivacySecurity.extension" + end tell + """ + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript") + process.arguments = ["-e", script] + try? process.run() + } + + func confirmMicrophoneAccess() { + UserDefaults.standard.set(true, forKey: "microphoneConfirmed") + hasMicrophoneAccess = true + } + + // MARK: - Accessibility + + private func checkAccessibilityAccess() { + // Check if THIS app (setup wizard) is trusted + // Note: Main Voxtype.app permission must be confirmed manually + hasAccessibilityAccess = UserDefaults.standard.bool(forKey: "accessibilityConfirmed") + } + + func requestAccessibilityAccess() { + // Open System Settings to Accessibility + openAccessibilitySettings() + } + + func confirmAccessibilityAccess() { + UserDefaults.standard.set(true, forKey: "accessibilityConfirmed") + hasAccessibilityAccess = true + } + + func openAccessibilitySettings() { + // Use osascript to open Accessibility directly + let script = """ + tell application "System Settings" + activate + reveal anchor "Privacy_Accessibility" of pane id "com.apple.settings.PrivacySecurity.extension" + end tell + """ + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript") + process.arguments = ["-e", script] + try? process.run() + } + + // MARK: - Input Monitoring + + private func checkInputMonitoringAccess() { + // Check confirmation from user + hasInputMonitoringAccess = UserDefaults.standard.bool(forKey: "inputMonitoringConfirmed") + } + + func openInputMonitoringSettings() { + // Use osascript to open Input Monitoring directly + let script = """ + tell application "System Settings" + activate + reveal anchor "Privacy_ListenEvent" of pane id "com.apple.settings.PrivacySecurity.extension" + end tell + """ + + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/osascript") + process.arguments = ["-e", script] + try? process.run() + } + + func confirmInputMonitoringAccess() { + UserDefaults.standard.set(true, forKey: "inputMonitoringConfirmed") + hasInputMonitoringAccess = true + } + + // MARK: - Notifications (optional) + + func openNotificationSettings() { + let url = URL(string: "x-apple.systempreferences:com.apple.preference.notifications")! + NSWorkspace.shared.open(url) + } +} diff --git a/macos/VoxtypeSetup/Sources/Utilities/VoxtypeCLI.swift b/macos/VoxtypeSetup/Sources/Utilities/VoxtypeCLI.swift new file mode 100644 index 00000000..90ed9c47 --- /dev/null +++ b/macos/VoxtypeSetup/Sources/Utilities/VoxtypeCLI.swift @@ -0,0 +1,112 @@ +import Foundation + +/// Helper to run voxtype CLI commands +enum VoxtypeCLI { + /// Path to voxtype binary + static var binaryPath: String { + // First try the app bundle location (works for both VoxtypeMenubar.app and VoxtypeSetup.app) + let bundlePath = Bundle.main.bundlePath + let parentDir = (bundlePath as NSString).deletingLastPathComponent + let siblingBinaryPath = (parentDir as NSString).appendingPathComponent("Voxtype.app/Contents/MacOS/voxtype") + + if FileManager.default.fileExists(atPath: siblingBinaryPath) { + return siblingBinaryPath + } + + // Try /Applications + let applicationsPath = "/Applications/Voxtype.app/Contents/MacOS/voxtype" + if FileManager.default.fileExists(atPath: applicationsPath) { + return applicationsPath + } + + // Try homebrew symlink + let homebrewPath = "/opt/homebrew/bin/voxtype" + if FileManager.default.fileExists(atPath: homebrewPath) { + return homebrewPath + } + + // Try ~/.local/bin + let localBinPath = NSHomeDirectory() + "/.local/bin/voxtype" + if FileManager.default.fileExists(atPath: localBinPath) { + return localBinPath + } + + // Fallback to PATH + return "voxtype" + } + + /// Run a voxtype command + @discardableResult + static func run(_ arguments: [String], wait: Bool = true) -> (output: String, success: Bool) { + let task = Process() + task.launchPath = binaryPath + task.arguments = arguments + + let pipe = Pipe() + task.standardOutput = pipe + task.standardError = pipe + + do { + try task.run() + + if wait { + task.waitUntilExit() + let data = pipe.fileHandleForReading.readDataToEndOfFile() + let output = String(data: data, encoding: .utf8) ?? "" + return (output, task.terminationStatus == 0) + } else { + return ("", true) + } + } catch { + return ("Error: \(error.localizedDescription)", false) + } + } + + /// Get daemon status + static func getStatus() -> String { + let result = run(["status"]) + return result.output.trimmingCharacters(in: .whitespacesAndNewlines) + } + + /// Check if daemon is running + static func isDaemonRunning() -> Bool { + let result = run(["status"]) + let status = result.output.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + return status == "idle" || status == "recording" || status == "transcribing" + } + + /// Restart the daemon (stop, clean up, start fresh) + static func restartDaemon(completion: (() -> Void)? = nil) { + DispatchQueue.global().async { + // Kill daemon with SIGKILL to ensure it stops + let killTask = Process() + killTask.launchPath = "/usr/bin/pkill" + killTask.arguments = ["-9", "voxtype"] + killTask.standardOutput = FileHandle.nullDevice + killTask.standardError = FileHandle.nullDevice + try? killTask.run() + killTask.waitUntilExit() + + // Wait for process to fully terminate + Thread.sleep(forTimeInterval: 0.5) + + // Clean up lock and state files + let rmTask = Process() + rmTask.launchPath = "/bin/rm" + rmTask.arguments = ["-rf", "/tmp/voxtype"] + rmTask.standardOutput = FileHandle.nullDevice + rmTask.standardError = FileHandle.nullDevice + try? rmTask.run() + rmTask.waitUntilExit() + + // Wait a moment for filesystem to sync + Thread.sleep(forTimeInterval: 0.5) + + // Start daemon + DispatchQueue.main.async { + _ = run(["daemon"], wait: false) + completion?() + } + } + } +} diff --git a/macos/VoxtypeSetup/Sources/VoxtypeSetupApp.swift b/macos/VoxtypeSetup/Sources/VoxtypeSetupApp.swift new file mode 100644 index 00000000..db5d14ec --- /dev/null +++ b/macos/VoxtypeSetup/Sources/VoxtypeSetupApp.swift @@ -0,0 +1,111 @@ +import SwiftUI + +@main +struct VoxtypeSetupApp: App { + var body: some Scene { + WindowGroup { + SettingsView() + } + .windowStyle(.hiddenTitleBar) + .defaultSize(width: 700, height: 500) + } +} + +/// Main settings view with sidebar navigation +struct SettingsView: View { + @State private var selectedSection: SettingsSection = .general + + var body: some View { + NavigationSplitView { + List(SettingsSection.allCases, selection: $selectedSection) { section in + Label(section.title, systemImage: section.icon) + .tag(section) + } + .listStyle(.sidebar) + .navigationSplitViewColumnWidth(min: 180, ideal: 200) + } detail: { + selectedSection.view + .frame(maxWidth: .infinity, maxHeight: .infinity) + .padding() + } + .navigationTitle("Voxtype Settings") + .onAppear { + // On first launch, go to Permissions so user can grant access + if isFirstLaunch() { + selectedSection = .permissions + } + } + } + + private func isFirstLaunch() -> Bool { + let key = "HasLaunchedBefore" + let hasLaunched = UserDefaults.standard.bool(forKey: key) + if !hasLaunched { + UserDefaults.standard.set(true, forKey: key) + return true + } + return false + } +} + +/// Settings sections +enum SettingsSection: String, CaseIterable, Identifiable { + case general + case hotkey + case audio + case models + case whisper + case output + case textProcessing + case notifications + case permissions + case advanced + + var id: String { rawValue } + + var title: String { + switch self { + case .general: return "General" + case .hotkey: return "Hotkey" + case .audio: return "Audio" + case .models: return "Models" + case .whisper: return "Whisper" + case .output: return "Output" + case .textProcessing: return "Text Processing" + case .notifications: return "Notifications" + case .permissions: return "Permissions" + case .advanced: return "Advanced" + } + } + + var icon: String { + switch self { + case .general: return "gearshape" + case .hotkey: return "keyboard" + case .audio: return "mic" + case .models: return "cpu" + case .whisper: return "waveform" + case .output: return "text.cursor" + case .textProcessing: return "text.quote" + case .notifications: return "bell" + case .permissions: return "lock.shield" + case .advanced: return "wrench.and.screwdriver" + } + } + + @ViewBuilder + var view: some View { + switch self { + case .general: GeneralSettingsView() + case .hotkey: HotkeySettingsView() + case .audio: AudioSettingsView() + case .models: ModelsSettingsView() + case .whisper: WhisperSettingsView() + case .output: OutputSettingsView() + case .textProcessing: TextProcessingSettingsView() + case .notifications: NotificationSettingsView() + case .permissions: PermissionsSettingsView() + case .advanced: AdvancedSettingsView() + } + } +} diff --git a/macos/VoxtypeSetup/VoxtypeSetup.entitlements b/macos/VoxtypeSetup/VoxtypeSetup.entitlements new file mode 100644 index 00000000..cffd6379 --- /dev/null +++ b/macos/VoxtypeSetup/VoxtypeSetup.entitlements @@ -0,0 +1,8 @@ + + + + + com.apple.security.automation.apple-events + + + diff --git a/macos/VoxtypeSetup/build-app.sh b/macos/VoxtypeSetup/build-app.sh new file mode 100755 index 00000000..288a9c82 --- /dev/null +++ b/macos/VoxtypeSetup/build-app.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# Build VoxtypeSetup.app bundle + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$SCRIPT_DIR" + +# Build release +swift build -c release + +# Create app bundle structure +APP_NAME="VoxtypeSetup" +APP_BUNDLE="$SCRIPT_DIR/.build/${APP_NAME}.app" +CONTENTS="$APP_BUNDLE/Contents" +MACOS="$CONTENTS/MacOS" +RESOURCES="$CONTENTS/Resources" + +rm -rf "$APP_BUNDLE" +mkdir -p "$MACOS" "$RESOURCES" + +# Copy binary +cp ".build/release/$APP_NAME" "$MACOS/" + +# Create icns from source icon +ICON_SOURCE="$REPO_ROOT/assets/icon.png" +if [ -f "$ICON_SOURCE" ]; then + ICONSET_DIR="$SCRIPT_DIR/.build/AppIcon.iconset" + rm -rf "$ICONSET_DIR" + mkdir -p "$ICONSET_DIR" + + # Generate all required sizes for macOS app icons + sips -z 16 16 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_16x16.png" 2>/dev/null + sips -z 32 32 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_16x16@2x.png" 2>/dev/null + sips -z 32 32 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_32x32.png" 2>/dev/null + sips -z 64 64 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_32x32@2x.png" 2>/dev/null + sips -z 128 128 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_128x128.png" 2>/dev/null + sips -z 256 256 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_128x128@2x.png" 2>/dev/null + sips -z 256 256 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_256x256.png" 2>/dev/null + sips -z 512 512 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_256x256@2x.png" 2>/dev/null + sips -z 512 512 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_512x512.png" 2>/dev/null + sips -z 1024 1024 "$ICON_SOURCE" --out "$ICONSET_DIR/icon_512x512@2x.png" 2>/dev/null + + # Convert iconset to icns + iconutil -c icns "$ICONSET_DIR" -o "$RESOURCES/AppIcon.icns" + rm -rf "$ICONSET_DIR" + echo "Created app icon from $ICON_SOURCE" +fi + +# Create Info.plist +cat > "$CONTENTS/Info.plist" << 'EOF' + + + + + CFBundleExecutable + VoxtypeSetup + CFBundleIdentifier + io.voxtype.setup + CFBundleName + Voxtype Setup + CFBundleDisplayName + Voxtype Setup + CFBundleIconFile + AppIcon + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0.0 + CFBundleVersion + 1 + LSMinimumSystemVersion + 13.0 + NSHighResolutionCapable + + NSMicrophoneUsageDescription + Voxtype needs microphone access for voice-to-text transcription. + NSAppleEventsUsageDescription + Voxtype needs to control other applications to type transcribed text. + + +EOF + +# Sign the app with entitlements +ENTITLEMENTS="$SCRIPT_DIR/VoxtypeSetup.entitlements" +codesign --force --deep --sign - --entitlements "$ENTITLEMENTS" "$APP_BUNDLE" + +echo "Built: $APP_BUNDLE" +echo "" +echo "To install:" +echo " cp -r $APP_BUNDLE /Applications/" +echo "" +echo "To run:" +echo " open $APP_BUNDLE" diff --git a/packaging/appimage/voxtype-onnx-wrapper.sh b/packaging/appimage/voxtype-onnx-wrapper.sh index d59aacaf..1ee6c3dd 100755 --- a/packaging/appimage/voxtype-onnx-wrapper.sh +++ b/packaging/appimage/voxtype-onnx-wrapper.sh @@ -67,10 +67,13 @@ if [ -x "$VOXTYPE_LIB/voxtype-onnx-avx2" ]; then exec "$VOXTYPE_LIB/voxtype-onnx-avx2" "$@" fi -# Single ONNX binary (CUDA or ROCm AppImage) +# Single ONNX binary (CUDA or MIGraphX AppImage) if [ -x "$VOXTYPE_LIB/voxtype-onnx-cuda" ]; then exec "$VOXTYPE_LIB/voxtype-onnx-cuda" "$@" fi +if [ -x "$VOXTYPE_LIB/voxtype-onnx-migraphx" ]; then + exec "$VOXTYPE_LIB/voxtype-onnx-migraphx" "$@" +fi echo "Error: No voxtype binary found in $VOXTYPE_LIB" >&2 exit 1 diff --git a/packaging/debian/rules b/packaging/debian/rules index ffe027e5..dd6ab09d 100755 --- a/packaging/debian/rules +++ b/packaging/debian/rules @@ -33,6 +33,11 @@ override_dh_auto_install: debian/voxtype/usr/share/zsh/vendor-completions/_voxtype install -D -m 644 packaging/completions/voxtype.fish \ debian/voxtype/usr/share/fish/vendor_completions.d/voxtype.fish + # Install configuration TUI launcher (.desktop entry + terminal-picker script) + install -D -m 644 packaging/voxtype-configure.desktop \ + debian/voxtype/usr/share/applications/voxtype-configure.desktop + install -D -m 755 packaging/scripts/voxtype-configure-launcher \ + debian/voxtype/usr/bin/voxtype-configure-launcher override_dh_auto_test: # Only test with AVX2 build to avoid SIGILL in build environments diff --git a/packaging/debian/voxtype.service b/packaging/debian/voxtype.service index 136e82ee..edcf9ea1 100644 --- a/packaging/debian/voxtype.service +++ b/packaging/debian/voxtype.service @@ -14,12 +14,5 @@ RestartSec=5 # Note: User must be in 'input' group for evdev access # Before enabling this service, run: voxtype setup --download -# GPU Selection (for systems with multiple GPUs): -# Create ~/.config/systemd/user/voxtype.service.d/gpu.conf with: -# [Service] -# Environment="VOXTYPE_VULKAN_DEVICE=nvidia" -# Valid values: nvidia, amd, intel -# Run: voxtype setup gpu to see detected GPUs - [Install] WantedBy=graphical-session.target diff --git a/packaging/homebrew/Casks/voxtype.rb b/packaging/homebrew/Casks/voxtype.rb new file mode 100644 index 00000000..c80e77ff --- /dev/null +++ b/packaging/homebrew/Casks/voxtype.rb @@ -0,0 +1,127 @@ +cask "voxtype" do + version "0.6.0-rc1" + sha256 "791963b523e84c3569cae2e64fae02bb782e9ce1bf0f244b8f45a8149ad80dd8" + + url "file:///Users/pete/workspace/voxtype/releases/0.6.0-rc1/Voxtype-0.6.0-rc1-macos-arm64.dmg" + name "Voxtype" + desc "Push-to-talk voice-to-text for macOS" + homepage "https://voxtype.io" + + livecheck do + url :url + strategy :github_latest + end + + depends_on macos: ">= :ventura" + depends_on formula: "terminal-notifier" + + app "Voxtype.app" + + postflight do + # Remove quarantine attribute (app is unsigned) + system_command "/usr/bin/xattr", args: ["-cr", "/Applications/Voxtype.app"] + + # Clean up any stale state from previous installs + system_command "/bin/rm", args: ["-rf", "/tmp/voxtype"] + + # Create config directory + system_command "/bin/mkdir", args: ["-p", "#{ENV["HOME"]}/Library/Application Support/voxtype"] + + # Create logs directory + system_command "/bin/mkdir", args: ["-p", "#{ENV["HOME"]}/Library/Logs/voxtype"] + + # Bundle terminal-notifier for notifications with custom icon + system_command "/bin/cp", args: [ + "-R", + "#{HOMEBREW_PREFIX}/opt/terminal-notifier/terminal-notifier.app", + "/Applications/Voxtype.app/Contents/Resources/" + ] + + # Create symlink for CLI access + system_command "/bin/ln", args: ["-sf", "/Applications/Voxtype.app/Contents/MacOS/voxtype", "#{HOMEBREW_PREFIX}/bin/voxtype"] + + # Install LaunchAgent for auto-start + launch_agents_dir = "#{ENV["HOME"]}/Library/LaunchAgents" + system_command "/bin/mkdir", args: ["-p", launch_agents_dir] + + plist_path = "#{launch_agents_dir}/io.voxtype.daemon.plist" + plist_content = <<~PLIST + + + + + Label + io.voxtype.daemon + ProgramArguments + + /Applications/Voxtype.app/Contents/MacOS/voxtype + daemon + + RunAtLoad + + KeepAlive + + StandardOutPath + #{ENV["HOME"]}/Library/Logs/voxtype/stdout.log + StandardErrorPath + #{ENV["HOME"]}/Library/Logs/voxtype/stderr.log + EnvironmentVariables + + PATH + /usr/local/bin:/usr/bin:/bin:/opt/homebrew/bin + + ProcessType + Interactive + Nice + -10 + + + PLIST + + File.write(plist_path, plist_content) + + # Run initial setup to create config and download model + # This is non-interactive and downloads the smallest fast model + system_command "/Applications/Voxtype.app/Contents/MacOS/voxtype", + args: ["setup", "--download", "--model", "parakeet-tdt-0.6b-v3-int8"], + print_stdout: true + + # Load the LaunchAgent to start the daemon + # It will work once user grants permissions + system_command "/bin/launchctl", args: ["load", plist_path] + + # Launch Settings app to Permissions pane so user can grant access + system_command "/usr/bin/open", args: ["/Applications/Voxtype.app/Contents/MacOS/VoxtypeSetup.app"] + end + + uninstall_postflight do + # Unload and remove LaunchAgent + plist_path = "#{ENV["HOME"]}/Library/LaunchAgents/io.voxtype.daemon.plist" + system_command "/bin/launchctl", args: ["unload", plist_path] if File.exist?(plist_path) + system_command "/bin/rm", args: ["-f", plist_path] + + # Remove CLI symlink + system_command "/bin/rm", args: ["-f", "#{HOMEBREW_PREFIX}/bin/voxtype"] + end + + uninstall quit: "io.voxtype.app" + + zap trash: [ + "~/Library/Application Support/voxtype", + "~/Library/LaunchAgents/io.voxtype.daemon.plist", + "~/Library/Logs/voxtype", + ] + + caveats <<~EOS + Voxtype is installed and the daemon is running! + + The Settings app opened to help you grant permissions: + 1. Click "Grant Access" for Input Monitoring (hotkey detection) + 2. Click "Grant Access" for Microphone (recording) + + Once permissions are granted, hold Right Option to record. + + If prompted "Voxtype was blocked", go to: + System Settings > Privacy & Security > click "Open Anyway" + EOS +end diff --git a/packaging/homebrew/voxtype-cask.rb b/packaging/homebrew/voxtype-cask.rb new file mode 100644 index 00000000..08e68156 --- /dev/null +++ b/packaging/homebrew/voxtype-cask.rb @@ -0,0 +1,41 @@ +cask "voxtype" do + version "0.7.0" + sha256 "PLACEHOLDER_SHA256" + + url "https://github.com/peteonrails/voxtype/releases/download/v#{version}/Voxtype-#{version}-macos-universal.dmg" + name "Voxtype" + desc "Push-to-talk voice-to-text" + homepage "https://voxtype.io" + + depends_on macos: ">= :ventura" + + app "Voxtype.app" + + postflight do + # Ensure CLI is accessible from PATH + binary = "#{appdir}/Voxtype.app/Contents/MacOS/voxtype-bin" + if File.exist?(binary) + FileUtils.ln_sf(binary, "/usr/local/bin/voxtype") + end + end + + uninstall quit: "io.voxtype.daemon", + login_item: "Voxtype" + + zap trash: [ + "~/.config/voxtype", + "~/Library/Logs/voxtype", + ] + + caveats <<~EOS + Open Voxtype to get started: + open /Applications/Voxtype.app + + Voxtype will automatically: + - Download a speech model on first launch + - Prompt for Microphone and Accessibility permissions + + Default hotkey: fn (Globe key) + More info: voxtype --help + EOS +end diff --git a/packaging/homebrew/voxtype.rb b/packaging/homebrew/voxtype.rb new file mode 100644 index 00000000..15418e35 --- /dev/null +++ b/packaging/homebrew/voxtype.rb @@ -0,0 +1,138 @@ +class Voxtype < Formula + desc "Push-to-talk voice-to-text for macOS and Linux" + homepage "https://voxtype.io" + url "https://github.com/peteonrails/voxtype/archive/refs/tags/v0.6.0-rc.1.tar.gz" + sha256 "PLACEHOLDER_SHA256" + license "MIT" + head "https://github.com/peteonrails/voxtype.git", branch: "feature/macos-release" + + depends_on "cmake" => :build + depends_on "rust" => :build + depends_on "pkg-config" => :build + + # macOS dependencies + on_macos do + depends_on "portaudio" + end + + # Linux dependencies + on_linux do + depends_on "alsa-lib" + depends_on "libxkbcommon" + end + + def install + # Build release binary with parakeet support on macOS + if OS.mac? + system "cargo", "install", *std_cargo_args, "--features", "parakeet" + else + system "cargo", "install", *std_cargo_args + end + end + + def post_install + # Create config directory + (var/"voxtype").mkpath + + # Create app bundle for macOS permissions + if OS.mac? + # Create app bundle in Homebrew prefix (writable by Homebrew) + app_path = prefix/"Voxtype.app" + contents_path = app_path/"Contents" + macos_path = contents_path/"MacOS" + resources_path = contents_path/"Resources" + + # Create directory structure + macos_path.mkpath + resources_path.mkpath + + # Copy binary to app bundle (named voxtype-bin to match CFBundleExecutable) + cp bin/"voxtype", macos_path/"voxtype-bin" + + # Create Info.plist + info_plist = <<~PLIST + + + + + CFBundleExecutable + voxtype-bin + CFBundleIdentifier + io.voxtype.daemon + CFBundleName + Voxtype + CFBundleDisplayName + Voxtype + CFBundlePackageType + APPL + CFBundleShortVersionString + #{version} + CFBundleVersion + #{version} + LSMinimumSystemVersion + 13.0 + LSUIElement + + NSHighResolutionCapable + + NSMicrophoneUsageDescription + Voxtype needs microphone access for speech-to-text transcription. + NSAppleEventsUsageDescription + Voxtype needs accessibility access to type transcribed text. + NSInputMonitoringUsageDescription + Voxtype monitors keyboard input to detect your push-to-talk hotkey. + + + PLIST + + (contents_path/"Info.plist").write(info_plist) + + # Sign the app bundle + system "codesign", "--force", "--deep", "--sign", "-", app_path + + # Create symlink in ~/Applications for easy access + user_apps = Pathname.new(Dir.home)/"Applications" + user_apps.mkpath rescue nil + user_app_link = user_apps/"Voxtype.app" + + # Remove old symlink/app if exists + user_app_link.rmtree if user_app_link.exist? || user_app_link.symlink? + + # Create symlink + begin + FileUtils.ln_sf(app_path, user_app_link) + ohai "Created #{user_app_link} -> #{app_path}" + rescue => e + opoo "Could not create symlink in ~/Applications: #{e.message}" + end + end + end + + def caveats + <<~EOS + Voxtype.app has been installed and linked to ~/Applications. + + To get started, open Voxtype.app: + open ~/Applications/Voxtype.app + + Voxtype will automatically: + - Download a speech model on first launch + - Prompt for Microphone and Accessibility permissions + + Default hotkey: fn (Globe key) + More info: voxtype --help + EOS + end + + service do + # Use app bundle path for proper macOS permissions + run [opt_prefix/"Voxtype.app/Contents/MacOS/voxtype-bin", "daemon"] + keep_alive true + log_path var/"log/voxtype.log" + error_log_path var/"log/voxtype.log" + end + + test do + assert_match version.to_s, shell_output("#{bin}/voxtype --version") + end +end diff --git a/packaging/omarchy/omarchy-install-voxtype b/packaging/omarchy/omarchy-install-voxtype index d7444202..cef525cc 100755 --- a/packaging/omarchy/omarchy-install-voxtype +++ b/packaging/omarchy/omarchy-install-voxtype @@ -80,6 +80,23 @@ if [ -f "$HYPR_CONF" ] && ! grep -q "conf.d/voxtype.conf" "$HYPR_CONF"; then echo "source = ~/.config/hypr/conf.d/voxtype.conf" >> "$HYPR_CONF" fi +# Drop window rules for the configuration TUI so `voxtype configure` (and the +# Walker/dmenu launcher entry) opens as a centered floating window instead of +# tiling like a regular terminal. +mkdir -p ~/.config/hypr/conf.d +cat > ~/.config/hypr/conf.d/voxtype-tui.conf << 'EOF' +# Voxtype configuration TUI window rules +# voxtype-configure-launcher sets the window class to "voxtype" regardless of +# which terminal emulator runs the TUI. +windowrulev2 = float, class:^(voxtype)$ +windowrulev2 = size 1100 750, class:^(voxtype)$ +windowrulev2 = center, class:^(voxtype)$ +windowrulev2 = bordersize 2, class:^(voxtype)$ +EOF +if [ -f "$HYPR_CONF" ] && ! grep -q "conf.d/voxtype-tui.conf" "$HYPR_CONF"; then + echo "source = ~/.config/hypr/conf.d/voxtype-tui.conf" >> "$HYPR_CONF" +fi + # Enable systemd user service echo "" echo "Enabling voxtype service..." @@ -102,3 +119,7 @@ echo "" echo "Keybindings are in: ~/.config/hypr/conf.d/voxtype.conf" echo "Voxtype config in: ~/.config/voxtype/config.toml" echo "" +echo "To tune voxtype settings interactively:" +echo " • Open Walker / fuzzel and search for 'Voxtype Configuration'" +echo " • Or run: voxtype configure" +echo "" diff --git a/packaging/omarchy/voxtype-tui.hypr b/packaging/omarchy/voxtype-tui.hypr new file mode 100644 index 00000000..564d9612 --- /dev/null +++ b/packaging/omarchy/voxtype-tui.hypr @@ -0,0 +1,14 @@ +# Hyprland window rules for the voxtype configuration TUI. +# +# Source this from your hyprland.conf (or the file Omarchy uses for window +# rules) to make `voxtype configure` open as a centered floating window +# rather than tiling like a regular terminal. +# +# source = ~/.config/hypr/voxtype-tui.hypr + +# The voxtype-configure-launcher script sets the window class to "voxtype" +# regardless of which terminal emulator runs the TUI. +windowrulev2 = float, class:^(voxtype)$ +windowrulev2 = size 1100 750, class:^(voxtype)$ +windowrulev2 = center, class:^(voxtype)$ +windowrulev2 = bordersize 2, class:^(voxtype)$ diff --git a/packaging/rpm/voxtype.spec b/packaging/rpm/voxtype.spec index 08b394e8..2dd2e992 100644 --- a/packaging/rpm/voxtype.spec +++ b/packaging/rpm/voxtype.spec @@ -103,6 +103,12 @@ install -D -m 644 packaging/completions/voxtype.zsh \ install -D -m 644 packaging/completions/voxtype.fish \ %{buildroot}%{_datadir}/fish/vendor_completions.d/voxtype.fish +# Install configuration TUI launcher (.desktop entry + terminal-picker script) +install -D -m 644 packaging/voxtype-configure.desktop \ + %{buildroot}%{_datadir}/applications/voxtype-configure.desktop +install -D -m 755 packaging/scripts/voxtype-configure-launcher \ + %{buildroot}%{_bindir}/voxtype-configure-launcher + %check export CARGO_HOME=%{_builddir}/cargo # Only test with AVX2 build to avoid SIGILL in build environments @@ -199,6 +205,8 @@ rm -f %{_bindir}/voxtype %{_datadir}/bash-completion/completions/voxtype %{_datadir}/zsh/site-functions/_voxtype %{_datadir}/fish/vendor_completions.d/voxtype.fish +%{_datadir}/applications/voxtype-configure.desktop +%{_bindir}/voxtype-configure-launcher %changelog * Fri Dec 20 2025 Peter Jackson - 0.4.1-1 diff --git a/packaging/scripts/voxtype-configure-launcher b/packaging/scripts/voxtype-configure-launcher new file mode 100755 index 00000000..97f76184 --- /dev/null +++ b/packaging/scripts/voxtype-configure-launcher @@ -0,0 +1,65 @@ +#!/bin/sh +# voxtype-configure-launcher +# +# Launch `voxtype configure` in a terminal window, discovering whichever +# terminal emulator the user has installed. Window class is set to +# `voxtype` so compositor rules (Hyprland windowrulev2, +# Sway for_window, …) can float and size the window. +# +# Picked up by /usr/share/applications/voxtype-configure.desktop, which is +# in turn surfaced in Walker, fuzzel, rofi, dmenu, KRunner, GNOME Activities. + +set -eu + +# Order of preference. $TERMINAL wins if it's set and resolvable. +candidates="${TERMINAL:-} ghostty alacritty kitty foot wezterm konsole gnome-terminal xterm" + +term="" +for c in $candidates; do + [ -z "$c" ] && continue + if command -v "$c" >/dev/null 2>&1; then + term="$c" + break + fi +done + +if [ -z "$term" ]; then + notify-send -a Voxtype "Voxtype Configuration" \ + "No terminal emulator found. Install ghostty, alacritty, kitty, foot, wezterm, or xterm." \ + 2>/dev/null || true + echo "voxtype-configure-launcher: no terminal emulator found" >&2 + exit 1 +fi + +# Each terminal flags the window class differently. Set class=voxtype +# so the compositor rule we ship can match it. +case "$term" in + ghostty) + exec "$term" --class=voxtype --command="voxtype configure" + ;; + alacritty) + exec "$term" --class voxtype -e voxtype configure + ;; + kitty) + exec "$term" --class voxtype voxtype configure + ;; + wezterm) + exec "$term" start --class voxtype -- voxtype configure + ;; + foot) + exec "$term" --app-id voxtype voxtype configure + ;; + xterm) + exec "$term" -class voxtype -e voxtype configure + ;; + konsole) + exec "$term" -e voxtype configure + ;; + gnome-terminal) + exec "$term" -- voxtype configure + ;; + *) + # Unknown but available terminal — fall through with -e. + exec "$term" -e voxtype configure + ;; +esac diff --git a/packaging/systemd/voxtype.service b/packaging/systemd/voxtype.service index 136e82ee..edcf9ea1 100644 --- a/packaging/systemd/voxtype.service +++ b/packaging/systemd/voxtype.service @@ -14,12 +14,5 @@ RestartSec=5 # Note: User must be in 'input' group for evdev access # Before enabling this service, run: voxtype setup --download -# GPU Selection (for systems with multiple GPUs): -# Create ~/.config/systemd/user/voxtype.service.d/gpu.conf with: -# [Service] -# Environment="VOXTYPE_VULKAN_DEVICE=nvidia" -# Valid values: nvidia, amd, intel -# Run: voxtype setup gpu to see detected GPUs - [Install] WantedBy=graphical-session.target diff --git a/packaging/voxtype-configure.desktop b/packaging/voxtype-configure.desktop new file mode 100644 index 00000000..82e81c77 --- /dev/null +++ b/packaging/voxtype-configure.desktop @@ -0,0 +1,12 @@ +[Desktop Entry] +Type=Application +Name=Voxtype Configuration +GenericName=Voice-to-Text Settings +Comment=Configure voxtype dictation settings (engine, model, hotkey, audio, output) +Exec=voxtype-configure-launcher +Icon=voxtype +Categories=Settings; +Keywords=voxtype;voice;dictation;transcription;whisper;parakeet;settings;configuration; +Terminal=false +NoDisplay=false +StartupWMClass=voxtype diff --git a/scripts/build-macos-dmg.sh b/scripts/build-macos-dmg.sh new file mode 100755 index 00000000..587448b0 --- /dev/null +++ b/scripts/build-macos-dmg.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# +# Create a DMG installer for macOS +# +# This script builds a complete Voxtype.app bundle containing: +# - voxtype CLI binary +# - VoxtypeMenubar.app (menu bar status icon) +# - VoxtypeSetup.app (settings UI) +# - Engine notification icons +# +# Requires: +# - voxtype binary already built (arm64 or universal) +# - Swift apps will be built automatically +# +# Usage: +# ./scripts/build-macos-dmg.sh 0.6.0-rc1 + +set -euo pipefail + +VERSION="${1:-}" + +if [[ -z "$VERSION" ]]; then + echo "Usage: $0 VERSION" + echo "Example: $0 0.6.0-rc1" + exit 1 +fi + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(dirname "$SCRIPT_DIR")" +RELEASES_DIR="${PROJECT_DIR}/releases/${VERSION}" +APP_DIR="${RELEASES_DIR}/Voxtype.app" + +# Find the binary (try arm64 first, then universal) +if [[ -f "${RELEASES_DIR}/voxtype-${VERSION}-macos-arm64" ]]; then + BINARY="${RELEASES_DIR}/voxtype-${VERSION}-macos-arm64" + DMG_PATH="${RELEASES_DIR}/Voxtype-${VERSION}-macos-arm64.dmg" +elif [[ -f "${RELEASES_DIR}/voxtype-${VERSION}-macos-universal" ]]; then + BINARY="${RELEASES_DIR}/voxtype-${VERSION}-macos-universal" + DMG_PATH="${RELEASES_DIR}/Voxtype-${VERSION}-macos-universal.dmg" +else + echo -e "${RED}Error: No binary found in ${RELEASES_DIR}${NC}" + echo "Expected: voxtype-${VERSION}-macos-arm64 or voxtype-${VERSION}-macos-universal" + exit 1 +fi + +# If build-macos.sh built with ONNX engines, libonnxruntime..dylib will +# be sitting next to the binary in releases/. Pick it up so we can bundle it +# into the .app/Contents/Frameworks/ and make the binary's rpath point at it. +ORT_DYLIB="$(ls "${RELEASES_DIR}"/libonnxruntime.*.dylib 2>/dev/null | head -1 || true)" + +echo -e "${GREEN}Building Voxtype.app for ${VERSION}...${NC}" +echo "Binary: $BINARY" +echo + +# Build Swift apps +echo -e "${YELLOW}Building VoxtypeMenubar...${NC}" +cd "${PROJECT_DIR}/macos/VoxtypeMenubar" +./build-app.sh > /dev/null 2>&1 +MENUBAR_APP="${PROJECT_DIR}/macos/VoxtypeMenubar/.build/VoxtypeMenubar.app" + +echo -e "${YELLOW}Building VoxtypeSetup...${NC}" +cd "${PROJECT_DIR}/macos/VoxtypeSetup" +./build-app.sh > /dev/null 2>&1 +SETUP_APP="${PROJECT_DIR}/macos/VoxtypeSetup/.build/VoxtypeSetup.app" + +# Verify Swift apps exist +if [[ ! -d "$MENUBAR_APP" ]]; then + echo -e "${RED}Error: VoxtypeMenubar.app not found${NC}" + exit 1 +fi + +if [[ ! -d "$SETUP_APP" ]]; then + echo -e "${RED}Error: VoxtypeSetup.app not found${NC}" + exit 1 +fi + +# Create app bundle structure +echo -e "${YELLOW}Creating Voxtype.app bundle...${NC}" +rm -rf "$APP_DIR" +mkdir -p "$APP_DIR/Contents/MacOS" +mkdir -p "$APP_DIR/Contents/Resources" + +# Copy the main voxtype binary (named voxtype-bin to match CFBundleExecutable) +cp "$BINARY" "$APP_DIR/Contents/MacOS/voxtype-bin" +chmod +x "$APP_DIR/Contents/MacOS/voxtype-bin" + +# If we have an ONNX Runtime dylib alongside the binary, bundle it into +# Contents/Frameworks/ and patch the binary so it can find it at runtime. +# The dylib's install_name is `@rpath/libonnxruntime..dylib`, so we +# add `@executable_path/../Frameworks` to the binary's rpath list. +if [[ -n "$ORT_DYLIB" ]]; then + echo -e "${YELLOW}Bundling $(basename "$ORT_DYLIB") into Frameworks/...${NC}" + mkdir -p "$APP_DIR/Contents/Frameworks" + cp "$ORT_DYLIB" "$APP_DIR/Contents/Frameworks/" + + # Drop any existing rpath entry first so re-runs are idempotent. The + # delete fails harmlessly if the rpath wasn't already there. + install_name_tool -delete_rpath "@executable_path/../Frameworks" \ + "$APP_DIR/Contents/MacOS/voxtype-bin" 2>/dev/null || true + install_name_tool -add_rpath "@executable_path/../Frameworks" \ + "$APP_DIR/Contents/MacOS/voxtype-bin" + + # install_name_tool invalidates the existing linker signature; re-sign + # adhoc so Gatekeeper sees a valid (if untrusted) signature. The + # outer .app gets signed with Developer ID by sign-macos.sh later, if + # available. + codesign --force --sign - "$APP_DIR/Contents/MacOS/voxtype-bin" +fi + +# Copy VoxtypeMenubar.app +cp -R "$MENUBAR_APP" "$APP_DIR/Contents/MacOS/" + +# Copy VoxtypeSetup.app +cp -R "$SETUP_APP" "$APP_DIR/Contents/MacOS/" + +# Copy engine icons for notifications +if [[ -f "${PROJECT_DIR}/assets/engines/parakeet.png" ]]; then + cp "${PROJECT_DIR}/assets/engines/parakeet.png" "$APP_DIR/Contents/Resources/" +fi +if [[ -f "${PROJECT_DIR}/assets/engines/whisper.png" ]]; then + cp "${PROJECT_DIR}/assets/engines/whisper.png" "$APP_DIR/Contents/Resources/" +fi + +# Copy app icon if it exists +if [[ -f "${PROJECT_DIR}/assets/icon.icns" ]]; then + cp "${PROJECT_DIR}/assets/icon.icns" "$APP_DIR/Contents/Resources/AppIcon.icns" +fi + +# Create Info.plist +cat > "$APP_DIR/Contents/Info.plist" << EOF + + + + + CFBundleExecutable + voxtype-bin + CFBundleIdentifier + io.voxtype.daemon + CFBundleName + Voxtype + CFBundleDisplayName + Voxtype + CFBundleVersion + ${VERSION} + CFBundleShortVersionString + ${VERSION} + CFBundlePackageType + APPL + LSMinimumSystemVersion + 13.0 + NSHighResolutionCapable + + LSUIElement + + NSMicrophoneUsageDescription + Voxtype needs microphone access to record your voice for transcription. + NSAppleEventsUsageDescription + Voxtype uses AppleScript to type transcribed text into applications. + NSInputMonitoringUsageDescription + Voxtype monitors keyboard input to detect your push-to-talk hotkey. + + +EOF + +echo -e "${GREEN}App bundle created:${NC}" +echo " $APP_DIR" +du -sh "$APP_DIR" +echo + +# Create DMG with Applications symlink for drag-to-install +echo -e "${YELLOW}Creating DMG...${NC}" +rm -f "$DMG_PATH" + +# Create a staging directory with the app and an Applications symlink +DMG_STAGING="${RELEASES_DIR}/dmg-staging" +rm -rf "$DMG_STAGING" +mkdir -p "$DMG_STAGING" +cp -R "$APP_DIR" "$DMG_STAGING/" +ln -s /Applications "$DMG_STAGING/Applications" + +hdiutil create -volname "Voxtype ${VERSION}" \ + -srcfolder "$DMG_STAGING" \ + -ov -format UDZO \ + "$DMG_PATH" + +rm -rf "$DMG_STAGING" + +# Get DMG size +SIZE=$(du -h "$DMG_PATH" | cut -f1) + +echo +echo -e "${GREEN}DMG created successfully!${NC}" +echo " DMG: $DMG_PATH" +echo " Size: $SIZE" + +# Generate checksum +echo +echo "SHA256 checksum:" +shasum -a 256 "$DMG_PATH" + +# Update the checksum file +CHECKSUM=$(shasum -a 256 "$DMG_PATH" | cut -d' ' -f1) +echo "${CHECKSUM} $(basename "$DMG_PATH")" > "${RELEASES_DIR}/macos-SHA256SUMS.txt" + +echo +echo "Next steps:" +echo " 1. Test the DMG: open '$DMG_PATH'" +echo " 2. Update Homebrew cask with new SHA256: $CHECKSUM" +echo " 3. Upload to GitHub release" diff --git a/scripts/build-macos.sh b/scripts/build-macos.sh new file mode 100755 index 00000000..9fe27a26 --- /dev/null +++ b/scripts/build-macos.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# +# Build voxtype for macOS (arm64) with all transcription engines. +# +# This script: +# 1. Downloads Microsoft's official ONNX Runtime prebuilt (cached) +# 2. Builds with --features "gpu-metal,parakeet-coreml,moonshine,sensevoice, +# paraformer,dolphin,omnilingual,cohere" +# 3. Copies the binary and libonnxruntime.dylib into releases/${VERSION}/ +# +# The DMG packaging step (build-macos-dmg.sh) bundles the dylib into +# Voxtype.app/Contents/Frameworks/ and patches the binary's rpath. +# +# Pyke's CDN (cdn.pyke.io) is unreliable from some build environments, so +# we use Microsoft's GitHub release directly with ORT_STRATEGY=system. The +# ONNX Runtime version is pinned to whatever ort 2.0.0-rc.12 targets. +# +# Usage: +# ./scripts/build-macos.sh 0.7.0-rc1 +# +# Outputs: +# releases/${VERSION}/voxtype-${VERSION}-macos-arm64 +# releases/${VERSION}/libonnxruntime.${ORT_VERSION}.dylib + +set -euo pipefail + +VERSION="${1:-}" + +if [[ -z "$VERSION" ]]; then + echo "Usage: $0 VERSION" + echo "Example: $0 0.7.0-rc1" + exit 1 +fi + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +# ONNX Runtime version that ort 2.0.0-rc.12 binds against. +# Bump this when upgrading the ort dep in Cargo.toml. +ORT_VERSION="1.24.2" + +# Engines to build with. Whisper is always on; the rest are opt-in features. +# parakeet-coreml gives Parakeet CoreML acceleration on Apple Silicon. +ENGINE_FEATURES="gpu-metal,parakeet-coreml,moonshine,sensevoice,paraformer,dolphin,omnilingual,cohere" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(dirname "$SCRIPT_DIR")" +CACHE_DIR="${PROJECT_DIR}/.cache/onnxruntime" +RELEASES_DIR="${PROJECT_DIR}/releases/${VERSION}" + +echo -e "${GREEN}Building voxtype ${VERSION} for macOS (arm64)...${NC}" +echo + +if [[ "$(uname)" != "Darwin" ]]; then + echo -e "${RED}Error: This script must be run on macOS${NC}" + exit 1 +fi + +if [[ "$(uname -m)" != "arm64" ]]; then + echo -e "${RED}Error: This script requires Apple Silicon (arm64)${NC}" + echo "x86_64 macOS support is not yet wired up; build on an arm64 host." + exit 1 +fi + +mkdir -p "$RELEASES_DIR" "$CACHE_DIR" + +# ---- Fetch Microsoft ONNX Runtime prebuilt --------------------------------- + +ORT_TARBALL="onnxruntime-osx-arm64-${ORT_VERSION}.tgz" +ORT_DIR="${CACHE_DIR}/onnxruntime-osx-arm64-${ORT_VERSION}" +ORT_URL="https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/${ORT_TARBALL}" + +if [[ ! -d "$ORT_DIR" ]]; then + echo -e "${YELLOW}Downloading ONNX Runtime ${ORT_VERSION} from Microsoft...${NC}" + curl -fL --max-time 300 -o "${CACHE_DIR}/${ORT_TARBALL}" "$ORT_URL" + + echo "Extracting..." + tar -xzf "${CACHE_DIR}/${ORT_TARBALL}" -C "$CACHE_DIR" + rm -f "${CACHE_DIR}/${ORT_TARBALL}" +else + echo "Using cached ONNX Runtime: ${ORT_DIR}" +fi + +ORT_LIB_DIR="${ORT_DIR}/lib" +ORT_DYLIB_NAME="libonnxruntime.${ORT_VERSION}.dylib" +if [[ ! -f "${ORT_LIB_DIR}/${ORT_DYLIB_NAME}" ]]; then + echo -e "${RED}Error: expected ${ORT_DYLIB_NAME} not found in ${ORT_LIB_DIR}${NC}" + ls -la "$ORT_LIB_DIR" + exit 1 +fi + +# ---- Build voxtype with all engine features -------------------------------- + +echo +echo -e "${YELLOW}Building voxtype with all engines (${ENGINE_FEATURES})...${NC}" +rustup target add aarch64-apple-darwin >/dev/null 2>&1 || true + +( + cd "$PROJECT_DIR" + ORT_STRATEGY=system \ + ORT_LIB_LOCATION="$ORT_LIB_DIR" \ + ORT_PREFER_DYNAMIC_LINK=1 \ + cargo build --release \ + --target aarch64-apple-darwin \ + --features "$ENGINE_FEATURES" +) + +BUILT_BINARY="${PROJECT_DIR}/target/aarch64-apple-darwin/release/voxtype" + +if [[ ! -f "$BUILT_BINARY" ]]; then + echo -e "${RED}Error: build did not produce ${BUILT_BINARY}${NC}" + exit 1 +fi + +# ---- Stage outputs into releases/ ----------------------------------------- + +OUTPUT_BINARY="${RELEASES_DIR}/voxtype-${VERSION}-macos-arm64" +OUTPUT_DYLIB="${RELEASES_DIR}/${ORT_DYLIB_NAME}" + +cp "$BUILT_BINARY" "$OUTPUT_BINARY" +chmod +x "$OUTPUT_BINARY" +cp "${ORT_LIB_DIR}/${ORT_DYLIB_NAME}" "$OUTPUT_DYLIB" + +# Smoke-test: verify the binary executes when given access to the dylib. +echo +echo "Verifying binary..." +DYLD_LIBRARY_PATH="$ORT_LIB_DIR" "$OUTPUT_BINARY" --version + +BINARY_SIZE=$(du -h "$OUTPUT_BINARY" | cut -f1) +DYLIB_SIZE=$(du -h "$OUTPUT_DYLIB" | cut -f1) + +echo +echo -e "${GREEN}Build complete!${NC}" +echo " Binary: $OUTPUT_BINARY ($BINARY_SIZE)" +echo " Dylib: $OUTPUT_DYLIB ($DYLIB_SIZE)" +echo " Engines: whisper, parakeet (CoreML), moonshine, sensevoice," +echo " paraformer, dolphin, omnilingual, cohere" +echo +echo "Next steps:" +echo " 1. Build DMG: ./scripts/build-macos-dmg.sh ${VERSION}" +echo " 2. Sign binary: ./scripts/sign-macos.sh ${OUTPUT_BINARY} (optional, needs Dev ID)" +echo " 3. Notarize: ./scripts/notarize-macos.sh " diff --git a/scripts/notarize-macos.sh b/scripts/notarize-macos.sh new file mode 100755 index 00000000..e1b1b1f8 --- /dev/null +++ b/scripts/notarize-macos.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# +# Notarize a macOS binary with Apple +# +# Requires: +# - Binary signed with Developer ID certificate +# - App-specific password for notarization +# +# Environment variables (required): +# APPLE_ID - Apple Developer account email +# APPLE_ID_PASSWORD - App-specific password (NOT your account password) +# APPLE_TEAM_ID - Team ID from Apple Developer account +# +# Usage: +# ./scripts/notarize-macos.sh releases/0.5.0/voxtype-0.5.0-macos-universal + +set -euo pipefail + +BINARY="${1:-}" + +if [[ -z "$BINARY" || ! -f "$BINARY" ]]; then + echo "Usage: $0 BINARY_PATH" + echo "Example: $0 releases/0.5.0/voxtype-0.5.0-macos-universal" + exit 1 +fi + +# Check required environment variables +if [[ -z "${APPLE_ID:-}" ]]; then + echo "Error: APPLE_ID environment variable not set" + echo "Set to your Apple Developer account email" + exit 1 +fi + +if [[ -z "${APPLE_ID_PASSWORD:-}" ]]; then + echo "Error: APPLE_ID_PASSWORD environment variable not set" + echo "Create an app-specific password at https://appleid.apple.com/account/manage" + exit 1 +fi + +if [[ -z "${APPLE_TEAM_ID:-}" ]]; then + echo "Error: APPLE_TEAM_ID environment variable not set" + echo "Find at https://developer.apple.com/account/#/membership" + exit 1 +fi + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${GREEN}Notarizing macOS binary...${NC}" +echo "Binary: $BINARY" +echo + +# Create ZIP for notarization (Apple requires a container format) +ZIP_PATH="${BINARY}.zip" +echo -e "${YELLOW}Creating ZIP for submission...${NC}" +ditto -c -k "$BINARY" "$ZIP_PATH" +echo "Created: $ZIP_PATH" +echo + +# Submit for notarization +echo -e "${YELLOW}Submitting to Apple for notarization...${NC}" +echo "This may take several minutes..." +echo + +xcrun notarytool submit "$ZIP_PATH" \ + --apple-id "$APPLE_ID" \ + --password "$APPLE_ID_PASSWORD" \ + --team-id "$APPLE_TEAM_ID" \ + --wait + +# Clean up ZIP +rm -f "$ZIP_PATH" + +# Staple the notarization ticket +echo +echo -e "${YELLOW}Stapling notarization ticket...${NC}" +xcrun stapler staple "$BINARY" + +# Verify +echo +echo -e "${YELLOW}Verifying notarization...${NC}" +spctl -a -v "$BINARY" + +echo +echo -e "${GREEN}Notarization complete!${NC}" +echo +echo "The binary is now notarized and can be distributed." +echo "Users will not see Gatekeeper warnings when running it." +echo +echo "Next steps:" +echo " 1. Create DMG: ./scripts/build-macos-dmg.sh VERSION" diff --git a/scripts/package.sh b/scripts/package.sh index 1ea737c5..ec997915 100755 --- a/scripts/package.sh +++ b/scripts/package.sh @@ -395,13 +395,45 @@ if [[ "$TARGET_ARCH" == "x86_64" ]]; then cp "${RELEASE_DIR}/voxtype-${VERSION}-linux-x86_64-onnx-avx512" "$STAGING/usr/lib/voxtype/voxtype-onnx-avx512" chmod 755 "$STAGING/usr/lib/voxtype/voxtype-onnx-avx512" fi - if [[ -f "${RELEASE_DIR}/voxtype-${VERSION}-linux-x86_64-onnx-cuda" ]]; then - cp "${RELEASE_DIR}/voxtype-${VERSION}-linux-x86_64-onnx-cuda" "$STAGING/usr/lib/voxtype/voxtype-onnx-cuda" - chmod 755 "$STAGING/usr/lib/voxtype/voxtype-onnx-cuda" - fi - if [[ -f "${RELEASE_DIR}/voxtype-${VERSION}-linux-x86_64-onnx-rocm" ]]; then - cp "${RELEASE_DIR}/voxtype-${VERSION}-linux-x86_64-onnx-rocm" "$STAGING/usr/lib/voxtype/voxtype-onnx-rocm" - chmod 755 "$STAGING/usr/lib/voxtype/voxtype-onnx-rocm" + # GPU-accelerated ONNX binaries each live in their own subdirectory + # alongside the companion shared libs they dlopen at runtime. + # ort 2.0.0-rc.12's CUDA/MIGraphX EPs dlopen libonnxruntime_providers_*.so + # and libonnxruntime_providers_shared.so based on the binary's own + # /proc/self/exe location; if they aren't co-located, EP registration + # fails and ort silently falls back to CPU. + # + # User-facing names at /usr/lib/voxtype/voxtype-onnx-* are symlinks into + # these subdirs. /proc/self/exe resolves the real path, so the .so files + # are found correctly even when invoked through a symlink. + install_onnx_gpu_variant() { + local variant="$1" # cuda-12, cuda-13, migraphx + local ep_lib="$2" # cuda or migraphx + local src="${RELEASE_DIR}/voxtype-${VERSION}-linux-x86_64-onnx-${variant}" + if [[ ! -f "$src" ]]; then + return 0 + fi + local subdir="$STAGING/usr/lib/voxtype/${variant}" + mkdir -p "$subdir" + cp "$src" "$subdir/voxtype-onnx-${variant}" + chmod 755 "$subdir/voxtype-onnx-${variant}" + cp "${src}.libonnxruntime_providers_${ep_lib}.so" \ + "$subdir/libonnxruntime_providers_${ep_lib}.so" + cp "${src}.libonnxruntime_providers_shared.so" \ + "$subdir/libonnxruntime_providers_shared.so" + # Convenience symlink at the top level so existing tooling (the + # voxtype-wrapper.sh, voxtype setup gpu, ParakeetBackend detection) + # finds the binary by its short name. + ln -sf "${variant}/voxtype-onnx-${variant}" \ + "$STAGING/usr/lib/voxtype/voxtype-onnx-${variant}" + } + install_onnx_gpu_variant cuda-12 cuda + install_onnx_gpu_variant cuda-13 cuda + install_onnx_gpu_variant migraphx migraphx + # Legacy compat symlink for users with scripts referencing the old + # voxtype-onnx-rocm name. The AMD GPU EP changed from ROCm to MIGraphX + # in v0.7.0; ship one release with both names. Drop in v0.8.0. + if [[ -L "$STAGING/usr/lib/voxtype/voxtype-onnx-migraphx" ]]; then + ln -sf migraphx/voxtype-onnx-migraphx "$STAGING/usr/lib/voxtype/voxtype-onnx-rocm" fi # Install wrapper script as /usr/bin/voxtype diff --git a/scripts/sign-macos.sh b/scripts/sign-macos.sh new file mode 100755 index 00000000..4a69a568 --- /dev/null +++ b/scripts/sign-macos.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# Sign a macOS binary for distribution +# +# Requires: +# - Apple Developer ID Application certificate +# - Certificate installed in keychain +# +# Environment variables: +# CODESIGN_IDENTITY - Developer ID (default: auto-detect from keychain) +# +# Usage: +# ./scripts/sign-macos.sh releases/0.5.0/voxtype-0.5.0-macos-universal + +set -euo pipefail + +BINARY="${1:-}" + +if [[ -z "$BINARY" || ! -f "$BINARY" ]]; then + echo "Usage: $0 BINARY_PATH" + echo "Example: $0 releases/0.5.0/voxtype-0.5.0-macos-universal" + exit 1 +fi + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${GREEN}Signing macOS binary...${NC}" +echo "Binary: $BINARY" +echo + +# Find signing identity +if [[ -n "${CODESIGN_IDENTITY:-}" ]]; then + IDENTITY="$CODESIGN_IDENTITY" +else + # Try to find a Developer ID certificate + IDENTITY=$(security find-identity -v -p codesigning | grep "Developer ID Application" | head -1 | sed 's/.*"\(.*\)".*/\1/' || true) + + if [[ -z "$IDENTITY" ]]; then + echo -e "${RED}Error: No Developer ID Application certificate found in keychain${NC}" + echo + echo "To sign binaries for distribution outside the Mac App Store, you need:" + echo " 1. Apple Developer Program membership" + echo " 2. Developer ID Application certificate" + echo + echo "Install certificate: Xcode > Preferences > Accounts > Manage Certificates" + echo "Or set CODESIGN_IDENTITY environment variable" + exit 1 + fi +fi + +echo "Using identity: $IDENTITY" +echo + +# Sign the binary +echo -e "${YELLOW}Signing...${NC}" +codesign --deep --force --verify --verbose \ + --sign "$IDENTITY" \ + --timestamp \ + --options runtime \ + "$BINARY" + +echo +echo -e "${YELLOW}Verifying signature...${NC}" +codesign -dv --verbose=4 "$BINARY" 2>&1 | head -20 + +echo +echo -e "${GREEN}Signature verification:${NC}" +codesign --verify --strict --verbose=2 "$BINARY" + +echo +echo -e "${GREEN}Binary signed successfully!${NC}" +echo +echo "Next steps:" +echo " 1. Notarize: ./scripts/notarize-macos.sh $BINARY" diff --git a/src/audio/dual_capture.rs b/src/audio/dual_capture.rs index 09b4383b..58ef94b1 100644 --- a/src/audio/dual_capture.rs +++ b/src/audio/dual_capture.rs @@ -59,7 +59,8 @@ impl ParecLoopback { fn start(&mut self) -> Result<(), AudioError> { let mut child = std::process::Command::new("parec") .args([ - "--device", &self.source, + "--device", + &self.source, "--format=float32le", "--channels=1", "--rate=16000", @@ -70,7 +71,9 @@ impl ParecLoopback { .spawn() .map_err(|e| AudioError::Connection(format!("Failed to start parec: {}", e)))?; - let mut stdout = child.stdout.take() + let mut stdout = child + .stdout + .take() .ok_or_else(|| AudioError::Connection("Failed to capture parec stdout".to_string()))?; self.child = Some(child); @@ -160,18 +163,16 @@ impl DualCapture { let loopback = match loopback_device { Some("disabled") | Some("") | None => None, - Some("auto") => { - match Self::find_monitor_source() { - Some(source) => { - tracing::info!("Auto-detected loopback source: {}", source); - Some(ParecLoopback::new(source)) - } - None => { - tracing::warn!("No monitor source found, using mic only"); - None - } + Some("auto") => match Self::find_monitor_source() { + Some(source) => { + tracing::info!("Auto-detected loopback source: {}", source); + Some(ParecLoopback::new(source)) } - } + None => { + tracing::warn!("No monitor source found, using mic only"); + None + } + }, Some(device) => { tracing::info!("Using configured loopback source: {}", device); Some(ParecLoopback::new(device.to_string())) diff --git a/src/audio/enhance.rs b/src/audio/enhance.rs index 2ca8242d..c7212432 100644 --- a/src/audio/enhance.rs +++ b/src/audio/enhance.rs @@ -80,20 +80,15 @@ impl GtcrnEnhancer { mix_data[i * 2 + 1] = bin.im; } - let mix_tensor = - Tensor::::from_array(([1usize, FREQ_BINS, 1, 2], mix_data)).map_err(|e| { - format!("Failed to create mix tensor: {}", e) - })?; + let mix_tensor = Tensor::::from_array(([1usize, FREQ_BINS, 1, 2], mix_data)) + .map_err(|e| format!("Failed to create mix tensor: {}", e))?; - let conv_tensor = Tensor::::from_array(( - [2usize, 1, 16, 16, 33], - conv_cache.clone(), - )) - .map_err(|e| format!("Failed to create conv_cache tensor: {}", e))?; + let conv_tensor = + Tensor::::from_array(([2usize, 1, 16, 16, 33], conv_cache.clone())) + .map_err(|e| format!("Failed to create conv_cache tensor: {}", e))?; - let tra_tensor = - Tensor::::from_array(([2usize, 3, 1, 1, 16], tra_cache.clone())) - .map_err(|e| format!("Failed to create tra_cache tensor: {}", e))?; + let tra_tensor = Tensor::::from_array(([2usize, 3, 1, 1, 16], tra_cache.clone())) + .map_err(|e| format!("Failed to create tra_cache tensor: {}", e))?; let inter_tensor = Tensor::::from_array(([2usize, 1, 33, 16], inter_cache.clone())) @@ -101,10 +96,7 @@ impl GtcrnEnhancer { let inputs: Vec<(std::borrow::Cow, ort::session::SessionInputValue)> = vec![ (std::borrow::Cow::Borrowed("mix"), mix_tensor.into()), - ( - std::borrow::Cow::Borrowed("conv_cache"), - conv_tensor.into(), - ), + (std::borrow::Cow::Borrowed("conv_cache"), conv_tensor.into()), (std::borrow::Cow::Borrowed("tra_cache"), tra_tensor.into()), ( std::borrow::Cow::Borrowed("inter_cache"), diff --git a/src/audio/levels.rs b/src/audio/levels.rs new file mode 100644 index 00000000..84f1ce02 --- /dev/null +++ b/src/audio/levels.rs @@ -0,0 +1,496 @@ +//! Audio level emitter for the on-screen visualizer +//! +//! During recording, the daemon buckets the live audio sample stream into +//! 10 ms windows (100 Hz) and emits a small binary frame for each window +//! over a Unix socket at `$XDG_RUNTIME_DIR/voxtype/audio.sock`. +//! +//! Per-frame payload (16 bytes, native byte order): +//! +//! ```text +//! struct AudioFrame { seq: u32, min: f32, max: f32, peak_dbfs: f32 } +//! ``` +//! +//! This is a lossy, best-effort broadcast: subscribers that fall behind get +//! disconnected. The daemon never blocks on slow consumers. +//! +//! The emitter is *additive*: it taps the existing +//! `mpsc::Receiver>` returned by `AudioCapture::start()` (which the +//! daemon used to discard). When recording stops, the input channel closes +//! and the emitter task exits, which in turn causes the bucketing loop to +//! end. The hub keeps running across recordings and accepts subscribers in +//! between, but only emits frames while a recording session has provided a +//! sample stream. +//! +//! ## Performance +//! +//! - No allocations in the hot path (per-sample). The input chunks are +//! already allocated by cpal_capture; bucketing reuses a small fixed +//! `[f32; 2]` accumulator. +//! - Subscriber writes use non-blocking `try_send` on a bounded queue per +//! client; clients that can't keep up are dropped, not buffered. +//! - Idle: zero work. The hub only spins up a forwarder task per +//! recording session and tears it down when the session ends. + +use crate::config::Config; +use std::io; +use std::path::PathBuf; +use std::sync::Arc; +use tokio::io::AsyncWriteExt; +use tokio::net::{UnixListener, UnixStream}; +use tokio::sync::{mpsc, Mutex}; + +/// Native sample rate of audio fed to the emitter (matches the daemon's +/// resampled mono stream). +pub const SAMPLE_RATE: u32 = 16_000; + +/// Frame emit rate. 100 Hz = one frame every 10 ms = 160 samples at 16 kHz. +pub const FRAME_HZ: u32 = 100; + +/// Samples per emitted frame. +pub const SAMPLES_PER_FRAME: usize = (SAMPLE_RATE / FRAME_HZ) as usize; + +/// Wire size of an `AudioFrame` in bytes. +pub const FRAME_BYTES: usize = 16; + +/// One audio level frame. +/// +/// `repr(C)` so the layout is stable for the wire format. We serialise +/// fields explicitly via `to_bytes()` rather than reinterpret-casting, +/// to avoid relying on padding rules. +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct AudioFrame { + /// Monotonic frame counter (wraps at u32::MAX). + pub seq: u32, + /// Minimum sample in the 10 ms window (range -1.0..=1.0). + pub min: f32, + /// Maximum sample in the 10 ms window (range -1.0..=1.0). + pub max: f32, + /// Peak amplitude in dBFS. -inf for silence; clamped to -120.0 below. + pub peak_dbfs: f32, +} + +impl AudioFrame { + /// Serialise to 16 bytes in native byte order. + /// + /// We use native order because the OSD runs on the same machine as the + /// daemon. There's no portability concern. + #[inline] + pub fn to_bytes(self) -> [u8; FRAME_BYTES] { + let mut buf = [0u8; FRAME_BYTES]; + buf[0..4].copy_from_slice(&self.seq.to_ne_bytes()); + buf[4..8].copy_from_slice(&self.min.to_ne_bytes()); + buf[8..12].copy_from_slice(&self.max.to_ne_bytes()); + buf[12..16].copy_from_slice(&self.peak_dbfs.to_ne_bytes()); + buf + } + + /// Parse a frame from 16 bytes in native byte order. + pub fn from_bytes(bytes: &[u8; FRAME_BYTES]) -> Self { + let seq = u32::from_ne_bytes(bytes[0..4].try_into().unwrap()); + let min = f32::from_ne_bytes(bytes[4..8].try_into().unwrap()); + let max = f32::from_ne_bytes(bytes[8..12].try_into().unwrap()); + let peak_dbfs = f32::from_ne_bytes(bytes[12..16].try_into().unwrap()); + Self { + seq, + min, + max, + peak_dbfs, + } + } +} + +/// Default path for the audio-frames socket. +pub fn default_socket_path() -> PathBuf { + Config::runtime_dir().join("audio.sock") +} + +/// Per-subscriber bounded queue. 30 frames = 300 ms at 100 Hz; if a client +/// can't keep up over that window, drop it. +const SUBSCRIBER_QUEUE_DEPTH: usize = 30; + +/// Hub for distributing audio frames to subscribers. +/// +/// The hub owns the Unix listener and a list of currently-connected +/// subscribers. Recording sessions feed frames into the hub via +/// [`LevelHub::frame_sink`]; the hub fans them out non-blockingly. +#[derive(Clone)] +pub struct LevelHub { + inner: Arc, +} + +struct HubInner { + /// Bounded mpsc channel: recording-session producers send frames here, + /// the broadcaster task drains it and fans out to clients. + broadcast_tx: mpsc::Sender, + /// Running count of attached subscribers, for telemetry/logging. + subscriber_count: Mutex, + socket_path: PathBuf, +} + +impl LevelHub { + /// Bind a Unix socket and start the broadcaster task. + /// + /// Returns the hub plus the socket path that was bound. If a stale + /// socket file exists (left by a prior daemon crash), it is removed + /// before binding. + pub async fn start(socket_path: PathBuf) -> io::Result { + if let Some(parent) = socket_path.parent() { + std::fs::create_dir_all(parent)?; + } + // Remove any stale socket from a prior run. + if socket_path.exists() { + let _ = std::fs::remove_file(&socket_path); + } + + let listener = UnixListener::bind(&socket_path)?; + tracing::info!("Audio level socket listening at {:?}", socket_path); + + // Frame fan-in: any number of recording-session producers can send. + // 200 frames = 2 seconds of buffered headroom at 100 Hz, plenty. + let (broadcast_tx, broadcast_rx) = mpsc::channel::(200); + + let inner = Arc::new(HubInner { + broadcast_tx, + subscriber_count: Mutex::new(0), + socket_path: socket_path.clone(), + }); + + // The list of active subscriber senders is owned by the + // broadcaster task, not the hub, so we don't need a lock around + // it on the hot path. + let (sub_tx, sub_rx) = mpsc::unbounded_channel::(); + + // Accept loop: per-connection senders are forwarded to the + // broadcaster task via `sub_tx`. + let inner_for_accept = inner.clone(); + tokio::spawn(async move { + run_accept_loop(listener, sub_tx, inner_for_accept).await; + }); + + // Broadcast loop: drains incoming frames, fans out to all + // connected subscribers, drops any whose queue is full. + tokio::spawn(async move { + run_broadcast_loop(broadcast_rx, sub_rx).await; + }); + + Ok(Self { inner }) + } + + /// Returns a sender that recording sessions can use to publish frames. + /// + /// Sending is bounded; if the broadcaster falls behind we drop frames + /// rather than back-pressure the audio thread. + pub fn frame_sink(&self) -> FrameSink { + FrameSink { + tx: self.inner.broadcast_tx.clone(), + } + } + + /// Path of the bound Unix socket. + pub fn socket_path(&self) -> &std::path::Path { + &self.inner.socket_path + } + + /// Best-effort cleanup of the socket file. Called on shutdown. + pub fn cleanup(&self) { + let _ = std::fs::remove_file(&self.inner.socket_path); + } +} + +/// Sender handle handed out by [`LevelHub::frame_sink`]. +#[derive(Clone)] +pub struct FrameSink { + tx: mpsc::Sender, +} + +impl FrameSink { + /// Try to publish a frame. Drops the frame if the broadcaster is + /// backed up. Never blocks and never allocates. + #[inline] + pub fn publish(&self, frame: AudioFrame) { + let _ = self.tx.try_send(frame); + } +} + +/// One subscriber's per-connection mailbox. +struct SubscriberSlot { + /// Sender feeding the per-client writer task. + tx: mpsc::Sender, +} + +async fn run_accept_loop( + listener: UnixListener, + sub_tx: mpsc::UnboundedSender, + inner: Arc, +) { + loop { + match listener.accept().await { + Ok((stream, _addr)) => { + let (tx, rx) = mpsc::channel::(SUBSCRIBER_QUEUE_DEPTH); + let slot = SubscriberSlot { tx }; + if sub_tx.send(slot).is_err() { + // Broadcaster has shut down; close the new connection. + drop(stream); + break; + } + + let inner_for_writer = inner.clone(); + tokio::spawn(async move { + { + let mut count = inner_for_writer.subscriber_count.lock().await; + *count += 1; + tracing::debug!("Audio subscriber connected (count={})", *count); + } + run_subscriber_writer(stream, rx).await; + { + let mut count = inner_for_writer.subscriber_count.lock().await; + *count = count.saturating_sub(1); + tracing::debug!("Audio subscriber disconnected (count={})", *count); + } + }); + } + Err(e) => { + tracing::warn!("Audio socket accept error: {}", e); + // Brief pause to avoid tight error loops. + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + } + } + } +} + +async fn run_subscriber_writer(mut stream: UnixStream, mut rx: mpsc::Receiver) { + while let Some(frame) = rx.recv().await { + let bytes = frame.to_bytes(); + if let Err(e) = stream.write_all(&bytes).await { + tracing::trace!("Audio subscriber write error: {}", e); + break; + } + } + let _ = stream.shutdown().await; +} + +async fn run_broadcast_loop( + mut frame_rx: mpsc::Receiver, + mut new_subs: mpsc::UnboundedReceiver, +) { + let mut subscribers: Vec> = Vec::new(); + + loop { + tokio::select! { + // New subscriber connected; add to fan-out list. + slot = new_subs.recv() => { + match slot { + Some(slot) => subscribers.push(slot.tx), + None => { + // Listener gone; nothing more to do. + break; + } + } + } + // New frame from a recording session; fan out. + frame = frame_rx.recv() => { + match frame { + Some(frame) => { + // Drain any pending new-subscriber notifications first + // so a fast reconnect after recording doesn't wait a + // whole frame to start receiving. + while let Ok(slot) = new_subs.try_recv() { + subscribers.push(slot.tx); + } + if subscribers.is_empty() { + continue; + } + // Fan out, dropping any subscriber whose queue is full. + subscribers.retain(|tx| tx.try_send(frame).is_ok()); + } + None => { + // Hub shutdown. + break; + } + } + } + } + } +} + +/// Bucketing helper: groups f32 samples into fixed-size 10 ms windows and +/// emits an [`AudioFrame`] per completed window. +/// +/// Holds a small running accumulator across calls; a final partial bucket +/// at end-of-stream is discarded (10 ms of "lost" tail audio is well below +/// perceptual threshold). +pub struct LevelBucketer { + samples_per_frame: usize, + accumulated: usize, + min: f32, + max: f32, + peak_abs: f32, + seq: u32, +} + +impl LevelBucketer { + pub fn new() -> Self { + Self { + samples_per_frame: SAMPLES_PER_FRAME, + accumulated: 0, + min: f32::INFINITY, + max: f32::NEG_INFINITY, + peak_abs: 0.0, + seq: 0, + } + } + + /// Push samples into the bucketer. For each completed 10 ms window an + /// [`AudioFrame`] is appended to `out`. No allocation when `out` has + /// sufficient capacity reserved by the caller. + pub fn push(&mut self, samples: &[f32], out: &mut Vec) { + for &s in samples { + if s < self.min { + self.min = s; + } + if s > self.max { + self.max = s; + } + let a = s.abs(); + if a > self.peak_abs { + self.peak_abs = a; + } + self.accumulated += 1; + + if self.accumulated >= self.samples_per_frame { + let peak_dbfs = if self.peak_abs <= 1e-6 { + -120.0 + } else { + 20.0 * self.peak_abs.log10() + }; + let frame = AudioFrame { + seq: self.seq, + min: if self.min.is_finite() { self.min } else { 0.0 }, + max: if self.max.is_finite() { self.max } else { 0.0 }, + peak_dbfs, + }; + out.push(frame); + self.seq = self.seq.wrapping_add(1); + self.accumulated = 0; + self.min = f32::INFINITY; + self.max = f32::NEG_INFINITY; + self.peak_abs = 0.0; + } + } + } +} + +impl Default for LevelBucketer { + fn default() -> Self { + Self::new() + } +} + +/// Spawn a forwarder task that reads from an `mpsc::Receiver>` +/// (the chunk stream from `AudioCapture::start()`), buckets the samples +/// into 100 Hz frames, and publishes them to the supplied `FrameSink`. +/// +/// The task ends when the input receiver is closed. This is the correct +/// signal for "recording stopped". +pub fn spawn_emitter( + mut chunk_rx: mpsc::Receiver>, + sink: FrameSink, +) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + let mut bucketer = LevelBucketer::new(); + // Reusable scratch buffer for emitted frames per chunk. + // 4 frames is plenty for typical 10–40 ms chunks; we'll grow if needed. + let mut out: Vec = Vec::with_capacity(8); + + while let Some(chunk) = chunk_rx.recv().await { + out.clear(); + bucketer.push(&chunk, &mut out); + for frame in out.drain(..) { + sink.publish(frame); + } + } + tracing::trace!("Audio level emitter task ended"); + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn frame_roundtrip() { + let f = AudioFrame { + seq: 42, + min: -0.5, + max: 0.75, + peak_dbfs: -3.0, + }; + let bytes = f.to_bytes(); + let parsed = AudioFrame::from_bytes(&bytes); + assert_eq!(parsed, f); + } + + #[test] + fn frame_size_is_16_bytes() { + assert_eq!(FRAME_BYTES, 16); + assert_eq!(std::mem::size_of::(), 16); + } + + #[test] + fn bucketer_emits_at_100hz() { + let mut b = LevelBucketer::new(); + let mut out = Vec::new(); + // 1600 samples = exactly 10 frames at 16 kHz / 100 Hz. + let samples = vec![0.5_f32; 1600]; + b.push(&samples, &mut out); + assert_eq!(out.len(), 10); + assert_eq!(out[0].seq, 0); + assert_eq!(out[9].seq, 9); + assert!((out[0].max - 0.5).abs() < 1e-6); + assert!((out[0].min - 0.5).abs() < 1e-6); + } + + #[test] + fn bucketer_partial_window_holds_state() { + let mut b = LevelBucketer::new(); + let mut out = Vec::new(); + b.push(&vec![0.1_f32; 100], &mut out); + assert!(out.is_empty(), "incomplete bucket should not emit"); + b.push(&vec![0.2_f32; 60], &mut out); + assert_eq!(out.len(), 1, "completing the bucket should emit one frame"); + } + + #[test] + fn bucketer_silence_yields_minus_120_dbfs() { + let mut b = LevelBucketer::new(); + let mut out = Vec::new(); + b.push(&vec![0.0_f32; SAMPLES_PER_FRAME], &mut out); + assert_eq!(out.len(), 1); + assert_eq!(out[0].peak_dbfs, -120.0); + } + + #[test] + fn bucketer_full_scale_yields_zero_dbfs() { + let mut b = LevelBucketer::new(); + let mut out = Vec::new(); + let mut samples = vec![0.0_f32; SAMPLES_PER_FRAME]; + samples[42] = 1.0; + b.push(&samples, &mut out); + assert_eq!(out.len(), 1); + assert!(out[0].peak_dbfs.abs() < 1e-3); + } + + #[test] + fn bucketer_min_max_track_polarity() { + let mut b = LevelBucketer::new(); + let mut out = Vec::new(); + let mut samples = vec![0.0_f32; SAMPLES_PER_FRAME]; + samples[0] = -0.8; + samples[1] = 0.4; + b.push(&samples, &mut out); + assert_eq!(out.len(), 1); + assert!((out[0].min - -0.8).abs() < 1e-6); + assert!((out[0].max - 0.4).abs() < 1e-6); + } +} diff --git a/src/audio/mod.rs b/src/audio/mod.rs index 97a0ca62..420342c5 100644 --- a/src/audio/mod.rs +++ b/src/audio/mod.rs @@ -8,6 +8,7 @@ pub mod dual_capture; #[cfg(feature = "onnx-common")] pub mod enhance; pub mod feedback; +pub mod levels; pub mod media; pub use dual_capture::{AudioSourceType, DualCapture, DualSamples, SourcedSample}; diff --git a/src/bin/voxtype_osd.rs b/src/bin/voxtype_osd.rs new file mode 100644 index 00000000..4833468e --- /dev/null +++ b/src/bin/voxtype_osd.rs @@ -0,0 +1,302 @@ +//! `voxtype-osd` — a tiny launcher that picks between the `voxtype-osd-gtk4` +//! and `voxtype-osd-native` frontends and execs the chosen one. +//! +//! The user's preference comes from (in priority order): +//! +//! 1. `--frontend gtk4|native` on the command line +//! 2. `VOXTYPE_OSD_FRONTEND=gtk4|native` env var +//! 3. `[osd] frontend = "gtk4|native"` in `~/.config/voxtype/config.toml` +//! 4. Default: `gtk4` +//! +//! That preference is then reconciled with what's actually installed: +//! +//! - Both binaries available → use the preferred one +//! - Only one available → use it (warn if it's not the preferred one) +//! - Neither available → exit with a clear error pointing to the build +//! feature flags +//! +//! Source builders who only enabled one of `osd-gtk4`/`osd-native` thus +//! get a working `voxtype-osd` regardless of config — the launcher +//! adapts to what was actually built. +//! +//! All other CLI args + env vars pass through unchanged to the chosen +//! frontend (including `--config`, which both frontends consume on their +//! own to read the rest of the `[osd]` section). + +use std::env; +use std::os::unix::process::CommandExt; +use std::path::{Path, PathBuf}; +use std::process::{Command, ExitCode}; + +use voxtype::config::Config as VoxtypeConfig; +use voxtype::osd::config::{OsdConfig, OsdFrontend}; + +const NATIVE_BIN: &str = "voxtype-osd-native"; +const GTK4_BIN: &str = "voxtype-osd-gtk4"; + +fn main() -> ExitCode { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); + + let raw_args: Vec = env::args().skip(1).collect(); + if raw_args.iter().any(|a| a == "--help" || a == "-h") { + print_help(); + return ExitCode::SUCCESS; + } + if raw_args.iter().any(|a| a == "--version" || a == "-V") { + println!("voxtype-osd {}", env!("CARGO_PKG_VERSION")); + return ExitCode::SUCCESS; + } + + // Strip `--frontend X` and `--frontend=X` from the args so the chosen + // child doesn't choke on a flag it doesn't know. `--config` and + // everything else passes through. + let (cli_frontend, config_path, rest) = parse_frontend_and_config(&raw_args); + + // Resolve preference: CLI > env > config file > default. + let preferred = cli_frontend + .or_else(|| { + env::var("VOXTYPE_OSD_FRONTEND") + .ok() + .and_then(|s| OsdFrontend::parse_str(&s)) + }) + .unwrap_or_else(|| load_frontend_from_config(config_path.as_deref())); + + let chosen = match resolve_installed(preferred) { + Some(c) => c, + None => { + eprintln!( + "voxtype-osd: neither '{NATIVE_BIN}' nor '{GTK4_BIN}' was found on PATH \ + or next to this binary.\n\ + \n\ + If you built from source, enable at least one OSD feature:\n\ + cargo build --release --features osd-gtk4 # GTK4 frontend\n\ + cargo build --release --features osd-native # SCTK + wgpu + egui\n\ + \n\ + If you installed a package, the OSD binaries may be a separate\n\ + optional dependency." + ); + return ExitCode::from(2); + } + }; + + if chosen.frontend != preferred { + tracing::warn!( + "preferred frontend '{}' not installed; using '{}' instead", + preferred.binary_name(), + chosen.frontend.binary_name(), + ); + } + + // Hand off. exec replaces this process so the child inherits stdin, + // stdout, stderr, signals, and process group cleanly. There's no return + // path on success. + let err = Command::new(&chosen.path).args(&rest).exec(); + eprintln!( + "voxtype-osd: failed to exec '{}': {err}", + chosen.path.display() + ); + ExitCode::from(1) +} + +fn print_help() { + println!( + "voxtype-osd {} — launcher for the on-screen mic visualizer\n\ + \n\ + USAGE:\n \ + voxtype-osd [--frontend gtk4|native] [FRONTEND ARGS...]\n\ + \n\ + OPTIONS:\n \ + --frontend Which frontend to launch. Falls back to\n\ + whatever is installed if the preferred\n\ + frontend isn't found on PATH.\n \ + -h, --help Show this message.\n \ + -V, --version Show version.\n\ + \n\ + All other arguments are passed through to the chosen frontend\n\ + (--config, --width-px, --waveform-gain, etc.). See the frontend's\n\ + own --help for the full list.\n\ + \n\ + CONFIG:\n \ + [osd]\n \ + frontend = \"gtk4\" # or \"native\"\n\ + \n\ + ENV:\n \ + VOXTYPE_OSD_FRONTEND Same as --frontend.\n \ + VOXTYPE_CONFIG Path to the voxtype config file.\n", + env!("CARGO_PKG_VERSION"), + ); +} + +/// Strip `--frontend X`/`--frontend=X` out of `args`, returning the chosen +/// frontend (if any) and the remaining args to pass through to the child. +/// Also sniff `--config X`/`--config=X` so we know which file to read for +/// the `[osd]` section without consuming it from the pass-through args +/// (the child needs to see `--config` too). +fn parse_frontend_and_config(args: &[String]) -> (Option, Option, Vec) { + let mut frontend: Option = None; + let mut config: Option = None; + let mut rest: Vec = Vec::with_capacity(args.len()); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--frontend" { + if let Some(v) = args.get(i + 1) { + frontend = OsdFrontend::parse_str(v); + i += 2; + continue; + } + // `--frontend` with no value: pass through and let the child + // (which doesn't know it) error out properly. + rest.push(a.clone()); + i += 1; + } else if let Some(v) = a.strip_prefix("--frontend=") { + frontend = OsdFrontend::parse_str(v); + i += 1; + } else if a == "--config" { + rest.push(a.clone()); + if let Some(v) = args.get(i + 1) { + config = Some(PathBuf::from(v)); + rest.push(v.clone()); + i += 2; + } else { + i += 1; + } + } else if let Some(v) = a.strip_prefix("--config=") { + config = Some(PathBuf::from(v)); + rest.push(a.clone()); + i += 1; + } else { + rest.push(a.clone()); + i += 1; + } + } + (frontend, config, rest) +} + +/// Load the `[osd] frontend` value from the voxtype config file, falling +/// back to the default when the file is missing, unreadable, or doesn't +/// contain a usable value. +fn load_frontend_from_config(explicit: Option<&Path>) -> OsdFrontend { + let path = explicit + .map(Path::to_path_buf) + .or_else(VoxtypeConfig::default_path); + let Some(path) = path else { + return OsdFrontend::default(); + }; + let Ok(content) = std::fs::read_to_string(&path) else { + return OsdFrontend::default(); + }; + + #[derive(serde::Deserialize, Default)] + struct PartialConfig { + #[serde(default)] + osd: Option, + } + + match toml::from_str::(&content) { + Ok(p) => p.osd.map(|o| o.frontend).unwrap_or_default(), + Err(_) => OsdFrontend::default(), + } +} + +struct ResolvedFrontend { + frontend: OsdFrontend, + path: PathBuf, +} + +/// Find the binary for `preferred`; if missing, fall back to the other +/// frontend. Returns `None` only if neither binary is installed. +fn resolve_installed(preferred: OsdFrontend) -> Option { + if let Some(path) = find_binary(preferred.binary_name()) { + return Some(ResolvedFrontend { + frontend: preferred, + path, + }); + } + let other = match preferred { + OsdFrontend::Gtk4 => OsdFrontend::Native, + OsdFrontend::Native => OsdFrontend::Gtk4, + }; + find_binary(other.binary_name()).map(|path| ResolvedFrontend { + frontend: other, + path, + }) +} + +/// Locate a binary by name. First checks alongside `voxtype-osd` itself +/// (so `target/release/voxtype-osd` finds `target/release/voxtype-osd-gtk4` +/// during development) and then walks `$PATH`. +fn find_binary(name: &str) -> Option { + if let Ok(self_exe) = env::current_exe() { + if let Some(parent) = self_exe.parent() { + let candidate = parent.join(name); + if candidate.is_file() { + return Some(candidate); + } + } + } + which::which(name).ok() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_strips_frontend_flag_space_form() { + let args = vec![ + "--frontend".into(), + "native".into(), + "--width-px".into(), + "400".into(), + ]; + let (f, _, rest) = parse_frontend_and_config(&args); + assert_eq!(f, Some(OsdFrontend::Native)); + assert_eq!(rest, vec!["--width-px".to_string(), "400".to_string()]); + } + + #[test] + fn parse_strips_frontend_flag_equals_form() { + let args = vec!["--frontend=gtk4".into(), "--width-px".into(), "400".into()]; + let (f, _, rest) = parse_frontend_and_config(&args); + assert_eq!(f, Some(OsdFrontend::Gtk4)); + assert_eq!(rest, vec!["--width-px".to_string(), "400".to_string()]); + } + + #[test] + fn parse_passes_config_through() { + let args = vec![ + "--config".into(), + "/tmp/foo.toml".into(), + "--width-px".into(), + "400".into(), + ]; + let (_, cfg, rest) = parse_frontend_and_config(&args); + assert_eq!(cfg.as_deref(), Some(Path::new("/tmp/foo.toml"))); + // --config + value still in rest so the child reads it too. + assert_eq!( + rest, + vec![ + "--config".to_string(), + "/tmp/foo.toml".to_string(), + "--width-px".to_string(), + "400".to_string(), + ] + ); + } + + #[test] + fn parse_unknown_frontend_value_drops_it() { + // Bad value is a parse error: returns None for frontend, but doesn't + // pass `--frontend nonsense` through to the child either. + let args = vec!["--frontend".into(), "nonsense".into()]; + let (f, _, rest) = parse_frontend_and_config(&args); + assert_eq!(f, None); + assert!(rest.is_empty()); + } +} diff --git a/src/bin/voxtype_osd_gtk4.rs b/src/bin/voxtype_osd_gtk4.rs new file mode 100644 index 00000000..24de0df5 --- /dev/null +++ b/src/bin/voxtype_osd_gtk4.rs @@ -0,0 +1,632 @@ +//! `voxtype-osd-gtk4` — GTK4 + gtk4-layer-shell on-screen mic visualizer +//! for the Voxtype daemon. +//! +//! Renders a click-through, layer-shell-anchored window containing the +//! scrolling waveform plus a segmented peak meter. Audio frames arrive on +//! the daemon's audio Unix socket via [`voxtype::osd::ipc::run_ipc_loop`], +//! decoded into [`AudioFrame`]s by a tokio runtime on a worker thread, and +//! pushed into a shared [`FrameRing`] + [`PeakHold`]. The GTK side polls a +//! ~60 Hz `glib::timeout_add_local` callback that redraws the +//! `DrawingArea` whenever new frames have arrived. +//! +//! When the IPC socket is silent for `idle_timeout_secs` (Idle proxy) the +//! window is hidden so the binary does no rendering work and consumes +//! effectively zero CPU. It reappears when frames resume. +//! +//! Run with `RUST_LOG=debug` for verbose logs. + +use std::cell::Cell; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +use cairo::{Context, RectangleInt, Region}; +use clap::Parser; +use gtk4::glib; +use gtk4::prelude::*; +use gtk4::{Application, ApplicationWindow, DrawingArea}; +use gtk4_layer_shell::{Edge, KeyboardMode, Layer, LayerShell}; + +use voxtype::audio::levels::{AudioFrame, FRAME_HZ}; +use voxtype::config::Config as VoxtypeConfig; +use voxtype::osd::config::{OsdConfig, OsdPosition}; +use voxtype::osd::ipc::{resolve_socket_path, run_ipc_loop, FrameRing, DEFAULT_RING_DEPTH}; +use voxtype::osd::theme::ThemeWatcher; +use voxtype::osd::visual::{peak_meter_fraction, project_envelope, MeterZone, Palette, PeakHold}; + +/// Load the `[osd]` section from the voxtype config file, falling back to +/// `OsdConfig::default()` on any error (file missing, unreadable, parse +/// failure, or `[osd]` section absent). +/// +/// We deliberately ignore parse errors instead of returning them: the OSD +/// is a side car, and a malformed config shouldn't prevent it from running +/// with sensible defaults — the user will see the daemon complain about +/// the same file separately. +fn load_osd_config_from_file(explicit: Option<&std::path::Path>) -> OsdConfig { + let path = explicit + .map(std::path::Path::to_path_buf) + .or_else(VoxtypeConfig::default_path); + let Some(path) = path else { + return OsdConfig::default(); + }; + let content = match std::fs::read_to_string(&path) { + Ok(s) => s, + Err(_) => return OsdConfig::default(), + }; + + #[derive(serde::Deserialize, Default)] + struct PartialConfig { + #[serde(default)] + osd: Option, + } + + match toml::from_str::(&content) { + Ok(p) => p.osd.unwrap_or_default(), + Err(_) => OsdConfig::default(), + } +} + +/// Application id for the GTK4 frontend. +const APP_ID: &str = "io.voxtype.OsdGtk4"; + +/// Render tick period (~60 Hz). The redraw is gated on whether new frames +/// have arrived since the last paint, so this is a cheap upper bound. +const RENDER_TICK_MS: u32 = 16; + +/// How long we wait without frames before treating the daemon as idle and +/// hiding the surface. Matches the BRIEF's "Idle: surface destroyed" rule. +const IDLE_TIMEOUT_SECS: f32 = 0.15; + +/// Number of segments in the vertical peak meter. +const METER_SEGMENTS: usize = 10; + +/// dBFS floor for the peak meter (maps to "empty bar"). +const METER_FLOOR_DBFS: f32 = -60.0; + +#[derive(Parser, Debug, Clone)] +#[command( + name = "voxtype-osd-gtk4", + version, + about = "Voxtype on-screen mic visualizer (GTK4 + gtk4-layer-shell)" +)] +struct Args { + /// Path to the voxtype config file. Defaults to + /// `~/.config/voxtype/config.toml`. Only the `[osd]` section is read. + #[arg(long, env = "VOXTYPE_CONFIG")] + config: Option, + + /// Path to the audio-frame Unix socket. Defaults to + /// `$XDG_RUNTIME_DIR/voxtype/audio.sock`. + #[arg(long, env = "VOXTYPE_OSD_SOCKET")] + socket: Option, + + /// Seconds to wait between reconnect attempts when the daemon is down. + #[arg(long, default_value = "1.0", env = "VOXTYPE_OSD_RECONNECT_SECS")] + reconnect_secs: f32, + + /// Print one debug line per N frames received (0 = quiet). + #[arg(long, default_value = "0", env = "VOXTYPE_OSD_LOG_EVERY")] + log_every: u32, + + /// Held-peak decay rate in dB/sec. + #[arg(long, default_value = "6.0", env = "VOXTYPE_OSD_PEAK_DECAY")] + peak_decay_db_per_sec: f32, + + /// Surface width in physical pixels. + #[arg(long, env = "VOXTYPE_OSD_WIDTH")] + width_px: Option, + + /// Surface height in physical pixels. + #[arg(long, env = "VOXTYPE_OSD_HEIGHT")] + height_px: Option, + + /// Margin from the screen edge in physical pixels. + #[arg(long, env = "VOXTYPE_OSD_MARGIN")] + margin_px: Option, + + /// Visual gain applied to audio samples before drawing the waveform. + /// Higher = waveform fills more of the vertical for quiet inputs. + /// Reduce for hot mics (e.g. 4.0); raise for quiet sources (e.g. 14.0). + #[arg(long, env = "VOXTYPE_OSD_GAIN")] + waveform_gain: Option, +} + +/// State shared between the IPC worker and the GTK redraw timer. +struct SharedState { + ring: Mutex, + peak: Mutex, + last_seq: Mutex, + last_frame_at: Mutex, +} + +impl SharedState { + fn new(decay_db_per_sec: f32) -> Self { + Self { + ring: Mutex::new(FrameRing::new(DEFAULT_RING_DEPTH)), + peak: Mutex::new(PeakHold::new(decay_db_per_sec)), + last_seq: Mutex::new(0), + last_frame_at: Mutex::new(Instant::now() - Duration::from_secs(3600)), + } + } +} + +fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); + + let args = Args::parse(); + let socket_path = resolve_socket_path(args.socket.clone()); + + // Layer config: defaults < config file [osd] < CLI/env overrides. + let mut osd_cfg = load_osd_config_from_file(args.config.as_deref()); + if let Some(w) = args.width_px { + osd_cfg.width_px = w; + } + if let Some(h) = args.height_px { + osd_cfg.height_px = h; + } + if let Some(m) = args.margin_px { + osd_cfg.margin_px = m; + } + if let Some(g) = args.waveform_gain { + osd_cfg.waveform_gain = g; + } + // peak_decay_db_per_sec has a clap default value, so this always + // overrides whatever the file said. That's intentional: if the user + // passes the flag, honor it; if they don't, the clap default kicks in. + osd_cfg.peak_decay_db_per_sec = args.peak_decay_db_per_sec; + + tracing::info!( + "voxtype-osd-gtk4 starting; socket={:?} size={}x{} margin={} pos={:?}", + socket_path, + osd_cfg.width_px, + osd_cfg.height_px, + osd_cfg.margin_px, + osd_cfg.position, + ); + + let theme = ThemeWatcher::new(); + let palette = theme.palette(); + + let state = Arc::new(SharedState::new(osd_cfg.peak_decay_db_per_sec)); + + // Spawn the tokio IPC worker on a side thread. + spawn_ipc_worker( + state.clone(), + socket_path, + args.reconnect_secs, + args.log_every, + ); + + // GTK application owns the main thread. + let app = Application::builder().application_id(APP_ID).build(); + + let cfg = osd_cfg.clone(); + let state_for_activate = state.clone(); + app.connect_activate(move |app| { + build_window(app, &cfg, palette, state_for_activate.clone()); + }); + + // GTK's run() consumes argv; we've already parsed via clap, so feed + // it an empty vector to keep it from re-parsing. + let exit = app.run_with_args::<&str>(&[]); + let code: u8 = exit.into(); + if code != 0 { + anyhow::bail!("GTK application exited with status {}", code); + } + Ok(()) +} + +/// Spawn the tokio runtime + IPC loop on a dedicated thread. +fn spawn_ipc_worker( + state: Arc, + socket_path: PathBuf, + reconnect_secs: f32, + log_every: u32, +) { + std::thread::Builder::new() + .name("voxtype-osd-ipc".into()) + .spawn(move || { + let rt = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(rt) => rt, + Err(e) => { + tracing::error!("Failed to build tokio runtime: {e}"); + return; + } + }; + + let dt_per_frame = 1.0 / FRAME_HZ as f32; + let mut total: u64 = 0; + let mut last_log = Instant::now(); + + let on_frame = move |frame: AudioFrame| { + if let Ok(mut r) = state.ring.lock() { + r.push(frame); + } + if let Ok(mut p) = state.peak.lock() { + p.update(frame.peak_dbfs, dt_per_frame); + } + if let Ok(mut s) = state.last_seq.lock() { + *s = s.wrapping_add(1); + } + if let Ok(mut t) = state.last_frame_at.lock() { + *t = Instant::now(); + } + + total += 1; + if log_every > 0 && total.is_multiple_of(u64::from(log_every)) { + let elapsed = last_log.elapsed().as_secs_f32(); + let rate = if elapsed > 0.0 { + log_every as f32 / elapsed + } else { + 0.0 + }; + tracing::debug!( + target: "osd::frames", + frontend = "gtk4", + seq = frame.seq, + peak_dbfs = frame.peak_dbfs, + min = frame.min, + max = frame.max, + rate_hz = rate, + "frame batch" + ); + last_log = Instant::now(); + } + }; + + rt.block_on(run_ipc_loop(socket_path, reconnect_secs, on_frame)); + }) + .expect("spawn ipc worker thread"); +} + +/// Build the GTK window, attach layer-shell config, mount the DrawingArea, +/// and start the redraw tick. +fn build_window(app: &Application, cfg: &OsdConfig, palette: Palette, state: Arc) { + let window = ApplicationWindow::builder() + .application(app) + .default_width(cfg.width_px as i32) + .default_height(cfg.height_px as i32) + .resizable(false) + .decorated(false) + .build(); + + // Layer-shell setup: top layer, no keyboard, anchored per config. + window.init_layer_shell(); + window.set_layer(Layer::Overlay); + window.set_keyboard_mode(KeyboardMode::None); + window.set_namespace(Some("voxtype-osd")); + + // Anchor edges from the configured position. Margin is applied to + // every anchored edge for the simple cases. + let (anchor_top, anchor_bottom, anchor_left, anchor_right) = match cfg.position { + OsdPosition::BottomCenter => (false, true, false, false), + OsdPosition::TopCenter => (true, false, false, false), + OsdPosition::BottomLeft => (false, true, true, false), + OsdPosition::BottomRight => (false, true, false, true), + OsdPosition::TopLeft => (true, false, true, false), + OsdPosition::TopRight => (true, false, false, true), + }; + window.set_anchor(Edge::Top, anchor_top); + window.set_anchor(Edge::Bottom, anchor_bottom); + window.set_anchor(Edge::Left, anchor_left); + window.set_anchor(Edge::Right, anchor_right); + + let m = cfg.margin_px as i32; + if anchor_top { + window.set_margin(Edge::Top, m); + } + if anchor_bottom { + window.set_margin(Edge::Bottom, m); + } + if anchor_left { + window.set_margin(Edge::Left, m); + } + if anchor_right { + window.set_margin(Edge::Right, m); + } + + // Don't reserve space on the output: the OSD floats over windows. + window.set_exclusive_zone(0); + + // The drawing area fills the window. + let drawing_area = DrawingArea::new(); + drawing_area.set_content_width(cfg.width_px as i32); + drawing_area.set_content_height(cfg.height_px as i32); + let state_for_draw = state.clone(); + let gain = cfg.waveform_gain as f64; + drawing_area.set_draw_func(move |_area, cr, w, h| { + draw(cr, w, h, &palette, &state_for_draw, gain); + }); + window.set_child(Some(&drawing_area)); + + // Click-through: install an empty input region on the window's surface + // once it's realized. Until then `realize` hasn't allocated a surface. + { + let window_ref = window.clone(); + window.connect_realize(move |_| { + apply_click_through(&window_ref); + }); + } + + // Redraw timer. We only call queue_draw() when the IPC has produced a + // newer seq than the last paint, so this is cheap when idle. + let redraw_state = state.clone(); + let redraw_area = drawing_area.clone(); + let redraw_window = window.clone(); + let last_drawn_seq = Cell::new(0u64); + let visible = Cell::new(false); + + glib::timeout_add_local(Duration::from_millis(RENDER_TICK_MS as u64), move || { + let cur_seq = redraw_state.last_seq.lock().map(|s| *s).unwrap_or(0); + let last_at = redraw_state + .last_frame_at + .lock() + .map(|t| *t) + .unwrap_or_else(|_| Instant::now() - Duration::from_secs(3600)); + let idle = last_at.elapsed().as_secs_f32() > IDLE_TIMEOUT_SECS; + + if idle { + if visible.get() { + tracing::info!("hiding (idle for {:.2}s)", last_at.elapsed().as_secs_f32()); + redraw_window.set_visible(false); + visible.set(false); + } + return glib::ControlFlow::Continue; + } + + if !visible.get() { + tracing::info!( + "showing (frame seq={}, last_at={:.3}s ago)", + cur_seq, + last_at.elapsed().as_secs_f32() + ); + redraw_window.set_visible(true); + visible.set(true); + } + + // Decay the held peak even when no new frame arrived this tick. + if let Ok(mut p) = redraw_state.peak.lock() { + let dt = (RENDER_TICK_MS as f32) / 1000.0; + // We pass the most recent peak from the ring as the "current" + // value so a stale update doesn't snap the held value back up. + let cur_peak = redraw_state + .ring + .lock() + .ok() + .and_then(|r| r.latest()) + .map(|f| f.peak_dbfs) + .unwrap_or(-120.0); + // Only decay; the IPC callback already snapped up on each + // received frame. Calling update here with a non-louder peak + // keeps the linear decay running at render rate. + if cur_peak <= p.held_dbfs { + p.update(cur_peak, dt); + } + } + + if cur_seq != last_drawn_seq.get() { + redraw_area.queue_draw(); + last_drawn_seq.set(cur_seq); + } + glib::ControlFlow::Continue + }); + + // Map the layer-shell surface once. The redraw timer will hide it + // immediately on its first tick (no frames yet → idle), and toggle + // visibility from there. Mapping once at startup keeps Hyprland's + // layer-shell state machine happy across hide/show cycles. + window.present(); +} + +/// Set an empty input region on the GdkSurface so clicks pass through. +fn apply_click_through(window: &ApplicationWindow) { + let Some(surface) = window.surface() else { + tracing::warn!("Window has no surface yet; click-through not applied"); + return; + }; + let empty = Region::create_rectangle(&RectangleInt::new(0, 0, 0, 0)); + surface.set_input_region(Some(&empty)); +} + +/// Render the waveform + peak meter into the given Cairo context. +fn draw( + cr: &Context, + width: i32, + height: i32, + palette: &Palette, + state: &Arc, + gain: f64, +) { + let w = width as f64; + let h = height as f64; + if w <= 0.0 || h <= 0.0 { + return; + } + + // Clear background. + cr.set_source_rgba( + palette.background.r as f64, + palette.background.g as f64, + palette.background.b as f64, + palette.background.a as f64, + ); + cr.set_operator(cairo::Operator::Source); + cr.paint().ok(); + cr.set_operator(cairo::Operator::Over); + + // Layout: waveform area on the left (~92% width), gap (1%), then peak + // meter on the right (~7% width). + let meter_width = (w * 0.07).max(8.0); + let gap = (w * 0.01).max(2.0); + let wave_width = (w - meter_width - gap).max(0.0); + + draw_waveform(cr, 0.0, 0.0, wave_width, h, palette, state, gain); + draw_peak_meter(cr, wave_width + gap, 0.0, meter_width, h, palette, state); +} + +fn draw_waveform( + cr: &Context, + x: f64, + y: f64, + w: f64, + h: f64, + palette: &Palette, + state: &Arc, + gain: f64, +) { + if w < 1.0 { + return; + } + let n_columns = w.floor() as usize; + if n_columns == 0 { + return; + } + + // Collect frames as a Vec snapshot under the lock, then drop. + let frames: Vec = match state.ring.lock() { + Ok(r) => r.iter().collect(), + Err(_) => return, + }; + let cols = project_envelope(&frames, n_columns); + + let mid = y + h * 0.5; + let half = h * 0.5; + + // Mirrored envelope filled polygon. We trace the top edge left-to-right + // following `max`, then the bottom edge right-to-left following `min`. + cr.set_source_rgba( + palette.accent.r as f64, + palette.accent.g as f64, + palette.accent.b as f64, + palette.accent.a as f64, + ); + + cr.new_path(); + // Top edge. + for (i, col) in cols.iter().enumerate() { + let px = x + i as f64 + 0.5; + let py = mid - sample_to_pixels(col.max, half, gain); + if i == 0 { + cr.move_to(px, py); + } else { + cr.line_to(px, py); + } + } + // Bottom edge, right-to-left. + for (i, col) in cols.iter().enumerate().rev() { + let px = x + i as f64 + 0.5; + let py = mid - sample_to_pixels(col.min, half, gain); + cr.line_to(px, py); + } + cr.close_path(); + cr.fill().ok(); + + // Subtle centerline. + cr.set_source_rgba( + palette.foreground.r as f64, + palette.foreground.g as f64, + palette.foreground.b as f64, + 0.15, + ); + cr.set_line_width(1.0); + cr.move_to(x, mid); + cr.line_to(x + w, mid); + cr.stroke().ok(); +} + +fn sample_to_pixels(sample: f32, half_height: f64, gain: f64) -> f64 { + // Apply visual gain, then clamp to -1.0..=1.0, then scale to half_height. + let s = (sample as f64 * gain).clamp(-1.0, 1.0); + s * half_height +} + +fn draw_peak_meter( + cr: &Context, + x: f64, + y: f64, + w: f64, + h: f64, + palette: &Palette, + state: &Arc, +) { + if w < 1.0 || h < 1.0 { + return; + } + + let (latest_peak, held_peak) = { + let latest = state + .ring + .lock() + .ok() + .and_then(|r| r.latest()) + .map(|f| f.peak_dbfs) + .unwrap_or(f32::NEG_INFINITY); + let held = state + .peak + .lock() + .map(|p| p.held_dbfs) + .unwrap_or(f32::NEG_INFINITY); + (latest, held) + }; + + let fill_frac = peak_meter_fraction(latest_peak, METER_FLOOR_DBFS) as f64; + + let segments = METER_SEGMENTS; + let gap = 1.5_f64; + let total_gap = gap * (segments as f64 - 1.0); + let seg_h = ((h - total_gap) / segments as f64).max(1.0); + + for i in 0..segments { + // Segment 0 is the bottom of the bar. + let frac_top = (i as f64 + 1.0) / segments as f64; + let lit = fill_frac >= (i as f64 + 0.5) / segments as f64; + // dBFS at the *top* of this segment for color zone classification. + let seg_top_db = METER_FLOOR_DBFS + (frac_top as f32) * (-METER_FLOOR_DBFS); + let zone = MeterZone::from_dbfs(seg_top_db); + let zone_color = zone.color(palette); + + let py = y + h - (i as f64 + 1.0) * seg_h - i as f64 * gap; + + if lit { + cr.set_source_rgba( + zone_color.r as f64, + zone_color.g as f64, + zone_color.b as f64, + zone_color.a as f64, + ); + } else { + cr.set_source_rgba( + zone_color.r as f64, + zone_color.g as f64, + zone_color.b as f64, + 0.18, + ); + } + cr.rectangle(x, py, w, seg_h); + cr.fill().ok(); + } + + // Held-peak tick (1.5 px line at the held position). + if held_peak.is_finite() && held_peak > METER_FLOOR_DBFS { + let held_frac = peak_meter_fraction(held_peak, METER_FLOOR_DBFS) as f64; + let py = y + h - held_frac * h; + cr.set_source_rgba( + palette.foreground.r as f64, + palette.foreground.g as f64, + palette.foreground.b as f64, + 0.95, + ); + cr.set_line_width(1.5); + cr.move_to(x, py); + cr.line_to(x + w, py); + cr.stroke().ok(); + } +} diff --git a/src/bin/voxtype_osd_native/app.rs b/src/bin/voxtype_osd_native/app.rs new file mode 100644 index 00000000..67ea1e7e --- /dev/null +++ b/src/bin/voxtype_osd_native/app.rs @@ -0,0 +1,789 @@ +//! Wayland + wgpu + egui-wgpu glue for `voxtype-osd-native`. +//! +//! The whole rendering stack is collapsed into one file because the borrow +//! relationships between SCTK state, the wgpu device/queue, the surface +//! configuration, and the egui-wgpu renderer are awkward to split without +//! introducing references with non-trivial lifetimes. Each piece is small, +//! and keeping them together makes the lifecycle (`create_surface_if_needed` +//! / `tear_down_surface`) easy to follow. + +use std::ptr::NonNull; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +use anyhow::{anyhow, Context as _}; +use raw_window_handle::{ + RawDisplayHandle, RawWindowHandle, WaylandDisplayHandle, WaylandWindowHandle, +}; +use smithay_client_toolkit::{ + compositor::{CompositorHandler, CompositorState, Region}, + delegate_compositor, delegate_layer, delegate_output, delegate_registry, + output::{OutputHandler, OutputState}, + reexports::{ + calloop::{self, EventLoop}, + calloop_wayland_source::WaylandSource, + client::{ + globals::registry_queue_init, + protocol::{wl_output, wl_surface::WlSurface}, + Connection, Proxy, QueueHandle, + }, + }, + registry::{ProvidesRegistryState, RegistryState}, + registry_handlers, + shell::{ + wlr_layer::{ + Anchor, KeyboardInteractivity, Layer, LayerShell, LayerShellHandler, LayerSurface, + LayerSurfaceConfigure, + }, + WaylandSurface, + }, +}; + +use voxtype::audio::levels::AudioFrame; +use voxtype::osd::config::{OsdConfig, OsdPosition}; +use voxtype::osd::ipc::FrameRing; +use voxtype::osd::visual::{ + peak_meter_fraction, project_envelope, EnvelopeColumn, MeterZone, Palette, PeakHold, +}; + +/// State shared between the IPC thread and the render thread. +#[derive(Clone)] +pub struct SharedState { + pub ring: Arc>, + pub peak_hold: Arc>, + /// Wall-clock timestamp of the most recent frame. Used to drive idle + /// teardown when no frames have arrived for a while. + pub last_frame_at: Arc>>, + pub palette: Palette, + pub config: OsdConfig, +} + +/// How long to keep the surface alive after the last frame arrived, before +/// destroying it. The daemon stops emitting between recordings; this value +/// controls how quickly the OSD disappears after that. +/// Idle threshold for tearing down the layer-shell + wgpu surface. Set +/// short enough that the OSD disappears immediately when the user releases +/// the hotkey, but long enough that the destroy+recreate cost on the next +/// recording isn't visible. 0.5s is the sweet spot: humans perceive sub- +/// second as "instant," and 0.5s is well above the recording boundary +/// gaps the daemon naturally produces. +const IDLE_TEARDOWN_SECS: f32 = 0.5; +/// Target render rate. 60 Hz is enough for a smooth scrolling waveform; we +/// can't render faster than the underlying frame rate (100 Hz IPC) gains us. +const REDRAW_INTERVAL_MS: u64 = 16; + +/// Outer state owned by the calloop event loop. Implements the SCTK delegate +/// traits via `delegate_*` macros. +pub struct App { + registry_state: RegistryState, + output_state: OutputState, + compositor_state: CompositorState, + layer_shell: LayerShell, + + qh: QueueHandle, + conn: Connection, + + shared: SharedState, + surface: Option, +} + +/// All state tied to the live layer-shell surface. Dropped (via +/// `Option::take`) when we tear down for idle. +struct RenderSurface { + layer: LayerSurface, + wl_surface: WlSurface, + + /// Last accepted size from the compositor's configure. We use this to + /// configure the wgpu surface. + width: u32, + height: u32, + /// Whether we've received the first configure (and thus may render). + configured: bool, + + // wgpu plumbing. + _instance: wgpu::Instance, + surface: wgpu::Surface<'static>, + device: wgpu::Device, + queue: wgpu::Queue, + surface_format: wgpu::TextureFormat, + + // egui plumbing. + egui_ctx: egui::Context, + egui_renderer: egui_wgpu::Renderer, +} + +/// Run the event loop. Returns when the user closes the surface or the +/// loop exits via signal. +pub fn run( + shared: SharedState, + frame_ping_source: calloop::ping::PingSource, +) -> anyhow::Result<()> { + let conn = + Connection::connect_to_env().context("connect to Wayland; is WAYLAND_DISPLAY set?")?; + let (globals, event_queue) = registry_queue_init::(&conn).context("init registry")?; + let qh = event_queue.handle(); + + let mut event_loop: EventLoop<'static, App> = + EventLoop::try_new().context("create calloop event loop")?; + let loop_handle = event_loop.handle(); + + let compositor_state = + CompositorState::bind(&globals, &qh).context("compositor protocol unavailable")?; + let layer_shell = + LayerShell::bind(&globals, &qh).context("wlr-layer-shell protocol unavailable")?; + let output_state = OutputState::new(&globals, &qh); + let registry_state = RegistryState::new(&globals); + + WaylandSource::new(conn.clone(), event_queue) + .insert(loop_handle.clone()) + .map_err(|e| anyhow!("insert WaylandSource: {}", e))?; + + let mut app = App { + registry_state, + output_state, + compositor_state, + layer_shell, + qh: qh.clone(), + conn: conn.clone(), + shared, + surface: None, + }; + + // Wake on each incoming audio frame: create the surface if needed, + // request a redraw. + loop_handle + .insert_source(frame_ping_source, move |_, _, app: &mut App| { + app.on_frame_ping(); + }) + .map_err(|e| anyhow!("insert ping source: {}", e))?; + + // Periodic redraw timer + idle teardown. Re-arms each fire. + let timer = calloop::timer::Timer::from_duration(Duration::from_millis(REDRAW_INTERVAL_MS)); + loop_handle + .insert_source(timer, |_deadline, _, app: &mut App| { + app.tick(); + calloop::timer::TimeoutAction::ToDuration(Duration::from_millis(REDRAW_INTERVAL_MS)) + }) + .map_err(|e| anyhow!("insert redraw timer: {}", e))?; + + tracing::info!("entering event loop"); + loop { + if let Err(e) = event_loop.dispatch(Some(Duration::from_secs(1)), &mut app) { + tracing::error!("event loop dispatch failed: {}", e); + break; + } + } + + drop(app); + drop(conn); + Ok(()) +} + +impl App { + fn on_frame_ping(&mut self) { + if self.surface.is_none() { + if let Err(e) = self.create_surface() { + tracing::warn!("Failed to create OSD surface: {:#}", e); + } + } + } + + fn tick(&mut self) { + let last_frame = self.shared.last_frame_at.lock().ok().and_then(|g| *g); + let idle = match last_frame { + Some(t) => t.elapsed().as_secs_f32() >= IDLE_TEARDOWN_SECS, + None => true, + }; + + if idle && self.surface.is_some() { + tracing::info!("Idle for {}s, tearing down surface", IDLE_TEARDOWN_SECS); + self.tear_down_surface(); + return; + } + + if self.surface.is_some() && !idle { + if let Err(e) = self.render_frame() { + tracing::warn!("render failed: {:#}", e); + } + } + } + + fn create_surface(&mut self) -> anyhow::Result<()> { + tracing::info!("Creating OSD layer surface"); + + let wl_surface = self.compositor_state.create_surface(&self.qh); + let layer = self.layer_shell.create_layer_surface( + &self.qh, + wl_surface.clone(), + Layer::Overlay, + Some("voxtype-osd"), + None, + ); + + let cfg = &self.shared.config; + let (anchor, margin_top, margin_bottom, margin_left, margin_right) = + position_to_anchor_and_margins(cfg.position, cfg.margin_px as i32); + layer.set_anchor(anchor); + layer.set_margin(margin_top, margin_right, margin_bottom, margin_left); + layer.set_size(cfg.width_px, cfg.height_px); + layer.set_keyboard_interactivity(KeyboardInteractivity::None); + layer.set_exclusive_zone(0); + + // Empty input region — clicks pass through. SCTK's Region helper + // owns the wl_region and destroys it on drop. The wl_region must + // outlive the commit that activates it; we let it drop after. + let region = Region::new(&self.compositor_state) + .map_err(|e| anyhow!("create input region: {}", e))?; + wl_surface.set_input_region(Some(region.wl_region())); + + layer.commit(); + drop(region); + + // wgpu instance + surface. + let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { + backends: wgpu::Backends::VULKAN | wgpu::Backends::GL, + flags: wgpu::InstanceFlags::default(), + memory_budget_thresholds: wgpu::MemoryBudgetThresholds::default(), + backend_options: wgpu::BackendOptions::default(), + display: None, + }); + + // Raw handles. With wayland-client's `system` feature + wayland-backend + // `client_system`, ObjectId/Backend expose libwayland pointers. + let display_ptr = NonNull::new(self.conn.backend().display_ptr() as *mut std::ffi::c_void) + .ok_or_else(|| anyhow!("null wl_display ptr"))?; + let surface_ptr = NonNull::new(wl_surface.id().as_ptr() as *mut std::ffi::c_void) + .ok_or_else(|| anyhow!("null wl_surface ptr"))?; + + let raw_display = RawDisplayHandle::Wayland(WaylandDisplayHandle::new(display_ptr)); + let raw_window = RawWindowHandle::Wayland(WaylandWindowHandle::new(surface_ptr)); + + // SAFETY: the `wl_display` and `wl_surface` outlive the wgpu surface + // because `RenderSurface` keeps them alive (Connection is held in + // `App`; wl_surface is held in RenderSurface). + let surface = unsafe { + instance.create_surface_unsafe(wgpu::SurfaceTargetUnsafe::RawHandle { + raw_display_handle: Some(raw_display), + raw_window_handle: raw_window, + }) + } + .context("create wgpu surface")?; + + let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::LowPower, + compatible_surface: Some(&surface), + force_fallback_adapter: false, + })) + .context("request wgpu adapter")?; + + let (device, queue) = pollster::block_on(adapter.request_device(&wgpu::DeviceDescriptor { + label: Some("voxtype-osd-device"), + required_features: wgpu::Features::empty(), + required_limits: wgpu::Limits::downlevel_defaults(), + experimental_features: wgpu::ExperimentalFeatures::default(), + memory_hints: wgpu::MemoryHints::Performance, + trace: wgpu::Trace::Off, + })) + .context("request wgpu device")?; + + let surface_caps = surface.get_capabilities(&adapter); + let surface_format = surface_caps + .formats + .iter() + .copied() + .find(|f| matches!(f, wgpu::TextureFormat::Bgra8UnormSrgb)) + .or_else(|| surface_caps.formats.first().copied()) + .ok_or_else(|| anyhow!("no surface formats available"))?; + + let egui_ctx = egui::Context::default(); + let egui_renderer = egui_wgpu::Renderer::new( + &device, + surface_format, + egui_wgpu::RendererOptions { + msaa_samples: 1, + depth_stencil_format: None, + dithering: false, + predictable_texture_filtering: false, + }, + ); + + self.surface = Some(RenderSurface { + layer, + wl_surface, + width: cfg.width_px, + height: cfg.height_px, + configured: false, + _instance: instance, + surface, + device, + queue, + surface_format, + egui_ctx, + egui_renderer, + }); + + Ok(()) + } + + fn tear_down_surface(&mut self) { + if let Some(rs) = self.surface.take() { + let RenderSurface { + layer, + wl_surface, + surface, + device, + queue, + egui_renderer, + _instance, + .. + } = rs; + // Drop wgpu state first, then the wl_surface. LayerSurface drops + // the role on drop; we then explicitly destroy the wl_surface. + drop(egui_renderer); + drop(queue); + drop(device); + drop(surface); + drop(_instance); + drop(layer); + wl_surface.destroy(); + } + } + + fn render_frame(&mut self) -> anyhow::Result<()> { + let rs = match self.surface.as_mut() { + Some(s) if s.configured => s, + _ => return Ok(()), + }; + + let cst = rs.surface.get_current_texture(); + let surface_texture: wgpu::SurfaceTexture = match cst { + wgpu::CurrentSurfaceTexture::Success(t) + | wgpu::CurrentSurfaceTexture::Suboptimal(t) => t, + wgpu::CurrentSurfaceTexture::Outdated | wgpu::CurrentSurfaceTexture::Lost => { + reconfigure_surface(rs); + return Ok(()); + } + other => { + tracing::debug!("acquire frame skipped: {:?}", other); + return Ok(()); + } + }; + + let view = surface_texture + .texture + .create_view(&wgpu::TextureViewDescriptor::default()); + + let raw_input = egui::RawInput { + screen_rect: Some(egui::Rect::from_min_size( + egui::Pos2::ZERO, + egui::vec2(rs.width as f32, rs.height as f32), + )), + ..Default::default() + }; + + let palette = self.shared.palette; + let cfg = &self.shared.config; + let waveform_window_secs = cfg.waveform_window_secs; + let meter_w = ((rs.width as f32) * 0.05).max(8.0); + let waveform_w = (rs.width as f32) - meter_w - 4.0; + let n_columns = waveform_w.max(32.0) as usize; + + let envelope_cols = { + let ring = self.shared.ring.lock().expect("ring poisoned"); + let frames_in_window = + (waveform_window_secs * voxtype::audio::levels::FRAME_HZ as f32) as usize; + let mut buf: Vec = ring.iter().collect(); + if buf.len() > frames_in_window { + let skip = buf.len() - frames_in_window; + buf = buf.split_off(skip); + } + project_envelope(&buf, n_columns) + }; + + let (peak_dbfs, held_dbfs) = { + let ring = self.shared.ring.lock().expect("ring poisoned"); + let p = ring.latest().map(|f| f.peak_dbfs).unwrap_or(-120.0); + let h = self + .shared + .peak_hold + .lock() + .map(|x| x.held_dbfs) + .unwrap_or(-120.0); + (p, h) + }; + + let width_px = rs.width; + let height_px = rs.height; + let gain = self.shared.config.waveform_gain; + let full_output = rs.egui_ctx.run_ui(raw_input, |ui| { + draw_ui( + ui, + width_px, + height_px, + &palette, + &envelope_cols, + peak_dbfs, + held_dbfs, + gain, + ); + }); + + let primitives = rs + .egui_ctx + .tessellate(full_output.shapes, full_output.pixels_per_point); + + let screen_descriptor = egui_wgpu::ScreenDescriptor { + size_in_pixels: [rs.width, rs.height], + pixels_per_point: full_output.pixels_per_point, + }; + + for (id, image_delta) in &full_output.textures_delta.set { + rs.egui_renderer + .update_texture(&rs.device, &rs.queue, *id, image_delta); + } + + let mut encoder = rs + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("voxtype-osd-encoder"), + }); + + rs.egui_renderer.update_buffers( + &rs.device, + &rs.queue, + &mut encoder, + &primitives, + &screen_descriptor, + ); + + { + let bg = palette.background; + let mut rpass = encoder + .begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("voxtype-osd-pass"), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color { + r: bg.r as f64, + g: bg.g as f64, + b: bg.b as f64, + a: bg.a as f64, + }), + store: wgpu::StoreOp::Store, + }, + depth_slice: None, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + multiview_mask: None, + }) + .forget_lifetime(); + + rs.egui_renderer + .render(&mut rpass, &primitives, &screen_descriptor); + } + + for id in &full_output.textures_delta.free { + rs.egui_renderer.free_texture(id); + } + + rs.queue.submit(Some(encoder.finish())); + rs.wl_surface.frame(&self.qh, rs.wl_surface.clone()); + surface_texture.present(); + Ok(()) + } +} + +fn reconfigure_surface(rs: &mut RenderSurface) { + let surface_config = wgpu::SurfaceConfiguration { + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + format: rs.surface_format, + width: rs.width.max(1), + height: rs.height.max(1), + present_mode: wgpu::PresentMode::Fifo, + alpha_mode: wgpu::CompositeAlphaMode::PreMultiplied, + view_formats: vec![], + desired_maximum_frame_latency: 2, + }; + rs.surface.configure(&rs.device, &surface_config); +} + +fn position_to_anchor_and_margins(pos: OsdPosition, margin: i32) -> (Anchor, i32, i32, i32, i32) { + // (anchor, top, bottom, left, right) + match pos { + OsdPosition::BottomCenter => (Anchor::BOTTOM, 0, margin, 0, 0), + OsdPosition::TopCenter => (Anchor::TOP, margin, 0, 0, 0), + OsdPosition::BottomLeft => (Anchor::BOTTOM | Anchor::LEFT, 0, margin, margin, 0), + OsdPosition::BottomRight => (Anchor::BOTTOM | Anchor::RIGHT, 0, margin, 0, margin), + OsdPosition::TopLeft => (Anchor::TOP | Anchor::LEFT, margin, 0, margin, 0), + OsdPosition::TopRight => (Anchor::TOP | Anchor::RIGHT, margin, 0, 0, margin), + } +} + +/// Render the egui UI: scrolling waveform on the left, segmented vertical +/// peak meter on the right. +fn draw_ui( + ui: &mut egui::Ui, + width: u32, + height: u32, + palette: &Palette, + envelope: &[EnvelopeColumn], + peak_dbfs: f32, + held_dbfs: f32, + gain: f32, +) { + use egui::{Pos2, Rect}; + let painter = ui.painter().clone(); + let w = width as f32; + let h = height as f32; + let meter_w = (w * 0.05).max(8.0); + let waveform_w = w - meter_w - 4.0; + let waveform_rect = Rect::from_min_size(Pos2::ZERO, egui::vec2(waveform_w, h)); + let meter_rect = Rect::from_min_size(Pos2::new(w - meter_w, 0.0), egui::vec2(meter_w, h)); + + draw_waveform(&painter, waveform_rect, palette, envelope, gain); + draw_meter(&painter, meter_rect, palette, peak_dbfs, held_dbfs); +} + +fn draw_waveform( + painter: &egui::Painter, + rect: egui::Rect, + palette: &Palette, + envelope: &[EnvelopeColumn], + gain: f32, +) { + use egui::{pos2, Shape}; + if envelope.is_empty() { + return; + } + let n = envelope.len(); + let col_w = rect.width() / n as f32; + let mid_y = rect.center().y; + let half_h = rect.height() * 0.45; + + let mut top_pts = Vec::with_capacity(n); + let mut bot_pts = Vec::with_capacity(n); + for (i, col) in envelope.iter().enumerate() { + let x = rect.left() + (i as f32 + 0.5) * col_w; + // Apply visual gain, then clamp to -1.0..=1.0, then map to pixel y. + // y grows downward so we subtract from mid_y for the top edge. + let top = mid_y - (col.max * gain).clamp(-1.0, 1.0) * half_h; + let bot = mid_y - (col.min * gain).clamp(-1.0, 1.0) * half_h; + top_pts.push(pos2(x, top)); + bot_pts.push(pos2(x, bot)); + } + + // Build a closed polygon: top points left-to-right, bottom right-to-left. + let mut polygon = top_pts; + for p in bot_pts.iter().rev() { + polygon.push(*p); + } + + let fill = color_to_egui(palette.accent); + painter.add(Shape::convex_polygon(polygon, fill, egui::Stroke::NONE)); + + // Centerline tick for visual reference at low levels. + let line_color = color_to_egui(palette.foreground.with_alpha(0.25)); + painter.line_segment( + [pos2(rect.left(), mid_y), pos2(rect.right(), mid_y)], + egui::Stroke::new(1.0, line_color), + ); +} + +fn draw_meter( + painter: &egui::Painter, + rect: egui::Rect, + palette: &Palette, + peak_dbfs: f32, + held_dbfs: f32, +) { + use egui::{pos2, Rect}; + const SEGMENTS: usize = 10; + const FLOOR_DBFS: f32 = -60.0; + + let segment_h = rect.height() / SEGMENTS as f32; + let segment_gap = (segment_h * 0.15).clamp(1.0, 3.0); + let inner_w = rect.width() - 4.0; + let lit_fraction = peak_meter_fraction(peak_dbfs, FLOOR_DBFS); + let lit_segments = (lit_fraction * SEGMENTS as f32).round() as usize; + + for i in 0..SEGMENTS { + // Segment 0 is the BOTTOM of the bar (low dB == bottom). + let y_top = rect.bottom() - (i as f32 + 1.0) * segment_h + segment_gap * 0.5; + let y_bot = rect.bottom() - i as f32 * segment_h - segment_gap * 0.5; + let seg_rect = Rect::from_min_max( + pos2(rect.left() + 2.0, y_top), + pos2(rect.left() + 2.0 + inner_w, y_bot), + ); + + let segment_peak_dbfs = FLOOR_DBFS * (1.0 - i as f32 / SEGMENTS as f32); + let zone = MeterZone::from_dbfs(segment_peak_dbfs); + let lit = i < lit_segments; + let base = zone.color(palette); + let color = if lit { + color_to_egui(base) + } else { + color_to_egui(base.with_alpha(0.18)) + }; + painter.rect_filled(seg_rect, 1.0, color); + } + + // Held-peak tick, drawn as a thin foreground bar. + let held_fraction = peak_meter_fraction(held_dbfs, FLOOR_DBFS); + if held_fraction > 0.0 { + let y = rect.bottom() - held_fraction * rect.height(); + let tick_rect = Rect::from_min_max( + pos2(rect.left() + 2.0, y - 1.0), + pos2(rect.left() + 2.0 + inner_w, y + 1.0), + ); + painter.rect_filled(tick_rect, 0.0, color_to_egui(palette.foreground)); + } +} + +fn color_to_egui(c: voxtype::osd::visual::Color) -> egui::Color32 { + egui::Color32::from_rgba_unmultiplied( + (c.r.clamp(0.0, 1.0) * 255.0) as u8, + (c.g.clamp(0.0, 1.0) * 255.0) as u8, + (c.b.clamp(0.0, 1.0) * 255.0) as u8, + (c.a.clamp(0.0, 1.0) * 255.0) as u8, + ) +} + +// --------------------------------------------------------------------------- +// SCTK delegate trait impls +// --------------------------------------------------------------------------- + +impl CompositorHandler for App { + fn scale_factor_changed( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + _surface: &WlSurface, + _new_factor: i32, + ) { + } + + fn transform_changed( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + _surface: &WlSurface, + _new_transform: wl_output::Transform, + ) { + } + + fn frame( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + _surface: &WlSurface, + _time: u32, + ) { + } + + fn surface_enter( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + _surface: &WlSurface, + _output: &wl_output::WlOutput, + ) { + } + + fn surface_leave( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + _surface: &WlSurface, + _output: &wl_output::WlOutput, + ) { + } +} + +impl OutputHandler for App { + fn output_state(&mut self) -> &mut OutputState { + &mut self.output_state + } + + fn new_output( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + _output: wl_output::WlOutput, + ) { + } + + fn update_output( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + _output: wl_output::WlOutput, + ) { + } + + fn output_destroyed( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + _output: wl_output::WlOutput, + ) { + } +} + +impl LayerShellHandler for App { + fn closed(&mut self, _conn: &Connection, _qh: &QueueHandle, layer: &LayerSurface) { + if let Some(rs) = self.surface.as_ref() { + if rs.layer.wl_surface().id() == layer.wl_surface().id() { + tracing::info!("Compositor closed the layer surface"); + self.tear_down_surface(); + } + } + } + + fn configure( + &mut self, + _conn: &Connection, + _qh: &QueueHandle, + layer: &LayerSurface, + configure: LayerSurfaceConfigure, + _serial: u32, + ) { + let rs = match self.surface.as_mut() { + Some(s) => s, + None => return, + }; + if rs.layer.wl_surface().id() != layer.wl_surface().id() { + return; + } + let (mut w, mut h) = configure.new_size; + if w == 0 { + w = self.shared.config.width_px; + } + if h == 0 { + h = self.shared.config.height_px; + } + rs.width = w; + rs.height = h; + rs.configured = true; + reconfigure_surface(rs); + if let Err(e) = self.render_frame() { + tracing::warn!("initial render after configure failed: {:#}", e); + } + } +} + +delegate_compositor!(App); +delegate_output!(App); +delegate_layer!(App); + +impl ProvidesRegistryState for App { + fn registry(&mut self) -> &mut RegistryState { + &mut self.registry_state + } + registry_handlers![OutputState]; +} + +delegate_registry!(App); diff --git a/src/bin/voxtype_osd_native/main.rs b/src/bin/voxtype_osd_native/main.rs new file mode 100644 index 00000000..bdac1011 --- /dev/null +++ b/src/bin/voxtype_osd_native/main.rs @@ -0,0 +1,256 @@ +//! `voxtype-osd-native` — native (SCTK + wgpu + egui-wgpu) on-screen +//! mic visualizer for the Voxtype daemon. +//! +//! Architecture: +//! +//! - The IPC reader runs on a dedicated thread with a single-threaded Tokio +//! runtime; it pushes decoded `AudioFrame`s into an `Arc>`, +//! updates an `Arc>`, and notifies the main thread via a +//! `calloop::ping::Ping` so the renderer can wake the surface up. +//! - The main thread runs the Wayland event loop (calloop + SCTK), creates +//! the wlr-layer-shell surface on demand when frames start arriving, and +//! destroys it after a configurable idle timeout. While the surface is +//! alive, a calloop timer drives ~60 Hz redraws. +//! - When no daemon is running, the IPC thread sleeps in its reconnect loop +//! and the main thread sleeps in `EventLoop::run`. Idle CPU is essentially +//! zero rendering work. +//! +//! The actual GUI smoke test (does it look right) is a human concern; the +//! bar this binary clears is "starts cleanly when the daemon is absent" plus +//! "exits cleanly on SIGTERM". + +mod app; + +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::Instant; + +use anyhow::Context as _; +use clap::Parser; + +use voxtype::audio::levels::{AudioFrame, FRAME_HZ}; +use voxtype::osd::config::OsdConfig; +use voxtype::osd::ipc::{resolve_socket_path, run_ipc_loop, FrameRing, DEFAULT_RING_DEPTH}; +use voxtype::osd::theme::ThemeWatcher; +use voxtype::osd::visual::PeakHold; + +use crate::app::SharedState; + +#[derive(Parser, Debug)] +#[command( + name = "voxtype-osd-native", + version, + about = "Voxtype on-screen mic visualizer (native: SCTK + wgpu + egui-wgpu)" +)] +struct Args { + /// Path to the voxtype config file. Defaults to + /// `~/.config/voxtype/config.toml`. Only the `[osd]` section is read. + #[arg(long, env = "VOXTYPE_CONFIG")] + config: Option, + + /// Path to the audio-frame Unix socket. Defaults to + /// `$XDG_RUNTIME_DIR/voxtype/audio.sock`. + #[arg(long, env = "VOXTYPE_OSD_SOCKET")] + socket: Option, + + /// Seconds to wait between reconnect attempts when the daemon is down. + #[arg(long, default_value = "1.0", env = "VOXTYPE_OSD_RECONNECT_SECS")] + reconnect_secs: f32, + + /// Print one debug line per N frames received (0 = quiet). + #[arg(long, default_value = "0", env = "VOXTYPE_OSD_LOG_EVERY")] + log_every: u32, + + /// Surface width in pixels (overrides config default). + #[arg(long, env = "VOXTYPE_OSD_WIDTH")] + width_px: Option, + + /// Surface height in pixels (overrides config default). + #[arg(long, env = "VOXTYPE_OSD_HEIGHT")] + height_px: Option, + + /// Margin from the screen edge in pixels (overrides config default). + #[arg(long, env = "VOXTYPE_OSD_MARGIN")] + margin_px: Option, + + /// Background opacity 0.0..=1.0 (overrides config default). + #[arg(long, env = "VOXTYPE_OSD_OPACITY")] + opacity: Option, + + /// Visual gain applied to audio samples before drawing the waveform. + /// Higher = waveform fills more of the vertical for quiet inputs. + /// Reduce for hot mics (e.g. 4.0); raise for quiet sources (e.g. 14.0). + #[arg(long, env = "VOXTYPE_OSD_GAIN")] + waveform_gain: Option, +} + +/// Load the `[osd]` section from the voxtype config file, falling back to +/// `OsdConfig::default()` on any error (file missing, unreadable, parse +/// failure, or `[osd]` section absent). +fn load_osd_config_from_file(explicit: Option<&std::path::Path>) -> OsdConfig { + let path = explicit + .map(std::path::Path::to_path_buf) + .or_else(voxtype::config::Config::default_path); + let Some(path) = path else { + return OsdConfig::default(); + }; + let content = match std::fs::read_to_string(&path) { + Ok(s) => s, + Err(_) => return OsdConfig::default(), + }; + + #[derive(serde::Deserialize, Default)] + struct PartialConfig { + #[serde(default)] + osd: Option, + } + + match toml::from_str::(&content) { + Ok(p) => p.osd.unwrap_or_default(), + Err(_) => OsdConfig::default(), + } +} + +fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .init(); + + let args = Args::parse(); + let socket_path = resolve_socket_path(args.socket.clone()); + + // Layer config: defaults < config file [osd] < CLI/env overrides. + let mut osd_config = load_osd_config_from_file(args.config.as_deref()); + if let Some(w) = args.width_px { + osd_config.width_px = w; + } + if let Some(h) = args.height_px { + osd_config.height_px = h; + } + if let Some(m) = args.margin_px { + osd_config.margin_px = m; + } + if let Some(o) = args.opacity { + osd_config.opacity = o.clamp(0.0, 1.0); + } + if let Some(g) = args.waveform_gain { + osd_config.waveform_gain = g; + } + + if !osd_config.enabled { + tracing::info!("OSD disabled in config; exiting"); + return Ok(()); + } + + tracing::info!( + "voxtype-osd-native starting; socket={:?}, size={}x{}", + socket_path, + osd_config.width_px, + osd_config.height_px + ); + + let theme = ThemeWatcher::new(); + let palette = theme.palette(); + + let shared = SharedState { + ring: Arc::new(Mutex::new(FrameRing::new(DEFAULT_RING_DEPTH))), + peak_hold: Arc::new(Mutex::new(PeakHold::new(osd_config.peak_decay_db_per_sec))), + last_frame_at: Arc::new(Mutex::new(None)), + palette, + config: osd_config, + }; + + // Set up the wakeup channel so the IPC thread can ping the main loop on + // every frame. Calloop's ping is a single fd; the renderer wakes up, + // creates the surface if needed, and resets the idle timer. + let (frame_ping, frame_ping_source) = + calloop::ping::make_ping().context("create calloop ping")?; + + // Spawn the IPC thread. + let ipc_shared = shared.clone(); + let log_every = args.log_every; + let reconnect_secs = args.reconnect_secs; + let frame_ping_for_ipc = frame_ping.clone(); + let _ipc_thread = thread::Builder::new() + .name("voxtype-osd-ipc".into()) + .spawn(move || { + ipc_thread_main( + ipc_shared, + socket_path, + reconnect_secs, + log_every, + frame_ping_for_ipc, + ); + }) + .context("spawn IPC thread")?; + + // Run the Wayland + render event loop on the main thread. + app::run(shared, frame_ping_source) +} + +/// Entry point of the IPC thread. Owns a single-threaded Tokio runtime, +/// runs the reconnect-and-read loop, and pings the main thread on every +/// frame. +fn ipc_thread_main( + shared: SharedState, + socket_path: PathBuf, + reconnect_secs: f32, + log_every: u32, + frame_ping: calloop::ping::Ping, +) { + let rt = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(rt) => rt, + Err(e) => { + tracing::error!("Failed to build IPC runtime: {}", e); + return; + } + }; + + let mut total: u64 = 0; + let mut last_log = Instant::now(); + let dt_per_frame = 1.0 / FRAME_HZ as f32; + + let on_frame = move |frame: AudioFrame| { + if let Ok(mut r) = shared.ring.lock() { + r.push(frame); + } + if let Ok(mut p) = shared.peak_hold.lock() { + p.update(frame.peak_dbfs, dt_per_frame); + } + if let Ok(mut t) = shared.last_frame_at.lock() { + *t = Some(Instant::now()); + } + // Wake the renderer. Pings coalesce; calling 100x/sec is fine. + frame_ping.ping(); + + total += 1; + if log_every > 0 && total.is_multiple_of(u64::from(log_every)) { + let elapsed = last_log.elapsed().as_secs_f32(); + let rate = if elapsed > 0.0 { + log_every as f32 / elapsed + } else { + 0.0 + }; + tracing::debug!( + target: "osd::frames", + frontend = "native", + seq = frame.seq, + peak_dbfs = frame.peak_dbfs, + min = frame.min, + max = frame.max, + rate_hz = rate, + "frame batch" + ); + last_log = Instant::now(); + } + }; + + rt.block_on(run_ipc_loop(socket_path, reconnect_secs, on_frame)); +} diff --git a/src/cli.rs b/src/cli.rs index 88763b0a..b764da8d 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -35,7 +35,7 @@ QUICK START: EXAMPLES: voxtype setup model Interactive model selection (Whisper, Parakeet, or Moonshine) voxtype setup waybar Show Waybar integration config - voxtype setup gpu Manage GPU acceleration (Vulkan/CUDA/ROCm) + voxtype setup gpu Manage GPU acceleration (Vulkan/CUDA/MIGraphX) voxtype setup onnx Switch between Whisper and ONNX engines voxtype status --follow --format json Waybar integration @@ -55,17 +55,24 @@ pub struct Cli { pub quiet: bool, // -- Transcription (engine-agnostic) -- - /// Override transcription model - #[arg(long, value_name = "MODEL", help_heading = "Transcription", + #[arg( + long, + value_name = "MODEL", + help_heading = "Transcription", long_help = "Override model for transcription.\n\ Whisper: tiny, base, small, medium, large-v3, large-v3-turbo (and .en variants).\n\ - Parakeet: parakeet-tdt-0.6b-v3, parakeet-tdt-0.6b-v3-int8")] + Parakeet: parakeet-tdt-0.6b-v3, parakeet-tdt-0.6b-v3-int8" + )] pub model: Option, /// Override transcription engine - #[arg(long, value_name = "ENGINE", help_heading = "Transcription", - long_help = "Override transcription engine: whisper, parakeet, moonshine, sensevoice, paraformer, dolphin, omnilingual")] + #[arg( + long, + value_name = "ENGINE", + help_heading = "Transcription", + long_help = "Override transcription engine: whisper, parakeet, moonshine, sensevoice, paraformer, dolphin, omnilingual, cohere" + )] pub engine: Option, /// Language for transcription (e.g., en, fr, auto, or comma-separated: en,fr,de) @@ -85,7 +92,12 @@ pub struct Cli { pub gpu_isolation: bool, /// GPU device index for multi-GPU systems (e.g., 1 for discrete GPU) - #[arg(long, value_name = "INDEX", help_heading = "Transcription", hide_short_help = true)] + #[arg( + long, + value_name = "INDEX", + help_heading = "Transcription", + hide_short_help = true + )] pub gpu_device: Option, /// Load model on-demand when recording starts instead of keeping it loaded @@ -93,7 +105,12 @@ pub struct Cli { pub on_demand_loading: bool, /// Secondary model for difficult audio (used with --model-modifier) - #[arg(long, value_name = "MODEL", help_heading = "Transcription", hide_short_help = true)] + #[arg( + long, + value_name = "MODEL", + help_heading = "Transcription", + hide_short_help = true + )] pub secondary_model: Option, /// Enable eager input processing (transcribe chunks while recording continues) @@ -101,15 +118,19 @@ pub struct Cli { pub eager_processing: bool, // -- Whisper-specific -- - /// Disable context window optimization for short recordings #[arg(long, help_heading = "Whisper", hide_short_help = true)] pub no_whisper_context_optimization: bool, /// Initial prompt to provide context for transcription - #[arg(long, value_name = "PROMPT", help_heading = "Whisper", hide_short_help = true, + #[arg( + long, + value_name = "PROMPT", + help_heading = "Whisper", + hide_short_help = true, long_help = "Initial prompt to provide context for transcription.\n\ - Hints at terminology, proper nouns, or formatting conventions.")] + Hints at terminology, proper nouns, or formatting conventions." + )] pub initial_prompt: Option, /// Enable flash attention for reduced GPU memory usage and faster inference @@ -117,23 +138,42 @@ pub struct Cli { pub flash_attention: bool, /// Whisper execution mode: local, remote, or cli - #[arg(long, value_name = "MODE", help_heading = "Whisper", hide_short_help = true)] + #[arg( + long, + value_name = "MODE", + help_heading = "Whisper", + hide_short_help = true + )] pub whisper_mode: Option, /// Remote server endpoint URL (for remote whisper mode) - #[arg(long, value_name = "URL", help_heading = "Whisper", hide_short_help = true)] + #[arg( + long, + value_name = "URL", + help_heading = "Whisper", + hide_short_help = true + )] pub remote_endpoint: Option, /// Model name to send to remote server - #[arg(long, value_name = "MODEL", help_heading = "Whisper", hide_short_help = true)] + #[arg( + long, + value_name = "MODEL", + help_heading = "Whisper", + hide_short_help = true + )] pub remote_model: Option, /// API key for remote server (or use VOXTYPE_WHISPER_API_KEY env var) - #[arg(long, value_name = "KEY", help_heading = "Whisper", hide_short_help = true)] + #[arg( + long, + value_name = "KEY", + help_heading = "Whisper", + hide_short_help = true + )] pub remote_api_key: Option, // -- Hotkey -- - /// Override hotkey (e.g., SCROLLLOCK, PAUSE, F13, MEDIA, WEV_234, EVTEST_226) #[arg(long, value_name = "KEY", help_heading = "Hotkey")] pub hotkey: Option, @@ -155,13 +195,17 @@ pub struct Cli { pub model_modifier: Option, // -- Audio -- - /// Audio input device name (or "default" for system default) #[arg(long, value_name = "DEVICE", help_heading = "Audio")] pub audio_device: Option, /// Maximum recording duration in seconds (safety limit) - #[arg(long, value_name = "SECS", help_heading = "Audio", hide_short_help = true)] + #[arg( + long, + value_name = "SECS", + help_heading = "Audio", + hide_short_help = true + )] pub max_duration: Option, /// Enable audio feedback sounds (beeps when recording starts/stops) @@ -177,7 +221,6 @@ pub struct Cli { pub pause_media: bool, // -- Output (delivery, timing, file output, hooks) -- - /// Force clipboard mode (don't try to type) #[arg(long, help_heading = "Output")] pub clipboard: bool, @@ -187,20 +230,32 @@ pub struct Cli { pub paste: bool, /// Restore clipboard after paste mode - #[arg(long, help_heading = "Output", + #[arg( + long, + help_heading = "Output", long_help = "Restore clipboard content after paste mode completes.\n\ - Saves clipboard before transcription and restores it after paste.")] + Saves clipboard before transcription and restores it after paste." + )] pub restore_clipboard: bool, /// Delay in milliseconds after paste before restoring clipboard (default: 200) - #[arg(long, value_name = "MS", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "MS", + help_heading = "Output", + hide_short_help = true + )] pub restore_clipboard_delay_ms: Option, /// Output driver order (comma-separated) - #[arg(long, value_name = "DRIVERS", help_heading = "Output", + #[arg( + long, + value_name = "DRIVERS", + help_heading = "Output", long_help = "Output driver order for type mode (comma-separated).\n\ Available: wtype, dotool, ydotool, clipboard.\n\ - Example: --driver=ydotool,wtype,clipboard")] + Example: --driver=ydotool,wtype,clipboard" + )] pub driver: Option, /// Auto-submit (press Enter) after outputting transcribed text @@ -208,7 +263,12 @@ pub struct Cli { pub auto_submit: bool, /// Disable auto-submit (overrides config auto_submit = true) - #[arg(long, conflicts_with = "auto_submit", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + conflicts_with = "auto_submit", + help_heading = "Output", + hide_short_help = true + )] pub no_auto_submit: bool, /// Fall back to clipboard if typing fails @@ -216,11 +276,21 @@ pub struct Cli { pub fallback_to_clipboard: bool, /// Disable clipboard fallback - #[arg(long, conflicts_with = "fallback_to_clipboard", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + conflicts_with = "fallback_to_clipboard", + help_heading = "Output", + hide_short_help = true + )] pub no_fallback_to_clipboard: bool, /// Keystroke for paste mode (e.g., ctrl+v, shift+insert, ctrl+shift+v) - #[arg(long, value_name = "KEYS", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "KEYS", + help_heading = "Output", + hide_short_help = true + )] pub paste_keys: Option, /// File path for file output mode @@ -228,11 +298,21 @@ pub struct Cli { pub file_path: Option, /// File write mode: overwrite or append - #[arg(long, value_name = "MODE", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "MODE", + help_heading = "Output", + hide_short_help = true + )] pub file_mode: Option, /// Delay before typing starts (ms), helps prevent first character drop - #[arg(long, value_name = "MS", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "MS", + help_heading = "Output", + hide_short_help = true + )] pub pre_type_delay: Option, /// DEPRECATED: Use --pre-type-delay instead @@ -240,37 +320,70 @@ pub struct Cli { pub wtype_delay: Option, /// Prefix wtype output with a Shift key press/release - #[arg(long, help_heading = "Output", hide_short_help = true, + #[arg( + long, + help_heading = "Output", + hide_short_help = true, long_help = "Prefix wtype output with a Shift key press/release.\n\ - Workaround for apps (e.g., Discord) that drop the first CJK character.")] + Workaround for apps (e.g., Discord) that drop the first CJK character." + )] pub wtype_shift_prefix: bool, /// Delay between typed characters in milliseconds (0 = fastest) - #[arg(long, value_name = "MS", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "MS", + help_heading = "Output", + hide_short_help = true + )] pub type_delay: Option, /// Keyboard layout for dotool (e.g., de, fr) - #[arg(long, value_name = "LAYOUT", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "LAYOUT", + help_heading = "Output", + hide_short_help = true + )] pub dotool_xkb_layout: Option, /// Keyboard layout variant for dotool (e.g., nodeadkeys) - #[arg(long, value_name = "VARIANT", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "VARIANT", + help_heading = "Output", + hide_short_help = true + )] pub dotool_xkb_variant: Option, /// Command to run before typing output (e.g., compositor submap switch) - #[arg(long, value_name = "CMD", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "CMD", + help_heading = "Output", + hide_short_help = true + )] pub pre_output_command: Option, /// Command to run after typing output (e.g., reset compositor submap) - #[arg(long, value_name = "CMD", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "CMD", + help_heading = "Output", + hide_short_help = true + )] pub post_output_command: Option, /// Command to run when recording starts (e.g., switch to compositor submap) - #[arg(long, value_name = "CMD", help_heading = "Output", hide_short_help = true)] + #[arg( + long, + value_name = "CMD", + help_heading = "Output", + hide_short_help = true + )] pub pre_recording_command: Option, // -- Text Processing -- - /// Enable spoken punctuation conversion (e.g., say "period" to get ".") #[arg(long, help_heading = "Text Processing")] pub spoken_punctuation: bool, @@ -280,7 +393,12 @@ pub struct Cli { pub shift_enter_newlines: bool, /// Disable Shift+Enter newlines (overrides config) - #[arg(long, conflicts_with = "shift_enter_newlines", help_heading = "Text Processing", hide_short_help = true)] + #[arg( + long, + conflicts_with = "shift_enter_newlines", + help_heading = "Text Processing", + hide_short_help = true + )] pub no_shift_enter_newlines: bool, /// Enable smart auto-submit (say "submit" to press Enter) @@ -288,28 +406,60 @@ pub struct Cli { pub smart_auto_submit: bool, /// Disable smart auto-submit (overrides config) - #[arg(long, conflicts_with = "smart_auto_submit", help_heading = "Text Processing", hide_short_help = true)] + #[arg( + long, + conflicts_with = "smart_auto_submit", + help_heading = "Text Processing", + hide_short_help = true + )] pub no_smart_auto_submit: bool, + /// Filter common filler words ("uh", "um", "er", ...) from transcribed text + #[arg(long, help_heading = "Text Processing")] + pub filter_fillers: bool, + + /// Disable filler-word filtering (overrides config) + #[arg( + long, + conflicts_with = "filter_fillers", + help_heading = "Text Processing", + hide_short_help = true + )] + pub no_filter_fillers: bool, + /// Text to append after each transcription (e.g., " " for trailing space) - #[arg(long, value_name = "TEXT", help_heading = "Text Processing", hide_short_help = true, + #[arg( + long, + value_name = "TEXT", + help_heading = "Text Processing", + hide_short_help = true, long_help = "Text to append after each transcription (e.g., \" \" for a trailing space).\n\ - Appended before auto_submit. Useful for separating sentences when dictating incrementally.")] + Appended before auto_submit. Useful for separating sentences when dictating incrementally." + )] pub append_text: Option, // -- VAD -- - /// Enable Voice Activity Detection (filter silence before transcription) #[arg(long, help_heading = "VAD")] pub vad: bool, /// VAD speech detection threshold (0.0-1.0, default: 0.5). /// Lower = more sensitive, Higher = less sensitive - #[arg(long, value_name = "THRESHOLD", help_heading = "VAD", hide_short_help = true)] + #[arg( + long, + value_name = "THRESHOLD", + help_heading = "VAD", + hide_short_help = true + )] pub vad_threshold: Option, /// VAD backend: auto, energy, whisper - #[arg(long, value_name = "BACKEND", help_heading = "VAD", hide_short_help = true)] + #[arg( + long, + value_name = "BACKEND", + help_heading = "VAD", + hide_short_help = true + )] pub vad_backend: Option, /// Minimum speech duration in milliseconds for VAD @@ -325,12 +475,21 @@ pub enum Commands { /// Run as daemon (default if no command specified) Daemon, + /// Run menu bar helper (macOS) + #[cfg(target_os = "macos")] + Menubar, + + /// Launch daemon + menubar (used by Voxtype.app bundle) + #[cfg(target_os = "macos")] + #[command(hide = true)] + AppLaunch, + /// Transcribe an audio file (WAV, 16kHz, mono) Transcribe { /// Path to audio file file: std::path::PathBuf, - /// Override transcription engine: whisper, parakeet, moonshine, sensevoice, paraformer, dolphin, omnilingual + /// Override transcription engine: whisper, parakeet, moonshine, sensevoice, paraformer, dolphin, omnilingual, cohere #[arg(long, value_name = "ENGINE")] engine: Option, }, @@ -383,6 +542,19 @@ pub enum Commands { /// Show current configuration Config, + /// Inspect runtime/install information + Info { + #[command(subcommand)] + action: InfoAction, + }, + + /// Open the interactive configuration TUI + Configure { + /// Render as if installed from a package (for testing source builds). + #[arg(long, hide = true)] + force_package_mode: bool, + }, + /// Show daemon status (for Waybar/polybar integration) Status { /// Continuously output status changes as JSON (for Waybar exec) @@ -416,6 +588,9 @@ pub enum Commands { #[command(subcommand)] action: MeetingAction, }, + + /// Check for updates + CheckUpdate, } /// Output mode override for record commands @@ -795,12 +970,26 @@ impl RecordAction { } } +#[derive(Subcommand)] +pub enum InfoAction { + /// Show installed binary variants and which one is active + Variants { + /// Emit machine-readable JSON instead of human-readable text + #[arg(long)] + json: bool, + }, +} + #[derive(Subcommand)] pub enum SetupAction { /// Check system configuration and dependencies Check, - /// Install voxtype as a systemd user service + /// Interactive macOS setup wizard + #[cfg(target_os = "macos")] + Macos, + + /// Install voxtype as a systemd user service (Linux) Systemd { /// Uninstall the service instead of installing #[arg(long)] @@ -811,6 +1000,55 @@ pub enum SetupAction { status: bool, }, + /// Install voxtype as a LaunchAgent (macOS) + /// Note: launchd services don't receive microphone permissions. + /// Use 'app-bundle' instead for full functionality. + #[cfg(target_os = "macos")] + Launchd { + /// Uninstall the service instead of installing + #[arg(long)] + uninstall: bool, + + /// Show service status + #[arg(long)] + status: bool, + }, + + /// Install Voxtype.app bundle with Login Items (macOS, recommended) + /// Creates /Applications/Voxtype.app and adds to Login Items. + /// This method properly receives Accessibility, Input Monitoring, + /// and Microphone permissions (unlike launchd). + #[cfg(target_os = "macos")] + AppBundle { + /// Uninstall the app bundle + #[arg(long)] + uninstall: bool, + + /// Show installation status + #[arg(long)] + status: bool, + }, + + /// Set up Hammerspoon hotkey integration (macOS) + #[cfg(target_os = "macos")] + Hammerspoon { + /// Install Hammerspoon config (copy to ~/.hammerspoon/) + #[arg(long)] + install: bool, + + /// Show the Hammerspoon configuration snippet + #[arg(long)] + show: bool, + + /// Hotkey to configure (default: rightalt) + #[arg(long, default_value = "rightalt")] + hotkey: String, + + /// Use toggle mode instead of push-to-talk + #[arg(long)] + toggle: bool, + }, + /// Show Waybar configuration snippets Waybar { /// Output only the JSON config (for scripting) @@ -860,7 +1098,7 @@ pub enum SetupAction { restart: bool, }, - /// Manage GPU acceleration (Vulkan for Whisper, CUDA/ROCm for Parakeet) + /// Manage GPU acceleration (Vulkan for Whisper, CUDA/MIGraphX for Parakeet) Gpu { /// Enable GPU acceleration (auto-detects best backend) #[arg(long)] @@ -875,6 +1113,14 @@ pub enum SetupAction { status: bool, }, + /// Switch the active binary variant (used by `voxtype configure` via pkexec) + #[command(hide = true)] + Variant { + /// Variant binary name (e.g., voxtype-avx512, voxtype-onnx-cuda) + #[arg(long, value_name = "NAME")] + to: String, + }, + /// Switch between Whisper and ONNX transcription engines Onnx { /// Enable ONNX engine (switch to ONNX binary) diff --git a/src/config.rs b/src/config.rs index 25d4bd0c..d3fe609f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -239,6 +239,11 @@ on_recording_stop = false # Show notification with transcribed text after transcription completes on_transcription = true +# Notification urgency level: "low", "normal", or "critical". +# On GNOME, "low" notifications are delivered to the drawer without a popup banner. +# Use "normal" (default) to ensure notifications appear as banners. +# urgency = "normal" + # [text] # Text processing options (word replacements, spoken punctuation) # @@ -251,6 +256,12 @@ on_transcription = true # Smart auto-submit: say "submit" at the end of dictation to press Enter. # The word "submit" is stripped from the output text and Enter is pressed. # smart_auto_submit = false +# +# Remove filler words like "uh" and "um" from transcribed text. +# Enabled by default. Set filter_filler_words = false to disable, or override +# the word list via filler_words. +# filter_filler_words = true +# filler_words = ["uh", "um", "er", "ah", "eh", "hmm", "hm", "mm", "mhm"] # [vad] # Voice Activity Detection - filters silence-only recordings @@ -297,6 +308,30 @@ on_transcription = true # output_mode = "clipboard" "#; +/// Return the default config content with platform-appropriate hotkey +pub fn default_config_content() -> String { + #[cfg(target_os = "macos")] + { + DEFAULT_CONFIG + .replace( + "key = \"SCROLLLOCK\"", + "key = \"FN\"", + ) + .replace( + "# Common choices: SCROLLLOCK, PAUSE, RIGHTALT, F13-F24", + "# Common choices: FN, RIGHTALT, F13-F24", + ) + .replace( + "# Use `evtest` to find key names for your keyboard", + "# FN (Globe key) is recommended on macOS", + ) + } + #[cfg(not(target_os = "macos"))] + { + DEFAULT_CONFIG.to_string() + } +} + /// Hotkey activation mode #[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)] #[serde(rename_all = "snake_case")] @@ -346,6 +381,10 @@ pub struct Config { #[serde(default)] pub omnilingual: Option, + /// Cohere Transcribe configuration (optional, only used when engine = "cohere") + #[serde(default)] + pub cohere: Option, + /// Text processing configuration (replacements, spoken punctuation) #[serde(default)] pub text: TextConfig, @@ -457,7 +496,14 @@ pub struct AudioFeedbackConfig { } fn default_hotkey_key() -> String { - "SCROLLLOCK".to_string() + #[cfg(target_os = "macos")] + { + "FN".to_string() + } + #[cfg(not(target_os = "macos"))] + { + "SCROLLLOCK".to_string() + } } fn default_sound_theme() -> String { @@ -1035,6 +1081,46 @@ impl Default for MoonshineConfig { } } +/// Cohere Transcribe speech-to-text configuration (ONNX-based, encoder-decoder). +/// Requires: cargo build --features cohere +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct CohereConfig { + /// Model name or directory containing the Cohere ONNX files. + /// Expects: cohere-encoder.int8.onnx (+ .data), + /// cohere-decoder.int8.onnx (+ .data), + /// tokens.txt + /// Short name: "cohere-transcribe-int8" (default) + pub model: String, + + /// Language for transcription. Two-letter ISO 639-1 codes + /// (e.g. "en", "fr", "de"). Cohere supports 14 languages. + #[serde(default = "default_cohere_language")] + pub language: String, + + /// Number of CPU threads for ONNX Runtime inference + #[serde(default)] + pub threads: Option, + + /// Load model on-demand when recording starts (true) or keep loaded (false) + #[serde(default = "default_on_demand_loading")] + pub on_demand_loading: bool, +} + +fn default_cohere_language() -> String { + "en".to_string() +} + +impl Default for CohereConfig { + fn default() -> Self { + Self { + model: "cohere-transcribe-int8".to_string(), + language: default_cohere_language(), + threads: None, + on_demand_loading: false, + } + } +} + /// SenseVoice speech-to-text configuration (ONNX-based, CTC encoder-only ASR) /// Requires: cargo build --features sensevoice #[derive(Debug, Clone, Deserialize, Serialize)] @@ -1157,11 +1243,10 @@ impl Default for OmnilingualConfig { } /// Transcription engine selection (which ASR technology to use) -#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)] +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum TranscriptionEngine { - /// Use Whisper (whisper.cpp via whisper-rs) - default - #[default] + /// Use Whisper (whisper.cpp via whisper-rs) Whisper, /// Use Parakeet (NVIDIA's FastConformer via ONNX Runtime) /// Requires: cargo build --features parakeet @@ -1181,6 +1266,10 @@ pub enum TranscriptionEngine { /// Use Omnilingual (FunASR 50+ language CTC encoder via ONNX Runtime) /// Requires: cargo build --features omnilingual Omnilingual, + /// Use Cohere Transcribe (encoder-decoder via ONNX Runtime, Whisper-style + /// task tokens). Top of the Open ASR Leaderboard. + /// Requires: cargo build --features cohere + Cohere, } /// VAD backend selection @@ -1256,8 +1345,14 @@ impl Default for VadConfig { } } +impl Default for TranscriptionEngine { + fn default() -> Self { + TranscriptionEngine::Whisper + } +} + /// Text processing configuration -#[derive(Debug, Clone, Default, Deserialize, Serialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct TextConfig { /// Enable spoken punctuation conversion (e.g., "period" → ".") #[serde(default)] @@ -1272,6 +1367,47 @@ pub struct TextConfig { /// The word "submit" is stripped from the output and Enter is pressed. #[serde(default)] pub smart_auto_submit: bool, + + /// Remove common filler words ("uh", "um", etc.) from transcribed text. + /// Defaults to false to preserve existing behavior. The list is + /// configurable via `filler_words`. + #[serde(default)] + pub filter_filler_words: bool, + + /// Words removed when `filter_filler_words` is true. Matched + /// case-insensitively on word boundaries; surrounding punctuation and + /// whitespace are cleaned up after removal. + #[serde(default = "default_filler_words")] + pub filler_words: Vec, +} + +impl Default for TextConfig { + fn default() -> Self { + Self { + spoken_punctuation: false, + replacements: HashMap::new(), + smart_auto_submit: false, + filter_filler_words: true, + filler_words: default_filler_words(), + } + } +} + +/// Default filler-word list. Conservative: single-syllable disfluencies only. +/// Multi-word phrases like "you know" or "sort of" are too aggressive for a +/// default and can be added via the `filler_words` config. +fn default_filler_words() -> Vec { + vec![ + "uh".to_string(), + "um".to_string(), + "er".to_string(), + "ah".to_string(), + "eh".to_string(), + "hmm".to_string(), + "hm".to_string(), + "mm".to_string(), + "mhm".to_string(), + ] } /// Meeting transcription configuration @@ -1504,6 +1640,15 @@ pub struct NotificationConfig { /// Show engine icon in notification title (🦜 for Parakeet, 🗣️ for Whisper) #[serde(default)] pub show_engine_icon: bool, + + /// Notification urgency level: "low", "normal", or "critical". + /// On GNOME, "low" notifications go straight to the drawer without a popup banner. + #[serde(default = "default_notification_urgency")] + pub urgency: String, +} + +fn default_notification_urgency() -> String { + "normal".to_string() } impl Default for NotificationConfig { @@ -1513,6 +1658,7 @@ impl Default for NotificationConfig { on_recording_stop: false, on_transcription: true, show_engine_icon: false, + urgency: default_notification_urgency(), } } } @@ -1801,7 +1947,7 @@ impl Default for Config { fn default() -> Self { Self { hotkey: HotkeyConfig { - key: "SCROLLLOCK".to_string(), + key: default_hotkey_key(), modifiers: vec![], mode: ActivationMode::default(), enabled: true, @@ -1873,6 +2019,7 @@ impl Default for Config { paraformer: None, dolphin: None, omnilingual: None, + cohere: None, text: TextConfig::default(), vad: VadConfig::default(), status: StatusConfig::default(), @@ -1884,12 +2031,40 @@ impl Default for Config { } impl Config { - /// Get the default config file path + /// System-wide config path used as a fallback when no user config exists. + pub const SYSTEM_PATH: &'static str = "/etc/voxtype/config.toml"; + + /// Get the default user config file path (XDG) pub fn default_path() -> Option { directories::ProjectDirs::from("", "", "voxtype") .map(|dirs| dirs.config_dir().join("config.toml")) } + /// Get the system-wide config file path. + pub fn system_path() -> PathBuf { + PathBuf::from(Self::SYSTEM_PATH) + } + + /// Resolve which config file should actually be loaded, in priority order: + /// 1. User config (`~/.config/voxtype/config.toml`) + /// 2. System-wide config (`/etc/voxtype/config.toml`) + /// + /// Returns `None` if neither exists, in which case the caller should fall + /// back to built-in defaults. This does not consider the `--config` CLI + /// flag; callers handle that explicitly. + pub fn resolve_existing_path() -> Option { + if let Some(user) = Self::default_path() { + if user.exists() { + return Some(user); + } + } + let system = Self::system_path(); + if system.exists() { + return Some(system); + } + None + } + /// Get the runtime directory for ephemeral files (state, sockets) pub fn runtime_dir() -> PathBuf { // Use XDG_RUNTIME_DIR if available, otherwise fall back to /tmp @@ -1981,6 +2156,11 @@ impl Config { .as_ref() .map(|o| o.on_demand_loading) .unwrap_or(false), + TranscriptionEngine::Cohere => self + .cohere + .as_ref() + .map(|c| c.on_demand_loading) + .unwrap_or(false), } } @@ -2018,6 +2198,11 @@ impl Config { .as_ref() .map(|o| o.model.as_str()) .unwrap_or("omnilingual (not configured)"), + TranscriptionEngine::Cohere => self + .cohere + .as_ref() + .map(|c| c.model.as_str()) + .unwrap_or("cohere (not configured)"), } } @@ -2044,8 +2229,12 @@ pub fn load_config(path: Option<&Path>) -> Result { // Start with defaults let mut config = Config::default(); - // Determine config file path - let config_path = path.map(PathBuf::from).or_else(Config::default_path); + // Determine config file path. If --config wasn't passed, walk the + // documented lookup chain: user config -> /etc/voxtype/config.toml. + let config_path = match path { + Some(p) => Some(PathBuf::from(p)), + None => Config::resolve_existing_path(), + }; // Load from file if it exists if let Some(ref path) = config_path { @@ -2059,6 +2248,10 @@ pub fn load_config(path: Option<&Path>) -> Result { } else { tracing::debug!("Config file not found at {:?}, using defaults", path); } + } else { + tracing::debug!( + "No config file found at user or system path, using built-in defaults" + ); } // Override from environment variables @@ -2086,6 +2279,7 @@ pub fn load_config(path: Option<&Path>) -> Result { "paraformer" => config.engine = TranscriptionEngine::Paraformer, "dolphin" => config.engine = TranscriptionEngine::Dolphin, "omnilingual" => config.engine = TranscriptionEngine::Omnilingual, + "cohere" => config.engine = TranscriptionEngine::Cohere, _ => tracing::warn!("Unknown VOXTYPE_ENGINE value: {}", engine), } } @@ -2196,6 +2390,9 @@ pub fn load_config(path: Option<&Path>) -> Result { if let Ok(val) = std::env::var("VOXTYPE_SMART_AUTO_SUBMIT") { config.text.smart_auto_submit = parse_bool_env(&val); } + if let Ok(val) = std::env::var("VOXTYPE_FILTER_FILLERS") { + config.text.filter_filler_words = parse_bool_env(&val); + } Ok(config) } @@ -2225,7 +2422,7 @@ mod tests { #[test] fn test_default_config() { let config = Config::default(); - assert_eq!(config.hotkey.key, "SCROLLLOCK"); + assert_eq!(config.hotkey.key, default_hotkey_key()); assert_eq!(config.hotkey.mode, ActivationMode::PushToTalk); assert_eq!(config.audio.sample_rate, 16000); assert!(!config.audio.feedback.enabled); @@ -2293,7 +2490,7 @@ mod tests { let config: Config = toml::from_str(toml_str).unwrap(); assert!(!config.hotkey.enabled); - assert_eq!(config.hotkey.key, "SCROLLLOCK"); // defaults to SCROLLLOCK + assert_eq!(config.hotkey.key, default_hotkey_key()); // platform default } #[test] @@ -3776,4 +3973,42 @@ mod tests { let config: Config = toml::from_str(toml_str).unwrap(); assert!(config.hotkey.profile_modifiers.is_empty()); } + + #[test] + fn test_system_path_constant() { + assert_eq!(Config::system_path(), PathBuf::from("/etc/voxtype/config.toml")); + assert_eq!(Config::SYSTEM_PATH, "/etc/voxtype/config.toml"); + } + + #[test] + fn test_load_config_explicit_path() { + // Explicit --config should always be used regardless of fallback. + let dir = tempfile::tempdir().unwrap(); + let config_path = dir.path().join("config.toml"); + std::fs::write( + &config_path, + r#" + [hotkey] + key = "F12" + + [audio] + device = "default" + sample_rate = 16000 + max_duration_secs = 30 + + [whisper] + model = "tiny.en" + language = "en" + + [output] + mode = "clipboard" + "#, + ) + .unwrap(); + + let config = load_config(Some(&config_path)).unwrap(); + assert_eq!(config.hotkey.key, "F12"); + assert_eq!(config.whisper.model, "tiny.en"); + assert_eq!(config.output.mode, OutputMode::Clipboard); + } } diff --git a/src/cpu.rs b/src/cpu.rs index a67ecd3f..2536c2e7 100644 --- a/src/cpu.rs +++ b/src/cpu.rs @@ -12,11 +12,12 @@ use std::sync::atomic::{AtomicBool, Ordering}; static SIGILL_HANDLER_INSTALLED: AtomicBool = AtomicBool::new(false); -/// Constructor function that runs before main() via .init_array +/// Constructor function that runs before main() via platform-specific init section /// This ensures the SIGILL handler is installed before any library /// initialization code that might use unsupported instructions. #[used] -#[link_section = ".init_array"] +#[cfg_attr(target_os = "linux", link_section = ".init_array")] +#[cfg_attr(target_os = "macos", link_section = "__DATA,__mod_init_func")] static INIT_SIGILL_HANDLER: extern "C" fn() = { extern "C" fn init() { install_sigill_handler(); @@ -36,7 +37,10 @@ pub fn install_sigill_handler() { } unsafe { - libc::signal(libc::SIGILL, sigill_handler as *const () as libc::sighandler_t); + libc::signal( + libc::SIGILL, + sigill_handler as *const () as libc::sighandler_t, + ); } } diff --git a/src/daemon.rs b/src/daemon.rs index 6e50c7a8..8d6bd124 100644 --- a/src/daemon.rs +++ b/src/daemon.rs @@ -8,17 +8,24 @@ use crate::audio::{self, AudioCapture}; use crate::config::{ActivationMode, Config, FileMode, OutputMode}; use crate::eager::{self, EagerConfig}; use crate::error::Result; +#[cfg(target_os = "linux")] use crate::hotkey::{self, HotkeyEvent}; +#[cfg(target_os = "macos")] +use crate::hotkey_macos::{self as hotkey, HotkeyEvent}; use crate::meeting::{self, MeetingDaemon, MeetingEvent, StorageConfig}; use crate::model_manager::ModelManager; +use crate::notification; use crate::output; use crate::output::post_process::PostProcessor; use crate::state::{ChunkResult, State}; use crate::text::TextProcessor; use crate::transcribe::Transcriber; +#[cfg(target_os = "linux")] +use nix::sys::signal::{kill, Signal}; +#[cfg(target_os = "linux")] +use nix::unistd::Pid; use pidlock::Pidlock; use std::path::PathBuf; -use std::process::Stdio; use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::process::Command; @@ -30,19 +37,42 @@ async fn send_notification( body: &str, show_engine_icon: bool, engine: crate::config::TranscriptionEngine, + urgency: &str, ) { + // On Linux, add emoji to title. On macOS, use content image instead. + #[cfg(target_os = "linux")] let title = if show_engine_icon { format!("{} {}", crate::output::engine_icon(engine), title) } else { title.to_string() }; + #[cfg(not(target_os = "linux"))] + let title = title.to_string(); - let _ = Command::new("notify-send") - .args(["--app-name=Voxtype", "--expire-time=2000", &title, body]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - .await; + #[cfg(target_os = "linux")] + { + let urgency_arg = format!("--urgency={}", crate::output::sanitize_urgency(urgency)); + let _ = Command::new("notify-send") + .args([ + "--app-name=Voxtype", + &urgency_arg, + "--expire-time=2000", + &title, + body, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await; + } + + #[cfg(target_os = "macos")] + { + // terminal-notifier has no urgency concept; ignore the arg on macOS. + let _ = urgency; + let engine_for_icon = if show_engine_icon { Some(engine) } else { None }; + notification::send_with_engine(&title, body, engine_for_icon).await; + } } /// Write state to file for external integrations (e.g., Waybar) @@ -93,6 +123,36 @@ fn write_pid_file() -> Option { Some(pid_path) } +/// Check if a PID is still running (Linux version using nix) +#[cfg(target_os = "linux")] +fn is_pid_running(pid: i32) -> bool { + // kill with signal 0 checks if process exists without sending a signal + kill(Pid::from_raw(pid), Signal::SIGCONT).is_ok() || kill(Pid::from_raw(pid), None).is_ok() +} + +/// Check if a PID is still running (macOS version using libc) +#[cfg(target_os = "macos")] +fn is_pid_running(pid: i32) -> bool { + // kill with signal 0 checks if process exists without sending a signal + unsafe { libc::kill(pid, 0) == 0 } +} + +/// Check if lockfile is stale (PID no longer running) and remove it if so +#[cfg(unix)] +fn cleanup_stale_lockfile(lock_path: &std::path::Path) -> bool { + if let Ok(contents) = std::fs::read_to_string(lock_path) { + if let Ok(pid) = contents.trim().parse::() { + if pid > 0 && !is_pid_running(pid) { + tracing::info!("Removing stale lockfile (PID {} is no longer running)", pid); + if std::fs::remove_file(lock_path).is_ok() { + return true; + } + } + } + } + false +} + /// Remove PID file on shutdown fn cleanup_pid_file(path: &PathBuf) { if path.exists() { @@ -488,6 +548,10 @@ pub struct Daemon { post_processor: Option, /// Last post-processed text and when it was produced, for context in subsequent dictations last_dictation: Option<(String, Instant)>, + /// Audio level broadcaster for the OSD (None when disabled or bind failed) + level_hub: Option, + /// Active per-recording level emitter task; aborted when recording stops + level_emitter_task: Option>, // Model manager for multi-model support model_manager: Option, // Background task for loading model on-demand @@ -496,6 +560,10 @@ pub struct Daemon { std::result::Result, crate::error::TranscribeError>, >, >, + // Background task that spawns and prepares the gpu_isolation subprocess + // worker. Awaited before transcription so audio capture can start + // immediately while the worker loads its model in parallel. + whisper_prepare_task: Option>, // Background task for transcription (allows cancel during transcription) transcription_task: Option>, // Background tasks for eager chunk transcriptions (chunk_index, task) @@ -604,8 +672,11 @@ impl Daemon { text_processor, post_processor, last_dictation: None, + level_hub: None, + level_emitter_task: None, model_manager: None, model_load_task: None, + whisper_prepare_task: None, transcription_task: None, eager_chunk_tasks: Vec::new(), vad, @@ -650,6 +721,52 @@ impl Daemon { } } + /// Start a push-to-talk audio capture and (if enabled) a level emitter. + /// + /// Returns the capture handle on success. The chunk receiver from the + /// capture is plumbed into the level hub so the OSD sees audio frames + /// at 100 Hz during recording. The emitter task is tracked so it can + /// be cleanly aborted when recording stops. + async fn start_recording_capture(&mut self) -> std::result::Result, ()> { + match audio::create_capture(&self.config.audio) { + Ok(mut capture) => match capture.start().await { + Ok(chunk_rx) => { + if let Some(hub) = &self.level_hub { + // Cancel any prior emitter (defensive; should be idle). + if let Some(handle) = self.level_emitter_task.take() { + handle.abort(); + } + let handle = audio::levels::spawn_emitter(chunk_rx, hub.frame_sink()); + self.level_emitter_task = Some(handle); + } + // If level_hub is None we still return Ok; the chunk_rx + // is dropped here, matching previous behaviour. + Ok(capture) + } + Err(e) => { + tracing::error!("Failed to start audio: {}", e); + self.play_feedback(SoundEvent::Error); + Err(()) + } + }, + Err(e) => { + tracing::error!("Failed to create audio capture: {}", e); + self.play_feedback(SoundEvent::Error); + Err(()) + } + } + } + + /// Stop the level emitter task (if running). The capture's chunk + /// receiver will close when the capture itself is dropped, which would + /// also end the emitter naturally — this just tightens the loop on + /// state transitions. + fn stop_level_emitter(&mut self) { + if let Some(handle) = self.level_emitter_task.take() { + handle.abort(); + } + } + /// Get the transcriber for the current recording session /// /// For on-demand loading: waits for the background model load task to complete @@ -693,7 +810,8 @@ impl Daemon { | crate::config::TranscriptionEngine::SenseVoice | crate::config::TranscriptionEngine::Paraformer | crate::config::TranscriptionEngine::Dolphin - | crate::config::TranscriptionEngine::Omnilingual => { + | crate::config::TranscriptionEngine::Omnilingual + | crate::config::TranscriptionEngine::Cohere => { if let Some(ref t) = transcriber_preloaded { Ok(t.clone()) } else { @@ -703,6 +821,15 @@ impl Daemon { } } crate::config::TranscriptionEngine::Whisper => { + // Wait for the gpu_isolation worker to finish preparing + // (model load) before we hand the transcriber to the + // recording stop path. Otherwise transcribe() would race + // with the in-flight prepare and spawn a second worker. + if let Some(task) = self.whisper_prepare_task.take() { + if let Err(e) = task.await { + tracing::warn!("Whisper prepare task failed: {}", e); + } + } if let Some(ref mut mm) = self.model_manager { match mm.get_prepared_transcriber(model_override) { Ok(t) => Ok(t), @@ -852,6 +979,7 @@ impl Daemon { &format!("ID: {}", meeting_id), false, self.config.engine, + &self.config.output.notification.urgency, ) .await; } @@ -892,6 +1020,7 @@ impl Daemon { &format!("ID: {}", meeting_id), false, self.config.engine, + &self.config.output.notification.urgency, ) .await; } @@ -923,6 +1052,7 @@ impl Daemon { "Recording paused", false, self.config.engine, + &self.config.output.notification.urgency, ) .await; } @@ -944,6 +1074,7 @@ impl Daemon { "Recording resumed", false, self.config.engine, + &self.config.output.notification.urgency, ) .await; } @@ -1206,10 +1337,14 @@ impl Daemon { "Transcribing...", self.config.output.notification.show_engine_icon, self.config.engine, + &self.config.output.notification.urgency, ) .await; } + // Tear down the OSD audio-frame emitter for this session. + self.stop_level_emitter(); + // Stop recording and get samples if let Some(mut capture) = audio_capture.take() { match capture.stop().await { @@ -1362,12 +1497,25 @@ impl Daemon { } else { // Profile exists but has no post_process_command, use default if let Some(ref post_processor) = self.post_processor { - tracing::info!("Post-processing, has_context: {}", recent_context.is_some()); - tracing::debug!("Post-processing input: {:?}, context: {:?}", processed_text, recent_context); + tracing::info!( + "Post-processing, has_context: {}", + recent_context.is_some() + ); + tracing::debug!( + "Post-processing input: {:?}, context: {:?}", + processed_text, + recent_context + ); let result = post_processor - .process_with_context(&processed_text, recent_context.as_deref()) + .process_with_context( + &processed_text, + recent_context.as_deref(), + ) .await; - tracing::info!("Post-processed: changed: {}", result != processed_text); + tracing::info!( + "Post-processed: changed: {}", + result != processed_text + ); tracing::debug!("Post-processed result: {:?}", result); result } else { @@ -1375,8 +1523,15 @@ impl Daemon { } } } else if let Some(ref post_processor) = self.post_processor { - tracing::info!("Post-processing, has_context: {}", recent_context.is_some()); - tracing::debug!("Post-processing input: {:?}, context: {:?}", processed_text, recent_context); + tracing::info!( + "Post-processing, has_context: {}", + recent_context.is_some() + ); + tracing::debug!( + "Post-processing input: {:?}, context: {:?}", + processed_text, + recent_context + ); let result = post_processor .process_with_context(&processed_text, recent_context.as_deref()) .await; @@ -1388,8 +1543,7 @@ impl Daemon { }; // Track last dictation for context in subsequent post-processing - self.last_dictation = - Some((final_text.clone(), Instant::now())); + self.last_dictation = Some((final_text.clone(), Instant::now())); if smart_submit { tracing::debug!( @@ -1520,6 +1674,7 @@ impl Daemon { &final_text, self.config.output.notification.show_engine_icon, self.config.engine, + &self.config.output.notification.urgency, ) .await; } @@ -1579,6 +1734,24 @@ impl Daemon { crate::error::VoxtypeError::Config(format!("Failed to create directories: {}", e)) })?; + // Start the audio-level broadcaster for the OSD. Failure to bind + // the socket is not fatal: the daemon still runs without an OSD + // feed, and downstream code treats `level_hub == None` as "no OSD". + let level_socket = audio::levels::default_socket_path(); + match audio::levels::LevelHub::start(level_socket.clone()).await { + Ok(hub) => { + tracing::info!("OSD audio level socket: {:?}", hub.socket_path()); + self.level_hub = Some(hub); + } + Err(e) => { + tracing::warn!( + "Could not start OSD audio level socket at {:?}: {}", + level_socket, + e + ); + } + } + // Check if another instance is already running (single-instance safeguard) let lock_path = Config::runtime_dir().join("voxtype.lock"); let lock_path_str = lock_path.to_string_lossy().to_string(); @@ -1588,14 +1761,40 @@ impl Daemon { Ok(_) => { tracing::debug!("Acquired PID lock at {:?}", lock_path); } - Err(e) => { - tracing::error!( - "Failed to acquire lock: another voxtype instance is already running" - ); - return Err(crate::error::VoxtypeError::Config(format!( - "Another voxtype instance is already running (lock error: {:?})", - e - ))); + Err(_) => { + // Check if the lock is stale (previous daemon crashed) + #[cfg(unix)] + if cleanup_stale_lockfile(&lock_path) { + // Try again after removing stale lock + pidlock = Pidlock::new(&lock_path_str); + if let Err(e) = pidlock.acquire() { + tracing::error!("Failed to acquire lock after stale cleanup: {:?}", e); + return Err(crate::error::VoxtypeError::Config(format!( + "Another voxtype instance is already running (lock error: {:?})", + e + )) + .into()); + } + tracing::debug!("Acquired PID lock at {:?} (after stale cleanup)", lock_path); + } else { + tracing::error!( + "Failed to acquire lock: another voxtype instance is already running" + ); + return Err(crate::error::VoxtypeError::Config( + "Another voxtype instance is already running".to_string(), + ) + .into()); + } + #[cfg(not(unix))] + { + tracing::error!( + "Failed to acquire lock: another voxtype instance is already running" + ); + return Err(crate::error::VoxtypeError::Config( + "Another voxtype instance is already running".to_string(), + ) + .into()); + } } } @@ -1606,11 +1805,10 @@ impl Daemon { tracing::info!("State file: {:?}", path); } - // Initialize hotkey listener (if enabled) - let mut hotkey_listener = if self.config.hotkey.enabled { - tracing::info!("Hotkey: {}", self.config.hotkey.key); - - // Warn about profile modifiers that reference undefined profiles + // Warn about profile modifiers that reference undefined profiles. Runs + // before either platform's hotkey listener is created so the warning + // surfaces regardless of evdev/rdev backend. + if self.config.hotkey.enabled { for (key_name, profile_name) in &self.config.hotkey.profile_modifiers { if self.config.get_profile(profile_name).is_none() { tracing::warn!( @@ -1622,18 +1820,55 @@ impl Daemon { ); } } + } + + // Initialize hotkey listener (Linux: evdev, macOS: rdev) + #[cfg(target_os = "linux")] + let mut hotkey_listener: Option> = + if self.config.hotkey.enabled { + tracing::info!("Hotkey: {}", self.config.hotkey.key); + let secondary_model = self.config.whisper.secondary_model.clone(); + Some(hotkey::create_listener( + &self.config.hotkey, + secondary_model, + )?) + } else { + tracing::info!( + "Built-in hotkey disabled, use 'voxtype record' commands or compositor keybindings" + ); + None + }; + #[cfg(target_os = "macos")] + let mut hotkey_listener: Option> = if self + .config + .hotkey + .enabled + { + tracing::info!("Hotkey: {}", self.config.hotkey.key); let secondary_model = self.config.whisper.secondary_model.clone(); - Some(hotkey::create_listener( - &self.config.hotkey, - secondary_model, - )?) + match hotkey::create_listener(&self.config.hotkey, secondary_model) { + Ok(listener) => Some(listener), + Err(e) => { + tracing::warn!("Failed to create hotkey listener: {}. Use 'voxtype record' commands instead.", e); + None + } + } } else { tracing::info!( "Built-in hotkey disabled, use 'voxtype record' commands or compositor keybindings" ); None }; + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + let hotkey_listener: Option<()> = { + if self.config.hotkey.enabled { + tracing::warn!( + "Built-in hotkey not supported on this platform, use 'voxtype record' commands" + ); + } + None + }; // Log default output chain (chain is created dynamically per-transcription to support overrides) let default_chain = output::create_output_chain(&self.config.output); @@ -1667,7 +1902,8 @@ impl Daemon { | crate::config::TranscriptionEngine::SenseVoice | crate::config::TranscriptionEngine::Paraformer | crate::config::TranscriptionEngine::Dolphin - | crate::config::TranscriptionEngine::Omnilingual => { + | crate::config::TranscriptionEngine::Omnilingual + | crate::config::TranscriptionEngine::Cohere => { // Parakeet/Moonshine uses its own model loading transcriber_preloaded = Some(Arc::from(crate::transcribe::create_transcriber( &self.config, @@ -1690,11 +1926,20 @@ impl Daemon { self.model_manager = Some(model_manager); // Start hotkey listener (if enabled) + #[cfg(any(target_os = "linux", target_os = "macos"))] let mut hotkey_rx = if let Some(ref mut listener) = hotkey_listener { - Some(listener.start().await?) + match listener.start() { + Ok(rx) => Some(rx), + Err(e) => { + tracing::warn!("Failed to start hotkey listener: {}. Use 'voxtype record' commands instead.", e); + None + } + } } else { None }; + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + let mut hotkey_rx: Option> = None; // Current state let mut state = State::Idle; @@ -1749,7 +1994,7 @@ impl Daemon { // Send notification if enabled if self.config.output.notification.on_recording_start { - send_notification("Push to Talk Active", "Recording...", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Push to Talk Active", "Recording...", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } // Prepare model for transcription @@ -1770,7 +2015,8 @@ impl Daemon { | crate::config::TranscriptionEngine::SenseVoice | crate::config::TranscriptionEngine::Paraformer | crate::config::TranscriptionEngine::Dolphin - | crate::config::TranscriptionEngine::Omnilingual => { + | crate::config::TranscriptionEngine::Omnilingual + | crate::config::TranscriptionEngine::Cohere => { let config = self.config.clone(); self.model_load_task = Some(tokio::task::spawn_blocking(move || { crate::transcribe::create_transcriber(&config).map(Arc::from) @@ -1783,8 +2029,13 @@ impl Daemon { match self.config.engine { crate::config::TranscriptionEngine::Whisper => { if let Some(ref mut mm) = self.model_manager { - if let Err(e) = mm.prepare_model(model_override.as_deref()) { - tracing::warn!("Failed to prepare model: {}", e); + match mm.prepare_model(model_override.as_deref()) { + Ok(handle) => { + self.whisper_prepare_task = handle; + } + Err(e) => { + tracing::warn!("Failed to prepare model: {}", e); + } } } } @@ -1793,7 +2044,8 @@ impl Daemon { | crate::config::TranscriptionEngine::SenseVoice | crate::config::TranscriptionEngine::Paraformer | crate::config::TranscriptionEngine::Dolphin - | crate::config::TranscriptionEngine::Omnilingual => { + | crate::config::TranscriptionEngine::Omnilingual + | crate::config::TranscriptionEngine::Cohere => { if let Some(ref t) = transcriber_preloaded { let transcriber = t.clone(); tokio::task::spawn_blocking(move || { @@ -1806,13 +2058,8 @@ impl Daemon { // Create and start audio capture tracing::debug!("Creating audio capture with device: {}", self.config.audio.device); - match audio::create_capture(&self.config.audio) { - Ok(mut capture) => { - tracing::debug!("Audio capture created, starting..."); - if let Err(e) = capture.start().await { - tracing::error!("Failed to start audio: {}", e); - continue; - } + match self.start_recording_capture().await { + Ok(capture) => { tracing::debug!("Audio capture started successfully"); audio_capture = Some(capture); @@ -1844,10 +2091,9 @@ impl Daemon { } } } - Err(e) => { - tracing::error!("Failed to create audio capture: {}", e); + Err(()) => { + // Helper already logged and played the error sound. cleanup_profile_override(); - self.play_feedback(SoundEvent::Error); } } } @@ -1886,7 +2132,7 @@ impl Daemon { self.play_feedback(SoundEvent::RecordingStop); if self.config.output.notification.on_recording_stop { - send_notification("Recording Stopped", "Transcribing...", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Recording Stopped", "Transcribing...", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } // Stop audio capture and get remaining samples @@ -1940,7 +2186,7 @@ impl Daemon { tracing::info!("Recording started (toggle mode)"); if self.config.output.notification.on_recording_start { - send_notification("Recording Started", "Press hotkey again to stop", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Recording Started", "Press hotkey again to stop", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } // Prepare model for transcription @@ -1961,7 +2207,8 @@ impl Daemon { | crate::config::TranscriptionEngine::SenseVoice | crate::config::TranscriptionEngine::Paraformer | crate::config::TranscriptionEngine::Dolphin - | crate::config::TranscriptionEngine::Omnilingual => { + | crate::config::TranscriptionEngine::Omnilingual + | crate::config::TranscriptionEngine::Cohere => { let config = self.config.clone(); self.model_load_task = Some(tokio::task::spawn_blocking(move || { crate::transcribe::create_transcriber(&config).map(Arc::from) @@ -1974,8 +2221,13 @@ impl Daemon { match self.config.engine { crate::config::TranscriptionEngine::Whisper => { if let Some(ref mut mm) = self.model_manager { - if let Err(e) = mm.prepare_model(model_override.as_deref()) { - tracing::warn!("Failed to prepare model: {}", e); + match mm.prepare_model(model_override.as_deref()) { + Ok(handle) => { + self.whisper_prepare_task = handle; + } + Err(e) => { + tracing::warn!("Failed to prepare model: {}", e); + } } } } @@ -1984,7 +2236,8 @@ impl Daemon { | crate::config::TranscriptionEngine::SenseVoice | crate::config::TranscriptionEngine::Paraformer | crate::config::TranscriptionEngine::Dolphin - | crate::config::TranscriptionEngine::Omnilingual => { + | crate::config::TranscriptionEngine::Omnilingual + | crate::config::TranscriptionEngine::Cohere => { if let Some(ref t) = transcriber_preloaded { let transcriber = t.clone(); tokio::task::spawn_blocking(move || { @@ -1995,13 +2248,8 @@ impl Daemon { } } - match audio::create_capture(&self.config.audio) { - Ok(mut capture) => { - if let Err(e) = capture.start().await { - tracing::error!("Failed to start audio: {}", e); - self.play_feedback(SoundEvent::Error); - continue; - } + match self.start_recording_capture().await { + Ok(capture) => { audio_capture = Some(capture); // Use EagerRecording state if eager_processing is enabled @@ -2032,10 +2280,9 @@ impl Daemon { } } } - Err(e) => { - tracing::error!("Failed to create audio capture: {}", e); + Err(()) => { + // Helper already logged and played the error sound. cleanup_profile_override(); - self.play_feedback(SoundEvent::Error); } } } else if let State::Recording { model_override: current_model_override, .. } = &state { @@ -2070,7 +2317,7 @@ impl Daemon { self.play_feedback(SoundEvent::RecordingStop); if self.config.output.notification.on_recording_stop { - send_notification("Recording Stopped", "Transcribing...", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Recording Stopped", "Transcribing...", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } // Stop audio capture and get remaining samples @@ -2150,7 +2397,7 @@ impl Daemon { } if self.config.output.notification.on_recording_stop { - send_notification("Cancelled", "Recording discarded", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Cancelled", "Recording discarded", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } } else if matches!(state, State::Transcribing { .. }) { tracing::info!("Transcription cancelled via hotkey"); @@ -2176,7 +2423,7 @@ impl Daemon { } if self.config.output.notification.on_recording_stop { - send_notification("Cancelled", "Transcription aborted", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Cancelled", "Transcription aborted", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } } else { tracing::trace!("Cancel ignored - not recording or transcribing"); @@ -2237,7 +2484,7 @@ impl Daemon { } if self.config.output.notification.on_recording_stop { - send_notification("Cancelled", "Recording discarded", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Cancelled", "Recording discarded", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } continue; @@ -2376,7 +2623,7 @@ impl Daemon { tracing::info!("Recording started (external trigger), model_override = {:?}", model_override); if self.config.output.notification.on_recording_start { - send_notification("Recording Started", "External trigger", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Recording Started", "External trigger", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } // Prepare model for transcription @@ -2397,7 +2644,8 @@ impl Daemon { | crate::config::TranscriptionEngine::SenseVoice | crate::config::TranscriptionEngine::Paraformer | crate::config::TranscriptionEngine::Dolphin - | crate::config::TranscriptionEngine::Omnilingual => { + | crate::config::TranscriptionEngine::Omnilingual + | crate::config::TranscriptionEngine::Cohere => { let config = self.config.clone(); self.model_load_task = Some(tokio::task::spawn_blocking(move || { crate::transcribe::create_transcriber(&config).map(Arc::from) @@ -2409,8 +2657,13 @@ impl Daemon { match self.config.engine { crate::config::TranscriptionEngine::Whisper => { if let Some(ref mut mm) = self.model_manager { - if let Err(e) = mm.prepare_model(model_override.as_deref()) { - tracing::warn!("Failed to prepare model: {}", e); + match mm.prepare_model(model_override.as_deref()) { + Ok(handle) => { + self.whisper_prepare_task = handle; + } + Err(e) => { + tracing::warn!("Failed to prepare model: {}", e); + } } } } @@ -2419,7 +2672,8 @@ impl Daemon { | crate::config::TranscriptionEngine::SenseVoice | crate::config::TranscriptionEngine::Paraformer | crate::config::TranscriptionEngine::Dolphin - | crate::config::TranscriptionEngine::Omnilingual => { + | crate::config::TranscriptionEngine::Omnilingual + | crate::config::TranscriptionEngine::Cohere => { if let Some(ref t) = transcriber_preloaded { let transcriber = t.clone(); tokio::task::spawn_blocking(move || { @@ -2430,45 +2684,40 @@ impl Daemon { } } - match audio::create_capture(&self.config.audio) { - Ok(mut capture) => { - if let Err(e) = capture.start().await { - tracing::error!("Failed to start audio: {}", e); + match self.start_recording_capture().await { + Ok(capture) => { + audio_capture = Some(capture); + + // Use EagerRecording state if eager_processing is enabled + if self.config.whisper.eager_processing { + tracing::info!("Using eager input processing"); + state = State::EagerRecording { + started_at: std::time::Instant::now(), + model_override, + accumulated_audio: Vec::new(), + chunks_sent: 0, + chunk_results: Vec::new(), + tasks_in_flight: 0, + }; } else { - audio_capture = Some(capture); - - // Use EagerRecording state if eager_processing is enabled - if self.config.whisper.eager_processing { - tracing::info!("Using eager input processing"); - state = State::EagerRecording { - started_at: std::time::Instant::now(), - model_override, - accumulated_audio: Vec::new(), - chunks_sent: 0, - chunk_results: Vec::new(), - tasks_in_flight: 0, - }; - } else { - state = State::Recording { - started_at: std::time::Instant::now(), - model_override, - }; - } - self.update_state("recording"); - self.play_feedback(SoundEvent::RecordingStart); - self.pause_media_players().await; - - // Run pre-recording hook (e.g., enter compositor submap for cancel) - if let Some(cmd) = &self.config.output.pre_recording_command { - if let Err(e) = output::run_hook(cmd, "pre_recording").await { - tracing::warn!("{}", e); - } + state = State::Recording { + started_at: std::time::Instant::now(), + model_override, + }; + } + self.update_state("recording"); + self.play_feedback(SoundEvent::RecordingStart); + self.pause_media_players().await; + + // Run pre-recording hook (e.g., enter compositor submap for cancel) + if let Some(cmd) = &self.config.output.pre_recording_command { + if let Err(e) = output::run_hook(cmd, "pre_recording").await { + tracing::warn!("{}", e); } } } - Err(e) => { - tracing::error!("Failed to create audio capture: {}", e); - self.play_feedback(SoundEvent::Error); + Err(()) => { + // Helper already logged and played the error sound. } } } @@ -2508,7 +2757,7 @@ impl Daemon { self.play_feedback(SoundEvent::RecordingStop); if self.config.output.notification.on_recording_stop { - send_notification("Recording Stopped", "Transcribing...", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Recording Stopped", "Transcribing...", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } // Stop audio capture and get remaining samples @@ -2582,7 +2831,7 @@ impl Daemon { } if self.config.output.notification.on_recording_stop { - send_notification("Cancelled", "Transcription aborted", self.config.output.notification.show_engine_icon, self.config.engine).await; + send_notification("Cancelled", "Transcription aborted", self.config.output.notification.show_engine_icon, self.config.engine, &self.config.output.notification.urgency).await; } } } @@ -2819,10 +3068,13 @@ impl Daemon { } } - // Cleanup + // Cleanup hotkey listener + #[cfg(any(target_os = "linux", target_os = "macos"))] if let Some(mut listener) = hotkey_listener { - listener.stop().await?; + let _ = listener.stop(); // Best effort cleanup } + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + let _ = hotkey_listener; // Silence unused variable warning // Abort any pending transcription task if let Some(task) = self.transcription_task.take() { @@ -2858,6 +3110,12 @@ impl Daemon { cleanup_pid_file(path); } + // Remove the OSD audio level socket so a stale path doesn't + // confuse the next daemon start. + if let Some(ref hub) = self.level_hub { + hub.cleanup(); + } + tracing::info!("Daemon stopped"); Ok(()) @@ -3048,6 +3306,7 @@ mod tests { }); } + #[test] fn test_pidlock_acquisition_succeeds() { with_test_runtime_dir(|dir| { let lock_path = dir.join("voxtype.lock"); @@ -3104,4 +3363,37 @@ mod tests { ); }); } + + #[test] + fn test_stale_lockfile_cleanup() { + with_test_runtime_dir(|dir| { + let lock_path = dir.join("voxtype.lock"); + + // Write a stale lockfile with a PID that doesn't exist + // PID 99999999 is very unlikely to exist + std::fs::write(&lock_path, "99999999").expect("Failed to write stale lockfile"); + assert!(lock_path.exists(), "Stale lockfile should exist"); + + // cleanup_stale_lockfile should detect and remove it + let cleaned = cleanup_stale_lockfile(&lock_path); + assert!(cleaned, "Stale lockfile should be cleaned up"); + assert!(!lock_path.exists(), "Stale lockfile should be removed"); + }); + } + + #[test] + fn test_stale_lockfile_not_cleaned_if_pid_running() { + with_test_runtime_dir(|dir| { + let lock_path = dir.join("voxtype.lock"); + + // Write a lockfile with our own PID (which is running) + let our_pid = std::process::id(); + std::fs::write(&lock_path, our_pid.to_string()).expect("Failed to write lockfile"); + + // cleanup_stale_lockfile should NOT remove it (PID is running) + let cleaned = cleanup_stale_lockfile(&lock_path); + assert!(!cleaned, "Lockfile with running PID should not be cleaned"); + assert!(lock_path.exists(), "Lockfile should still exist"); + }); + } } diff --git a/src/error.rs b/src/error.rs index e2d7e2e9..88d79b49 100644 --- a/src/error.rs +++ b/src/error.rs @@ -94,6 +94,9 @@ pub enum TranscribeError { #[error("Remote server error: {0}")] RemoteError(String), + + #[error("{0}")] + LicenseRequired(String), } /// Errors related to Voice Activity Detection @@ -173,6 +176,7 @@ pub enum MeetingError { /// Result type alias using VoxtypeError pub type Result = std::result::Result; +#[cfg(target_os = "linux")] impl From for HotkeyError { fn from(e: evdev::Error) -> Self { HotkeyError::Evdev(e.to_string()) diff --git a/src/hotkey/evdev_listener.rs b/src/hotkey/evdev_listener.rs index f235057f..813065b9 100644 --- a/src/hotkey/evdev_listener.rs +++ b/src/hotkey/evdev_listener.rs @@ -110,9 +110,8 @@ impl EvdevListener { } } -#[async_trait::async_trait] impl HotkeyListener for EvdevListener { - async fn start(&mut self) -> Result, HotkeyError> { + fn start(&mut self) -> Result, HotkeyError> { let (tx, rx) = mpsc::channel(32); let (stop_tx, stop_rx) = oneshot::channel(); self.stop_signal = Some(stop_tx); @@ -143,7 +142,7 @@ impl HotkeyListener for EvdevListener { Ok(rx) } - async fn stop(&mut self) -> Result<(), HotkeyError> { + fn stop(&mut self) -> Result<(), HotkeyError> { if let Some(stop) = self.stop_signal.take() { let _ = stop.send(()); } diff --git a/src/hotkey/mod.rs b/src/hotkey/mod.rs index 71e40427..a1705d00 100644 --- a/src/hotkey/mod.rs +++ b/src/hotkey/mod.rs @@ -29,14 +29,13 @@ pub enum HotkeyEvent { } /// Trait for hotkey detection implementations -#[async_trait::async_trait] -pub trait HotkeyListener: Send + Sync { +pub trait HotkeyListener: Send { /// Start listening for hotkey events /// Returns a channel receiver for events - async fn start(&mut self) -> Result, HotkeyError>; + fn start(&mut self) -> Result, HotkeyError>; /// Stop listening and clean up - async fn stop(&mut self) -> Result<(), HotkeyError>; + fn stop(&mut self) -> Result<(), HotkeyError>; } /// Factory function to create the appropriate hotkey listener diff --git a/src/hotkey_macos.rs b/src/hotkey_macos.rs new file mode 100644 index 00000000..8cac746e --- /dev/null +++ b/src/hotkey_macos.rs @@ -0,0 +1,333 @@ +//! macOS global hotkey support using rdev +//! +//! Provides global keyboard event capture on macOS using the rdev crate. +//! Requires Accessibility permission to be granted to the terminal/app. +//! +//! Fallback: If rdev doesn't work (permissions not granted), users can use +//! Hammerspoon or Karabiner-Elements to trigger `voxtype record toggle`. + +use crate::config::HotkeyConfig; +use crate::error::{HotkeyError, Result}; +use rdev::{listen, Event, EventType, Key}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::sync::Mutex; +use std::time::{Duration, Instant}; +use tokio::sync::mpsc; + +/// Hotkey events that can be sent from the listener +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum HotkeyEvent { + /// The hotkey was pressed. model_override / profile_override are not yet + /// surfaced from the macOS rdev backend — always None today, but the field + /// matches the Linux variant so the daemon match arms stay platform-agnostic. + Pressed { + model_override: Option, + profile_override: Option, + }, + Released, + Cancel, +} + +/// Hotkey listener trait for macOS +pub trait HotkeyListener: Send { + /// Start listening for hotkey events + fn start(&mut self) -> Result>; + + /// Stop listening + fn stop(&mut self) -> Result<()>; +} + +/// rdev-based hotkey listener for macOS +pub struct RdevHotkeyListener { + target_key: Key, + cancel_key: Option, + running: Arc, + thread_handle: Option>, +} + +impl RdevHotkeyListener { + /// Create a new rdev hotkey listener + pub fn new(config: &HotkeyConfig) -> Result { + let target_key = parse_key_name(&config.key) + .ok_or_else(|| HotkeyError::UnknownKey(config.key.clone()))?; + + let cancel_key = config.cancel_key.as_ref().and_then(|k| parse_key_name(k)); + + Ok(Self { + target_key, + cancel_key, + running: Arc::new(AtomicBool::new(false)), + thread_handle: None, + }) + } +} + +impl HotkeyListener for RdevHotkeyListener { + fn start(&mut self) -> Result> { + // Check/request Accessibility permission before starting the listener. + // This triggers the macOS system dialog if permission hasn't been granted. + if !check_accessibility_permission() { + tracing::warn!( + "Accessibility permission not granted. \ + macOS should have shown a permission dialog. \ + Grant access in: System Settings > Privacy & Security > Accessibility" + ); + } + + let (tx, rx) = mpsc::channel(32); + let target_key = self.target_key; + let cancel_key = self.cancel_key; + let running = self.running.clone(); + running.store(true, Ordering::SeqCst); + + // If Accessibility permission isn't granted, rdev::listen() creates a dead + // event tap that never fires. The only fix is to restart the process after + // permission is granted. Spawn a watcher that re-execs when permission appears. + if !is_accessibility_granted() { + let running_watcher = running.clone(); + std::thread::spawn(move || { + loop { + if !running_watcher.load(Ordering::SeqCst) { + return; + } + std::thread::sleep(Duration::from_secs(2)); + if is_accessibility_granted() { + tracing::info!( + "Accessibility permission granted, restarting daemon to activate hotkey..." + ); + // Remove lock file so the new process can acquire it + let lock_path = crate::config::Config::runtime_dir().join("voxtype.lock"); + let _ = std::fs::remove_file(&lock_path); + // Spawn a new daemon and exit. The dead CGEvent tap in this + // process can't be revived; a fresh process is needed. + let exe = std::env::current_exe().expect("current_exe"); + let args: Vec = std::env::args().skip(1).collect(); + match std::process::Command::new(&exe).args(&args).spawn() { + Ok(_) => std::process::exit(0), + Err(e) => { + tracing::error!("Failed to restart: {}", e); + return; + } + } + } + } + }); + } + + let thread_handle = std::thread::spawn(move || { + let tx_clone = tx.clone(); + let running_clone = running.clone(); + + // Debounce: track last event time to prevent duplicate events + let last_press = Arc::new(Mutex::new(Instant::now() - Duration::from_secs(10))); + let last_release = Arc::new(Mutex::new(Instant::now() - Duration::from_secs(10))); + let debounce_ms = 100; // Minimum ms between same event type + + let last_press_clone = last_press.clone(); + let last_release_clone = last_release.clone(); + + let callback = move |event: Event| { + if !running_clone.load(Ordering::SeqCst) { + return; + } + + match event.event_type { + EventType::KeyPress(key) => { + if key == target_key { + let mut last = last_press_clone.lock().unwrap(); + if last.elapsed() > Duration::from_millis(debounce_ms) { + *last = Instant::now(); + let _ = tx_clone.blocking_send(HotkeyEvent::Pressed { + model_override: None, + profile_override: None, + }); + } + } else if Some(key) == cancel_key { + let _ = tx_clone.blocking_send(HotkeyEvent::Cancel); + } + } + EventType::KeyRelease(key) => { + if key == target_key { + let mut last = last_release_clone.lock().unwrap(); + if last.elapsed() > Duration::from_millis(debounce_ms) { + *last = Instant::now(); + let _ = tx_clone.blocking_send(HotkeyEvent::Released); + } + } + } + _ => {} + } + }; + + // This blocks until an error occurs or the process is terminated + if let Err(e) = listen(callback) { + tracing::error!("rdev listen error: {:?}", e); + tracing::warn!( + "Global hotkey capture failed. Grant Accessibility permission in \ + System Settings > Privacy & Security > Accessibility, \ + or use Hammerspoon for hotkey support." + ); + } + }); + + self.thread_handle = Some(thread_handle); + Ok(rx) + } + + fn stop(&mut self) -> Result<()> { + self.running.store(false, Ordering::SeqCst); + // Note: rdev's listen() doesn't have a clean way to stop from another thread + // The thread will stop when the process exits or on the next event + Ok(()) + } +} + +/// Parse a key name string to rdev Key +fn parse_key_name(name: &str) -> Option { + match name.to_uppercase().as_str() { + // Function keys + "F1" => Some(Key::F1), + "F2" => Some(Key::F2), + "F3" => Some(Key::F3), + "F4" => Some(Key::F4), + "F5" => Some(Key::F5), + "F6" => Some(Key::F6), + "F7" => Some(Key::F7), + "F8" => Some(Key::F8), + "F9" => Some(Key::F9), + "F10" => Some(Key::F10), + "F11" => Some(Key::F11), + "F12" => Some(Key::F12), + + // Modifier keys + "LEFTALT" | "LEFTOPT" | "LEFTOPTION" | "ALT" | "OPTION" => Some(Key::Alt), + "RIGHTALT" | "RIGHTOPT" | "RIGHTOPTION" => Some(Key::AltGr), + "LEFTCTRL" | "LEFTCONTROL" | "CTRL" | "CONTROL" => Some(Key::ControlLeft), + "RIGHTCTRL" | "RIGHTCONTROL" => Some(Key::ControlRight), + "LEFTSHIFT" | "SHIFT" => Some(Key::ShiftLeft), + "RIGHTSHIFT" => Some(Key::ShiftRight), + "LEFTMETA" | "LEFTCMD" | "LEFTCOMMAND" | "CMD" | "COMMAND" | "META" => Some(Key::MetaLeft), + "RIGHTMETA" | "RIGHTCMD" | "RIGHTCOMMAND" => Some(Key::MetaRight), + + // Special keys + "ESCAPE" | "ESC" => Some(Key::Escape), + "SPACE" => Some(Key::Space), + "TAB" => Some(Key::Tab), + "CAPSLOCK" => Some(Key::CapsLock), + "BACKSPACE" => Some(Key::Backspace), + "ENTER" | "RETURN" => Some(Key::Return), + + // Navigation + "UP" | "UPARROW" => Some(Key::UpArrow), + "DOWN" | "DOWNARROW" => Some(Key::DownArrow), + "LEFT" | "LEFTARROW" => Some(Key::LeftArrow), + "RIGHT" | "RIGHTARROW" => Some(Key::RightArrow), + "HOME" => Some(Key::Home), + "END" => Some(Key::End), + "PAGEUP" => Some(Key::PageUp), + "PAGEDOWN" => Some(Key::PageDown), + + // Other + "DELETE" => Some(Key::Delete), + "INSERT" => Some(Key::Insert), + "PAUSE" => Some(Key::Pause), + "SCROLLLOCK" => Some(Key::ScrollLock), + "PRINTSCREEN" => Some(Key::PrintScreen), + "FN" | "FUNCTION" | "GLOBE" => Some(Key::Function), + + // Letters (for completeness, though unusual for hotkeys) + "A" => Some(Key::KeyA), + "B" => Some(Key::KeyB), + "C" => Some(Key::KeyC), + "D" => Some(Key::KeyD), + "E" => Some(Key::KeyE), + "F" => Some(Key::KeyF), + "G" => Some(Key::KeyG), + "H" => Some(Key::KeyH), + "I" => Some(Key::KeyI), + "J" => Some(Key::KeyJ), + "K" => Some(Key::KeyK), + "L" => Some(Key::KeyL), + "M" => Some(Key::KeyM), + "N" => Some(Key::KeyN), + "O" => Some(Key::KeyO), + "P" => Some(Key::KeyP), + "Q" => Some(Key::KeyQ), + "R" => Some(Key::KeyR), + "S" => Some(Key::KeyS), + "T" => Some(Key::KeyT), + "U" => Some(Key::KeyU), + "V" => Some(Key::KeyV), + "W" => Some(Key::KeyW), + "X" => Some(Key::KeyX), + "Y" => Some(Key::KeyY), + "Z" => Some(Key::KeyZ), + + _ => None, + } +} + +/// Create a hotkey listener for macOS +pub fn create_listener( + config: &HotkeyConfig, + _secondary_model: Option, +) -> Result> { + Ok(Box::new(RdevHotkeyListener::new(config)?)) +} + +/// Check if Accessibility permission is granted by trying to create an event tap. +/// Unlike AXIsProcessTrusted(), this is not cached and reflects the current state. +fn is_accessibility_granted() -> bool { + use core_graphics::event::{ + CGEventTap, CGEventTapLocation, CGEventTapOptions, CGEventTapPlacement, CGEventType, + }; + + let tap = CGEventTap::new( + CGEventTapLocation::Session, + CGEventTapPlacement::HeadInsertEventTap, + CGEventTapOptions::ListenOnly, + vec![CGEventType::KeyDown], + |_, _, _| None, + ); + tap.is_ok() +} + +/// Check if Accessibility permission is granted, prompting the user if not. +/// +/// Calls AXIsProcessTrustedWithOptions with kAXTrustedCheckOptionPrompt=true, +/// which makes macOS show the "App wants to control this computer" dialog +/// if permission hasn't been granted yet. +pub fn check_accessibility_permission() -> bool { + #[link(name = "ApplicationServices", kind = "framework")] + extern "C" { + fn AXIsProcessTrustedWithOptions(options: core_foundation::base::CFTypeRef) -> bool; + } + + use core_foundation::base::TCFType; + use core_foundation::boolean::CFBoolean; + use core_foundation::dictionary::CFDictionary; + use core_foundation::string::CFString; + + let key = CFString::new("AXTrustedCheckOptionPrompt"); + let value = CFBoolean::true_value(); + let options = CFDictionary::from_CFType_pairs(&[(key.as_CFType(), value.as_CFType())]); + + unsafe { AXIsProcessTrustedWithOptions(options.as_concrete_TypeRef() as _) } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_key_name() { + assert_eq!(parse_key_name("F1"), Some(Key::F1)); + assert_eq!(parse_key_name("f1"), Some(Key::F1)); + assert_eq!(parse_key_name("RIGHTALT"), Some(Key::AltGr)); + assert_eq!(parse_key_name("rightoption"), Some(Key::AltGr)); + assert_eq!(parse_key_name("CMD"), Some(Key::MetaLeft)); + assert_eq!(parse_key_name("SCROLLLOCK"), Some(Key::ScrollLock)); + assert_eq!(parse_key_name("UNKNOWN"), None); + } +} diff --git a/src/lib.rs b/src/lib.rs index 301291a4..b8189f45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,18 +75,27 @@ pub mod cpu; pub mod daemon; pub mod eager; pub mod error; +#[cfg(target_os = "linux")] pub mod hotkey; +#[cfg(target_os = "macos")] +pub mod hotkey_macos; pub mod meeting; +#[cfg(target_os = "macos")] +pub mod menubar; pub mod model_manager; +pub mod notification; +pub mod osd; pub mod output; pub mod setup; pub mod state; pub mod text; pub mod transcribe; +pub mod tui; pub mod vad; pub use cli::{ - Cli, Commands, CompositorType, MeetingAction, OutputModeOverride, RecordAction, SetupAction, + Cli, Commands, CompositorType, InfoAction, MeetingAction, OutputModeOverride, RecordAction, + SetupAction, }; pub use config::Config; pub use daemon::Daemon; diff --git a/src/main.rs b/src/main.rs index 6279e7e6..85306dc1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,10 +6,11 @@ use clap::Parser; use std::path::PathBuf; -use std::process::Command; use tracing_subscriber::EnvFilter; +#[cfg(target_os = "macos")] +use voxtype::menubar; use voxtype::{ - config, cpu, daemon, meeting, setup, transcribe, vad, Cli, Commands, MeetingAction, + config, cpu, daemon, meeting, setup, transcribe, vad, Cli, Commands, InfoAction, MeetingAction, RecordAction, SetupAction, }; @@ -88,8 +89,13 @@ async fn main() -> anyhow::Result<()> { .init(); } - // Load configuration - let config_path = cli.config.clone().or_else(config::Config::default_path); + // Load configuration. config_path tracks the file we actually loaded (or + // would load), so subprocess transcribers can reuse the same source. + let config_path = cli + .config + .clone() + .or_else(config::Config::resolve_existing_path) + .or_else(config::Config::default_path); let mut config = config::load_config(cli.config.as_deref())?; // Apply CLI overrides @@ -116,14 +122,11 @@ async fn main() -> anyhow::Result<()> { model, default_model ); - let _ = Command::new("notify-send") - .args([ - "--app-name=Voxtype", - "--expire-time=5000", - "Voxtype: Invalid Model", - &format!("Unknown model '{}', using '{}'", model, default_model), - ]) - .spawn(); + // Send desktop notification + voxtype::notification::send_sync( + "Voxtype: Invalid Model", + &format!("Unknown model '{}', using '{}'", model, default_model), + ); } } if let Some(engine) = cli.engine { @@ -135,9 +138,10 @@ async fn main() -> anyhow::Result<()> { "paraformer" => config.engine = config::TranscriptionEngine::Paraformer, "dolphin" => config.engine = config::TranscriptionEngine::Dolphin, "omnilingual" => config.engine = config::TranscriptionEngine::Omnilingual, + "cohere" => config.engine = config::TranscriptionEngine::Cohere, _ => { eprintln!( - "Error: Invalid engine '{}'. Valid options: whisper, parakeet, moonshine, sensevoice, paraformer, dolphin, omnilingual", + "Error: Invalid engine '{}'. Valid options: whisper, parakeet, moonshine, sensevoice, paraformer, dolphin, omnilingual, cohere", engine ); std::process::exit(1); @@ -292,6 +296,12 @@ async fn main() -> anyhow::Result<()> { if cli.spoken_punctuation { config.text.spoken_punctuation = true; } + if cli.filter_fillers { + config.text.filter_filler_words = true; + } + if cli.no_filter_fillers { + config.text.filter_filler_words = false; + } if let Some(keys) = cli.paste_keys { config.output.paste_keys = Some(keys); } @@ -352,12 +362,74 @@ async fn main() -> anyhow::Result<()> { config.vad.min_speech_duration_ms = min_speech; } + // On macOS, detect if launched as app bundle executable (no subcommand, binary inside .app) + #[cfg(target_os = "macos")] + let default_command = if cli.command.is_none() { + std::env::current_exe() + .ok() + .and_then(|p| p.to_str().map(|s| s.contains(".app/Contents/MacOS/"))) + .unwrap_or(false) + .then_some(Commands::AppLaunch) + .unwrap_or(Commands::Daemon) + } else { + Commands::Daemon // unused, cli.command is Some + }; + #[cfg(not(target_os = "macos"))] + let default_command = Commands::Daemon; + // Run the appropriate command - match cli.command.unwrap_or(Commands::Daemon) { + match cli.command.unwrap_or(default_command) { Commands::Daemon => { let mut daemon = daemon::Daemon::new(config, config_path); daemon.run().await?; } + #[cfg(target_os = "macos")] + Commands::Menubar => { + let state_file = config + .resolve_state_file() + .ok_or_else(|| anyhow::anyhow!("state_file not configured"))?; + menubar::run(state_file); + // Note: menubar::run() never returns (runs macOS event loop) + } + #[cfg(target_os = "macos")] + Commands::AppLaunch => { + // Launched by Voxtype.app: start daemon in background, run menubar in foreground. + // The binary must be the CFBundleExecutable (not exec'd from a wrapper script) + // so macOS Control Center can register the status bar scene correctly. + let logs_dir = dirs::home_dir() + .map(|h| h.join("Library/Logs/voxtype")) + .unwrap_or_else(|| std::path::PathBuf::from("/tmp/voxtype")); + let _ = std::fs::create_dir_all(&logs_dir); + + // First-launch auto-setup: create config and download model if needed + first_launch_setup(&config).await; + + // Kill any existing instances + let _ = std::process::Command::new("pkill") + .args(["-9", "-f", "voxtype-bin daemon"]) + .status(); + let _ = std::process::Command::new("pkill") + .args(["-9", "-f", "voxtype-bin menubar"]) + .status(); + let _ = std::fs::remove_file("/tmp/voxtype/voxtype.lock"); + let _ = std::fs::remove_file("/tmp/voxtype/menubar.lock"); + + // Start daemon as a child process with logging + let exe = std::env::current_exe()?; + let stdout = std::fs::File::create(logs_dir.join("stdout.log"))?; + let stderr = std::fs::File::create(logs_dir.join("stderr.log"))?; + let _daemon = std::process::Command::new(&exe) + .arg("daemon") + .stdout(stdout) + .stderr(stderr) + .spawn()?; + + // Run menubar in this process (keeps the app alive with menu bar icon) + let state_file = config + .resolve_state_file() + .ok_or_else(|| anyhow::anyhow!("state_file not configured"))?; + menubar::run(state_file); + } Commands::Transcribe { file, engine } => { if let Some(engine_name) = engine { @@ -369,8 +441,9 @@ async fn main() -> anyhow::Result<()> { "paraformer" => config.engine = config::TranscriptionEngine::Paraformer, "dolphin" => config.engine = config::TranscriptionEngine::Dolphin, "omnilingual" => config.engine = config::TranscriptionEngine::Omnilingual, + "cohere" => config.engine = config::TranscriptionEngine::Cohere, _ => { - eprintln!("Error: Invalid engine '{}'. Valid options: whisper, parakeet, moonshine, sensevoice, paraformer, dolphin, omnilingual", engine_name); + eprintln!("Error: Invalid engine '{}'. Valid options: whisper, parakeet, moonshine, sensevoice, paraformer, dolphin, omnilingual, cohere", engine_name); std::process::exit(1); } } @@ -426,6 +499,39 @@ async fn main() -> anyhow::Result<()> { setup::systemd::install().await?; } } + #[cfg(target_os = "macos")] + Some(SetupAction::Launchd { uninstall, status }) => { + if status { + setup::launchd::status().await?; + } else if uninstall { + setup::launchd::uninstall().await?; + } else { + setup::launchd::install().await?; + } + } + #[cfg(target_os = "macos")] + Some(SetupAction::AppBundle { uninstall, status }) => { + if status { + setup::app_bundle::status().await?; + } else if uninstall { + setup::app_bundle::uninstall().await?; + } else { + setup::app_bundle::install().await?; + } + } + #[cfg(target_os = "macos")] + Some(SetupAction::Hammerspoon { + install, + show, + hotkey, + toggle, + }) => { + setup::hammerspoon::run(install, show, &hotkey, toggle).await?; + } + #[cfg(target_os = "macos")] + Some(SetupAction::Macos) => { + setup::macos::run().await?; + } Some(SetupAction::Waybar { json, css, @@ -487,6 +593,20 @@ async fn main() -> anyhow::Result<()> { setup::gpu::show_status(); } } + Some(SetupAction::Variant { to }) => { + let variant = setup::binary::Variant::from_binary_name(&to) + .ok_or_else(|| anyhow::anyhow!( + "Unknown variant '{}'. Expected one of: {}", + to, + setup::binary::Variant::ALL + .iter() + .map(|v| v.binary_name()) + .collect::>() + .join(", ") + ))?; + setup::binary::switch_to(variant)?; + println!("Switched /usr/bin/voxtype to {}.", variant.binary_name()); + } Some(SetupAction::Onnx { enable, disable, @@ -534,6 +654,14 @@ async fn main() -> anyhow::Result<()> { show_config(&config).await?; } + Commands::Info { action } => { + run_info_command(action)?; + } + + Commands::Configure { force_package_mode } => { + voxtype::tui::run(force_package_mode)?; + } + Commands::Status { follow, format, @@ -550,6 +678,193 @@ async fn main() -> anyhow::Result<()> { Commands::Meeting { action } => { run_meeting_command(&config, action).await?; } + + Commands::CheckUpdate => { + check_for_updates().await?; + } + } + + Ok(()) +} + +/// First-launch auto-setup for macOS app bundle launches. +/// +/// Detects if this is the first launch by checking for a config file and downloaded model. +/// If either is missing, creates default config and downloads the recommended model +/// so the user can start recording immediately after granting permissions. +#[cfg(target_os = "macos")] +async fn first_launch_setup(_config: &config::Config) { + + // Check if config file exists + let config_exists = config::Config::default_path() + .map(|p| p.exists()) + .unwrap_or(false); + + // Check if any model is already downloaded + let models_dir = config::Config::models_dir(); + let has_model = models_dir.exists() + && std::fs::read_dir(&models_dir) + .map(|entries| { + entries.filter_map(|e| e.ok()).any(|e| { + let name = e.file_name().to_string_lossy().to_string(); + // Whisper models (ggml-*.bin) or Parakeet/ONNX model dirs + (name.starts_with("ggml-") && name.ends_with(".bin")) + || (e.path().is_dir() && e.path().join("encoder-model.onnx").exists()) + }) + }) + .unwrap_or(false); + + if config_exists && has_model { + return; // Not first launch + } + + // Create default config if missing + if !config_exists { + if let Some(config_path) = config::Config::default_path() { + if let Some(parent) = config_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let content = config::default_config_content(); + if std::fs::write(&config_path, &content).is_ok() { + tracing::info!("Created default config: {:?}", config_path); + } + } + } + + // Download default model if none present + if !has_model { + // Detect system language to choose the right model + let is_english = tokio::process::Command::new("defaults") + .args(["read", "NSGlobalDomain", "AppleLanguages"]) + .output() + .await + .map(|o| { + let s = String::from_utf8_lossy(&o.stdout); + s.lines() + .find(|l| l.trim().starts_with('"')) + .map(|l| l.trim().trim_matches(|c| c == '"' || c == ',').starts_with("en")) + .unwrap_or(true) + }) + .unwrap_or(true); + + // Show notification that model is downloading + let _ = std::process::Command::new("osascript") + .args([ + "-e", + "display notification \"Downloading speech model (this may take a minute)...\" with title \"Voxtype\"", + ]) + .status(); + + #[cfg(feature = "parakeet")] + let download_result = if is_english { + tracing::info!("First launch: downloading Parakeet model"); + setup::model::download_parakeet_model("parakeet-tdt-0.6b-v3-int8") + .and_then(|_| setup::model::set_parakeet_config("parakeet-tdt-0.6b-v3-int8")) + } else { + tracing::info!("First launch: downloading Whisper base model"); + setup::model::download_model("base") + .and_then(|_| setup::model::set_model_config("base")) + }; + + #[cfg(not(feature = "parakeet"))] + let download_result = { + let model = if is_english { "base.en" } else { "base" }; + tracing::info!("First launch: downloading Whisper {} model", model); + setup::model::download_model(model) + .and_then(|_| setup::model::set_model_config(model)) + }; + + match download_result { + Ok(_) => { + let _ = std::process::Command::new("osascript") + .args([ + "-e", + "display notification \"Ready! Press fn to start recording.\" with title \"Voxtype\"", + ]) + .status(); + } + Err(e) => { + tracing::error!("Failed to download model: {}", e); + let msg = format!( + "display notification \"Model download failed: {}. Run 'voxtype setup model' to try again.\" with title \"Voxtype\"", + e.to_string().replace('"', "'") + ); + let _ = std::process::Command::new("osascript") + .args(["-e", &msg]) + .status(); + } + } + } +} + +/// Check for updates by comparing version with GitHub releases +async fn check_for_updates() -> anyhow::Result<()> { + let current = env!("CARGO_PKG_VERSION"); + println!("Voxtype Update Check\n"); + println!("====================\n"); + println!("Current version: {}", current); + println!("Checking for updates...\n"); + + // Fetch latest release from GitHub API (blocking call wrapped in spawn_blocking) + let result = tokio::task::spawn_blocking(|| { + ureq::get("https://api.github.com/repos/peteonrails/voxtype/releases/latest") + .set("User-Agent", "voxtype-update-checker") + .call() + }) + .await?; + + match result { + Ok(resp) => { + let release: serde_json::Value = resp.into_json()?; + if let Some(tag) = release["tag_name"].as_str() { + let latest = tag.trim_start_matches('v'); + + // Compare versions using semver + let current_ver = semver::Version::parse(current) + .unwrap_or_else(|_| semver::Version::new(0, 0, 0)); + let latest_ver = semver::Version::parse(latest) + .unwrap_or_else(|_| semver::Version::new(0, 0, 0)); + + if latest_ver > current_ver { + println!( + "\x1b[33m⚠ Update available: {} → {}\x1b[0m\n", + current, latest + ); + println!( + "Download: https://github.com/peteonrails/voxtype/releases/tag/{}", + tag + ); + println!("Website: https://voxtype.io/download"); + + // Show release notes excerpt if available + if let Some(body) = release["body"].as_str() { + let summary: String = body.lines().take(5).collect::>().join("\n"); + if !summary.is_empty() { + println!("\nRelease notes:"); + println!("{}", summary); + if body.lines().count() > 5 { + println!("..."); + } + } + } + } else { + println!( + "\x1b[32m✓ You're on the latest version ({}).\x1b[0m", + current + ); + } + } else { + println!("Could not parse latest version from GitHub."); + } + } + Err(ureq::Error::Status(code, _)) => { + eprintln!("GitHub API returned status: {}", code); + eprintln!("Try again later or check manually: https://github.com/peteonrails/voxtype/releases"); + } + Err(e) => { + eprintln!("Failed to check for updates: {}", e); + eprintln!("Check manually: https://github.com/peteonrails/voxtype/releases"); + } } Ok(()) @@ -557,9 +872,6 @@ async fn main() -> anyhow::Result<()> { /// Check if the daemon is running, exit with error if not fn check_daemon_running() -> anyhow::Result<()> { - use nix::sys::signal::kill; - use nix::unistd::Pid; - let pid_file = config::Config::runtime_dir().join("pid"); if !pid_file.exists() { @@ -576,8 +888,8 @@ fn check_daemon_running() -> anyhow::Result<()> { .parse() .map_err(|e| anyhow::anyhow!("Invalid PID in file: {}", e))?; - // Check if the process is actually running - if kill(Pid::from_raw(pid), None).is_err() { + // Check if the process is actually running (signal 0 = check existence) + if unsafe { libc::kill(pid, 0) } != 0 { // Process doesn't exist, clean up stale PID file let _ = std::fs::remove_file(&pid_file); eprintln!("Error: Voxtype daemon is not running (stale PID file removed)."); @@ -594,12 +906,10 @@ fn send_record_command( action: RecordAction, top_level_model: Option<&str>, ) -> anyhow::Result<()> { - use nix::sys::signal::{kill, Signal}; - use nix::unistd::Pid; use voxtype::OutputModeOverride; - // Read PID from the pid file - let pid_file = config::Config::runtime_dir().join("pid"); + // Read PID from the lock file (daemon writes PID to voxtype.lock) + let pid_file = config::Config::runtime_dir().join("voxtype.lock"); if !pid_file.exists() { eprintln!("Error: Voxtype daemon is not running."); @@ -615,8 +925,8 @@ fn send_record_command( .parse() .map_err(|e| anyhow::anyhow!("Invalid PID in file: {}", e))?; - // Check if the process is actually running - if kill(Pid::from_raw(pid), None).is_err() { + // Check if the process is actually running (signal 0 = check existence) + if unsafe { libc::kill(pid, 0) } != 0 { // Process doesn't exist, clean up stale PID file let _ = std::fs::remove_file(&pid_file); eprintln!("Error: Voxtype daemon is not running (stale PID file removed)."); @@ -714,9 +1024,9 @@ fn send_record_command( } // For toggle, we need to read current state to decide which signal to send - let signal = match &action { - RecordAction::Start { .. } => Signal::SIGUSR1, - RecordAction::Stop { .. } => Signal::SIGUSR2, + let signal: libc::c_int = match &action { + RecordAction::Start { .. } => libc::SIGUSR1, + RecordAction::Stop { .. } => libc::SIGUSR2, RecordAction::Toggle { .. } => { // Read current state to determine action let state_file = match config.resolve_state_file() { @@ -738,16 +1048,21 @@ fn send_record_command( std::fs::read_to_string(&state_file).unwrap_or_else(|_| "idle".to_string()); if current_state.trim() == "recording" { - Signal::SIGUSR2 // Stop + libc::SIGUSR2 // Stop } else { - Signal::SIGUSR1 // Start + libc::SIGUSR1 // Start } } RecordAction::Cancel => unreachable!(), // Handled above }; - kill(Pid::from_raw(pid), signal) - .map_err(|e| anyhow::anyhow!("Failed to send signal to daemon: {}", e))?; + let result = unsafe { libc::kill(pid, signal) }; + if result != 0 { + return Err(anyhow::anyhow!( + "Failed to send signal to daemon: {}", + std::io::Error::last_os_error() + )); + } Ok(()) } @@ -905,13 +1220,14 @@ fn is_daemon_running() -> bool { Err(_) => return false, // No PID file = not running }; - let pid: u32 = match pid_str.trim().parse() { + let pid: i32 = match pid_str.trim().parse() { Ok(p) => p, Err(_) => return false, // Invalid PID = not running }; - // Check if process exists by testing /proc/{pid} - std::path::Path::new(&format!("/proc/{}", pid)).exists() + // Check if process exists using kill(pid, 0) - works on both Linux and macOS + // Signal 0 doesn't send a signal, just checks if process exists and we have permission + unsafe { libc::kill(pid, 0) == 0 } } /// Run the status command - show current daemon state @@ -1095,6 +1411,100 @@ fn format_state_json( } } +/// Dispatch `voxtype info `. +fn run_info_command(action: InfoAction) -> anyhow::Result<()> { + match action { + InfoAction::Variants { json } => { + let inv = setup::binary::inventory(); + if json { + println!("{}", serde_json::to_string_pretty(&inv)?); + } else { + print_variants_text(&inv); + } + } + } + Ok(()) +} + +fn print_variants_text(inv: &setup::binary::Inventory) { + use setup::binary::InstallKind; + + println!("Voxtype install"); + println!(" Binary: {}", inv.binary_path.display()); + println!( + " Install kind: {}", + match inv.install_kind { + InstallKind::Package => "package", + InstallKind::Source => "source", + } + ); + if let Some(dir) = &inv.package_lib_dir { + println!(" Lib dir: {}", dir.display()); + } + if !inv.compiled_features.is_empty() { + println!(" Features: {}", inv.compiled_features.join(", ")); + } + + println!(); + println!("Hardware"); + println!( + " CPU: AVX2={}, AVX-512={}", + inv.cpu.avx2, inv.cpu.avx512 + ); + println!( + " GPU: NVIDIA={}, AMD={}", + inv.gpus.nvidia, inv.gpus.amd + ); + + println!(); + println!("Recommended for this hardware"); + println!( + " Whisper: ★ {} — {}", + inv.recommendation.whisper.display(), + inv.recommendation.whisper_reason + ); + println!( + " ONNX: ★ {} — {}", + inv.recommendation.onnx.display(), + inv.recommendation.onnx_reason + ); + + println!(); + if matches!(inv.install_kind, InstallKind::Source) { + println!("Source build: variant switching not applicable."); + println!("To enable a different engine, rebuild with the appropriate Cargo features."); + return; + } + + println!("Variants"); + if let Some(active) = inv.active_variant { + println!( + " Active: {} ({})", + active.display(), + active.binary_name() + ); + } else { + println!(" Active: unknown (symlink missing or unrecognized)"); + } + + println!(); + println!(" Available:"); + for status in &inv.variants { + let mark = if status.active { + "● active" + } else if !status.installed { + " not installed" + } else if !status.runs_on_this_cpu { + " installed (won't run on this CPU)" + } else if !status.gpu_available { + " installed (no compatible GPU detected)" + } else { + " installed" + }; + println!(" {:<22} {}", status.variant.display(), mark); + } +} + /// Show current configuration async fn show_config(config: &config::Config) -> anyhow::Result<()> { println!("Current Configuration\n"); @@ -1292,6 +1702,7 @@ async fn show_config(config: &config::Config) -> anyhow::Result<()> { " on_transcription = {}", config.output.notification.on_transcription ); + println!(" urgency = {:?}", config.output.notification.urgency); println!("\n[status]"); println!(" icon_theme = {:?}", config.status.icon_theme); @@ -1314,10 +1725,14 @@ async fn show_config(config: &config::Config) -> anyhow::Result<()> { setup::print_output_chain_status(&output_status); println!("\n---"); - println!( - "Config file: {:?}", - config::Config::default_path().unwrap_or_else(|| PathBuf::from("(not found)")) - ); + match config::Config::resolve_existing_path() { + Some(path) => println!("Config file: {:?} (loaded)", path), + None => println!( + "Config file: {:?} (not found, using defaults; system fallback {:?} also missing)", + config::Config::default_path().unwrap_or_else(|| PathBuf::from("(unknown)")), + config::Config::system_path() + ), + } println!("Models dir: {:?}", config::Config::models_dir()); Ok(()) diff --git a/src/meeting/chunk.rs b/src/meeting/chunk.rs index c624b6e4..b17c6fab 100644 --- a/src/meeting/chunk.rs +++ b/src/meeting/chunk.rs @@ -475,6 +475,7 @@ mod tests { // Speech covers the entire buffer let (start, end) = segments[0]; assert_eq!(start, 0); + assert_eq!(end, speech.len()); } #[test] diff --git a/src/meeting/data.rs b/src/meeting/data.rs index 7ee005e5..b5b068e8 100644 --- a/src/meeting/data.rs +++ b/src/meeting/data.rs @@ -57,7 +57,6 @@ pub enum AudioSource { Unknown, } - impl std::fmt::Display for AudioSource { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -361,7 +360,6 @@ pub enum MeetingStatus { Cancelled, } - /// Metadata for a meeting #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MeetingMetadata { diff --git a/src/meeting/diarization/ml.rs b/src/meeting/diarization/ml.rs index 244d4306..5b7b3027 100644 --- a/src/meeting/diarization/ml.rs +++ b/src/meeting/diarization/ml.rs @@ -107,18 +107,22 @@ impl MlDiarizerState { // Return existing speaker tracing::debug!( "Speaker match: {} (similarity: {:.3})", - self.speaker_embeddings[idx].speaker_id, sim + self.speaker_embeddings[idx].speaker_id, + sim ); self.speaker_embeddings[idx].speaker_id.clone() } else if self.next_speaker_id < max_speakers { // Log best similarity for debugging - let best_sim = self.speaker_embeddings.iter() + let best_sim = self + .speaker_embeddings + .iter() .map(|e| new_embedding.cosine_similarity(e)) .fold(f32::NEG_INFINITY, f32::max); if !self.speaker_embeddings.is_empty() { tracing::debug!( "New speaker (best similarity: {:.3}, threshold: {:.3})", - best_sim, similarity_threshold + best_sim, + similarity_threshold ); } // Create new speaker @@ -210,7 +214,7 @@ impl MlDiarizer { } match Session::builder() { - Ok(builder) => match builder.commit_from_file(&path) { + Ok(mut builder) => match builder.commit_from_file(&path) { Ok(session) => { self.session = Some(Mutex::new(session)); tracing::info!("Loaded speaker embedding model: {:?}", path); @@ -322,10 +326,7 @@ impl Diarizer for MlDiarizer { // Segment timestamps are meeting-relative, but samples are chunk-relative. // Subtract the chunk's base offset to get correct sample indices. - let chunk_offset_ms = transcript_segments - .first() - .map(|s| s.start_ms) - .unwrap_or(0); + let chunk_offset_ms = transcript_segments.first().map(|s| s.start_ms).unwrap_or(0); let mut results = Vec::new(); diff --git a/src/meeting/export/txt.rs b/src/meeting/export/txt.rs index 8dde8f33..0513e75c 100644 --- a/src/meeting/export/txt.rs +++ b/src/meeting/export/txt.rs @@ -108,7 +108,7 @@ fn wrap_text(text: &str, width: usize) -> String { #[cfg(test)] mod tests { use super::*; - use crate::meeting::data::{MeetingMetadata, Transcript, TranscriptSegment}; + use crate::meeting::data::TranscriptSegment; fn create_test_meeting() -> MeetingData { let mut meeting = MeetingData::new(Some("Test Meeting".to_string())); diff --git a/src/meeting/mod.rs b/src/meeting/mod.rs index 368532b6..ed2d219c 100644 --- a/src/meeting/mod.rs +++ b/src/meeting/mod.rs @@ -332,8 +332,7 @@ impl MeetingDaemon { // Update context for next chunk (per source), using the last non-empty // segment to avoid losing useful context when a chunk ends with silence if let Some(last_seg) = result.segments.iter().rfind(|s| !s.text.is_empty()) { - self.last_chunk_text - .insert(source, last_seg.text.clone()); + self.last_chunk_text.insert(source, last_seg.text.clone()); } } diff --git a/src/meeting/state.rs b/src/meeting/state.rs index ee38b623..49572ed1 100644 --- a/src/meeting/state.rs +++ b/src/meeting/state.rs @@ -36,8 +36,7 @@ impl ChunkState { } /// Meeting transcription state -#[derive(Debug, Clone)] -#[derive(Default)] +#[derive(Debug, Clone, Default)] pub enum MeetingState { /// No meeting in progress #[default] @@ -74,7 +73,6 @@ pub enum MeetingState { }, } - impl MeetingState { /// Create a new idle state pub fn new() -> Self { diff --git a/src/meeting/storage.rs b/src/meeting/storage.rs index 3c0e1ca0..3dee20eb 100644 --- a/src/meeting/storage.rs +++ b/src/meeting/storage.rs @@ -899,7 +899,7 @@ mod tests { #[test] fn test_create_meeting_creates_directory() { - let (storage, temp) = create_test_storage(); + let (storage, _temp) = create_test_storage(); let metadata = MeetingMetadata::new(Some("Dir Test".to_string())); let path = storage.create_meeting(&metadata).unwrap(); assert!(path.exists()); diff --git a/src/menubar.rs b/src/menubar.rs new file mode 100644 index 00000000..62e12c1a --- /dev/null +++ b/src/menubar.rs @@ -0,0 +1,717 @@ +//! macOS menu bar integration +//! +//! Provides a system tray icon showing voxtype status with a context menu +//! for controlling recording and configuring settings. + +use crate::config::{ActivationMode, Config, OutputMode, TranscriptionEngine}; +use notify::{EventKind, RecommendedWatcher, RecursiveMode, Watcher}; +use pidlock::Pidlock; +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tao::event_loop::{ControlFlow, EventLoopBuilder}; +use tray_icon::{ + menu::{CheckMenuItem, Menu, MenuEvent, MenuItem, PredefinedMenuItem, Submenu}, + TrayIconBuilder, +}; + +/// Current voxtype state +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VoxtypeState { + Idle, + Recording, + Transcribing, + Stopped, +} + +impl VoxtypeState { + fn from_str(s: &str) -> Self { + match s.trim().to_lowercase().as_str() { + "idle" => VoxtypeState::Idle, + "recording" => VoxtypeState::Recording, + "transcribing" => VoxtypeState::Transcribing, + _ => VoxtypeState::Stopped, + } + } + + fn icon(&self) -> &'static str { + match self { + VoxtypeState::Idle => "🎙", + VoxtypeState::Recording => "🔴", + VoxtypeState::Transcribing => "⏳", + VoxtypeState::Stopped => "⬛", + } + } + + fn status_text(&self) -> &'static str { + match self { + VoxtypeState::Idle => "Status: Ready", + VoxtypeState::Recording => "Status: Recording...", + VoxtypeState::Transcribing => "Status: Transcribing...", + VoxtypeState::Stopped => "Status: Daemon not running", + } + } +} + +/// Menu item IDs +mod menu_ids { + // Recording controls + pub const TOGGLE: &str = "toggle"; + pub const CANCEL: &str = "cancel"; + + // Engine selection + pub const ENGINE_PARAKEET: &str = "engine_parakeet"; + pub const ENGINE_WHISPER: &str = "engine_whisper"; + + // Hotkey mode + pub const HOTKEY_PTT: &str = "hotkey_ptt"; + pub const HOTKEY_TOGGLE: &str = "hotkey_toggle"; + + // Output mode + pub const OUTPUT_TYPE: &str = "output_type"; + pub const OUTPUT_CLIPBOARD: &str = "output_clipboard"; + pub const OUTPUT_PASTE: &str = "output_paste"; + + // Model prefixes (actual ID will be model_) + pub const MODEL_PREFIX: &str = "model_"; + + // Utilities + pub const DOWNLOAD_MODEL: &str = "download_model"; + pub const OPEN_CONFIG: &str = "open_config"; + pub const VIEW_LOGS: &str = "view_logs"; + pub const RESTART_DAEMON: &str = "restart_daemon"; + + // Auto-start + pub const AUTOSTART_ENABLE: &str = "autostart_enable"; + + // Quit + pub const QUIT: &str = "quit"; +} + +/// Read state from file +fn read_state_from_file(path: &PathBuf) -> VoxtypeState { + std::fs::read_to_string(path) + .map(|s| VoxtypeState::from_str(&s)) + .unwrap_or(VoxtypeState::Stopped) +} + +/// Get the voxtype binary path +fn get_voxtype_path() -> PathBuf { + std::env::current_exe() + .ok() + .and_then(|p| p.parent().map(|d| d.join("voxtype"))) + .filter(|p| p.exists()) + .unwrap_or_else(|| PathBuf::from("voxtype")) +} + +/// Execute voxtype command +fn voxtype_cmd(args: &[&str]) { + let voxtype_path = get_voxtype_path(); + let _ = std::process::Command::new(voxtype_path).args(args).spawn(); +} + +/// Execute voxtype command and wait for completion +fn voxtype_cmd_wait(args: &[&str]) -> bool { + let voxtype_path = get_voxtype_path(); + std::process::Command::new(voxtype_path) + .args(args) + .status() + .map(|s| s.success()) + .unwrap_or(false) +} + +/// Open a file or URL with the default application +fn open_path(path: &str) { + let _ = std::process::Command::new("open").arg(path).spawn(); +} + +/// Check if LaunchAgent is installed +fn is_autostart_enabled() -> bool { + let home = dirs::home_dir().unwrap_or_default(); + let plist = home.join("Library/LaunchAgents/io.voxtype.daemon.plist"); + plist.exists() +} + +/// Get list of downloaded models (both Whisper and Parakeet) +fn get_downloaded_models() -> Vec<(String, bool)> { + let mut models = Vec::new(); + let models_dir = Config::models_dir(); + + if let Ok(entries) = std::fs::read_dir(&models_dir) { + for entry in entries.flatten() { + let name = entry.file_name().to_string_lossy().to_string(); + let path = entry.path(); + + // Check for Whisper models (ggml-*.bin files) + if name.starts_with("ggml-") && name.ends_with(".bin") { + // Extract model name from filename (e.g., "ggml-base.en.bin" -> "base.en") + let model_name = name + .strip_prefix("ggml-") + .and_then(|s| s.strip_suffix(".bin")) + .unwrap_or(&name) + .to_string(); + models.push((model_name, false)); // false = Whisper + } + + // Check for Parakeet models (directories with encoder-model.onnx) + if path.is_dir() && name.contains("parakeet") { + if path.join("encoder-model.onnx").exists() { + models.push((name, true)); // true = Parakeet + } + } + } + } + + models.sort(); + models +} + +/// Update config file with new engine +fn set_engine(engine: TranscriptionEngine) -> bool { + let config_path = match Config::default_path() { + Some(p) => p, + None => return false, + }; + + let content = std::fs::read_to_string(&config_path).unwrap_or_default(); + + let engine_str = match engine { + TranscriptionEngine::Parakeet => "parakeet", + TranscriptionEngine::Whisper => "whisper", + TranscriptionEngine::Moonshine => "moonshine", + TranscriptionEngine::SenseVoice => "sensevoice", + TranscriptionEngine::Paraformer => "paraformer", + TranscriptionEngine::Dolphin => "dolphin", + TranscriptionEngine::Omnilingual => "omnilingual", + TranscriptionEngine::Cohere => "cohere", + }; + + // Check if engine line exists + let new_content = if content.contains("engine =") { + // Replace existing engine line + let re = regex::Regex::new(r#"engine\s*=\s*"[^"]*""#).unwrap(); + re.replace(&content, format!(r#"engine = "{}""#, engine_str)) + .to_string() + } else { + // Add engine line at the beginning + format!("engine = \"{}\"\n{}", engine_str, content) + }; + + std::fs::write(&config_path, new_content).is_ok() +} + +/// Update config file with new output mode +fn set_output_mode(mode: OutputMode) -> bool { + let config_path = match Config::default_path() { + Some(p) => p, + None => return false, + }; + + let content = std::fs::read_to_string(&config_path).unwrap_or_default(); + + let mode_str = match mode { + OutputMode::Type => "type", + OutputMode::Clipboard => "clipboard", + OutputMode::Paste => "paste", + OutputMode::File => "file", + }; + + // Check if [output] section exists with mode + let new_content = if content.contains("[output]") { + // Check if mode line exists under [output] + if let Some(output_start) = content.find("[output]") { + let after_output = &content[output_start..]; + if after_output.contains("mode =") { + // Replace existing mode line + let re = regex::Regex::new(r#"(\[output\][^\[]*?)mode\s*=\s*"[^"]*""#).unwrap(); + re.replace(&content, format!(r#"$1mode = "{}""#, mode_str)) + .to_string() + } else { + // Add mode line after [output] + content.replace("[output]", &format!("[output]\nmode = \"{}\"", mode_str)) + } + } else { + content.clone() + } + } else { + // Add [output] section + format!("{}\n[output]\nmode = \"{}\"\n", content, mode_str) + }; + + std::fs::write(&config_path, new_content).is_ok() +} + +/// Update config file with new hotkey mode +fn set_hotkey_mode(mode: ActivationMode) -> bool { + let config_path = match Config::default_path() { + Some(p) => p, + None => return false, + }; + + let content = std::fs::read_to_string(&config_path).unwrap_or_default(); + let mode_str = match mode { + ActivationMode::PushToTalk => "push_to_talk", + ActivationMode::Toggle => "toggle", + }; + + // Check if [hotkey] section exists + let new_content = if content.contains("[hotkey]") { + if content.contains("mode =") { + // Replace existing mode line + let re = regex::Regex::new(r#"mode\s*=\s*"[^"]*""#).unwrap(); + re.replace(&content, format!(r#"mode = "{}""#, mode_str)) + .to_string() + } else { + // Add mode line after [hotkey] + content.replace("[hotkey]", &format!("[hotkey]\nmode = \"{}\"", mode_str)) + } + } else { + // Add [hotkey] section + format!("{}\n[hotkey]\nmode = \"{}\"\n", content, mode_str) + }; + + std::fs::write(&config_path, new_content).is_ok() +} + +/// Update config to use a specific model +fn set_model(model_name: &str, is_parakeet: bool) -> bool { + if is_parakeet { + voxtype_cmd_wait(&["setup", "parakeet", "--set", model_name]) + } else { + voxtype_cmd_wait(&["setup", "model", "--set", model_name]) + } +} + +/// Restart the daemon +fn restart_daemon() { + // Try launchctl first + let _ = std::process::Command::new("launchctl") + .args(["kickstart", "-k", "gui/$(id -u)/io.voxtype.daemon"]) + .status(); + + // Fallback: kill and restart + let _ = std::process::Command::new("pkill") + .args(["-f", "voxtype daemon"]) + .status(); + + std::thread::sleep(Duration::from_millis(500)); + + voxtype_cmd(&["daemon"]); +} + +/// Show notification +fn notify(title: &str, message: &str) { + let _ = std::process::Command::new("osascript") + .args([ + "-e", + &format!( + "display notification \"{}\" with title \"{}\"", + message, title + ), + ]) + .spawn(); +} + +/// Build the settings submenus +/// Returns (menu, status_item) so status can be updated later +fn build_menu(config: &Config) -> (Menu, MenuItem) { + let menu = Menu::new(); + + // Recording controls + let toggle_item = MenuItem::with_id(menu_ids::TOGGLE, "Toggle Recording", true, None); + let cancel_item = MenuItem::with_id(menu_ids::CANCEL, "Cancel Recording", true, None); + + menu.append(&toggle_item).unwrap(); + menu.append(&cancel_item).unwrap(); + menu.append(&PredefinedMenuItem::separator()).unwrap(); + + // Engine submenu + let engine_menu = Submenu::new("Engine", true); + let is_parakeet = config.engine == TranscriptionEngine::Parakeet; + + #[cfg(feature = "parakeet")] + { + let parakeet_item = CheckMenuItem::with_id( + menu_ids::ENGINE_PARAKEET, + "🦜 Parakeet (Fast)", + true, + is_parakeet, + None, + ); + engine_menu.append(¶keet_item).unwrap(); + } + + let whisper_item = CheckMenuItem::with_id( + menu_ids::ENGINE_WHISPER, + "🗣️ Whisper", + true, + !is_parakeet, + None, + ); + engine_menu.append(&whisper_item).unwrap(); + menu.append(&engine_menu).unwrap(); + + // Model submenu + let model_menu = Submenu::new("Model", true); + let downloaded_models = get_downloaded_models(); + let current_model = if is_parakeet { + config + .parakeet + .as_ref() + .map(|p| p.model.clone()) + .unwrap_or_default() + } else { + config.whisper.model.clone() + }; + + if downloaded_models.is_empty() { + let no_models = MenuItem::new("No models downloaded", false, None); + model_menu.append(&no_models).unwrap(); + } else { + for (model_name, model_is_parakeet) in &downloaded_models { + // Show models for the current engine + if *model_is_parakeet == is_parakeet { + let is_current = model_name == ¤t_model; + let display_name = if *model_is_parakeet { + format!("🦜 {}", model_name) + } else { + model_name.clone() + }; + let item = CheckMenuItem::with_id( + format!("{}{}", menu_ids::MODEL_PREFIX, model_name), + display_name, + true, + is_current, + None, + ); + model_menu.append(&item).unwrap(); + } + } + } + + model_menu.append(&PredefinedMenuItem::separator()).unwrap(); + let download_item = + MenuItem::with_id(menu_ids::DOWNLOAD_MODEL, "Download Model...", true, None); + model_menu.append(&download_item).unwrap(); + menu.append(&model_menu).unwrap(); + + // Output mode submenu + let output_menu = Submenu::new("Output Mode", true); + let output_type = CheckMenuItem::with_id( + menu_ids::OUTPUT_TYPE, + "Type Text", + true, + config.output.mode == OutputMode::Type, + None, + ); + let output_clipboard = CheckMenuItem::with_id( + menu_ids::OUTPUT_CLIPBOARD, + "Copy to Clipboard", + true, + config.output.mode == OutputMode::Clipboard, + None, + ); + let output_paste = CheckMenuItem::with_id( + menu_ids::OUTPUT_PASTE, + "Clipboard + Paste", + true, + config.output.mode == OutputMode::Paste, + None, + ); + output_menu.append(&output_type).unwrap(); + output_menu.append(&output_clipboard).unwrap(); + output_menu.append(&output_paste).unwrap(); + menu.append(&output_menu).unwrap(); + + // Hotkey mode submenu + let hotkey_menu = Submenu::new("Hotkey Mode", true); + let is_toggle = config.hotkey.mode == ActivationMode::Toggle; + let ptt_item = CheckMenuItem::with_id( + menu_ids::HOTKEY_PTT, + "Push-to-Talk (hold)", + true, + !is_toggle, + None, + ); + let toggle_item = CheckMenuItem::with_id( + menu_ids::HOTKEY_TOGGLE, + "Toggle (press to start/stop)", + true, + is_toggle, + None, + ); + hotkey_menu.append(&ptt_item).unwrap(); + hotkey_menu.append(&toggle_item).unwrap(); + menu.append(&hotkey_menu).unwrap(); + + // Auto-start submenu + let autostart_menu = Submenu::new("Auto-start", true); + let autostart_enabled = is_autostart_enabled(); + let enable_item = CheckMenuItem::with_id( + menu_ids::AUTOSTART_ENABLE, + "Start at Login", + true, + autostart_enabled, + None, + ); + autostart_menu.append(&enable_item).unwrap(); + menu.append(&autostart_menu).unwrap(); + + menu.append(&PredefinedMenuItem::separator()).unwrap(); + + // Status (disabled, just for display) + let status_item = MenuItem::new("Status: Checking...", false, None); + menu.append(&status_item).unwrap(); + + menu.append(&PredefinedMenuItem::separator()).unwrap(); + + // Utilities + let config_item = MenuItem::with_id(menu_ids::OPEN_CONFIG, "Edit Config File...", true, None); + let logs_item = MenuItem::with_id(menu_ids::VIEW_LOGS, "View Logs", true, None); + let restart_item = MenuItem::with_id(menu_ids::RESTART_DAEMON, "Restart Daemon", true, None); + + menu.append(&config_item).unwrap(); + menu.append(&logs_item).unwrap(); + menu.append(&restart_item).unwrap(); + + menu.append(&PredefinedMenuItem::separator()).unwrap(); + + // Quit + let quit_item = MenuItem::with_id(menu_ids::QUIT, "Quit Menu Bar", true, None); + menu.append(&quit_item).unwrap(); + + (menu, status_item) +} + +/// Run the menu bar application +/// This should be called from the main thread +/// Note: This function never returns (runs the macOS event loop) +pub fn run(state_file: PathBuf) -> ! { + println!("Starting Voxtype menu bar..."); + println!("State file: {}", state_file.display()); + + // Single instance check + let lock_path = Config::runtime_dir().join("menubar.lock"); + let lock_path_str = lock_path.to_string_lossy().to_string(); + let mut pidlock = Pidlock::new(&lock_path_str); + + match pidlock.acquire() { + Ok(_) => { + println!("Acquired menu bar lock"); + } + Err(_) => { + eprintln!("Error: Another voxtype menubar instance is already running."); + std::process::exit(1); + } + } + + // Check if state file exists (daemon should be running) + if !state_file.exists() { + println!("\nWarning: State file not found. Is the voxtype daemon running?"); + println!("Start it with: voxtype daemon\n"); + } + + // Load config + let config = crate::config::load_config(None).unwrap_or_default(); + + // Build menu (returns menu and status item for updates) + let (menu, status_item) = build_menu(&config); + + // Get initial state + let initial_state = read_state_from_file(&state_file); + + // Update status item with initial state + let _ = status_item.set_text(initial_state.status_text()); + + // Create tray icon + let tray = TrayIconBuilder::new() + .with_tooltip("Voxtype") + .with_title(initial_state.icon()) + .with_menu(Box::new(menu)) + .build() + .expect("Failed to create tray icon"); + + println!("Menu bar is running. Look for the icon in your menu bar."); + println!("Press Ctrl+C to stop.\n"); + + // Track state + let mut last_state = initial_state; + let running = Arc::new(AtomicBool::new(true)); + + // Watch state file for changes via kqueue (instant notification, no polling) + let state_changed = Arc::new(AtomicBool::new(false)); + let state_changed_writer = state_changed.clone(); + let watch_path = state_file.clone(); + let _watcher = { + let mut watcher: RecommendedWatcher = + notify::recommended_watcher(move |res: notify::Result| { + if let Ok(event) = res { + if matches!(event.kind, EventKind::Modify(_) | EventKind::Create(_)) { + state_changed_writer.store(true, Ordering::SeqCst); + } + } + }) + .expect("Failed to create file watcher"); + // Watch the parent directory since the state file may be recreated + if let Some(parent) = watch_path.parent() { + watcher + .watch(parent, RecursiveMode::NonRecursive) + .unwrap_or_else(|e| eprintln!("Warning: could not watch state dir: {}", e)); + } + watcher // keep alive + }; + + // Set up menu event receiver + let menu_channel = MenuEvent::receiver(); + + // Create event loop + let event_loop = EventLoopBuilder::new().build(); + + event_loop.run(move |_event, _, control_flow| { + // Wake every 100ms to check menu events; state updates arrive via kqueue flag + *control_flow = ControlFlow::WaitUntil(Instant::now() + Duration::from_millis(100)); + + // Check for menu events (non-blocking) + if let Ok(event) = menu_channel.try_recv() { + let id = event.id().0.as_str(); + + match id { + // Recording controls + menu_ids::TOGGLE => { + voxtype_cmd(&["record", "toggle"]); + } + menu_ids::CANCEL => { + voxtype_cmd(&["record", "cancel"]); + } + + // Engine selection + menu_ids::ENGINE_PARAKEET => { + if set_engine(TranscriptionEngine::Parakeet) { + notify( + "Voxtype", + "Switched to Parakeet engine. Restart daemon to apply.", + ); + } + } + menu_ids::ENGINE_WHISPER => { + if set_engine(TranscriptionEngine::Whisper) { + notify( + "Voxtype", + "Switched to Whisper engine. Restart daemon to apply.", + ); + } + } + + // Hotkey mode + menu_ids::HOTKEY_PTT => { + if set_hotkey_mode(ActivationMode::PushToTalk) { + notify( + "Voxtype", + "Switched to push-to-talk mode. Restart daemon to apply.", + ); + } + } + menu_ids::HOTKEY_TOGGLE => { + if set_hotkey_mode(ActivationMode::Toggle) { + notify( + "Voxtype", + "Switched to toggle mode. Restart daemon to apply.", + ); + } + } + + // Output mode + menu_ids::OUTPUT_TYPE => { + if set_output_mode(OutputMode::Type) { + notify("Voxtype", "Output mode: Type text"); + } + } + menu_ids::OUTPUT_CLIPBOARD => { + if set_output_mode(OutputMode::Clipboard) { + notify("Voxtype", "Output mode: Copy to clipboard"); + } + } + menu_ids::OUTPUT_PASTE => { + if set_output_mode(OutputMode::Paste) { + notify("Voxtype", "Output mode: Clipboard + Paste"); + } + } + + // Auto-start + menu_ids::AUTOSTART_ENABLE => { + if is_autostart_enabled() { + // Disable + if voxtype_cmd_wait(&["setup", "launchd", "--uninstall"]) { + notify("Voxtype", "Auto-start disabled"); + } + } else { + // Enable + if voxtype_cmd_wait(&["setup", "launchd"]) { + notify("Voxtype", "Auto-start enabled"); + } + } + } + + // Utilities + menu_ids::DOWNLOAD_MODEL => { + // Open terminal with model download command + let voxtype_path = get_voxtype_path(); + let script = format!( + "tell application \"Terminal\" to do script \"{}\" & \" setup model\"", + voxtype_path.display() + ); + let _ = std::process::Command::new("osascript") + .args(["-e", &script]) + .spawn(); + } + menu_ids::OPEN_CONFIG => { + if let Some(config_path) = Config::default_path() { + open_path(config_path.to_str().unwrap_or("")); + } + } + menu_ids::VIEW_LOGS => { + let home = dirs::home_dir().unwrap_or_default(); + let log_path = home.join("Library/Logs/voxtype"); + open_path(log_path.to_str().unwrap_or("")); + } + menu_ids::RESTART_DAEMON => { + notify("Voxtype", "Restarting daemon..."); + restart_daemon(); + } + + // Quit + menu_ids::QUIT => { + running.store(false, Ordering::SeqCst); + *control_flow = ControlFlow::Exit; + } + + // Model selection (dynamic IDs) + _ if id.starts_with(menu_ids::MODEL_PREFIX) => { + let model_name = id.strip_prefix(menu_ids::MODEL_PREFIX).unwrap_or(""); + let is_parakeet = model_name.contains("parakeet"); + if set_model(model_name, is_parakeet) { + notify("Voxtype", &format!("Switched to model: {}", model_name)); + } + } + + _ => {} + } + } + + // Update state when file watcher signals a change + if state_changed.swap(false, Ordering::SeqCst) { + let new_state = read_state_from_file(&state_file); + + if new_state != last_state { + let _ = tray.set_title(Some(new_state.icon())); + let _ = status_item.set_text(new_state.status_text()); + last_state = new_state; + } + } + + if !running.load(Ordering::SeqCst) { + *control_flow = ControlFlow::Exit; + } + }); +} diff --git a/src/model_manager.rs b/src/model_manager.rs index 4446e8bf..63d3e8c2 100644 --- a/src/model_manager.rs +++ b/src/model_manager.rs @@ -245,8 +245,16 @@ impl ModelManager { /// Prepare a model for transcription (called when recording starts) /// /// For subprocess mode, this spawns the worker early so it can load - /// the model while the user is speaking. - pub fn prepare_model(&mut self, model: Option<&str>) -> Result<(), TranscribeError> { + /// the model while the user is speaking. The actual worker spawn and + /// model load happen on a blocking thread, so the async event loop + /// (and audio capture) is not blocked. Returns a JoinHandle that the + /// caller must await before invoking transcription, so that we don't + /// race against the in-flight prepare and end up spawning a second + /// worker. + pub fn prepare_model( + &mut self, + model: Option<&str>, + ) -> Result>, TranscribeError> { let model_name = model .map(|s| s.to_string()) .unwrap_or_else(|| self.config.model.clone()); @@ -257,26 +265,30 @@ impl ModelManager { "Cannot prepare unavailable model '{}', will use default", model_name ); - return Ok(()); + return Ok(None); } // For GPU isolation, spawn subprocess early if self.config.gpu_isolation && self.config.effective_mode() == WhisperMode::Local { - // Create and prepare subprocess transcriber let transcriber = self.create_subprocess_transcriber(&model_name)?; - transcriber.prepare(); - // Store it temporarily for the upcoming transcription + // Store the Arc immediately so get_prepared_transcriber can retrieve it. + // The worker spawn happens on a blocking thread; the prepared_worker + // mutex inside SubprocessTranscriber is populated when ready. self.loaded_models.insert( format!("_prepared_{}", model_name), LoadedModel { - transcriber, + transcriber: transcriber.clone(), last_used: Instant::now(), is_primary: false, }, ); + let handle = tokio::task::spawn_blocking(move || { + transcriber.prepare(); + }); + return Ok(Some(handle)); } - Ok(()) + Ok(None) } /// Get a prepared transcriber (if available) or create one diff --git a/src/notification.rs b/src/notification.rs new file mode 100644 index 00000000..175ea64f --- /dev/null +++ b/src/notification.rs @@ -0,0 +1,180 @@ +//! Platform-specific desktop notifications +//! +//! Provides a unified interface for sending desktop notifications on +//! different platforms: +//! - Linux: Uses notify-send (libnotify) +//! - macOS: Uses terminal-notifier with engine-specific icons + +use std::process::Stdio; + +#[cfg(target_os = "linux")] +use tokio::process::Command; + +use crate::config::TranscriptionEngine; + +/// Send a desktop notification with the given title and body. +/// +/// This function is async and non-blocking. Notification failures are +/// logged but don't propagate errors (notifications are best-effort). +pub async fn send(title: &str, body: &str) { + send_with_engine(title, body, None).await; +} + +/// Send a desktop notification with optional engine icon. +/// +/// On macOS, when an engine is provided, the engine-specific icon is shown +/// as a content image in the notification. +pub async fn send_with_engine(title: &str, body: &str, engine: Option) { + #[cfg(target_os = "linux")] + { + let _ = engine; // Linux doesn't use engine icons in notifications + send_linux(title, body).await; + } + + #[cfg(target_os = "macos")] + send_macos_native(title, body, engine); + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + tracing::debug!("Notifications not supported on this platform"); + let _ = (title, body, engine); // Suppress unused warnings + } +} + +/// Send a notification on Linux using notify-send +#[cfg(target_os = "linux")] +async fn send_linux(title: &str, body: &str) { + let result = Command::new("notify-send") + .args(["--app-name=Voxtype", "--expire-time=2000", title, body]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await; + + if let Err(e) = result { + tracing::debug!("Failed to send notification: {}", e); + } +} + +/// Send a macOS notification using terminal-notifier +/// Falls back to osascript if terminal-notifier is not installed +#[cfg(target_os = "macos")] +fn send_macos_native(title: &str, body: &str, engine: Option) { + // Try bundled terminal-notifier first, then system PATH, then osascript + let bundled_path = + "/Applications/Voxtype.app/Contents/Resources/terminal-notifier.app/Contents/MacOS/terminal-notifier"; + + let notifier_paths = [bundled_path, "terminal-notifier"]; + + // Engine-specific content images + let content_image = engine.and_then(|e| match e { + TranscriptionEngine::Parakeet => { + Some("/Applications/Voxtype.app/Contents/Resources/parakeet.png") + } + TranscriptionEngine::Whisper => { + Some("/Applications/Voxtype.app/Contents/Resources/whisper.png") + } + TranscriptionEngine::Moonshine + | TranscriptionEngine::SenseVoice + | TranscriptionEngine::Paraformer + | TranscriptionEngine::Dolphin + | TranscriptionEngine::Omnilingual + | TranscriptionEngine::Cohere => None, + }); + + for notifier in notifier_paths { + let mut cmd = std::process::Command::new(notifier); + cmd.args([ + "-title", + title, + "-message", + body, + "-sender", + "io.voxtype.menubar", + ]); + + if let Some(image_path) = content_image { + // Only add content image if the file exists + if std::path::Path::new(image_path).exists() { + cmd.args(["-contentImage", image_path]); + } + } + + let result = cmd.stdout(Stdio::null()).stderr(Stdio::null()).status(); + + match result { + Ok(status) if status.success() => { + tracing::debug!("Sent notification via {}", notifier); + return; + } + _ => continue, + } + } + + // Fallback to osascript + tracing::debug!("terminal-notifier not available, using osascript"); + send_macos_osascript_sync(title, body); +} + +/// Fallback notification via osascript (if native fails) +#[cfg(target_os = "macos")] +fn send_macos_osascript_sync(title: &str, body: &str) { + let escaped_title = title.replace('"', "\\\""); + let escaped_body = body.replace('"', "\\\""); + + let script = format!( + r#"display notification "{}" with title "{}""#, + escaped_body, escaped_title + ); + + let _ = std::process::Command::new("osascript") + .args(["-e", &script]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn(); +} + +/// Send a notification synchronously (blocking). +/// +/// Used in non-async contexts like early startup warnings. +pub fn send_sync(title: &str, body: &str) { + send_sync_with_engine(title, body, None); +} + +/// Send a notification synchronously with optional engine icon. +pub fn send_sync_with_engine(title: &str, body: &str, engine: Option) { + #[cfg(target_os = "linux")] + { + let _ = engine; + send_linux_sync(title, body); + } + + #[cfg(target_os = "macos")] + send_macos_native(title, body, engine); + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + let _ = (title, body, engine); // Suppress unused warnings + } +} + +/// Send a notification on Linux using notify-send (synchronous) +#[cfg(target_os = "linux")] +fn send_linux_sync(title: &str, body: &str) { + let _ = std::process::Command::new("notify-send") + .args(["--app-name=Voxtype", "--expire-time=5000", title, body]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn(); +} + +#[cfg(test)] +mod tests { + #[test] + fn test_quote_escaping() { + // Test that quotes are properly escaped for AppleScript + let title = r#"Test "title""#; + let escaped = title.replace('"', "\\\""); + assert_eq!(escaped, r#"Test \"title\""#); + } +} diff --git a/src/osd/config.rs b/src/osd/config.rs new file mode 100644 index 00000000..d5d2eee4 --- /dev/null +++ b/src/osd/config.rs @@ -0,0 +1,169 @@ +//! `[osd]` configuration block. +//! +//! Parsed from the user's config file alongside the rest of the daemon +//! config; can be overridden via CLI flags or `VOXTYPE_OSD_*` env vars on +//! either OSD binary. The full config layering is wired up in Commit 6. + +use serde::{Deserialize, Serialize}; + +/// Position anchor for the OSD surface on the focused output. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum OsdPosition { + #[default] + BottomCenter, + TopCenter, + BottomLeft, + BottomRight, + TopLeft, + TopRight, +} + +/// Selects which OSD frontend the `voxtype-osd` wrapper launches. +/// +/// The wrapper treats this as a *preference*: if the chosen frontend's +/// binary isn't on PATH (e.g. the user built voxtype with only one of +/// `osd-gtk4`/`osd-native`), the wrapper falls back to whichever it can +/// find and logs a warning. Default is `Gtk4` because GTK4 ships with +/// most Hyprland setups already (Omarchy pulls it in via swayosd, walker, +/// etc.) so there's no extra runtime cost for the typical user. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum OsdFrontend { + #[default] + Gtk4, + Native, +} + +impl OsdFrontend { + /// Name of the binary that implements this frontend, suitable for a + /// PATH lookup or `Command::new`. + pub fn binary_name(self) -> &'static str { + match self { + OsdFrontend::Gtk4 => "voxtype-osd-gtk4", + OsdFrontend::Native => "voxtype-osd-native", + } + } + + pub fn parse_str(s: &str) -> Option { + match s.trim().to_ascii_lowercase().as_str() { + "gtk4" | "gtk" => Some(OsdFrontend::Gtk4), + "native" | "wgpu" | "egui" => Some(OsdFrontend::Native), + _ => None, + } + } +} + +/// All user-facing OSD options. Defaults match BRIEF.md. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct OsdConfig { + /// Run the OSD at all. When `false`, both binaries exit immediately. + pub enabled: bool, + /// Surface width in physical pixels. + pub width_px: u32, + /// Surface height in physical pixels. + pub height_px: u32, + /// Anchor on the focused output. + pub position: OsdPosition, + /// Margin from the screen edge in physical pixels. + pub margin_px: u32, + /// Background opacity, 0.0..=1.0. + pub opacity: f32, + /// Visible waveform window in seconds (3.0 per BRIEF). + pub waveform_window_secs: f32, + /// Held-peak decay rate in dB/sec (6.0 per BRIEF). + pub peak_decay_db_per_sec: f32, + /// Visual gain applied to audio samples before drawing the waveform. + /// Mic-level voice typically peaks at ~0.1..=0.3 of full-scale; gain + /// scales that up so the envelope fills the available height. 10.0 is + /// the default; reduce for hot mics, increase for quiet sources. + pub waveform_gain: f32, + /// Which OSD frontend the `voxtype-osd` wrapper launches. Defaults to + /// `Gtk4` since GTK4 ships with most Hyprland setups already. + pub frontend: OsdFrontend, +} + +impl Default for OsdConfig { + fn default() -> Self { + Self { + enabled: true, + width_px: 400, + height_px: 48, + position: OsdPosition::BottomCenter, + margin_px: 24, + opacity: 0.95, + waveform_window_secs: 3.0, + peak_decay_db_per_sec: 6.0, + waveform_gain: 10.0, + frontend: OsdFrontend::default(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn defaults_match_brief() { + let c = OsdConfig::default(); + assert!(c.enabled); + assert_eq!(c.width_px, 400); + assert_eq!(c.height_px, 48); + assert_eq!(c.position, OsdPosition::BottomCenter); + assert_eq!(c.margin_px, 24); + assert!((c.opacity - 0.95).abs() < 1e-6); + assert!((c.waveform_window_secs - 3.0).abs() < 1e-6); + assert!((c.peak_decay_db_per_sec - 6.0).abs() < 1e-6); + assert!((c.waveform_gain - 10.0).abs() < 1e-6); + } + + #[test] + fn position_serde_kebab_case() { + let v: OsdPosition = serde_json::from_str("\"bottom-center\"").unwrap(); + assert_eq!(v, OsdPosition::BottomCenter); + let v: OsdPosition = serde_json::from_str("\"top-right\"").unwrap(); + assert_eq!(v, OsdPosition::TopRight); + } + + #[test] + fn frontend_default_is_gtk4() { + assert_eq!(OsdFrontend::default(), OsdFrontend::Gtk4); + assert_eq!(OsdConfig::default().frontend, OsdFrontend::Gtk4); + } + + #[test] + fn frontend_binary_names() { + assert_eq!(OsdFrontend::Gtk4.binary_name(), "voxtype-osd-gtk4"); + assert_eq!(OsdFrontend::Native.binary_name(), "voxtype-osd-native"); + } + + #[test] + fn frontend_parse_str_accepts_aliases() { + assert_eq!(OsdFrontend::parse_str("gtk4"), Some(OsdFrontend::Gtk4)); + assert_eq!(OsdFrontend::parse_str("GTK"), Some(OsdFrontend::Gtk4)); + assert_eq!(OsdFrontend::parse_str("native"), Some(OsdFrontend::Native)); + assert_eq!(OsdFrontend::parse_str("wgpu"), Some(OsdFrontend::Native)); + assert_eq!(OsdFrontend::parse_str("egui"), Some(OsdFrontend::Native)); + assert_eq!(OsdFrontend::parse_str("nope"), None); + } + + #[test] + fn frontend_serde_kebab_case() { + let v: OsdFrontend = serde_json::from_str("\"gtk4\"").unwrap(); + assert_eq!(v, OsdFrontend::Gtk4); + let v: OsdFrontend = serde_json::from_str("\"native\"").unwrap(); + assert_eq!(v, OsdFrontend::Native); + } + + #[test] + fn config_partial_toml_uses_defaults() { + let toml_src = "width_px = 800\n"; + let c: OsdConfig = toml::from_str(toml_src).unwrap(); + assert_eq!(c.width_px, 800); + // All other fields default + assert_eq!(c.height_px, 48); + assert!(c.enabled); + } +} diff --git a/src/osd/ipc.rs b/src/osd/ipc.rs new file mode 100644 index 00000000..9479651c --- /dev/null +++ b/src/osd/ipc.rs @@ -0,0 +1,250 @@ +//! Daemon IPC for the on-screen visualizer. +//! +//! The daemon emits 16-byte [`AudioFrame`]s at 100 Hz over a Unix socket +//! (default `$XDG_RUNTIME_DIR/voxtype/audio.sock`). This module encapsulates: +//! +//! - The connect / read / reconnect loop, abstracted over a per-frame +//! callback so each frontend can plug in its own state. +//! - A fixed-capacity ring buffer of decoded frames, used by the renderer +//! to draw the scrolling waveform. +//! +//! The design goal is that the two frontends (`voxtype-osd-native` and +//! `voxtype-osd-gtk4`) can share an identical IPC surface and only differ +//! in their rendering stack. + +use std::io::ErrorKind; +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use tokio::io::AsyncReadExt; +use tokio::net::UnixStream; +use tokio::time::sleep; + +use crate::audio::levels::{default_socket_path, AudioFrame, FRAME_BYTES}; + +/// Default ring buffer depth: 3 seconds at 100 Hz. +pub const DEFAULT_RING_DEPTH: usize = 300; + +/// Fixed-capacity ring buffer of audio frames. +/// +/// New frames overwrite the oldest. The renderer iterates in oldest-first +/// order via [`FrameRing::iter`] to draw the scrolling waveform. +pub struct FrameRing { + buf: Vec>, + head: usize, + len: usize, +} + +impl FrameRing { + pub fn new(capacity: usize) -> Self { + assert!(capacity > 0, "FrameRing capacity must be > 0"); + Self { + buf: vec![None; capacity], + head: 0, + len: 0, + } + } + + pub fn push(&mut self, frame: AudioFrame) { + let cap = self.buf.len(); + self.buf[self.head] = Some(frame); + self.head = (self.head + 1) % cap; + if self.len < cap { + self.len += 1; + } + } + + pub fn latest(&self) -> Option { + if self.len == 0 { + return None; + } + let cap = self.buf.len(); + let idx = (self.head + cap - 1) % cap; + self.buf[idx] + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn capacity(&self) -> usize { + self.buf.len() + } + + /// Iterate over the buffered frames in oldest-first order. + pub fn iter(&self) -> impl Iterator + '_ { + let cap = self.buf.len(); + let start = if self.len < cap { + 0 + } else { + self.head // oldest is the position about to be overwritten + }; + (0..self.len).filter_map(move |i| self.buf[(start + i) % cap]) + } + + /// Drop all buffered frames. + pub fn clear(&mut self) { + for slot in self.buf.iter_mut() { + *slot = None; + } + self.head = 0; + self.len = 0; + } +} + +/// Resolve the socket path: explicit override, else the daemon's default. +pub fn resolve_socket_path(override_path: Option) -> PathBuf { + override_path.unwrap_or_else(default_socket_path) +} + +/// Outcome of one connection attempt. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ConnectionOutcome { + /// The daemon closed the socket (recording ended, daemon shut down). + Eof, + /// We failed to connect (daemon not running yet). + ConnectFailed, + /// We were reading frames and hit a non-EOF error. + ReadError, +} + +/// Run one connect/read cycle, calling `on_frame` for each decoded frame. +/// +/// Returns when the connection ends. The caller is expected to sleep and +/// retry per [`run_ipc_loop`], which composes this with a reconnect delay. +pub async fn run_one_connection( + socket_path: &Path, + mut on_frame: F, +) -> ConnectionOutcome +where + F: FnMut(AudioFrame), +{ + let mut stream = match UnixStream::connect(socket_path).await { + Ok(s) => s, + Err(e) => { + tracing::debug!("Cannot connect to {:?}: {}", socket_path, e); + return ConnectionOutcome::ConnectFailed; + } + }; + tracing::info!("Connected to daemon at {:?}", socket_path); + + let mut buf = [0u8; FRAME_BYTES]; + loop { + match stream.read_exact(&mut buf).await { + Ok(_) => { + let frame = AudioFrame::from_bytes(&buf); + on_frame(frame); + } + Err(e) if e.kind() == ErrorKind::UnexpectedEof => { + tracing::info!("Daemon closed the socket (EOF)"); + return ConnectionOutcome::Eof; + } + Err(e) => { + tracing::warn!("Read error on audio socket: {}", e); + return ConnectionOutcome::ReadError; + } + } + } +} + +/// Run the connect / read / reconnect loop forever. +/// +/// `reconnect_secs` controls the gap between retry attempts when the +/// daemon is unavailable or the socket closes. +/// +/// This function never returns under normal operation; it is intended to +/// be spawned on a Tokio runtime by each frontend. +pub async fn run_ipc_loop( + socket_path: PathBuf, + reconnect_secs: f32, + mut on_frame: F, +) -> ! +where + F: FnMut(AudioFrame) + Send, +{ + let delay = Duration::from_secs_f32(reconnect_secs.max(0.05)); + loop { + let _ = run_one_connection(&socket_path, &mut on_frame).await; + sleep(delay).await; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn frame(seq: u32) -> AudioFrame { + AudioFrame { + seq, + min: -0.1, + max: 0.1, + peak_dbfs: -20.0, + } + } + + #[test] + fn ring_keeps_latest_within_capacity() { + let mut r = FrameRing::new(4); + for i in 0..10 { + r.push(frame(i)); + } + assert_eq!(r.len(), 4); + let latest = r.latest().unwrap(); + assert_eq!(latest.seq, 9); + } + + #[test] + fn ring_latest_none_when_empty() { + let r = FrameRing::new(8); + assert!(r.latest().is_none()); + assert_eq!(r.len(), 0); + assert!(r.is_empty()); + } + + #[test] + fn ring_grows_until_capacity() { + let mut r = FrameRing::new(8); + for i in 0..3 { + r.push(frame(i)); + } + assert_eq!(r.len(), 3); + assert_eq!(r.latest().unwrap().seq, 2); + } + + #[test] + fn ring_iter_oldest_first_when_full() { + let mut r = FrameRing::new(4); + for i in 0..6 { + r.push(frame(i)); + } + let seqs: Vec = r.iter().map(|f| f.seq).collect(); + // After 6 pushes into a 4-deep ring: contents are 2,3,4,5 oldest-first. + assert_eq!(seqs, vec![2, 3, 4, 5]); + } + + #[test] + fn ring_iter_oldest_first_partial() { + let mut r = FrameRing::new(4); + r.push(frame(7)); + r.push(frame(8)); + let seqs: Vec = r.iter().map(|f| f.seq).collect(); + assert_eq!(seqs, vec![7, 8]); + } + + #[test] + fn ring_clear_resets_state() { + let mut r = FrameRing::new(4); + for i in 0..3 { + r.push(frame(i)); + } + r.clear(); + assert_eq!(r.len(), 0); + assert!(r.latest().is_none()); + r.push(frame(99)); + assert_eq!(r.latest().unwrap().seq, 99); + } +} diff --git a/src/osd/mod.rs b/src/osd/mod.rs new file mode 100644 index 00000000..28cc7460 --- /dev/null +++ b/src/osd/mod.rs @@ -0,0 +1,19 @@ +//! Shared logic for the on-screen visualizer binaries. +//! +//! Both `voxtype-osd-native` (SCTK + wgpu + egui-wgpu) and `voxtype-osd-gtk4` +//! (GTK4 + gtk4-layer-shell) consume the same daemon IPC, run the same +//! peak-hold + waveform envelope math, parse the same Omarchy theme, and +//! honor the same `[osd]` configuration. That logic lives here so the two +//! frontends only differ in their rendering surface. +//! +//! ## Module layout +//! +//! - [`ipc`] — Unix-socket connection, frame decode, ring buffer, reconnect. +//! - [`visual`] — peak-hold decay, waveform envelope helpers, palette types. +//! - [`config`] — `[osd]` config block (`OsdConfig`). +//! - [`theme`] — Omarchy theme parsing + change watcher. + +pub mod config; +pub mod ipc; +pub mod theme; +pub mod visual; diff --git a/src/osd/theme.rs b/src/osd/theme.rs new file mode 100644 index 00000000..4fd46dcc --- /dev/null +++ b/src/osd/theme.rs @@ -0,0 +1,228 @@ +//! Omarchy theme integration. +//! +//! On startup, both OSD frontends read the active Omarchy theme and map it +//! to a [`Palette`] used by the renderer. The active theme lives at +//! `~/.config/omarchy/current/theme/colors.toml`, which is a TOML file with +//! a flat structure: `background`, `foreground`, `accent`, plus the ANSI +//! palette `color0`..=`color15`. +//! +//! Mapping: +//! +//! - `accent` → waveform fill +//! - `background` → window background (alpha kept from fallback) +//! - `foreground` → held-peak tick +//! - `color2` (ANSI green) → meter low zone +//! - `color3` (ANSI yellow) → meter mid zone +//! - `color1` (ANSI red) → meter high zone +//! +//! Themes whose ANSI red/green/yellow are off-spec (e.g. the "aether" theme +//! maps red to a tan) inherit the theme designer's choice — that's the +//! point of theming. + +use std::fs; +use std::path::PathBuf; + +use serde::Deserialize; + +use crate::osd::visual::{Color, Palette}; + +/// Canonical Omarchy "current theme" directory. +pub fn omarchy_theme_dir() -> Option { + let home = std::env::var_os("HOME")?; + let mut p = PathBuf::from(home); + p.push(".config/omarchy/current/theme"); + Some(p) +} + +#[derive(Deserialize, Default)] +struct OmarchyColors { + background: Option, + foreground: Option, + accent: Option, + color1: Option, + color2: Option, + color3: Option, +} + +/// Parse a `#RRGGBB` hex color into a [`Color`] with full alpha. +fn parse_hex(s: &str) -> Option { + let s = s.trim().trim_start_matches('#'); + if s.len() != 6 { + return None; + } + let r = u8::from_str_radix(&s[0..2], 16).ok()? as f32 / 255.0; + let g = u8::from_str_radix(&s[2..4], 16).ok()? as f32 / 255.0; + let b = u8::from_str_radix(&s[4..6], 16).ok()? as f32 / 255.0; + Some(Color::rgb(r, g, b)) +} + +/// Load the palette from the active Omarchy theme. +/// +/// Falls back to [`Palette::fallback`] when the theme directory is missing, +/// the colors file is unreadable, or the TOML doesn't parse. Per-field +/// fallbacks apply too: a theme that only defines `accent` keeps the +/// fallback values for everything else. +pub fn load_palette() -> Palette { + let Some(dir) = omarchy_theme_dir() else { + return Palette::fallback(); + }; + let path = dir.join("colors.toml"); + let content = match fs::read_to_string(&path) { + Ok(s) => s, + Err(_) => return Palette::fallback(), + }; + let parsed: OmarchyColors = match toml::from_str(&content) { + Ok(v) => v, + Err(_) => return Palette::fallback(), + }; + + palette_from(parsed) +} + +fn palette_from(c: OmarchyColors) -> Palette { + let fb = Palette::fallback(); + let bg_alpha = fb.background.a; + Palette { + background: c + .background + .as_deref() + .and_then(parse_hex) + .map(|c| c.with_alpha(bg_alpha)) + .unwrap_or(fb.background), + accent: c + .accent + .as_deref() + .and_then(parse_hex) + .unwrap_or(fb.accent), + meter_low: c + .color2 + .as_deref() + .and_then(parse_hex) + .unwrap_or(fb.meter_low), + meter_mid: c + .color3 + .as_deref() + .and_then(parse_hex) + .unwrap_or(fb.meter_mid), + meter_high: c + .color1 + .as_deref() + .and_then(parse_hex) + .unwrap_or(fb.meter_high), + foreground: c + .foreground + .as_deref() + .and_then(parse_hex) + .unwrap_or(fb.foreground), + } +} + +/// Theme watcher: snapshots the palette at construction. +/// +/// Real `notify`-based reload-on-change would let us re-render with the new +/// theme when the user switches Omarchy themes. Out of scope for this +/// commit; users can re-launch the OSD after switching. +pub struct ThemeWatcher { + palette: Palette, +} + +impl ThemeWatcher { + pub fn new() -> Self { + Self { + palette: load_palette(), + } + } + + /// Current palette. Cheap to call every frame. + pub fn palette(&self) -> Palette { + self.palette + } +} + +impl Default for ThemeWatcher { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn theme_dir_resolves_under_home() { + std::env::set_var("HOME", "/tmp/fakehome"); + let p = omarchy_theme_dir().unwrap(); + assert!(p.ends_with(".config/omarchy/current/theme")); + } + + #[test] + fn missing_theme_dir_yields_fallback() { + std::env::set_var("HOME", "/tmp/this-dir-should-not-exist-voxtype-test"); + assert_eq!(load_palette(), Palette::fallback()); + } + + #[test] + fn parse_hex_basic() { + let c = parse_hex("#6E89C2").unwrap(); + assert!((c.r - 0x6E as f32 / 255.0).abs() < 1e-6); + assert!((c.g - 0x89 as f32 / 255.0).abs() < 1e-6); + assert!((c.b - 0xC2 as f32 / 255.0).abs() < 1e-6); + assert_eq!(c.a, 1.0); + } + + #[test] + fn parse_hex_no_hash_prefix() { + let c = parse_hex("121515").unwrap(); + assert!((c.r - 0x12 as f32 / 255.0).abs() < 1e-6); + } + + #[test] + fn parse_hex_rejects_short_or_invalid() { + assert!(parse_hex("#FFF").is_none()); + assert!(parse_hex("#ZZZZZZ").is_none()); + assert!(parse_hex("").is_none()); + } + + #[test] + fn palette_from_aether_sample() { + // Real values from ~/.config/omarchy/themes/aether/colors.toml + let toml_src = r##" + accent = "#6E89C2" + background = "#121515" + foreground = "#FCFBF8" + color1 = "#A48364" + color2 = "#F8E7AE" + color3 = "#FEE88B" + "##; + let c: OmarchyColors = toml::from_str(toml_src).unwrap(); + let p = palette_from(c); + assert_eq!(p.accent, parse_hex("#6E89C2").unwrap()); + // Background keeps the fallback alpha (translucent OSD). + let fb_alpha = Palette::fallback().background.a; + assert!((p.background.a - fb_alpha).abs() < 1e-6); + assert_eq!(p.meter_high, parse_hex("#A48364").unwrap()); + assert_eq!(p.meter_low, parse_hex("#F8E7AE").unwrap()); + assert_eq!(p.meter_mid, parse_hex("#FEE88B").unwrap()); + } + + #[test] + fn palette_from_partial_inherits_fallback() { + // Only accent defined; everything else stays as fallback. + let toml_src = r##"accent = "#6E89C2""##; + let c: OmarchyColors = toml::from_str(toml_src).unwrap(); + let p = palette_from(c); + let fb = Palette::fallback(); + assert_eq!(p.accent, parse_hex("#6E89C2").unwrap()); + assert_eq!(p.background, fb.background); + assert_eq!(p.meter_low, fb.meter_low); + } + + #[test] + fn watcher_uses_loaded_palette() { + // We can't predict the user's theme here, but at minimum the watcher + // should hold whatever load_palette() returned at construction. + let w = ThemeWatcher::new(); + assert_eq!(w.palette(), load_palette()); + } +} diff --git a/src/osd/visual.rs b/src/osd/visual.rs new file mode 100644 index 00000000..5bc70271 --- /dev/null +++ b/src/osd/visual.rs @@ -0,0 +1,346 @@ +//! Pure visual logic shared by both OSD frontends. +//! +//! Nothing in this module touches Wayland, GTK, wgpu, or Cairo. It exists +//! so the rendering math is identical across frontends and unit-testable +//! without a graphics context. + +use crate::audio::levels::AudioFrame; + +/// RGBA color, components in 0.0..=1.0. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct Color { + pub r: f32, + pub g: f32, + pub b: f32, + pub a: f32, +} + +impl Color { + pub const fn rgba(r: f32, g: f32, b: f32, a: f32) -> Self { + Self { r, g, b, a } + } + + pub const fn rgb(r: f32, g: f32, b: f32) -> Self { + Self { r, g, b, a: 1.0 } + } + + pub fn with_alpha(mut self, a: f32) -> Self { + self.a = a; + self + } +} + +/// Color palette resolved from the active Omarchy theme (or the fallback). +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct Palette { + /// Window background color (typically dark). + pub background: Color, + /// Waveform fill color (theme accent). + pub accent: Color, + /// Peak meter "safe" zone (-inf..-12 dBFS). + pub meter_low: Color, + /// Peak meter "warning" zone (-12..-3 dBFS). + pub meter_mid: Color, + /// Peak meter "danger" zone (-3..0 dBFS). + pub meter_high: Color, + /// Foreground / text color (used for held-peak tick, segment dividers). + pub foreground: Color, +} + +impl Palette { + /// Fallback palette used until an Omarchy theme is parsed. Designed to + /// look passable on a dark background. + pub const fn fallback() -> Self { + Self { + background: Color::rgba(0.10, 0.10, 0.12, 0.85), + accent: Color::rgb(0.40, 0.78, 1.00), + meter_low: Color::rgb(0.30, 0.85, 0.45), + meter_mid: Color::rgb(0.95, 0.80, 0.30), + meter_high: Color::rgb(0.95, 0.35, 0.30), + foreground: Color::rgb(0.92, 0.92, 0.95), + } + } +} + +impl Default for Palette { + fn default() -> Self { + Self::fallback() + } +} + +/// Peak meter zone, used to color the lit segment of the bar. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum MeterZone { + Low, + Mid, + High, +} + +impl MeterZone { + /// Classify a peak level (dBFS) into a meter zone. + /// + /// Boundaries match the BRIEF: green to -12, yellow -12..-3, red -3..0. + pub fn from_dbfs(peak_dbfs: f32) -> Self { + if peak_dbfs >= -3.0 { + MeterZone::High + } else if peak_dbfs >= -12.0 { + MeterZone::Mid + } else { + MeterZone::Low + } + } + + pub fn color(self, palette: &Palette) -> Color { + match self { + MeterZone::Low => palette.meter_low, + MeterZone::Mid => palette.meter_mid, + MeterZone::High => palette.meter_high, + } + } +} + +/// Held-peak state for the peak meter's decaying tick. +/// +/// Per BRIEF: held-peak rises instantly to the current peak and decays at +/// `peak_decay_db_per_sec` dB/sec while the live peak sits below it. +#[derive(Debug, Clone, Copy)] +pub struct PeakHold { + /// Current held peak in dBFS. -inf-equivalent is represented as -120.0. + pub held_dbfs: f32, + /// Decay rate in dB per second. + pub decay_db_per_sec: f32, +} + +impl PeakHold { + pub fn new(decay_db_per_sec: f32) -> Self { + Self { + held_dbfs: -120.0, + decay_db_per_sec, + } + } + + /// Update the hold given the current peak and the time delta since the + /// last update (seconds). + pub fn update(&mut self, current_peak_dbfs: f32, dt_secs: f32) { + update_peak_hold( + current_peak_dbfs, + &mut self.held_dbfs, + self.decay_db_per_sec, + dt_secs, + ); + } +} + +/// Free-function peak-hold update; matches the formula in BRIEF.md verbatim. +/// +/// `held` snaps up to `current_peak` instantly when louder, otherwise +/// decays linearly at `decay_db_per_sec`. The held value floors at -120.0 +/// so a quiet signal doesn't underflow toward -infinity. +pub fn update_peak_hold( + current_peak: f32, + held: &mut f32, + decay_db_per_sec: f32, + dt_secs: f32, +) { + if current_peak > *held { + *held = current_peak; + } else { + *held -= decay_db_per_sec * dt_secs; + if *held < -120.0 { + *held = -120.0; + } + } +} + +/// One column of the waveform envelope: min/max amplitude in -1.0..=1.0. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct EnvelopeColumn { + pub min: f32, + pub max: f32, +} + +impl EnvelopeColumn { + pub const SILENT: Self = Self { min: 0.0, max: 0.0 }; +} + +/// Project the most recent `frames.len()` audio frames onto `n_columns` +/// pixel columns by aggregating min/max over the frames that map to each +/// column. Columns are oldest-on-left, newest-on-right. +/// +/// Every column is mapped proportionally to the available frame range, +/// which means the waveform always fills the entire display width. When +/// the ring contains fewer frames than columns, frames stretch to cover +/// the extra columns (one frame may map to several adjacent columns) — +/// preferable to leaving a permanent dead zone on the left edge that +/// never receives data. +pub fn project_envelope(frames: &[AudioFrame], n_columns: usize) -> Vec { + let mut out = vec![EnvelopeColumn::SILENT; n_columns]; + if frames.is_empty() || n_columns == 0 { + return out; + } + + let n_frames = frames.len(); + for col in 0..n_columns { + // Bucket-map column index to a half-open frame range. When + // n_frames >= n_columns, each bucket covers >=1 frame and we + // aggregate min/max over the bucket. When n_frames < n_columns, + // start..end ends up empty for some buckets (start == end); + // we sample-and-hold the previous column's value so the + // waveform stretches across the full width instead of leaving + // gaps. + let start = (col * n_frames) / n_columns; + let end = ((col + 1) * n_frames) / n_columns; + let mut min = 0.0_f32; + let mut max = 0.0_f32; + let mut any = false; + for f in &frames[start..end] { + if !any { + min = f.min; + max = f.max; + any = true; + } else { + if f.min < min { + min = f.min; + } + if f.max > max { + max = f.max; + } + } + } + out[col] = if any { + EnvelopeColumn { min, max } + } else { + // Empty bucket: sample-and-hold the nearest frame so the + // visualization stretches rather than going silent. + let idx = ((col * n_frames) / n_columns).min(n_frames - 1); + EnvelopeColumn { + min: frames[idx].min, + max: frames[idx].max, + } + }; + } + out +} + +/// Map a dBFS peak to a normalized 0.0..=1.0 fill level for the meter. +/// +/// `floor_dbfs` is the dBFS value that maps to 0.0 (typically -60 dBFS for +/// a usable visual range). 0 dBFS maps to 1.0. +pub fn peak_meter_fraction(peak_dbfs: f32, floor_dbfs: f32) -> f32 { + if !peak_dbfs.is_finite() || peak_dbfs <= floor_dbfs { + return 0.0; + } + let clipped = peak_dbfs.min(0.0); + let span = -floor_dbfs; + if span <= 0.0 { + return 0.0; + } + ((clipped - floor_dbfs) / span).clamp(0.0, 1.0) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn frame(seq: u32, min: f32, max: f32, peak_dbfs: f32) -> AudioFrame { + AudioFrame { + seq, + min, + max, + peak_dbfs, + } + } + + #[test] + fn meter_zone_boundaries() { + assert_eq!(MeterZone::from_dbfs(-30.0), MeterZone::Low); + assert_eq!(MeterZone::from_dbfs(-12.0), MeterZone::Mid); + assert_eq!(MeterZone::from_dbfs(-6.0), MeterZone::Mid); + assert_eq!(MeterZone::from_dbfs(-3.0), MeterZone::High); + assert_eq!(MeterZone::from_dbfs(0.0), MeterZone::High); + } + + #[test] + fn peak_hold_snaps_up_instantly() { + let mut hold = PeakHold::new(6.0); + hold.update(-10.0, 0.01); + assert!((hold.held_dbfs - -10.0).abs() < 1e-6); + hold.update(-3.0, 0.01); + assert!((hold.held_dbfs - -3.0).abs() < 1e-6); + } + + #[test] + fn peak_hold_decays_linearly() { + let mut hold = PeakHold::new(6.0); + hold.update(-3.0, 0.0); + assert!((hold.held_dbfs - -3.0).abs() < 1e-6); + // 1 second at 6 dB/sec = -9 dBFS + hold.update(-30.0, 1.0); + assert!((hold.held_dbfs - -9.0).abs() < 1e-3); + } + + #[test] + fn peak_hold_floor_at_minus_120() { + let mut held = -10.0; + update_peak_hold(-100.0, &mut held, 6.0, 1000.0); // huge dt + assert_eq!(held, -120.0); + } + + #[test] + fn peak_meter_fraction_basic() { + assert_eq!(peak_meter_fraction(-60.0, -60.0), 0.0); + assert_eq!(peak_meter_fraction(0.0, -60.0), 1.0); + let half = peak_meter_fraction(-30.0, -60.0); + assert!((half - 0.5).abs() < 1e-3); + } + + #[test] + fn peak_meter_fraction_clamps_silence() { + assert_eq!(peak_meter_fraction(-120.0, -60.0), 0.0); + assert_eq!(peak_meter_fraction(f32::NEG_INFINITY, -60.0), 0.0); + } + + #[test] + fn envelope_partial_stretches_to_fill() { + // 2 frames into 5 columns: every column must be populated (no + // silent left edge). Frames stretch via sample-and-hold. + let frames = vec![ + frame(0, -0.1, 0.1, -20.0), + frame(1, -0.2, 0.2, -14.0), + ]; + let cols = project_envelope(&frames, 5); + assert_eq!(cols.len(), 5); + for (i, c) in cols.iter().enumerate() { + assert_ne!(*c, EnvelopeColumn::SILENT, "column {i} was silent"); + } + // The newest frame should appear in the last column. + assert_eq!(cols[4], EnvelopeColumn { min: -0.2, max: 0.2 }); + // The oldest frame should appear in the first column. + assert_eq!(cols[0], EnvelopeColumn { min: -0.1, max: 0.1 }); + } + + #[test] + fn envelope_aggregates_when_full() { + // 10 frames into 5 columns: each column covers 2 frames. + let frames: Vec = (0..10) + .map(|i| frame(i, -(i as f32) * 0.1, (i as f32) * 0.1, -20.0)) + .collect(); + let cols = project_envelope(&frames, 5); + assert_eq!(cols.len(), 5); + // First column: frames 0..=1 -> min = -0.1, max = 0.1 + assert!((cols[0].min - -0.1).abs() < 1e-6); + assert!((cols[0].max - 0.1).abs() < 1e-6); + // Last column: frames 8..=9 -> min = -0.9, max = 0.9 + assert!((cols[4].min - -0.9).abs() < 1e-6); + assert!((cols[4].max - 0.9).abs() < 1e-6); + } + + #[test] + fn envelope_empty_input_yields_silence() { + let cols = project_envelope(&[], 4); + assert_eq!(cols.len(), 4); + for c in cols { + assert_eq!(c, EnvelopeColumn::SILENT); + } + } +} diff --git a/src/output/cgevent.rs b/src/output/cgevent.rs new file mode 100644 index 00000000..b2de5bfd --- /dev/null +++ b/src/output/cgevent.rs @@ -0,0 +1,271 @@ +//! macOS text output via CGEvent API +//! +//! Uses Core Graphics events to simulate keyboard input on macOS. +//! This is the native, preferred method for text injection on macOS. +//! +//! Requires Accessibility permissions: +//! System Settings > Privacy & Security > Accessibility +//! +//! Advantages over osascript: +//! - Native API, no subprocess spawning +//! - Direct Unicode support via CGEventKeyboardSetUnicodeString +//! - Lower latency and better reliability +//! - Proper keycode mapping with modifier support + +use super::TextOutput; +use crate::error::OutputError; +use core_foundation::base::TCFType; +use core_graphics::event::{CGEvent, CGEventFlags, CGEventTapLocation, CGKeyCode}; +use core_graphics::event_source::{CGEventSource, CGEventSourceStateID}; +use std::time::Duration; + +/// CGEvent-based text output for macOS +pub struct CGEventOutput { + /// Delay between keypresses in milliseconds + type_delay_ms: u32, + /// Delay before typing starts in milliseconds + pre_type_delay_ms: u32, + /// Whether to show a desktop notification + notify: bool, + /// Whether to send Enter key after output + auto_submit: bool, +} + +impl CGEventOutput { + /// Create a new CGEvent output + pub fn new( + type_delay_ms: u32, + pre_type_delay_ms: u32, + notify: bool, + auto_submit: bool, + ) -> Self { + Self { + type_delay_ms, + pre_type_delay_ms, + notify, + auto_submit, + } + } + + /// Check if Accessibility permissions are granted + fn check_accessibility_permission() -> bool { + #[link(name = "ApplicationServices", kind = "framework")] + extern "C" { + fn AXIsProcessTrusted() -> bool; + } + unsafe { AXIsProcessTrusted() } + } + + /// Request Accessibility permissions (shows system dialog) + #[allow(dead_code)] + fn request_accessibility_permission() { + #[link(name = "ApplicationServices", kind = "framework")] + extern "C" { + fn AXIsProcessTrustedWithOptions(options: core_foundation::base::CFTypeRef) -> bool; + } + + use core_foundation::boolean::CFBoolean; + use core_foundation::dictionary::CFDictionary; + use core_foundation::string::CFString; + + let key = CFString::new("AXTrustedCheckOptionPrompt"); + let value = CFBoolean::true_value(); + let options = CFDictionary::from_CFType_pairs(&[(key.as_CFType(), value.as_CFType())]); + + unsafe { + AXIsProcessTrustedWithOptions(options.as_concrete_TypeRef() as _); + } + } + + /// Send a desktop notification using osascript + async fn send_notification(&self, text: &str) { + use std::process::Stdio; + use tokio::process::Command; + + let preview: String = text.chars().take(80).collect(); + let preview = if text.chars().count() > 80 { + format!("{}...", preview) + } else { + preview + }; + + let escaped = preview.replace('\\', "\\\\").replace('"', "\\\""); + let script = format!( + "display notification \"{}\" with title \"Voxtype\" subtitle \"Transcribed\"", + escaped + ); + + let _ = Command::new("osascript") + .args(["-e", &script]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await; + } + + /// Type text using CGEvent (blocking, for use in spawn_blocking) + fn type_text_blocking( + text: &str, + type_delay_ms: u32, + auto_submit: bool, + ) -> Result<(), OutputError> { + let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState) + .map_err(|_| OutputError::InjectionFailed("Failed to create CGEventSource".into()))?; + + let delay = Duration::from_millis(type_delay_ms as u64); + + // Type text using Unicode string injection for reliability + // This works with any keyboard layout and supports all characters + for chunk in text.chars().collect::>().chunks(20) { + Self::type_unicode_string(&source, chunk)?; + + if type_delay_ms > 0 && !chunk.is_empty() { + std::thread::sleep(delay); + } + } + + if auto_submit { + std::thread::sleep(Duration::from_millis(50)); + Self::press_key(&source, KEYCODE_RETURN, CGEventFlags::empty())?; + } + + Ok(()) + } + + /// Type a string using Unicode injection (handles any character) + fn type_unicode_string(source: &CGEventSource, chars: &[char]) -> Result<(), OutputError> { + if chars.is_empty() { + return Ok(()); + } + + // Convert to UTF-16 for CGEvent + let mut utf16_buf: Vec = Vec::with_capacity(chars.len() * 2); + for ch in chars { + let mut buf = [0u16; 2]; + let encoded = ch.encode_utf16(&mut buf); + utf16_buf.extend_from_slice(encoded); + } + + // Create key down event with Unicode string + let event = CGEvent::new_keyboard_event(source.clone(), 0, true) + .map_err(|_| OutputError::InjectionFailed("Failed to create keyboard event".into()))?; + + event.set_string_from_utf16_unchecked(&utf16_buf); + event.post(CGEventTapLocation::HID); + + // Key up event + let event_up = CGEvent::new_keyboard_event(source.clone(), 0, false) + .map_err(|_| OutputError::InjectionFailed("Failed to create key up event".into()))?; + event_up.post(CGEventTapLocation::HID); + + Ok(()) + } + + /// Press a single key with optional modifiers + /// + /// Always explicitly sets flags to prevent Caps Lock or stuck modifiers + /// from interfering with text injection. + fn press_key( + source: &CGEventSource, + keycode: CGKeyCode, + flags: CGEventFlags, + ) -> Result<(), OutputError> { + let key_down = CGEvent::new_keyboard_event(source.clone(), keycode, true) + .map_err(|_| OutputError::InjectionFailed("Failed to create key down event".into()))?; + + // Always set flags explicitly - use CGEventFlagNull when no modifiers needed + // This prevents Caps Lock or stuck modifier keys from causing random capitalization + key_down.set_flags(flags); + key_down.post(CGEventTapLocation::HID); + + let key_up = CGEvent::new_keyboard_event(source.clone(), keycode, false) + .map_err(|_| OutputError::InjectionFailed("Failed to create key up event".into()))?; + key_up.set_flags(flags); + key_up.post(CGEventTapLocation::HID); + + Ok(()) + } +} + +// macOS virtual key codes (from Carbon HIToolbox Events.h) +const KEYCODE_RETURN: CGKeyCode = 0x24; + +#[async_trait::async_trait] +impl TextOutput for CGEventOutput { + async fn output(&self, text: &str) -> Result<(), OutputError> { + if text.is_empty() { + return Ok(()); + } + + // Check permissions first + if !Self::check_accessibility_permission() { + return Err(OutputError::InjectionFailed( + "Accessibility permission required.\n\ + Grant access in: System Settings > Privacy & Security > Accessibility\n\ + Then restart voxtype." + .into(), + )); + } + + // Pre-typing delay + if self.pre_type_delay_ms > 0 { + tracing::debug!( + "cgevent: waiting {}ms before typing", + self.pre_type_delay_ms + ); + tokio::time::sleep(Duration::from_millis(self.pre_type_delay_ms as u64)).await; + } + + tracing::debug!("cgevent: typing {} chars", text.chars().count()); + + // CGEventSource is not Send, so do all CGEvent work in spawn_blocking + let text_owned = text.to_string(); + let type_delay_ms = self.type_delay_ms; + let auto_submit = self.auto_submit; + + tokio::task::spawn_blocking(move || { + Self::type_text_blocking(&text_owned, type_delay_ms, auto_submit) + }) + .await + .map_err(|e| OutputError::InjectionFailed(format!("Task join error: {}", e)))??; + + tracing::info!("Text typed via CGEvent ({} chars)", text.chars().count()); + + if self.notify { + self.send_notification(text).await; + } + + Ok(()) + } + + async fn is_available(&self) -> bool { + // CGEvent is available on macOS, return true to allow helpful error message + // if permissions are denied + true + } + + fn name(&self) -> &'static str { + "cgevent (macOS native)" + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new() { + let output = CGEventOutput::new(10, 100, true, false); + assert_eq!(output.type_delay_ms, 10); + assert_eq!(output.pre_type_delay_ms, 100); + assert!(output.notify); + assert!(!output.auto_submit); + } + + #[test] + fn test_new_with_auto_submit() { + let output = CGEventOutput::new(0, 0, false, true); + assert!(!output.notify); + assert!(output.auto_submit); + } +} diff --git a/src/output/mod.rs b/src/output/mod.rs index b519655c..1a946007 100644 --- a/src/output/mod.rs +++ b/src/output/mod.rs @@ -3,6 +3,8 @@ //! Provides text output via keyboard simulation or clipboard. //! //! Fallback chain for `mode = "type"`: +//! +//! Linux: //! 1. wtype - Wayland-native via virtual-keyboard protocol, best Unicode/CJK support, no daemon needed //! 2. eitype - Wayland via libei/EI protocol, works on GNOME/KDE (no virtual-keyboard support) //! 3. dotool - Works on X11/Wayland/TTY, supports keyboard layouts, no daemon needed @@ -10,12 +12,23 @@ //! 5. clipboard (wl-copy) - Wayland clipboard fallback //! 6. xclip - X11 clipboard fallback //! +//! macOS: +//! 1. cgevent - Native CGEvent API for keyboard simulation (best performance) +//! 2. osascript - AppleScript fallback +//! 3. pbcopy - Native macOS clipboard +//! //! Paste mode (clipboard + Ctrl+V) helps with system with non US keyboard layouts. +#[cfg(target_os = "macos")] +pub mod cgevent; pub mod clipboard; pub mod dotool; pub mod eitype; +#[cfg(target_os = "macos")] +pub mod osascript; pub mod paste; +#[cfg(target_os = "macos")] +pub mod pbcopy; pub mod post_process; pub mod wtype; pub mod xclip; @@ -157,6 +170,17 @@ pub fn engine_icon(engine: crate::config::TranscriptionEngine) -> &'static str { crate::config::TranscriptionEngine::Paraformer => "\u{1F4AC}", // 💬 crate::config::TranscriptionEngine::Dolphin => "\u{1F42C}", // 🐬 crate::config::TranscriptionEngine::Omnilingual => "\u{1F30D}", // 🌍 + crate::config::TranscriptionEngine::Cohere => "\u{1F4DD}", // 📝 + } +} + +/// Validate notification urgency, falling back to "normal" for unknown values. +/// +/// notify-send only accepts "low", "normal", or "critical". +pub fn sanitize_urgency(urgency: &str) -> &str { + match urgency { + "low" | "normal" | "critical" => urgency, + _ => "normal", } } @@ -165,6 +189,7 @@ pub async fn send_transcription_notification( text: &str, show_engine_icon: bool, engine: crate::config::TranscriptionEngine, + urgency: &str, ) { // Truncate preview for notification (use chars() to handle multi-byte UTF-8) let preview = if text.chars().count() > 80 { @@ -179,10 +204,11 @@ pub async fn send_transcription_notification( "Transcribed".to_string() }; + let urgency_arg = format!("--urgency={}", sanitize_urgency(urgency)); let _ = Command::new("notify-send") .args([ "--app-name=Voxtype", - "--urgency=low", + &urgency_arg, "--expire-time=3000", &title, &preview, @@ -207,6 +233,7 @@ pub trait TextOutput: Send + Sync { } /// Default driver order for type mode +#[cfg(not(target_os = "macos"))] const DEFAULT_DRIVER_ORDER: &[OutputDriver] = &[ OutputDriver::Wtype, OutputDriver::Eitype, @@ -217,6 +244,7 @@ const DEFAULT_DRIVER_ORDER: &[OutputDriver] = &[ ]; /// Create a TextOutput implementation for a specific driver +#[cfg(not(target_os = "macos"))] fn create_driver_output( driver: OutputDriver, config: &OutputConfig, @@ -270,59 +298,89 @@ pub fn create_output_chain_with_override( driver_override: Option<&[OutputDriver]>, ) -> Vec> { let mut chain: Vec> = Vec::new(); + #[cfg(target_os = "macos")] + let _ = driver_override; // Get effective pre_type_delay_ms (handles deprecated wtype_delay_ms) let pre_type_delay_ms = config.effective_pre_type_delay_ms(); match config.mode { crate::config::OutputMode::Type => { - // Determine driver order: CLI override > config > default - let driver_order: &[OutputDriver] = driver_override - .or(config.driver_order.as_deref()) - .unwrap_or(DEFAULT_DRIVER_ORDER); - - if let Some(custom_order) = driver_override.or(config.driver_order.as_deref()) { - tracing::info!( - "Using custom driver order: {}", - custom_order - .iter() - .map(|d| d.to_string()) - .collect::>() - .join(" -> ") - ); + #[cfg(target_os = "macos")] + { + // macOS: Primary - CGEvent (native API, best performance) + // driver_order not yet supported on macOS + let show_notification = config.notification.on_transcription; + chain.push(Box::new(cgevent::CGEventOutput::new( + config.type_delay_ms, + pre_type_delay_ms, + show_notification, + config.auto_submit, + ))); + + // Fallback 1: osascript (AppleScript, works without CGEvent permissions) + chain.push(Box::new(osascript::OsascriptOutput::new( + false, // notification already handled by primary + config.auto_submit, + pre_type_delay_ms, + ))); + + // Fallback 2: pbcopy for clipboard + if config.fallback_to_clipboard { + chain.push(Box::new(pbcopy::PbcopyOutput::new(false))); + } } - // Build chain based on driver order - for (i, driver) in driver_order.iter().enumerate() { - // Skip clipboard if it's in the middle and fallback_to_clipboard is false - // (clipboard should only be added if explicitly in the order OR fallback is enabled and it's last) - let is_last = i == driver_order.len() - 1; - if *driver == OutputDriver::Clipboard && !is_last && !config.fallback_to_clipboard { - continue; + #[cfg(not(target_os = "macos"))] + { + // Determine driver order: CLI override > config > default + let driver_order: &[OutputDriver] = driver_override + .or(config.driver_order.as_deref()) + .unwrap_or(DEFAULT_DRIVER_ORDER); + + if let Some(custom_order) = driver_override.or(config.driver_order.as_deref()) { + tracing::info!( + "Using custom driver order: {}", + custom_order + .iter() + .map(|d| d.to_string()) + .collect::>() + .join(" -> ") + ); + } + + for driver in driver_order.iter() { + chain.push(create_driver_output(*driver, config, pre_type_delay_ms)); } - chain.push(create_driver_output(*driver, config, pre_type_delay_ms)); + // If fallback_to_clipboard is true but clipboard wasn't in the custom order, add it + if config.fallback_to_clipboard + && config.driver_order.is_some() + && !driver_order.contains(&OutputDriver::Clipboard) + { + chain.push(Box::new(clipboard::ClipboardOutput::new( + config.append_text.clone(), + ))); + } } + } + crate::config::OutputMode::Clipboard => { + #[cfg(target_os = "macos")] + chain.push(Box::new(pbcopy::PbcopyOutput::new( + config.notification.on_transcription, + ))); - // If fallback_to_clipboard is true but clipboard wasn't in the custom order, add it - if config.fallback_to_clipboard - && config.driver_order.is_some() - && !driver_order.contains(&OutputDriver::Clipboard) + #[cfg(not(target_os = "macos"))] { + // Clipboard with X11 fallback: wl-copy first, then xclip chain.push(Box::new(clipboard::ClipboardOutput::new( config.append_text.clone(), ))); + chain.push(Box::new(xclip::XclipOutput::new( + config.append_text.clone(), + ))); } } - crate::config::OutputMode::Clipboard => { - // Clipboard with X11 fallback: wl-copy first, then xclip - chain.push(Box::new(clipboard::ClipboardOutput::new( - config.append_text.clone(), - ))); - chain.push(Box::new(xclip::XclipOutput::new( - config.append_text.clone(), - ))); - } crate::config::OutputMode::Paste => { // Only paste mode (no fallback as requested) chain.push(Box::new(paste::PasteOutput::new( @@ -510,4 +568,19 @@ mod tests { assert_eq!(result, Some(sock_path)); } + + #[test] + fn test_sanitize_urgency_valid() { + assert_eq!(sanitize_urgency("low"), "low"); + assert_eq!(sanitize_urgency("normal"), "normal"); + assert_eq!(sanitize_urgency("critical"), "critical"); + } + + #[test] + fn test_sanitize_urgency_invalid_falls_back_to_normal() { + assert_eq!(sanitize_urgency(""), "normal"); + assert_eq!(sanitize_urgency("LOW"), "normal"); + assert_eq!(sanitize_urgency("urgent"), "normal"); + assert_eq!(sanitize_urgency("--rm -rf /"), "normal"); + } } diff --git a/src/output/osascript.rs b/src/output/osascript.rs new file mode 100644 index 00000000..52b7e529 --- /dev/null +++ b/src/output/osascript.rs @@ -0,0 +1,186 @@ +//! macOS text output via osascript/AppleScript +//! +//! Uses System Events to simulate keyboard input on macOS. +//! Requires Accessibility permissions for the terminal/app running voxtype. +//! +//! This is the primary typing method on macOS. + +use super::TextOutput; +use crate::error::OutputError; +use std::process::Stdio; +use tokio::process::Command; + +/// macOS text output using osascript +pub struct OsascriptOutput { + /// Whether to show a desktop notification + notify: bool, + /// Whether to send Enter key after text + auto_submit: bool, + /// Delay before typing starts (ms) + pre_type_delay_ms: u32, +} + +impl OsascriptOutput { + /// Create a new osascript output + pub fn new(notify: bool, auto_submit: bool, pre_type_delay_ms: u32) -> Self { + Self { + notify, + auto_submit, + pre_type_delay_ms, + } + } + + /// Send a desktop notification using osascript + async fn send_notification(&self, text: &str) { + // Truncate preview for notification + let preview = if text.chars().count() > 80 { + format!("{}...", text.chars().take(80).collect::()) + } else { + text.to_string() + }; + + // Escape for AppleScript string + let escaped_preview = preview.replace('\\', "\\\\").replace('"', "\\\""); + + let script = format!( + r#"display notification "{}" with title "Voxtype""#, + escaped_preview + ); + + let _ = Command::new("osascript") + .args(["-e", &script]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await; + } + + /// Escape text for AppleScript string literal + fn escape_for_applescript(text: &str) -> String { + text.replace('\\', "\\\\").replace('"', "\\\"") + } +} + +/// Wait for all modifier keys (Option, Command, Control, Shift) to be released +/// This prevents typing garbage characters when hotkey uses a modifier +async fn wait_for_modifiers_release() { + // Simple fixed delay - the AppleScript check was causing issues + // 150ms is enough for the Option key to fully release + tokio::time::sleep(std::time::Duration::from_millis(150)).await; +} + +#[async_trait::async_trait] +impl TextOutput for OsascriptOutput { + async fn output(&self, text: &str) -> Result<(), OutputError> { + if text.is_empty() { + return Ok(()); + } + + // Wait for modifier keys to be released (prevents Option-key garbage) + wait_for_modifiers_release().await; + + // Additional pre-type delay if configured + if self.pre_type_delay_ms > 0 { + tokio::time::sleep(std::time::Duration::from_millis( + self.pre_type_delay_ms as u64, + )) + .await; + } + + // Escape text for AppleScript + let escaped_text = Self::escape_for_applescript(text); + + // Build AppleScript to type text + // Using "keystroke" which types the text character by character + let mut script = format!( + r#"tell application "System Events" to keystroke "{}""#, + escaped_text + ); + + // Add Enter key if auto_submit is enabled + if self.auto_submit { + script.push_str( + r#" +tell application "System Events" to key code 36"#, + ); // 36 = Return key + } + + let output = Command::new("osascript") + .args(["-e", &script]) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .output() + .await + .map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + OutputError::InjectionFailed("osascript not found".to_string()) + } else { + OutputError::InjectionFailed(e.to_string()) + } + })?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + // Check for common permission error + if stderr.contains("not allowed") || stderr.contains("accessibility") { + return Err(OutputError::InjectionFailed( + "Accessibility permission required. Grant access in System Settings > Privacy & Security > Accessibility".to_string() + )); + } + return Err(OutputError::InjectionFailed(format!( + "osascript failed: {}", + stderr + ))); + } + + // Send notification if enabled + if self.notify { + self.send_notification(text).await; + } + + tracing::info!("Text typed via osascript ({} chars)", text.len()); + Ok(()) + } + + async fn is_available(&self) -> bool { + // osascript is always available on macOS + cfg!(target_os = "macos") + && Command::new("which") + .arg("osascript") + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await + .map(|s| s.success()) + .unwrap_or(false) + } + + fn name(&self) -> &'static str { + "osascript (macOS)" + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new() { + let output = OsascriptOutput::new(true, false, 0); + assert!(output.notify); + assert!(!output.auto_submit); + assert_eq!(output.pre_type_delay_ms, 0); + } + + #[test] + fn test_escape_for_applescript() { + assert_eq!( + OsascriptOutput::escape_for_applescript(r#"hello "world""#), + r#"hello \"world\""# + ); + assert_eq!( + OsascriptOutput::escape_for_applescript(r#"path\to\file"#), + r#"path\\to\\file"# + ); + } +} diff --git a/src/output/pbcopy.rs b/src/output/pbcopy.rs new file mode 100644 index 00000000..d82761b9 --- /dev/null +++ b/src/output/pbcopy.rs @@ -0,0 +1,133 @@ +//! macOS clipboard output via pbcopy +//! +//! Uses the native macOS pbcopy command for clipboard access. +//! This is the clipboard fallback on macOS. + +use super::TextOutput; +use crate::error::OutputError; +use std::process::Stdio; +use tokio::io::AsyncWriteExt; +use tokio::process::Command; + +/// macOS clipboard output using pbcopy +pub struct PbcopyOutput { + /// Whether to show a desktop notification + notify: bool, +} + +impl PbcopyOutput { + /// Create a new pbcopy output + pub fn new(notify: bool) -> Self { + Self { notify } + } + + /// Send a desktop notification using osascript + async fn send_notification(&self, text: &str) { + // Truncate preview for notification + let preview = if text.chars().count() > 80 { + format!("{}...", text.chars().take(80).collect::()) + } else { + text.to_string() + }; + + // Escape for AppleScript string + let escaped_preview = preview.replace('\\', "\\\\").replace('"', "\\\""); + + let script = format!( + r#"display notification "{}" with title "Copied to clipboard""#, + escaped_preview + ); + + let _ = Command::new("osascript") + .args(["-e", &script]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await; + } +} + +#[async_trait::async_trait] +impl TextOutput for PbcopyOutput { + async fn output(&self, text: &str) -> Result<(), OutputError> { + if text.is_empty() { + return Ok(()); + } + + // Spawn pbcopy with stdin pipe + let mut child = Command::new("pbcopy") + .stdin(Stdio::piped()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + OutputError::InjectionFailed("pbcopy not found".to_string()) + } else { + OutputError::InjectionFailed(e.to_string()) + } + })?; + + // Write text to stdin + if let Some(mut stdin) = child.stdin.take() { + stdin + .write_all(text.as_bytes()) + .await + .map_err(|e| OutputError::InjectionFailed(e.to_string()))?; + + // Close stdin to signal EOF + drop(stdin); + } + + // Wait for completion + let status = child + .wait() + .await + .map_err(|e| OutputError::InjectionFailed(e.to_string()))?; + + if !status.success() { + return Err(OutputError::InjectionFailed( + "pbcopy exited with error".to_string(), + )); + } + + // Send notification if enabled + if self.notify { + self.send_notification(text).await; + } + + tracing::info!("Text copied to clipboard via pbcopy ({} chars)", text.len()); + Ok(()) + } + + async fn is_available(&self) -> bool { + // pbcopy is always available on macOS + cfg!(target_os = "macos") + && Command::new("which") + .arg("pbcopy") + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .await + .map(|s| s.success()) + .unwrap_or(false) + } + + fn name(&self) -> &'static str { + "clipboard (pbcopy)" + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new() { + let output = PbcopyOutput::new(true); + assert!(output.notify); + + let output = PbcopyOutput::new(false); + assert!(!output.notify); + } +} diff --git a/src/output/post_process.rs b/src/output/post_process.rs index bf89de1f..af970c05 100644 --- a/src/output/post_process.rs +++ b/src/output/post_process.rs @@ -233,8 +233,9 @@ mod tests { #[tokio::test] async fn test_empty_output_fallback() { - // echo -n outputs nothing, which should trigger fallback - let config = make_config("echo -n ''", 5000); + // printf '' outputs nothing, which should trigger fallback + // (echo -n is not portable across platforms) + let config = make_config("printf ''", 5000); let processor = PostProcessor::new(&config); let result = processor.process("original text").await; assert_eq!(result, "original text"); // Falls back to original @@ -267,7 +268,8 @@ mod tests { #[tokio::test] async fn test_whitespace_trimming() { // Output has trailing newline which should be trimmed - let config = make_config("echo 'hello'", 5000); + // Use printf with \n to be portable across platforms + let config = make_config("printf 'hello\\n'", 5000); let processor = PostProcessor::new(&config); let result = processor.process("ignored").await; assert_eq!(result, "hello"); @@ -314,7 +316,7 @@ mod tests { async fn test_no_fallback_on_empty_returns_empty() { // When fallback_on_empty = false, empty output is returned as-is let config = PostProcessConfig { - command: "echo -n ''".to_string(), + command: "printf ''".to_string(), timeout_ms: 5000, trim: true, fallback_on_empty: false, @@ -327,12 +329,56 @@ mod tests { #[tokio::test] async fn test_fallback_on_empty_default_returns_original() { // Default behavior: empty output falls back to original text - let config = make_config("echo -n ''", 5000); + let config = make_config("printf ''", 5000); let processor = PostProcessor::new(&config); let result = processor.process("original text").await; assert_eq!(result, "original text"); } + #[tokio::test] + async fn test_no_trim_no_fallback_combination() { + // Both options off: whatever the command emits is returned verbatim, + // empty included. + let config = PostProcessConfig { + command: "printf ''".to_string(), + timeout_ms: 5000, + trim: false, + fallback_on_empty: false, + }; + let processor = PostProcessor::new(&config); + let result = processor.process("original text").await; + assert_eq!(result, ""); + } + + #[tokio::test] + async fn test_trim_then_empty_triggers_fallback() { + // Whitespace-only output should be considered empty after trimming, + // and trigger the fallback when fallback_on_empty is on. + let config = PostProcessConfig { + command: "printf ' \\n '".to_string(), + timeout_ms: 5000, + trim: true, + fallback_on_empty: true, + }; + let processor = PostProcessor::new(&config); + let result = processor.process("original text").await; + assert_eq!(result, "original text"); + } + + #[tokio::test] + async fn test_trim_then_empty_no_fallback_returns_empty() { + // Same scenario but fallback off: empty string surfaces. + let config = PostProcessConfig { + command: "printf ' \\n '".to_string(), + timeout_ms: 5000, + trim: true, + fallback_on_empty: false, + }; + let processor = PostProcessor::new(&config); + let result = processor.process("original text").await; + assert_eq!(result, ""); + } + #[tokio::test] async fn test_context_passed_via_env_var() { // Command prints VOXTYPE_CONTEXT env var, stdin is current text @@ -347,7 +393,10 @@ mod tests { #[tokio::test] async fn test_no_context_env_var_when_none() { // VOXTYPE_CONTEXT should not be set when context is None - let config = make_config("echo \"context:${VOXTYPE_CONTEXT:-unset} stdin:$(cat)\"", 5000); + let config = make_config( + "echo \"context:${VOXTYPE_CONTEXT:-unset} stdin:$(cat)\"", + 5000, + ); let processor = PostProcessor::new(&config); let result = processor.process_with_context("current text", None).await; assert_eq!(result, "context:unset stdin:current text"); diff --git a/src/setup/app_bundle.rs b/src/setup/app_bundle.rs new file mode 100644 index 00000000..75151489 --- /dev/null +++ b/src/setup/app_bundle.rs @@ -0,0 +1,353 @@ +//! macOS App Bundle creation and Login Items setup +//! +//! Creates a proper macOS app bundle for voxtype and manages Login Items. +//! This is preferred over launchd for the daemon because: +//! - App bundles can be granted Accessibility, Input Monitoring, and Microphone permissions +//! - Login Items inherit these permissions correctly (launchd services don't get mic access) + +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::path::PathBuf; +use std::process::Command; + +use super::{get_voxtype_path, print_failure, print_info, print_success, print_warning}; + +const APP_NAME: &str = "Voxtype.app"; +pub const BUNDLE_ID: &str = "io.voxtype.daemon"; + +/// Get the path to the app bundle +pub fn app_bundle_path() -> PathBuf { + PathBuf::from("/Applications").join(APP_NAME) +} + +/// Get the path to the binary inside the app bundle +pub fn app_binary_path() -> PathBuf { + app_bundle_path() + .join("Contents") + .join("MacOS") + .join("voxtype-bin") +} + +/// Get the path to the logs directory +fn logs_dir() -> Option { + dirs::home_dir().map(|home| home.join("Library/Logs/voxtype")) +} + +/// Generate Info.plist content +fn generate_info_plist(version: &str) -> String { + format!( + r#" + + + + CFBundleExecutable + voxtype-bin + CFBundleIdentifier + {bundle_id} + CFBundleName + Voxtype + CFBundleDisplayName + Voxtype + CFBundleVersion + {version} + CFBundleShortVersionString + {version} + CFBundlePackageType + APPL + LSMinimumSystemVersion + 11.0 + LSUIElement + + NSMicrophoneUsageDescription + Voxtype needs microphone access for speech-to-text transcription. + NSAppleEventsUsageDescription + Voxtype needs accessibility access to type transcribed text. + NSInputMonitoringUsageDescription + Voxtype monitors keyboard input to detect your push-to-talk hotkey. + + +"#, + bundle_id = BUNDLE_ID, + version = version, + ) +} + +/// Create the app bundle +pub fn create_app_bundle() -> anyhow::Result<()> { + let app_path = app_bundle_path(); + let contents_path = app_path.join("Contents"); + let macos_path = contents_path.join("MacOS"); + + // Create directory structure + fs::create_dir_all(&macos_path)?; + + // Get version from current binary + let version = env!("CARGO_PKG_VERSION"); + + // Write Info.plist + fs::write( + contents_path.join("Info.plist"), + generate_info_plist(version), + )?; + + // Copy the current voxtype binary (handle self-copy case) + let source_binary = get_voxtype_path(); + let dest_binary = macos_path.join("voxtype-bin"); + let source_canon = + fs::canonicalize(&source_binary).unwrap_or_else(|_| PathBuf::from(&source_binary)); + let dest_canon = fs::canonicalize(&dest_binary).unwrap_or_else(|_| dest_binary.clone()); + + let binary_replaced = source_canon != dest_canon; + if binary_replaced { + // Copy via temp file for atomicity (prevents corruption if interrupted) + let temp_binary = macos_path.join("voxtype-bin.tmp"); + fs::copy(&source_binary, &temp_binary)?; + fs::rename(&temp_binary, &dest_binary)?; + } + + // Make binary executable + let mut perms = fs::metadata(&dest_binary)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(&dest_binary, perms)?; + + // Remove legacy wrapper script if present (replaced by direct binary launch) + let wrapper_path = macos_path.join("voxtype"); + let _ = fs::remove_file(&wrapper_path); + + // Sign the Mach-O binary individually first so it gets proper code page hashes, + // then sign the whole bundle (--deep alone doesn't always hash inner binaries correctly) + let _ = Command::new("codesign") + .args(["--force", "--sign", "-", dest_binary.to_str().unwrap()]) + .output(); + + let _ = Command::new("codesign") + .args([ + "--force", + "--deep", + "--sign", + "-", + app_path.to_str().unwrap(), + ]) + .output(); + + // Reset TCC entries only when the binary changed, so macOS re-prompts for the + // new code signature. Skip on self-copy to preserve existing permissions. + if binary_replaced { + let _ = Command::new("tccutil") + .args(["reset", "Accessibility", BUNDLE_ID]) + .output(); + let _ = Command::new("tccutil") + .args(["reset", "ListenEvent", BUNDLE_ID]) + .output(); + } + + Ok(()) +} + +/// Add app to Login Items +pub fn add_to_login_items() -> anyhow::Result { + let app_path = app_bundle_path(); + let script = format!( + r#"tell application "System Events" + if not (exists login item "Voxtype") then + make login item at end with properties {{path:"{}", hidden:true}} + end if +end tell"#, + app_path.display() + ); + + let output = Command::new("osascript").args(["-e", &script]).output()?; + + Ok(output.status.success()) +} + +/// Remove app from Login Items +pub fn remove_from_login_items() -> anyhow::Result { + let script = r#"tell application "System Events" + if exists login item "Voxtype" then + delete login item "Voxtype" + end if +end tell"#; + + let output = Command::new("osascript").args(["-e", script]).output()?; + + Ok(output.status.success()) +} + +/// Check if app is in Login Items +pub fn is_in_login_items() -> bool { + let script = r#"tell application "System Events" + return exists login item "Voxtype" +end tell"#; + + Command::new("osascript") + .args(["-e", script]) + .output() + .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "true") + .unwrap_or(false) +} + +/// Remove the app bundle +pub fn remove_app_bundle() -> anyhow::Result<()> { + let app_path = app_bundle_path(); + if app_path.exists() { + fs::remove_dir_all(&app_path)?; + } + Ok(()) +} + +/// Open System Settings to the relevant privacy pane +pub fn open_privacy_settings(pane: &str) -> anyhow::Result<()> { + let url = match pane { + "accessibility" => { + "x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility" + } + "input" => "x-apple.systempreferences:com.apple.preference.security?Privacy_ListenEvent", + "microphone" => { + "x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone" + } + "login" => "x-apple.systempreferences:com.apple.LoginItems-Settings.extension", + _ => return Err(anyhow::anyhow!("Unknown pane: {}", pane)), + }; + + Command::new("open").arg(url).spawn()?; + Ok(()) +} + +/// Install the app bundle and set up Login Items +pub async fn install() -> anyhow::Result<()> { + println!("Installing Voxtype.app...\n"); + + // Create logs directory + if let Some(logs) = logs_dir() { + fs::create_dir_all(&logs)?; + print_success(&format!("Logs directory: {:?}", logs)); + } + + // Create app bundle + create_app_bundle()?; + print_success(&format!("Created: {:?}", app_bundle_path())); + + // Add to Login Items + if add_to_login_items()? { + print_success("Added to Login Items"); + } else { + print_warning("Could not add to Login Items automatically"); + print_info("Add manually: System Settings > General > Login Items"); + } + + // Launch the app + let launched = Command::new("open") + .arg(app_bundle_path().as_os_str()) + .status() + .map(|s| s.success()) + .unwrap_or(false); + + if launched { + print_success("Launched Voxtype.app"); + } else { + print_warning("Could not launch automatically"); + print_info("Start manually: open /Applications/Voxtype.app"); + } + + println!("\n---"); + println!("\x1b[32m✓ Installation complete!\x1b[0m"); + println!(); + println!("\x1b[1mIMPORTANT: Grant permissions to Voxtype.app:\x1b[0m"); + println!(); + println!(" 1. System Settings > Privacy & Security > \x1b[1mAccessibility\x1b[0m"); + println!(" Add and enable Voxtype"); + println!(); + println!(" 2. System Settings > Privacy & Security > \x1b[1mInput Monitoring\x1b[0m"); + println!(" Add and enable Voxtype"); + println!(); + println!(" 3. System Settings > Privacy & Security > \x1b[1mMicrophone\x1b[0m"); + println!(" Voxtype should appear after first use - enable it"); + println!(); + println!("Voxtype will start automatically on login."); + + Ok(()) +} + +/// Uninstall the app bundle and remove from Login Items +pub async fn uninstall() -> anyhow::Result<()> { + println!("Uninstalling Voxtype.app...\n"); + + // Stop any running instance + let _ = Command::new("pkill") + .args(["-9", "-f", "Voxtype.app"]) + .status(); + + // Remove from Login Items + if remove_from_login_items()? { + print_success("Removed from Login Items"); + } + + // Remove app bundle + if app_bundle_path().exists() { + remove_app_bundle()?; + print_success("Removed Voxtype.app"); + } else { + print_info("Voxtype.app was not installed"); + } + + println!("\n---"); + println!("\x1b[32m✓ Uninstallation complete!\x1b[0m"); + + Ok(()) +} + +/// Show installation status +pub async fn status() -> anyhow::Result<()> { + println!("Voxtype.app Status\n"); + println!("==================\n"); + + // Check app bundle + if app_bundle_path().exists() { + print_success(&format!("App installed: {:?}", app_bundle_path())); + } else { + print_failure("Voxtype.app not installed"); + print_info("Install with: voxtype setup app-bundle"); + return Ok(()); + } + + // Check Login Items + if is_in_login_items() { + print_success("In Login Items (will start on login)"); + } else { + print_warning("Not in Login Items"); + print_info("Add with: voxtype setup app-bundle"); + } + + // Check if running + let output = Command::new("pgrep").args(["-f", "Voxtype.app"]).output(); + + match output { + Ok(out) if out.status.success() => { + let pid = String::from_utf8_lossy(&out.stdout); + print_success(&format!("Running (PID: {})", pid.trim())); + } + _ => { + print_info("Not currently running"); + print_info("Start with: open /Applications/Voxtype.app"); + } + } + + // Show log locations + if let Some(logs) = logs_dir() { + println!("\nLogs:"); + let stdout_log = logs.join("stdout.log"); + let stderr_log = logs.join("stderr.log"); + + if stdout_log.exists() { + let size = fs::metadata(&stdout_log).map(|m| m.len()).unwrap_or(0); + println!(" stdout: {:?} ({} bytes)", stdout_log, size); + } + if stderr_log.exists() { + let size = fs::metadata(&stderr_log).map(|m| m.len()).unwrap_or(0); + println!(" stderr: {:?} ({} bytes)", stderr_log, size); + } + } + + Ok(()) +} diff --git a/src/setup/binary.rs b/src/setup/binary.rs new file mode 100644 index 00000000..56551b81 --- /dev/null +++ b/src/setup/binary.rs @@ -0,0 +1,778 @@ +//! Engine-agnostic voxtype binary inventory and switching. +//! +//! Voxtype ships seven prebuilt variants in `/usr/lib/voxtype/` (Whisper: +//! avx2/avx512/vulkan; ONNX: avx2/avx512/cuda/migraphx). `/usr/bin/voxtype` is a +//! symlink into that directory, and switching engines means updating that +//! symlink. +//! +//! Source builds typically live at `/usr/local/bin/voxtype` or `~/.cargo/bin/` +//! and are a single binary with whatever features were enabled at compile +//! time. They are reported as `InstallKind::Source` and switching is not +//! applicable. + +use serde::Serialize; +use std::fs; +use std::io::Write; +use std::os::unix::fs::{PermissionsExt, symlink}; +use std::path::{Path, PathBuf}; +use std::process::Command; + +pub const LIB_DIR: &str = "/usr/lib/voxtype"; +pub const SYSTEM_BIN: &str = "/usr/bin/voxtype"; + +/// Install `/usr/bin/voxtype` so it dispatches to `binary_path`. CPU-only +/// variants get a plain symlink; GPU/ONNX variants whose binary lives in +/// a /usr/lib/voxtype// subdirectory next to companion ONNX +/// Runtime provider .so files get a thin shell wrapper that `exec`s the +/// canonical real binary path. +/// +/// Why the wrapper: ORT's CUDA/MIGraphX EPs resolve their provider .so +/// files from `argv[0]`'s dirname (not /proc/self/exe). A plain symlink +/// at /usr/bin/voxtype leaves argv[0] = "/usr/bin/voxtype", so ORT +/// searches /usr/bin/ for libonnxruntime_providers_*.so, doesn't find +/// them, and silently falls back to CPU. `exec`ing the real binary path +/// replaces argv[0] with that path, so ORT searches the right subdir. +/// +/// `binary_path` may be the top-level convenience symlink (e.g. +/// /usr/lib/voxtype/voxtype-onnx-migraphx) or the canonical real path; +/// this function canonicalizes before deciding wrapper vs symlink. +pub fn install_active_binary(active_bin: &str, binary_path: &Path) -> anyhow::Result<()> { + let canonical = fs::canonicalize(binary_path).unwrap_or_else(|_| binary_path.to_path_buf()); + + let needs_wrapper = canonical + .parent() + .and_then(|p| p.file_name()) + .and_then(|s| s.to_str()) + .map(|name| name.starts_with("cuda-") || name == "migraphx") + .unwrap_or(false); + + if fs::symlink_metadata(active_bin).is_ok() { + fs::remove_file(active_bin).map_err(|e| { + anyhow::anyhow!( + "Failed to remove existing {} (need sudo?): {}\n\ + Try: sudo voxtype setup onnx --enable", + active_bin, + e + ) + })?; + } + + if needs_wrapper { + // MIGraphX needs a writeable model-cache directory or its runtime + // fails to save compiled graphs and inference errors out (silent + // CPU fallback isn't available — the EP fails the call). Default + // to $XDG_CACHE_HOME/voxtype/migraphx, honoring any user override. + let is_migraphx = canonical + .parent() + .and_then(|p| p.file_name()) + .and_then(|s| s.to_str()) + == Some("migraphx"); + let migraphx_env = if is_migraphx { + "\ + : \"${ORT_MIGRAPHX_MODEL_CACHE_PATH:=${XDG_CACHE_HOME:-$HOME/.cache}/voxtype/migraphx}\"\n\ + mkdir -p \"$ORT_MIGRAPHX_MODEL_CACHE_PATH\"\n\ + export ORT_MIGRAPHX_MODEL_CACHE_PATH\n" + } else { + "" + }; + let wrapper = format!( + "#!/bin/sh\n\ + # voxtype dispatch wrapper.\n\ + # Execs the GPU/ONNX binary by canonical path so ORT's argv[0]\n\ + # based provider .so lookup resolves to the right subdirectory.\n\ + # Managed by `voxtype setup onnx --enable` and the AUR package's\n\ + # post_install / post_upgrade hooks; do not edit by hand.\n\ + {}\ + exec {} \"$@\"\n", + migraphx_env, + canonical.display() + ); + let mut f = fs::File::create(active_bin).map_err(|e| { + anyhow::anyhow!( + "Failed to create {} (need sudo?): {}\n\ + Try: sudo voxtype setup onnx --enable", + active_bin, + e + ) + })?; + f.write_all(wrapper.as_bytes())?; + f.sync_all()?; + let mut perms = fs::metadata(active_bin)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(active_bin, perms)?; + } else { + symlink(binary_path, active_bin).map_err(|e| { + anyhow::anyhow!( + "Failed to create symlink (need sudo?): {}\n\ + Try: sudo voxtype setup onnx --enable", + e + ) + })?; + } + + let _ = Command::new("restorecon").arg(active_bin).status(); + Ok(()) +} + +/// Read /usr/bin/voxtype and return the canonical real binary it dispatches +/// to, regardless of whether it's a symlink or a wrapper script. Used by +/// the AUR pre-upgrade flow (and equivalent) to preserve the user's chosen +/// backend across package upgrades. +pub fn resolve_active_binary(active_bin: &str) -> Option { + let meta = fs::symlink_metadata(active_bin).ok()?; + if meta.file_type().is_symlink() { + fs::canonicalize(active_bin).ok() + } else if meta.file_type().is_file() { + let content = fs::read_to_string(active_bin).ok()?; + for line in content.lines() { + if let Some(rest) = line.trim().strip_prefix("exec ") { + return rest.split_whitespace().next().map(PathBuf::from); + } + } + None + } else { + None + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum EngineFamily { + Whisper, + Onnx, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum Acceleration { + Avx2, + Avx512, + Vulkan, + Cuda, + /// AMD GPU acceleration via the MIGraphX execution provider in ONNX + /// Runtime. Replaced ROCm in 0.7.0; old `voxtype-onnx-rocm` binary names + /// still resolve to this variant via [`Variant::from_binary_name`] for + /// the symlink-compat window. + Migraphx, + /// Source-built generic binary (no specific tier). + Native, +} + +/// Every binary name voxtype recognizes in `/usr/lib/voxtype/`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "kebab-case")] +pub enum Variant { + WhisperAvx2, + WhisperAvx512, + WhisperVulkan, + WhisperNative, + OnnxAvx2, + OnnxAvx512, + /// CUDA 12.x (NVIDIA, ort built against libcudart.so.12). + OnnxCuda12, + /// CUDA 13.x (NVIDIA, ort built against libcudart.so.13, requires driver 580+). + OnnxCuda13, + /// Unversioned CUDA binary, present in source-built or pre-0.7.0 installs. + OnnxCuda, + OnnxMigraphx, + OnnxNative, +} + +impl Variant { + pub const ALL: &'static [Variant] = &[ + Variant::WhisperAvx2, + Variant::WhisperAvx512, + Variant::WhisperVulkan, + Variant::WhisperNative, + Variant::OnnxAvx2, + Variant::OnnxAvx512, + // OnnxCuda first so the TUI's (Onnx, Cuda) matrix cell maps to the + // generic CUDA variant; cu12 and cu13 are specific binaries that + // live in the inventory list rather than a unique matrix cell. + Variant::OnnxCuda, + Variant::OnnxCuda12, + Variant::OnnxCuda13, + Variant::OnnxMigraphx, + Variant::OnnxNative, + ]; + + pub const fn binary_name(self) -> &'static str { + match self { + Variant::WhisperAvx2 => "voxtype-avx2", + Variant::WhisperAvx512 => "voxtype-avx512", + Variant::WhisperVulkan => "voxtype-vulkan", + Variant::WhisperNative => "voxtype-native", + Variant::OnnxAvx2 => "voxtype-onnx-avx2", + Variant::OnnxAvx512 => "voxtype-onnx-avx512", + Variant::OnnxCuda12 => "voxtype-onnx-cuda-12", + Variant::OnnxCuda13 => "voxtype-onnx-cuda-13", + Variant::OnnxCuda => "voxtype-onnx-cuda", + Variant::OnnxMigraphx => "voxtype-onnx-migraphx", + Variant::OnnxNative => "voxtype-onnx", + } + } + + pub const fn family(self) -> EngineFamily { + match self { + Variant::WhisperAvx2 + | Variant::WhisperAvx512 + | Variant::WhisperVulkan + | Variant::WhisperNative => EngineFamily::Whisper, + Variant::OnnxAvx2 + | Variant::OnnxAvx512 + | Variant::OnnxCuda12 + | Variant::OnnxCuda13 + | Variant::OnnxCuda + | Variant::OnnxMigraphx + | Variant::OnnxNative => EngineFamily::Onnx, + } + } + + pub const fn acceleration(self) -> Acceleration { + match self { + Variant::WhisperAvx2 | Variant::OnnxAvx2 => Acceleration::Avx2, + Variant::WhisperAvx512 | Variant::OnnxAvx512 => Acceleration::Avx512, + Variant::WhisperVulkan => Acceleration::Vulkan, + Variant::OnnxCuda12 | Variant::OnnxCuda13 | Variant::OnnxCuda => Acceleration::Cuda, + Variant::OnnxMigraphx => Acceleration::Migraphx, + Variant::WhisperNative | Variant::OnnxNative => Acceleration::Native, + } + } + + pub const fn display(self) -> &'static str { + match self { + Variant::WhisperAvx2 => "Whisper (AVX2)", + Variant::WhisperAvx512 => "Whisper (AVX-512)", + Variant::WhisperVulkan => "Whisper (Vulkan)", + Variant::WhisperNative => "Whisper (native)", + Variant::OnnxAvx2 => "ONNX (AVX2)", + Variant::OnnxAvx512 => "ONNX (AVX-512)", + Variant::OnnxCuda12 => "ONNX (CUDA 12)", + Variant::OnnxCuda13 => "ONNX (CUDA 13)", + Variant::OnnxCuda => "ONNX (CUDA)", + Variant::OnnxMigraphx => "ONNX (MIGraphX)", + Variant::OnnxNative => "ONNX (native)", + } + } + + /// Reverse lookup. Accepts current names plus legacy `voxtype-parakeet*` + /// names from before the ONNX rename. + pub fn from_binary_name(name: &str) -> Option { + match name { + "voxtype-avx2" => Some(Variant::WhisperAvx2), + "voxtype-avx512" => Some(Variant::WhisperAvx512), + "voxtype-vulkan" => Some(Variant::WhisperVulkan), + "voxtype-native" => Some(Variant::WhisperNative), + "voxtype-onnx-avx2" | "voxtype-parakeet-avx2" => Some(Variant::OnnxAvx2), + "voxtype-onnx-avx512" | "voxtype-parakeet-avx512" => Some(Variant::OnnxAvx512), + "voxtype-onnx-cuda-12" => Some(Variant::OnnxCuda12), + "voxtype-onnx-cuda-13" => Some(Variant::OnnxCuda13), + "voxtype-onnx-cuda" | "voxtype-parakeet-cuda" => Some(Variant::OnnxCuda), + // Canonical name for the MIGraphX-based ONNX binary (0.7.0+). + "voxtype-onnx-migraphx" => Some(Variant::OnnxMigraphx), + // Legacy ROCm names continue to resolve during the symlink-compat + // window so `voxtype-bin` users with the old AUR symlink still see + // the variant correctly identified. + "voxtype-onnx-rocm" | "voxtype-parakeet-rocm" => Some(Variant::OnnxMigraphx), + "voxtype-onnx" | "voxtype-parakeet" => Some(Variant::OnnxNative), + _ => None, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum InstallKind { + /// `/usr/bin/voxtype` resolves into `/usr/lib/voxtype/`. Switching is + /// supported by rewriting that symlink. + Package, + /// The running binary lives outside `/usr/lib/voxtype/`. Single binary, + /// switching not applicable. + Source, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Cpu { + pub avx2: bool, + pub avx512: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Gpus { + pub nvidia: bool, + pub amd: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct VariantStatus { + pub variant: Variant, + pub binary_name: String, + pub installed: bool, + pub runs_on_this_cpu: bool, + /// True if the variant has no GPU requirement, or its required GPU vendor + /// is detected. + pub gpu_available: bool, + pub active: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Inventory { + pub install_kind: InstallKind, + pub binary_path: PathBuf, + pub package_lib_dir: Option, + pub active_variant: Option, + /// Empty for `InstallKind::Source`. + pub variants: Vec, + pub cpu: Cpu, + pub gpus: Gpus, + pub compiled_features: Vec<&'static str>, + pub recommendation: Recommendation, +} + +/// Hardware-driven recommendations: best variant per engine family for the +/// detected CPU/GPU. +#[derive(Debug, Clone, Serialize)] +pub struct Recommendation { + pub whisper: Variant, + pub whisper_reason: &'static str, + pub onnx: Variant, + pub onnx_reason: &'static str, + /// Single overall pick when the user has no engine preference. Defaults to + /// the Whisper recommendation since voxtype's default engine is Whisper. + pub primary: Variant, +} + +pub fn recommend(cpu: &Cpu, gpus: &Gpus) -> Recommendation { + let whisper = recommend_whisper(cpu, gpus); + let onnx = recommend_onnx(cpu, gpus); + Recommendation { + whisper: whisper.0, + whisper_reason: whisper.1, + onnx: onnx.0, + onnx_reason: onnx.1, + primary: whisper.0, + } +} + +fn recommend_whisper(cpu: &Cpu, gpus: &Gpus) -> (Variant, &'static str) { + if gpus.nvidia || gpus.amd { + // Vulkan covers all GPU vendors and is the most reliable Whisper GPU path. + return ( + Variant::WhisperVulkan, + "GPU detected; Vulkan covers NVIDIA, AMD, and Intel in one binary.", + ); + } + if cpu.avx512 { + return ( + Variant::WhisperAvx512, + "AVX-512 CPU, no GPU; this is the fastest CPU-only Whisper build.", + ); + } + ( + Variant::WhisperAvx2, + "AVX2-only CPU, no GPU; the safe default for Whisper.", + ) +} + +fn recommend_onnx(cpu: &Cpu, gpus: &Gpus) -> (Variant, &'static str) { + // CUDA/MIGraphX bundles ship with AVX-512 ONNX Runtime, so the CPU has to + // support it before we can recommend a GPU variant. + if gpus.nvidia && cpu.avx512 { + return ( + Variant::OnnxCuda, + "NVIDIA GPU + AVX-512 CPU; CUDA execution provider is the fastest Parakeet path.", + ); + } + if gpus.amd && cpu.avx512 { + return ( + Variant::OnnxAvx512, + "AMD GPU detected. The MIGraphX execution provider is new and may not register on \ + every driver version; ONNX (AVX-512) on CPU is the safe default. Try ONNX (MIGraphX) \ + once you've verified it works on your card.", + ); + } + if cpu.avx512 { + return ( + Variant::OnnxAvx512, + "AVX-512 CPU, no compatible GPU; this is the fastest CPU-only ONNX build.", + ); + } + ( + Variant::OnnxAvx2, + "AVX2-only CPU; ONNX (AVX2) keeps Parakeet/Moonshine/etc. available without GPU.", + ) +} + +pub fn detect_cpu() -> Cpu { + Cpu { + #[cfg(target_arch = "x86_64")] + avx2: std::arch::is_x86_feature_detected!("avx2"), + #[cfg(target_arch = "x86_64")] + avx512: std::arch::is_x86_feature_detected!("avx512f"), + #[cfg(not(target_arch = "x86_64"))] + avx2: false, + #[cfg(not(target_arch = "x86_64"))] + avx512: false, + } +} + +pub fn detect_gpus() -> Gpus { + Gpus { + nvidia: detect_nvidia_gpu(), + amd: detect_amd_gpu(), + } +} + +fn detect_nvidia_gpu() -> bool { + if let Ok(output) = Command::new("nvidia-smi") + .arg("--query-gpu=name") + .arg("--format=csv,noheader") + .output() + { + if output.status.success() && !output.stdout.is_empty() { + return true; + } + } + Path::new("/dev/nvidia0").exists() +} + +fn detect_amd_gpu() -> bool { + if let Ok(output) = Command::new("lspci").output() { + if output.status.success() { + let s = String::from_utf8_lossy(&output.stdout).to_lowercase(); + if s.contains("amd") || s.contains("radeon") { + return true; + } + } + } + if let Ok(entries) = fs::read_dir("/dev/dri") { + for entry in entries.flatten() { + if let Some(name) = entry.file_name().to_str() { + if let Some(num) = name.strip_prefix("renderD") { + let card_num = num.parse::().unwrap_or(128) - 128; + let vendor_path = format!("/sys/class/drm/card{}/device/vendor", card_num); + if let Ok(vendor) = fs::read_to_string(&vendor_path) { + if vendor.trim() == "0x1002" { + return true; + } + } + } + } + } + } + false +} + +/// Path of the currently running binary, with all symlinks resolved. +pub fn current_binary_path() -> PathBuf { + fs::read_link("/proc/self/exe").unwrap_or_else(|_| PathBuf::from(SYSTEM_BIN)) +} + +pub fn detect_install_kind(binary_path: &Path) -> InstallKind { + let canonical = fs::canonicalize(binary_path).unwrap_or_else(|_| binary_path.to_path_buf()); + if canonical.starts_with(LIB_DIR) { + InstallKind::Package + } else { + InstallKind::Source + } +} + +/// Read the `/usr/bin/voxtype` symlink to learn which packaged variant is +/// active. Returns `None` for source installs, missing symlinks, or unknown +/// targets. +pub fn active_variant() -> Option { + // Handle both shapes /usr/bin/voxtype can take: a symlink (CPU + // variants) or a wrapper script (GPU/ONNX variants whose binary + // lives in a /usr/lib/voxtype// subdir alongside companion + // .so files). resolve_active_binary returns the canonical real + // binary path in both cases; we look up the variant from its + // filename. Falls back to the legacy fs::read_link path for + // robustness on edge cases. + let target = resolve_active_binary(SYSTEM_BIN) + .or_else(|| fs::read_link(SYSTEM_BIN).ok())?; + let name = target.file_name()?.to_str()?; + Variant::from_binary_name(name) +} + +pub fn enumerate_installed() -> Vec { + Variant::ALL + .iter() + .filter(|v| Path::new(LIB_DIR).join(v.binary_name()).exists()) + .copied() + .collect() +} + +fn variant_runs_on_cpu(v: Variant, cpu: &Cpu) -> bool { + match v.acceleration() { + Acceleration::Avx512 => cpu.avx512, + // ONNX GPU binaries bundle an ONNX Runtime built with AVX-512. + // Runtime CPU dispatch in ORT mostly handles fallback, but the + // binary itself can still trip SIGILL on init. Treat AVX-512 as + // a hard requirement for CUDA/MIGraphX variants. + Acceleration::Cuda | Acceleration::Migraphx => cpu.avx512, + Acceleration::Avx2 | Acceleration::Vulkan | Acceleration::Native => cpu.avx2, + } +} + +fn variant_gpu_available(v: Variant, g: &Gpus) -> bool { + match v.acceleration() { + Acceleration::Cuda => g.nvidia, + Acceleration::Migraphx => g.amd, + _ => true, + } +} + +pub fn compiled_features() -> Vec<&'static str> { + let mut f = Vec::new(); + if cfg!(feature = "parakeet") { + f.push("parakeet"); + } + if cfg!(feature = "gpu-vulkan") { + f.push("gpu-vulkan"); + } + if cfg!(feature = "gpu-cuda") { + f.push("gpu-cuda"); + } + if cfg!(feature = "gpu-hipblas") { + f.push("gpu-hipblas"); + } + if cfg!(feature = "gpu-metal") { + f.push("gpu-metal"); + } + f +} + +pub fn inventory() -> Inventory { + let cpu = detect_cpu(); + let gpus = detect_gpus(); + let binary_path = current_binary_path(); + let install_kind = detect_install_kind(&binary_path); + let active = active_variant(); + + let variants = if install_kind == InstallKind::Package { + Variant::ALL + .iter() + .map(|&v| VariantStatus { + variant: v, + binary_name: v.binary_name().to_string(), + installed: Path::new(LIB_DIR).join(v.binary_name()).exists(), + runs_on_this_cpu: variant_runs_on_cpu(v, &cpu), + gpu_available: variant_gpu_available(v, &gpus), + active: active == Some(v), + }) + .collect() + } else { + Vec::new() + }; + + let package_lib_dir = if Path::new(LIB_DIR).is_dir() { + Some(PathBuf::from(LIB_DIR)) + } else { + None + }; + + let recommendation = recommend(&cpu, &gpus); + + Inventory { + install_kind, + binary_path, + package_lib_dir, + active_variant: active, + variants, + cpu, + gpus, + compiled_features: compiled_features(), + recommendation, + } +} + +/// Rewrite `/usr/bin/voxtype` to point at the requested variant's binary. +/// Requires write access to `/usr/bin/`; callers should run with sudo. +pub fn switch_to(variant: Variant) -> anyhow::Result<()> { + let binary_path = Path::new(LIB_DIR).join(variant.binary_name()); + + if !binary_path.exists() { + anyhow::bail!( + "Binary not found: {}\n\ + Install the appropriate voxtype package variant.", + binary_path.display() + ); + } + + install_active_binary(SYSTEM_BIN, &binary_path)?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn binary_names_are_unique() { + let mut names: Vec<&str> = Variant::ALL.iter().map(|v| v.binary_name()).collect(); + names.sort(); + let original_len = names.len(); + names.dedup(); + assert_eq!(names.len(), original_len, "duplicate binary names"); + } + + #[test] + fn round_trip_binary_names() { + for v in Variant::ALL { + assert_eq!(Variant::from_binary_name(v.binary_name()), Some(*v)); + } + } + + #[test] + fn legacy_parakeet_names_resolve() { + assert_eq!( + Variant::from_binary_name("voxtype-parakeet-avx2"), + Some(Variant::OnnxAvx2) + ); + assert_eq!( + Variant::from_binary_name("voxtype-parakeet-cuda"), + Some(Variant::OnnxCuda) + ); + assert_eq!( + Variant::from_binary_name("voxtype-parakeet"), + Some(Variant::OnnxNative) + ); + } + + #[test] + fn unknown_binary_name_is_none() { + assert_eq!(Variant::from_binary_name("voxtype-totally-fake"), None); + assert_eq!(Variant::from_binary_name(""), None); + } + + #[test] + fn family_partition() { + let whisper = Variant::ALL + .iter() + .filter(|v| v.family() == EngineFamily::Whisper) + .count(); + let onnx = Variant::ALL + .iter() + .filter(|v| v.family() == EngineFamily::Onnx) + .count(); + assert_eq!(whisper, 4); + assert_eq!(onnx, 7); + assert_eq!(whisper + onnx, Variant::ALL.len()); + } + + #[test] + fn cpu_gating() { + let no_avx512 = Cpu { + avx2: true, + avx512: false, + }; + assert!(variant_runs_on_cpu(Variant::WhisperAvx2, &no_avx512)); + assert!(!variant_runs_on_cpu(Variant::WhisperAvx512, &no_avx512)); + assert!(!variant_runs_on_cpu(Variant::OnnxCuda, &no_avx512)); + assert!(variant_runs_on_cpu(Variant::WhisperVulkan, &no_avx512)); + + let full = Cpu { + avx2: true, + avx512: true, + }; + assert!(variant_runs_on_cpu(Variant::WhisperAvx512, &full)); + assert!(variant_runs_on_cpu(Variant::OnnxCuda, &full)); + + let nothing = Cpu { + avx2: false, + avx512: false, + }; + assert!(!variant_runs_on_cpu(Variant::WhisperAvx2, ¬hing)); + assert!(!variant_runs_on_cpu(Variant::WhisperNative, ¬hing)); + } + + #[test] + fn gpu_gating() { + let nvidia_only = Gpus { + nvidia: true, + amd: false, + }; + assert!(variant_gpu_available(Variant::OnnxCuda, &nvidia_only)); + assert!(!variant_gpu_available(Variant::OnnxMigraphx, &nvidia_only)); + assert!(variant_gpu_available(Variant::WhisperVulkan, &nvidia_only)); + + let none = Gpus { + nvidia: false, + amd: false, + }; + assert!(!variant_gpu_available(Variant::OnnxCuda, &none)); + assert!(!variant_gpu_available(Variant::OnnxMigraphx, &none)); + assert!(variant_gpu_available(Variant::WhisperAvx2, &none)); + } + + #[test] + fn detect_install_kind_classifies_package_vs_source() { + assert_eq!( + detect_install_kind(Path::new("/usr/lib/voxtype/voxtype-avx2")), + InstallKind::Package + ); + assert_eq!( + detect_install_kind(Path::new("/usr/local/bin/voxtype")), + InstallKind::Source + ); + assert_eq!( + detect_install_kind(Path::new("/home/user/.cargo/bin/voxtype")), + InstallKind::Source + ); + } + + #[test] + fn recommendations_match_hardware() { + // No GPU, AVX2 only → Whisper AVX2 + ONNX AVX2. + let r = recommend( + &Cpu { avx2: true, avx512: false }, + &Gpus { nvidia: false, amd: false }, + ); + assert_eq!(r.whisper, Variant::WhisperAvx2); + assert_eq!(r.onnx, Variant::OnnxAvx2); + assert_eq!(r.primary, Variant::WhisperAvx2); + + // No GPU, AVX-512 → Whisper AVX-512 + ONNX AVX-512. + let r = recommend( + &Cpu { avx2: true, avx512: true }, + &Gpus { nvidia: false, amd: false }, + ); + assert_eq!(r.whisper, Variant::WhisperAvx512); + assert_eq!(r.onnx, Variant::OnnxAvx512); + + // NVIDIA + AVX-512 → Whisper Vulkan + ONNX CUDA. + let r = recommend( + &Cpu { avx2: true, avx512: true }, + &Gpus { nvidia: true, amd: false }, + ); + assert_eq!(r.whisper, Variant::WhisperVulkan); + assert_eq!(r.onnx, Variant::OnnxCuda); + + // NVIDIA but no AVX-512 → CUDA bundle won't load, fall back to ONNX AVX2. + let r = recommend( + &Cpu { avx2: true, avx512: false }, + &Gpus { nvidia: true, amd: false }, + ); + assert_eq!(r.whisper, Variant::WhisperVulkan); + assert_eq!(r.onnx, Variant::OnnxAvx2); + + // AMD + AVX-512 → Vulkan for Whisper, AVX-512 (not MIGraphX) for ONNX. + let r = recommend( + &Cpu { avx2: true, avx512: true }, + &Gpus { nvidia: false, amd: true }, + ); + assert_eq!(r.whisper, Variant::WhisperVulkan); + assert_eq!(r.onnx, Variant::OnnxAvx512); + } + + #[test] + fn inventory_runs_without_panicking() { + let inv = inventory(); + assert!(matches!( + inv.install_kind, + InstallKind::Package | InstallKind::Source + )); + let _ = inv.recommendation; + } +} diff --git a/src/setup/gpu.rs b/src/setup/gpu.rs index 719d2493..da3f1a9e 100644 --- a/src/setup/gpu.rs +++ b/src/setup/gpu.rs @@ -17,6 +17,7 @@ //! //! This sets VK_LOADER_DRIVERS_SELECT internally to filter Vulkan ICDs. +use super::binary::install_active_binary; use std::fs; use std::os::unix::fs::symlink; use std::path::Path; @@ -28,6 +29,7 @@ const VOXTYPE_BIN_LOCAL: &str = "/usr/local/bin/voxtype"; const VOXTYPE_CPU_BACKUP: &str = "/usr/lib/voxtype/voxtype-cpu"; const VOXTYPE_NATIVE: &str = "/usr/lib/voxtype/voxtype-native"; + /// Get the active voxtype binary path (prefers /usr/bin, falls back to /usr/local/bin) fn get_active_binary_path() -> &'static str { // If /usr/bin/voxtype exists and points somewhere, use it @@ -351,30 +353,7 @@ fn switch_backend_tiered(backend: Backend) -> anyhow::Result<()> { ); } - // Remove existing symlink - if Path::new(active_bin).exists() || fs::symlink_metadata(active_bin).is_ok() { - fs::remove_file(active_bin).map_err(|e| { - anyhow::anyhow!( - "Failed to remove existing symlink (need sudo?): {}\n\ - Try: sudo voxtype setup gpu --enable", - e - ) - })?; - } - - // Create new symlink - symlink(&binary_path, active_bin).map_err(|e| { - anyhow::anyhow!( - "Failed to create symlink (need sudo?): {}\n\ - Try: sudo voxtype setup gpu --enable", - e - ) - })?; - - // Restore SELinux context if available - let _ = Command::new("restorecon").arg(active_bin).status(); - - Ok(()) + install_active_binary(active_bin, &binary_path) } /// Enable GPU in simple mode (switch symlink from native to vulkan) @@ -522,8 +501,11 @@ pub fn show_status() { let display_name = match target.as_str() { "voxtype-onnx-avx2" | "voxtype-parakeet-avx2" => "ONNX CPU (AVX2)", "voxtype-onnx-avx512" | "voxtype-parakeet-avx512" => "ONNX CPU (AVX-512)", - "voxtype-onnx-cuda" | "voxtype-parakeet-cuda" => "ONNX GPU (CUDA)", - "voxtype-onnx-rocm" | "voxtype-parakeet-rocm" => "ONNX GPU (ROCm)", + "voxtype-onnx-cuda-12" => "ONNX GPU (CUDA 12)", + "voxtype-onnx-cuda-13" => "ONNX GPU (CUDA 13)", + "voxtype-onnx-cuda" | "voxtype-parakeet-cuda" => "ONNX GPU (CUDA, unversioned)", + "voxtype-onnx-migraphx" => "ONNX GPU (MIGraphX)", + "voxtype-onnx-rocm" | "voxtype-parakeet-rocm" => "ONNX GPU (MIGraphX, legacy name)", _ => "ONNX (unknown variant)", }; println!("Active backend: {}", display_name); @@ -575,8 +557,9 @@ pub fn show_status() { let onnx_backends = [ ("voxtype-onnx-avx2", "voxtype-parakeet-avx2", "ONNX CPU (AVX2)"), ("voxtype-onnx-avx512", "voxtype-parakeet-avx512", "ONNX CPU (AVX-512)"), - ("voxtype-onnx-cuda", "voxtype-parakeet-cuda", "ONNX GPU (CUDA)"), - ("voxtype-onnx-rocm", "voxtype-parakeet-rocm", "ONNX GPU (ROCm)"), + ("voxtype-onnx-cuda-12", "voxtype-onnx-cuda", "ONNX GPU (CUDA 12)"), + ("voxtype-onnx-cuda-13", "voxtype-onnx-cuda-13", "ONNX GPU (CUDA 13)"), + ("voxtype-onnx-migraphx", "voxtype-onnx-rocm", "ONNX GPU (MIGraphX)"), ]; // Get current symlink target @@ -685,7 +668,7 @@ pub fn show_status() { .and_then(|p| p.file_name().map(|n| n.to_string_lossy().to_string())); let is_gpu_active = current_target .as_ref() - .map(|t| t.contains("cuda") || t.contains("rocm")) + .map(|t| t.contains("cuda") || t.contains("migraphx") || t.contains("rocm")) .unwrap_or(false); if !is_gpu_active && detect_best_parakeet_gpu_backend().is_some() { @@ -708,7 +691,7 @@ pub fn show_status() { fn detect_best_parakeet_gpu_backend() -> Option<(&'static str, &'static str)> { let gpus = detect_gpus(); - // The CUDA and ROCm binaries bundle ONNX Runtime which contains AVX-512 + // The CUDA and MIGraphX binaries bundle ONNX Runtime which contains AVX-512 // instructions. On CPUs without AVX-512 (e.g., Zen 3), these binaries will // crash with SIGILL. Only select GPU backends if the CPU supports AVX-512. let has_avx512 = fs::read_to_string("/proc/cpuinfo") @@ -720,51 +703,70 @@ fn detect_best_parakeet_gpu_backend() -> Option<(&'static str, &'static str)> { } // Helper to find installed binary, preferring new name over legacy - let find_binary = - |new_name: &'static str, legacy_name: &'static str| -> Option<&'static str> { - if Path::new(VOXTYPE_LIB_DIR).join(new_name).exists() { - Some(new_name) - } else if Path::new(VOXTYPE_LIB_DIR).join(legacy_name).exists() { - Some(legacy_name) - } else { - None - } - }; + let find_binary = |new_name: &'static str, legacy_name: &'static str| -> Option<&'static str> { + if Path::new(VOXTYPE_LIB_DIR).join(new_name).exists() { + Some(new_name) + } else if Path::new(VOXTYPE_LIB_DIR).join(legacy_name).exists() { + Some(legacy_name) + } else { + None + } + }; - // Check for AMD GPU and ROCm binary + // Check for AMD GPU and MIGraphX binary (legacy "rocm" name accepted via symlink) let has_amd = gpus.iter().any(|g| g.vendor == GpuVendor::Amd); - if let Some(binary) = find_binary("voxtype-onnx-rocm", "voxtype-parakeet-rocm") { + if let Some(binary) = find_binary("voxtype-onnx-migraphx", "voxtype-onnx-rocm") { if has_amd { - return Some((binary, "ROCm")); + return Some((binary, "MIGraphX")); } } - // Check for NVIDIA GPU and CUDA binary + // Check for NVIDIA GPU and CUDA binary. v0.7.0 splits cuda into -12 and + // -13 variants; pick the one matching the host's CUDA runtime so ort's + // bundled libonnxruntime_providers_cuda.so (built against a fixed CUDA + // ABI) doesn't fail to register at runtime. Mismatched pairings would + // silently fall back to CPU. let has_nvidia = gpus.iter().any(|g| g.vendor == GpuVendor::Nvidia); - if let Some(binary) = find_binary("voxtype-onnx-cuda", "voxtype-parakeet-cuda") { - if has_nvidia { - return Some((binary, "CUDA")); + if has_nvidia { + let host_cuda_major = crate::setup::parakeet::detect_cuda_runtime_major(); + let cuda_pref: &[&str] = match host_cuda_major { + Some(13) => &["voxtype-onnx-cuda-13", "voxtype-onnx-cuda"], + Some(12) => &["voxtype-onnx-cuda-12", "voxtype-onnx-cuda"], + // No detection — try cu13 first (rolling-distro default), then cu12 + _ => &["voxtype-onnx-cuda-13", "voxtype-onnx-cuda-12", "voxtype-onnx-cuda"], + }; + for name in cuda_pref { + if Path::new(VOXTYPE_LIB_DIR).join(name).exists() { + let label = match host_cuda_major { + Some(13) => "CUDA 13", + Some(12) => "CUDA 12", + _ => "CUDA", + }; + return Some((*name, label)); + } } } // Fall back to whichever is installed (user may have external GPU) - if let Some(binary) = find_binary("voxtype-onnx-rocm", "voxtype-parakeet-rocm") { - return Some((binary, "ROCm")); + if let Some(binary) = find_binary("voxtype-onnx-migraphx", "voxtype-onnx-rocm") { + return Some((binary, "MIGraphX")); } - if let Some(binary) = find_binary("voxtype-onnx-cuda", "voxtype-parakeet-cuda") { - return Some((binary, "CUDA")); + for name in ["voxtype-onnx-cuda-13", "voxtype-onnx-cuda-12", "voxtype-onnx-cuda"] { + if Path::new(VOXTYPE_LIB_DIR).join(name).exists() { + return Some((name, "CUDA")); + } } None } -/// Enable GPU backend (engine-aware: Vulkan for Whisper, CUDA/ROCm for Parakeet) +/// Enable GPU backend (engine-aware: Vulkan for Whisper, CUDA/MIGraphX for Parakeet) pub fn enable() -> anyhow::Result<()> { // Check which engine is active by looking at the current symlink let is_parakeet = is_parakeet_binary_active(); if is_parakeet { - // Parakeet mode: switch to best available GPU backend (CUDA or ROCm) + // Parakeet mode: switch to best available GPU backend (CUDA or MIGraphX) let (backend_binary, backend_name) = detect_best_parakeet_gpu_backend().ok_or_else(|| { let gpus = detect_gpus(); let has_amd = gpus.iter().any(|g| g.vendor == GpuVendor::Amd); @@ -774,18 +776,19 @@ pub fn enable() -> anyhow::Result<()> { .unwrap_or(false); let hint = if (has_amd || has_nvidia) && !has_avx512 { - "You have a GPU, but the ONNX GPU binaries (CUDA/ROCm) require a CPU with \ + "You have a GPU, but the ONNX GPU binaries (CUDA/MIGraphX) require a CPU with \ AVX-512 support. Your CPU only supports AVX2.\n\n\ Use ONNX on CPU instead:\n \ sudo ln -sf /usr/lib/voxtype/voxtype-onnx-avx2 /usr/bin/voxtype\n\n\ Or use the Whisper engine with Vulkan GPU acceleration:\n \ voxtype setup onnx --disable && sudo voxtype setup gpu --enable" } else if has_amd { - "You have an AMD GPU. Install voxtype-onnx-rocm for GPU acceleration." + "You have an AMD GPU. Install voxtype-onnx-migraphx for GPU acceleration." } else if has_nvidia { - "You have an NVIDIA GPU. Install voxtype-onnx-cuda for GPU acceleration." + "You have an NVIDIA GPU. Install voxtype-onnx-cuda-12 (for CUDA 12.x) or \ + voxtype-onnx-cuda-13 (for CUDA 13.x) for GPU acceleration." } else { - "No supported GPU detected. ONNX GPU acceleration requires NVIDIA (CUDA) or AMD (ROCm)." + "No supported GPU detected. ONNX GPU acceleration requires NVIDIA (CUDA) or AMD (MIGraphX)." }; anyhow::anyhow!( @@ -924,23 +927,20 @@ fn detect_best_cpu_backend() -> Backend { /// Detect the best ONNX CPU backend for this system fn detect_best_parakeet_cpu_backend() -> Option<&'static str> { // Helper to find installed binary, preferring new name over legacy - let find_binary = - |new_name: &'static str, legacy_name: &'static str| -> Option<&'static str> { - if Path::new(VOXTYPE_LIB_DIR).join(new_name).exists() { - Some(new_name) - } else if Path::new(VOXTYPE_LIB_DIR).join(legacy_name).exists() { - Some(legacy_name) - } else { - None - } - }; + let find_binary = |new_name: &'static str, legacy_name: &'static str| -> Option<&'static str> { + if Path::new(VOXTYPE_LIB_DIR).join(new_name).exists() { + Some(new_name) + } else if Path::new(VOXTYPE_LIB_DIR).join(legacy_name).exists() { + Some(legacy_name) + } else { + None + } + }; // Check for AVX-512 support if let Ok(cpuinfo) = fs::read_to_string("/proc/cpuinfo") { if cpuinfo.contains("avx512f") { - if let Some(binary) = - find_binary("voxtype-onnx-avx512", "voxtype-parakeet-avx512") - { + if let Some(binary) = find_binary("voxtype-onnx-avx512", "voxtype-parakeet-avx512") { return Some(binary); } } @@ -963,28 +963,5 @@ fn switch_backend_tiered_parakeet(binary_name: &str) -> anyhow::Result<()> { ); } - // Remove existing symlink - if Path::new(active_bin).exists() || fs::symlink_metadata(active_bin).is_ok() { - fs::remove_file(active_bin).map_err(|e| { - anyhow::anyhow!( - "Failed to remove existing symlink (need sudo?): {}\n\ - Try: sudo voxtype setup gpu --enable", - e - ) - })?; - } - - // Create new symlink - symlink(&binary_path, active_bin).map_err(|e| { - anyhow::anyhow!( - "Failed to create symlink (need sudo?): {}\n\ - Try: sudo voxtype setup gpu --enable", - e - ) - })?; - - // Restore SELinux context if available - let _ = Command::new("restorecon").arg(active_bin).status(); - - Ok(()) + install_active_binary(active_bin, &binary_path) } diff --git a/src/setup/hammerspoon.rs b/src/setup/hammerspoon.rs new file mode 100644 index 00000000..1c814a90 --- /dev/null +++ b/src/setup/hammerspoon.rs @@ -0,0 +1,129 @@ +//! Hammerspoon integration setup for macOS +//! +//! Helps users configure Hammerspoon for hotkey support as an alternative +//! to the built-in rdev-based hotkey capture. + +use std::path::PathBuf; + +/// Generate the Hammerspoon init.lua snippet +fn generate_config(hotkey: &str, toggle: bool) -> String { + let mode = if toggle { "toggle" } else { "push_to_talk" }; + format!( + r#"-- Voxtype Hammerspoon Integration +-- Add this to your ~/.hammerspoon/init.lua + +local voxtype = require("voxtype") +voxtype.setup({{ + hotkey = "{}", + mode = "{}", +}}) + +-- Optional: Add a cancel hotkey +-- voxtype.add_cancel_hotkey({{"cmd", "shift"}}, "escape") +"#, + hotkey, mode + ) +} + +/// Get the path to the Hammerspoon config directory +fn hammerspoon_dir() -> Option { + dirs::home_dir().map(|h| h.join(".hammerspoon")) +} + +/// Check if Hammerspoon is installed +async fn is_hammerspoon_installed() -> bool { + tokio::process::Command::new("which") + .arg("hs") + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false) + || std::path::Path::new("/Applications/Hammerspoon.app").exists() +} + +/// Install the voxtype.lua module to ~/.hammerspoon/ +async fn install_module() -> anyhow::Result<()> { + let hs_dir = + hammerspoon_dir().ok_or_else(|| anyhow::anyhow!("Could not find home directory"))?; + + // Create .hammerspoon directory if needed + if !hs_dir.exists() { + std::fs::create_dir_all(&hs_dir)?; + println!("Created {}", hs_dir.display()); + } + + // Write the voxtype.lua module + let module_path = hs_dir.join("voxtype.lua"); + let module_content = include_str!("../../contrib/hammerspoon/voxtype.lua"); + std::fs::write(&module_path, module_content)?; + println!("Installed {}", module_path.display()); + + Ok(()) +} + +/// Run the Hammerspoon setup command +pub async fn run(install: bool, show: bool, hotkey: &str, toggle: bool) -> anyhow::Result<()> { + println!("Hammerspoon Integration for Voxtype"); + println!("====================================\n"); + + // Show config if requested (even without Hammerspoon installed) + if show { + println!("Add the following to your ~/.hammerspoon/init.lua:\n"); + println!("{}", generate_config(hotkey, toggle)); + return Ok(()); + } + + // Check if Hammerspoon is installed for other actions + if !is_hammerspoon_installed().await { + println!("Hammerspoon is not installed.\n"); + println!("Install it with:"); + println!(" brew install --cask hammerspoon\n"); + println!("Then run this command again.\n"); + println!("Or use --show to see the config snippet anyway."); + return Ok(()); + } + + if install { + // Install the module + install_module().await?; + println!(); + println!("Now add the following to your ~/.hammerspoon/init.lua:"); + println!(); + println!("{}", generate_config(hotkey, toggle)); + println!(); + println!("Then reload Hammerspoon config:"); + println!(" - Click Hammerspoon menu bar icon -> Reload Config"); + println!(" - Or press Cmd+Shift+R while Hammerspoon console is focused"); + } else if show { + // Just show the config + println!("Add the following to your ~/.hammerspoon/init.lua:\n"); + println!("{}", generate_config(hotkey, toggle)); + } else { + // Default: show instructions + println!("Hammerspoon provides hotkey support without granting Accessibility"); + println!("permissions to Terminal.\n"); + + println!("Setup options:\n"); + println!(" voxtype setup hammerspoon --install"); + println!(" Install voxtype.lua module and show config snippet\n"); + println!(" voxtype setup hammerspoon --show"); + println!(" Show the init.lua configuration snippet\n"); + println!(" voxtype setup hammerspoon --install --hotkey rightcmd"); + println!(" Install with a different hotkey\n"); + println!(" voxtype setup hammerspoon --install --toggle"); + println!(" Use toggle mode (press to start/stop) instead of push-to-talk\n"); + + println!("Available hotkeys:"); + println!(" rightalt, leftalt, rightcmd, leftcmd, rightctrl, leftctrl"); + println!(" rightshift, leftshift, f1-f20, escape, space, tab, etc.\n"); + + println!( + "Current Hammerspoon directory: {}", + hammerspoon_dir() + .map(|p| p.display().to_string()) + .unwrap_or_else(|| "not found".to_string()) + ); + } + + Ok(()) +} diff --git a/src/setup/launchd.rs b/src/setup/launchd.rs new file mode 100644 index 00000000..4e2663e0 --- /dev/null +++ b/src/setup/launchd.rs @@ -0,0 +1,254 @@ +//! macOS LaunchAgent service installation +//! +//! Provides commands to install, uninstall, and check the status of +//! voxtype as a launchd user service on macOS. + +use super::{get_voxtype_path, print_failure, print_info, print_success, print_warning}; +use std::fs; +use std::path::PathBuf; + +const PLIST_FILENAME: &str = "io.voxtype.daemon.plist"; + +/// Get the path to the LaunchAgents directory +fn launch_agents_dir() -> Option { + dirs::home_dir().map(|home| home.join("Library/LaunchAgents")) +} + +/// Get the path to the plist file +fn plist_path() -> Option { + launch_agents_dir().map(|dir| dir.join(PLIST_FILENAME)) +} + +/// Get the path to the logs directory +fn logs_dir() -> Option { + dirs::home_dir().map(|home| home.join("Library/Logs/voxtype")) +} + +/// Generate the launchd plist content +fn generate_plist() -> String { + let voxtype_path = get_voxtype_path(); + let logs_dir = logs_dir().unwrap_or_else(|| PathBuf::from("/tmp")); + + format!( + r#" + + + + Label + io.voxtype.daemon + + ProgramArguments + + {voxtype_path} + daemon + + + RunAtLoad + + + KeepAlive + + + StandardOutPath + {stdout} + + StandardErrorPath + {stderr} + + EnvironmentVariables + + PATH + /usr/local/bin:/usr/bin:/bin:/opt/homebrew/bin + + + ProcessType + Interactive + + Nice + -10 + + +"#, + voxtype_path = voxtype_path, + stdout = logs_dir.join("stdout.log").display(), + stderr = logs_dir.join("stderr.log").display(), + ) +} + +/// Install the LaunchAgent +pub async fn install() -> anyhow::Result<()> { + println!("Installing Voxtype LaunchAgent...\n"); + + // Check if we're on macOS + if !cfg!(target_os = "macos") { + print_failure("This command is only available on macOS"); + print_info("On Linux, use: voxtype setup systemd"); + anyhow::bail!("Not on macOS"); + } + + // Warn about limitations on macOS + #[cfg(target_os = "macos")] + { + print_warning("LaunchAgent services do not receive Microphone permissions on macOS."); + print_warning("Transcription will fail (Whisper outputs silence as 'Thank you')."); + println!(); + print_info("Recommended: use 'voxtype setup app-bundle' instead."); + print_info("The app bundle approach uses Login Items and properly receives"); + print_info("Accessibility, Input Monitoring, and Microphone permissions."); + println!(); + } + + // Ensure LaunchAgents directory exists + let launch_dir = launch_agents_dir() + .ok_or_else(|| anyhow::anyhow!("Could not determine LaunchAgents directory"))?; + fs::create_dir_all(&launch_dir)?; + + // Ensure logs directory exists + if let Some(logs) = logs_dir() { + fs::create_dir_all(&logs)?; + print_success(&format!("Logs directory: {:?}", logs)); + } + + // Generate and write the plist + let plist = plist_path().ok_or_else(|| anyhow::anyhow!("Could not determine plist path"))?; + let content = generate_plist(); + fs::write(&plist, &content)?; + print_success(&format!("Created: {:?}", plist)); + + // Load the service + let status = std::process::Command::new("launchctl") + .args(["load", plist.to_str().unwrap_or("")]) + .status(); + + match status { + Ok(s) if s.success() => { + print_success("LaunchAgent loaded"); + } + _ => { + print_warning("Could not load LaunchAgent automatically"); + print_info("Try: launchctl load ~/Library/LaunchAgents/io.voxtype.daemon.plist"); + } + } + + println!("\n---"); + println!("\x1b[32m✓ Installation complete!\x1b[0m"); + println!(); + println!("Voxtype will now start automatically on login."); + println!(); + println!("Useful commands:"); + println!(" launchctl start io.voxtype.daemon - Start now"); + println!(" launchctl stop io.voxtype.daemon - Stop"); + println!(" launchctl unload ~/Library/LaunchAgents/io.voxtype.daemon.plist - Disable"); + println!(); + println!("Logs:"); + if let Some(logs) = logs_dir() { + println!(" tail -f {:?}/stdout.log", logs); + println!(" tail -f {:?}/stderr.log", logs); + } + + Ok(()) +} + +/// Uninstall the LaunchAgent +pub async fn uninstall() -> anyhow::Result<()> { + println!("Uninstalling Voxtype LaunchAgent...\n"); + + let plist = plist_path().ok_or_else(|| anyhow::anyhow!("Could not determine plist path"))?; + + if !plist.exists() { + print_info("LaunchAgent not installed"); + return Ok(()); + } + + // Unload the service first + let _ = std::process::Command::new("launchctl") + .args(["unload", plist.to_str().unwrap_or("")]) + .status(); + + // Remove the plist file + fs::remove_file(&plist)?; + print_success("LaunchAgent removed"); + + println!("\n---"); + println!("\x1b[32m✓ Uninstallation complete!\x1b[0m"); + + Ok(()) +} + +/// Show LaunchAgent status +pub async fn status() -> anyhow::Result<()> { + println!("Voxtype LaunchAgent Status\n"); + println!("==========================\n"); + + let plist = plist_path().ok_or_else(|| anyhow::anyhow!("Could not determine plist path"))?; + + // Check if plist exists + if plist.exists() { + print_success(&format!("Plist installed: {:?}", plist)); + } else { + print_failure("LaunchAgent not installed"); + print_info("Install with: voxtype setup launchd"); + return Ok(()); + } + + // Check if service is running + let output = std::process::Command::new("launchctl") + .args(["list", "io.voxtype.daemon"]) + .output(); + + match output { + Ok(out) if out.status.success() => { + let stdout = String::from_utf8_lossy(&out.stdout); + if stdout.contains("io.voxtype.daemon") { + print_success("Service is running"); + + // Parse PID if available + for line in stdout.lines() { + if let Some(pid) = line.split_whitespace().next() { + if pid != "-" { + println!(" PID: {}", pid); + } + } + } + } else { + print_warning("Service is loaded but not running"); + } + } + _ => { + print_warning("Service is not loaded"); + print_info("Start with: launchctl load ~/Library/LaunchAgents/io.voxtype.daemon.plist"); + } + } + + // Show log locations + if let Some(logs) = logs_dir() { + println!("\nLogs:"); + let stdout_log = logs.join("stdout.log"); + let stderr_log = logs.join("stderr.log"); + + if stdout_log.exists() { + let size = fs::metadata(&stdout_log).map(|m| m.len()).unwrap_or(0); + println!(" stdout: {:?} ({} bytes)", stdout_log, size); + } + if stderr_log.exists() { + let size = fs::metadata(&stderr_log).map(|m| m.len()).unwrap_or(0); + println!(" stderr: {:?} ({} bytes)", stderr_log, size); + } + } + + Ok(()) +} + +/// Regenerate the LaunchAgent plist file (e.g., after binary path change) +/// Returns true if the file was updated +pub fn regenerate_plist() -> anyhow::Result { + let plist = match plist_path() { + Some(p) if p.exists() => p, + _ => return Ok(false), + }; + + let content = generate_plist(); + fs::write(&plist, &content)?; + + Ok(true) +} diff --git a/src/setup/macos.rs b/src/setup/macos.rs new file mode 100644 index 00000000..1051fec3 --- /dev/null +++ b/src/setup/macos.rs @@ -0,0 +1,684 @@ +//! macOS interactive setup wizard +//! +//! Provides a guided setup experience for macOS users, covering: +//! - App bundle creation and code signing (via app_bundle module) +//! - Microphone permission (required for audio capture) +//! - Accessibility permission (required for text injection) +//! - Notification permission (optional) +//! - Hotkey configuration (native rdev or Hammerspoon) +//! - Login Items auto-start (via app_bundle module) +//! - Model download + +use super::{print_failure, print_info, print_success, print_warning}; +use std::io::{self, Write}; + +/// Check if the app bundle exists and is properly set up +fn check_app_bundle() -> bool { + let app_path = super::app_bundle::app_bundle_path(); + let binary_path = app_path.join("Contents/MacOS/voxtype"); + let info_plist = app_path.join("Contents/Info.plist"); + + app_path.exists() && binary_path.exists() && info_plist.exists() +} + +/// Reset TCC permissions for Voxtype (forces re-prompt) +async fn reset_permissions() -> bool { + let bundle_id = super::app_bundle::BUNDLE_ID; + + let mic_reset = tokio::process::Command::new("tccutil") + .args(["reset", "Microphone", bundle_id]) + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false); + + let acc_reset = tokio::process::Command::new("tccutil") + .args(["reset", "Accessibility", bundle_id]) + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false); + + mic_reset || acc_reset +} + +/// Check if Accessibility permission is granted using AXIsProcessTrusted equivalent +async fn check_accessibility_permission() -> bool { + let output = tokio::process::Command::new("osascript") + .args([ + "-e", + "tell application \"System Events\" to return name of first process", + ]) + .output() + .await; + + match output { + Ok(o) => o.status.success(), + Err(_) => false, + } +} + +/// Open System Settings to a specific privacy pane +async fn open_privacy_settings(pane: &str) -> bool { + let url = format!( + "x-apple.systempreferences:com.apple.preference.security?Privacy_{}", + pane + ); + + tokio::process::Command::new("open") + .arg(&url) + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false) +} + +/// Check if Hammerspoon is installed +async fn check_hammerspoon() -> bool { + std::path::Path::new("/Applications/Hammerspoon.app").exists() + || tokio::process::Command::new("which") + .arg("hs") + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false) +} + +/// Check if terminal-notifier is installed +async fn check_terminal_notifier() -> bool { + tokio::process::Command::new("which") + .arg("terminal-notifier") + .output() + .await + .map(|o| o.status.success()) + .unwrap_or(false) +} + +/// Check if system language is English +async fn is_system_language_english() -> bool { + let output = tokio::process::Command::new("defaults") + .args(["read", "NSGlobalDomain", "AppleLanguages"]) + .output() + .await; + + match output { + Ok(o) if o.status.success() => { + let languages = String::from_utf8_lossy(&o.stdout); + languages + .lines() + .find(|line| line.trim().starts_with('"')) + .map(|line| { + let trimmed = line.trim().trim_matches(|c| c == '"' || c == ','); + trimmed.starts_with("en") + }) + .unwrap_or(true) + } + _ => true, + } +} + +/// Get user input with a default value +fn prompt(message: &str, default: &str) -> String { + print!("{} [{}]: ", message, default); + io::stdout().flush().unwrap(); + + let mut input = String::new(); + io::stdin().read_line(&mut input).unwrap(); + let input = input.trim(); + + if input.is_empty() { + default.to_string() + } else { + input.to_string() + } +} + +/// Get yes/no input +fn prompt_yn(message: &str, default: bool) -> bool { + let default_str = if default { "Y/n" } else { "y/N" }; + print!("{} [{}]: ", message, default_str); + io::stdout().flush().unwrap(); + + let mut input = String::new(); + io::stdin().read_line(&mut input).unwrap(); + let input = input.trim().to_lowercase(); + + match input.as_str() { + "y" | "yes" => true, + "n" | "no" => false, + "" => default, + _ => default, + } +} + +/// Wait for user to press Enter +fn wait_for_enter(message: &str) { + print!("{}", message); + io::stdout().flush().unwrap(); + let mut input = String::new(); + let _ = io::stdin().read_line(&mut input); +} + +/// Print a section header +fn section(title: &str) { + println!("\n\x1b[1m{}\x1b[0m", title); + println!("{}", "─".repeat(title.len())); +} + +/// Check if a notification icon is installed +fn check_notification_icon() -> bool { + let candidates = [ + dirs::data_dir().map(|d| d.join("voxtype/icon.png")), + dirs::config_dir().map(|d| d.join("voxtype/icon.png")), + ]; + + candidates.into_iter().flatten().any(|p| p.exists()) +} + +/// Install a default notification icon +fn install_default_icon_file() -> anyhow::Result<()> { + let data_dir = dirs::data_dir() + .ok_or_else(|| anyhow::anyhow!("Could not find Application Support directory"))? + .join("voxtype"); + + std::fs::create_dir_all(&data_dir)?; + + let icon_path = data_dir.join("icon.png"); + let icon_data = include_bytes!("../../assets/icon.png"); + std::fs::write(&icon_path, icon_data)?; + + println!(" Installed icon: {}", icon_path.display()); + Ok(()) +} + +/// Get the app bundle binary path +pub fn get_app_bundle_path() -> String { + super::app_bundle::app_binary_path() + .to_string_lossy() + .to_string() +} + +/// Run the macOS setup wizard +pub async fn run() -> anyhow::Result<()> { + println!("\x1b[1mVoxtype macOS Setup Wizard\x1b[0m"); + println!("==========================\n"); + println!("This wizard will set up Voxtype as a native macOS app with proper permissions.\n"); + + // Step 1: Create App Bundle + section("Step 1: App Bundle"); + + let app_exists = check_app_bundle(); + if app_exists { + print_success("Voxtype.app already exists"); + let recreate = prompt_yn("Recreate app bundle? (recommended after updates)", true); + if recreate { + println!(" Creating app bundle..."); + match super::app_bundle::create_app_bundle() { + Ok(_) => print_success("App bundle created and signed"), + Err(e) => { + print_failure(&format!("Failed to create app bundle: {}", e)); + println!(" You may need to run with sudo or manually create the bundle"); + return Err(e); + } + } + } + } else { + println!("Voxtype needs to be installed as an app bundle for proper macOS integration.\n"); + println!("This will:"); + println!(" - Create /Applications/Voxtype.app"); + println!(" - Enable proper permission prompts"); + println!(" - Allow adding to Login Items\n"); + + let create = prompt_yn("Create app bundle?", true); + if create { + println!(" Creating app bundle..."); + match super::app_bundle::create_app_bundle() { + Ok(_) => print_success("App bundle created and signed"), + Err(e) => { + print_failure(&format!("Failed to create app bundle: {}", e)); + println!(" You may need to run with sudo or manually create the bundle"); + return Err(e); + } + } + } else { + print_warning("Skipping app bundle creation"); + println!(" Note: Without the app bundle, permissions may not work correctly"); + } + } + + // Step 2: Microphone Permission + section("Step 2: Microphone Permission"); + + println!("Voxtype needs microphone access to capture your voice.\n"); + println!("We'll open System Settings and launch Voxtype to trigger the permission prompt.\n"); + + let setup_mic = prompt_yn("Set up microphone permission now?", true); + if setup_mic { + let _ = reset_permissions().await; + + print_info("Opening System Settings > Privacy & Security > Microphone..."); + open_privacy_settings("Microphone").await; + + tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; + + // Launch the app bundle to trigger the permission prompt + print_info("Launching Voxtype.app to trigger permission prompt..."); + let app_path = super::app_bundle::app_bundle_path(); + let _ = tokio::process::Command::new("open") + .arg(app_path.as_os_str()) + .output() + .await; + + tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; + + println!(); + println!(" \x1b[1mAction required:\x1b[0m"); + println!(" 1. If a permission dialog appears, click 'OK' to allow microphone access"); + println!(" 2. If no dialog appears, find 'Voxtype' in the list and toggle it ON"); + println!( + " 3. If Voxtype isn't in the list, press the hotkey once to trigger the prompt" + ); + println!(); + + wait_for_enter("Press Enter when microphone permission is granted..."); + + // Kill the test daemon + let _ = tokio::process::Command::new("pkill") + .args(["-9", "-f", "Voxtype.app"]) + .output() + .await; + + print_success("Microphone permission configured"); + } + + // Step 3: Input Monitoring Permission (for hotkey capture) + section("Step 3: Input Monitoring Permission"); + + println!("Voxtype needs Input Monitoring permission to capture global hotkeys.\n"); + + let setup_input = prompt_yn("Set up Input Monitoring permission now?", true); + if setup_input { + print_info("Opening System Settings > Privacy & Security > Input Monitoring..."); + open_privacy_settings("ListenEvent").await; + + tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; + + println!(); + println!(" \x1b[1mAction required:\x1b[0m"); + println!(" 1. Click the '+' button"); + println!(" 2. Navigate to /Applications"); + println!(" 3. Select 'Voxtype.app'"); + println!(" 4. Ensure the toggle is ON"); + println!(); + + wait_for_enter("Press Enter when Input Monitoring permission is granted..."); + print_success("Input Monitoring permission configured"); + } + + // Step 4: Accessibility Permission (for text injection) + section("Step 4: Accessibility Permission"); + + println!("Voxtype needs Accessibility permission to type transcribed text.\n"); + + let has_accessibility = check_accessibility_permission().await; + + if has_accessibility { + print_success("Accessibility permission already granted"); + } else { + let setup_acc = prompt_yn("Set up Accessibility permission now?", true); + if setup_acc { + print_info("Opening System Settings > Privacy & Security > Accessibility..."); + open_privacy_settings("Accessibility").await; + + tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; + + println!(); + println!(" \x1b[1mAction required:\x1b[0m"); + println!(" 1. Click the '+' button"); + println!(" 2. Navigate to /Applications"); + println!(" 3. Select 'Voxtype.app'"); + println!(" 4. Ensure the toggle is ON"); + println!(); + + wait_for_enter("Press Enter when Accessibility permission is granted..."); + + let has_acc_now = check_accessibility_permission().await; + if has_acc_now { + print_success("Accessibility permission granted"); + } else { + print_warning("Accessibility permission may not be fully configured"); + println!(" Text typing will fall back to clipboard if needed"); + } + } + } + + // Step 5: Notification Permission (Optional) + section("Step 5: Notifications (Optional)"); + + let has_notifier = check_terminal_notifier().await; + + println!("Voxtype can show notifications when transcription completes.\n"); + + if has_notifier { + print_success("terminal-notifier installed (enhanced notifications)"); + } else { + print_info("terminal-notifier not installed"); + println!(" Install with: brew install terminal-notifier"); + } + + let setup_notifications = prompt_yn("Configure notification permission?", false); + if setup_notifications { + print_info("Opening System Settings > Notifications..."); + let _ = tokio::process::Command::new("open") + .arg("x-apple.systempreferences:com.apple.preference.notifications") + .output() + .await; + + println!(); + println!(" Find 'Voxtype' in the list and configure notification settings."); + println!(); + + wait_for_enter("Press Enter when done..."); + } + + // Step 6: Hotkey Configuration + section("Step 6: Hotkey Configuration"); + + let has_hammerspoon = check_hammerspoon().await; + + println!("Voxtype supports two methods for global hotkey capture:\n"); + println!(" 1. Native (rdev) - Built-in, requires Accessibility permission"); + println!(" 2. Hammerspoon - External app, more reliable on some systems\n"); + + if has_hammerspoon { + print_success("Hammerspoon is installed"); + } else { + print_info("Hammerspoon is not installed (optional)"); + println!(" Install with: brew install --cask hammerspoon"); + } + + let use_hammerspoon = if has_hammerspoon { + println!(); + prompt_yn("Use Hammerspoon for hotkey support?", false) + } else { + false + }; + + let hotkey = prompt("\nHotkey to use", "fn"); + let toggle_mode = prompt_yn( + "Use toggle mode? (press to start/stop instead of hold)", + false, + ); + + if use_hammerspoon { + println!(); + println!("Setting up Hammerspoon integration..."); + + if let Err(e) = super::hammerspoon::run(true, false, &hotkey, toggle_mode).await { + print_warning(&format!("Could not set up Hammerspoon: {}", e)); + } + } else { + print_success(&format!("Configured native hotkey: {}", hotkey)); + print_info(&format!( + "Mode: {}", + if toggle_mode { + "toggle" + } else { + "push-to-talk" + } + )); + } + + // Step 7: Auto-start (Login Items) + section("Step 7: Auto-start Configuration"); + + println!("Voxtype can start automatically when you log in via Login Items.\n"); + + let setup_autostart = prompt_yn("Add to Login Items?", true); + + if setup_autostart { + match super::app_bundle::add_to_login_items() { + Ok(true) => print_success("Added to Login Items"), + Ok(false) => { + print_warning("Could not add to Login Items automatically"); + print_info("Add manually: System Settings > General > Login Items"); + } + Err(e) => print_warning(&format!("Could not add to Login Items: {}", e)), + } + } + + // Step 8: Notification icon + section("Step 8: Notification Icon (Optional)"); + + if has_notifier { + println!("terminal-notifier supports custom notification icons.\n"); + + let icon_installed = check_notification_icon(); + if icon_installed { + print_success("Custom notification icon is installed"); + } else { + print_info("No custom notification icon found"); + + let install_default_icon = prompt_yn("Install a default microphone icon?", true); + if install_default_icon { + if let Err(e) = install_default_icon_file() { + print_warning(&format!("Could not install icon: {}", e)); + } else { + print_success("Default icon installed"); + } + } + } + } else { + print_info("Install terminal-notifier to enable custom notification icons"); + } + + // Step 9: Model download + section("Step 9: Speech Recognition Model"); + + let config = crate::config::load_config(None).unwrap_or_default(); + let models_dir = crate::Config::models_dir(); + + #[cfg(feature = "parakeet")] + let has_parakeet = true; + #[cfg(not(feature = "parakeet"))] + let has_parakeet = false; + + let is_english = is_system_language_english().await; + + let (use_parakeet, model) = if has_parakeet { + println!("Voxtype supports two speech recognition engines:\n"); + + if is_english { + println!(" 1. Parakeet (Recommended) - NVIDIA's FastConformer via CoreML"); + println!(" - ~8x faster than Whisper on Apple Silicon"); + println!(" - Optimized for macOS Neural Engine"); + println!(" - English only"); + println!(); + println!(" 2. Whisper - OpenAI's Whisper via whisper.cpp"); + println!(" - Broader language support"); + println!(" - More model size options"); + } else { + println!(" 1. Whisper (Recommended) - OpenAI's Whisper via whisper.cpp"); + println!(" - Supports your system language"); + println!(" - Multiple model sizes available"); + println!(); + println!(" 2. Parakeet - NVIDIA's FastConformer via CoreML"); + println!(" - ~8x faster on Apple Silicon"); + println!(" - English only"); + print_warning("Your system language is not English. Parakeet only supports English."); + } + println!(); + + let use_parakeet = prompt_yn("Use Parakeet?", is_english); + + if use_parakeet { + println!(); + println!("Available Parakeet models:"); + println!(" parakeet-tdt-0.6b-v3 - Full precision (~1.2 GB)"); + println!(" parakeet-tdt-0.6b-v3-int8 - Quantized, faster (~670 MB)"); + println!(); + + let current = config + .parakeet + .as_ref() + .map(|p| p.model.as_str()) + .unwrap_or("parakeet-tdt-0.6b-v3-int8"); + let model = prompt("Model to use", current); + (true, model) + } else { + println!(); + println!("Available Whisper models (from fastest to most accurate):"); + if is_english { + println!(" tiny.en - Fastest, English only (~75 MB)"); + println!(" base.en - Fast, English only (~145 MB)"); + println!(" small.en - Balanced, English only (~500 MB)"); + println!(" medium.en - Accurate, English only (~1.5 GB)"); + println!(" large-v3-turbo - Most accurate, all languages (~1.6 GB)"); + } else { + println!(" tiny - Fastest, multilingual (~75 MB)"); + println!(" base - Fast, multilingual (~145 MB)"); + println!(" small - Balanced, multilingual (~500 MB)"); + println!(" medium - Accurate, multilingual (~1.5 GB)"); + println!(" large-v3-turbo - Most accurate, all languages (~1.6 GB)"); + } + println!(); + + let default_model = if is_english { + config.whisper.model.as_str() + } else { + "large-v3-turbo" + }; + let model = prompt("Model to use", default_model); + (false, model) + } + } else { + println!("Voxtype uses Whisper for speech recognition.\n"); + println!("Available models (from fastest to most accurate):"); + if is_english { + println!(" tiny.en - Fastest, English only (~75 MB)"); + println!(" base.en - Fast, English only (~145 MB)"); + println!(" small.en - Balanced, English only (~500 MB)"); + println!(" medium.en - Accurate, English only (~1.5 GB)"); + println!(" large-v3-turbo - Most accurate, all languages (~1.6 GB)"); + } else { + println!(" tiny - Fastest, multilingual (~75 MB)"); + println!(" base - Fast, multilingual (~145 MB)"); + println!(" small - Balanced, multilingual (~500 MB)"); + println!(" medium - Accurate, multilingual (~1.5 GB)"); + println!(" large-v3-turbo - Most accurate, all languages (~1.6 GB)"); + } + println!(); + + let default_model = if is_english { + config.whisper.model.as_str() + } else { + "large-v3-turbo" + }; + let model = prompt("Model to use", default_model); + (false, model) + }; + + // Download and configure the selected model + if use_parakeet { + let model_path = models_dir.join(&model); + let model_valid = + model_path.exists() && super::model::validate_parakeet_model(&model_path).is_ok(); + + if model_valid { + print_success(&format!("Model '{}' is already downloaded", model)); + } else { + let download = prompt_yn(&format!("Download model '{}'?", model), true); + if download { + println!(); + println!("Downloading model... (this may take a while)"); + if let Err(e) = super::model::download_parakeet_model(&model) { + print_failure(&format!("Download failed: {}", e)); + } else { + print_success("Model downloaded successfully"); + } + } + } + + if let Err(e) = super::model::set_parakeet_config(&model) { + print_warning(&format!("Could not update config: {}", e)); + } else { + print_success("Config updated to use Parakeet engine"); + } + } else { + let model_filename = crate::transcribe::whisper::get_model_filename(&model); + let model_path = models_dir.join(&model_filename); + + if model_path.exists() { + print_success(&format!("Model '{}' is already downloaded", model)); + } else { + let download = prompt_yn(&format!("Download model '{}'?", model), true); + if download { + println!(); + println!("Downloading model... (this may take a while)"); + if let Err(e) = super::model::download_model(&model) { + print_failure(&format!("Download failed: {}", e)); + } else { + print_success("Model downloaded successfully"); + + if let Err(e) = super::model::set_model_config(&model) { + print_warning(&format!("Could not update config: {}", e)); + } + } + } + } + } + + let engine_name = if use_parakeet { "Parakeet" } else { "Whisper" }; + + // Summary + section("Setup Complete!"); + + println!("Your Voxtype installation is ready. Here's a summary:\n"); + + println!(" App bundle: /Applications/Voxtype.app"); + if use_hammerspoon { + println!(" Hotkey method: Hammerspoon"); + } else { + println!(" Hotkey method: Native (rdev)"); + } + println!( + " Hotkey: {} ({})", + hotkey, + if toggle_mode { + "toggle" + } else { + "push-to-talk" + } + ); + println!(" Engine: {}", engine_name); + println!(" Model: {}", model); + println!( + " Auto-start: {}", + if setup_autostart { + "Login Items" + } else { + "disabled" + } + ); + + println!("\n\x1b[1mStarting Voxtype...\x1b[0m\n"); + + // Start via open (preserves app bundle identity for permissions) + let app_path = super::app_bundle::app_bundle_path(); + let _ = tokio::process::Command::new("open") + .arg(app_path.as_os_str()) + .output() + .await; + print_success("Voxtype.app started"); + + println!(); + println!("Press {} to start recording!", hotkey); + println!(); + println!("Useful commands:"); + println!(" voxtype status - Check daemon status"); + println!(" voxtype status --follow - Watch status in real-time"); + println!(" voxtype setup app-bundle --status - Check app bundle status"); + println!(" voxtype record toggle - Toggle recording from CLI"); + + Ok(()) +} diff --git a/src/setup/mod.rs b/src/setup/mod.rs index d1ec44d8..3a4e36b9 100644 --- a/src/setup/mod.rs +++ b/src/setup/mod.rs @@ -9,9 +9,17 @@ //! - Parakeet backend management //! - Compositor integration (modifier key fix) +#[cfg(target_os = "macos")] +pub mod app_bundle; +pub mod binary; pub mod compositor; pub mod dms; pub mod gpu; +#[cfg(target_os = "macos")] +pub mod hammerspoon; +pub mod launchd; +#[cfg(target_os = "macos")] +pub mod macos; pub mod model; pub mod parakeet; pub mod systemd; @@ -27,6 +35,7 @@ use tokio::process::Command; pub enum DisplayServer { Wayland, X11, + MacOS, Unknown, } @@ -35,6 +44,7 @@ impl std::fmt::Display for DisplayServer { match self { DisplayServer::Wayland => write!(f, "Wayland"), DisplayServer::X11 => write!(f, "X11"), + DisplayServer::MacOS => write!(f, "macOS"), DisplayServer::Unknown => write!(f, "Unknown"), } } @@ -60,6 +70,9 @@ pub struct OutputChainStatus { pub ydotool_daemon: bool, pub wl_copy: OutputToolStatus, pub xclip: OutputToolStatus, + // macOS-specific + pub osascript: OutputToolStatus, + pub pbcopy: OutputToolStatus, pub primary_method: Option, } @@ -125,15 +138,24 @@ pub fn print_warning(msg: &str) { /// Detect the current display server pub fn detect_display_server() -> DisplayServer { - // Check for Wayland first - if std::env::var("WAYLAND_DISPLAY").is_ok() { - return DisplayServer::Wayland; + // Check for macOS first + #[cfg(target_os = "macos")] + { + return DisplayServer::MacOS; } - // Check for X11 - if std::env::var("DISPLAY").is_ok() { - return DisplayServer::X11; + + #[cfg(not(target_os = "macos"))] + { + // Check for Wayland first + if std::env::var("WAYLAND_DISPLAY").is_ok() { + return DisplayServer::Wayland; + } + // Check for X11 + if std::env::var("DISPLAY").is_ok() { + return DisplayServer::X11; + } + DisplayServer::Unknown } - DisplayServer::Unknown } /// Get the path to a command if it exists @@ -231,13 +253,39 @@ pub async fn detect_output_chain() -> OutputChainStatus { None }; + // Check osascript (macOS) + let osascript_path = get_command_path("osascript").await; + let osascript_installed = osascript_path.is_some(); + let osascript_available = osascript_installed && display_server == DisplayServer::MacOS; + let osascript_note = if osascript_installed && !osascript_available { + Some("macOS only".to_string()) + } else if osascript_available { + Some("requires Accessibility permission".to_string()) + } else { + None + }; + + // Check pbcopy (macOS) + let pbcopy_path = get_command_path("pbcopy").await; + let pbcopy_installed = pbcopy_path.is_some(); + let pbcopy_available = pbcopy_installed && display_server == DisplayServer::MacOS; + let pbcopy_note = if pbcopy_installed && !pbcopy_available { + Some("macOS only".to_string()) + } else { + None + }; + // Determine primary method - let primary_method = if wtype_available { + let primary_method = if osascript_available { + Some("osascript".to_string()) + } else if wtype_available { Some("wtype".to_string()) } else if eitype_available { Some("eitype".to_string()) } else if ydotool_available { Some("ydotool".to_string()) + } else if pbcopy_available { + Some("pbcopy".to_string()) } else if wl_copy_available || xclip_available { Some("clipboard".to_string()) } else { @@ -282,6 +330,20 @@ pub async fn detect_output_chain() -> OutputChainStatus { path: xclip_path, note: xclip_note, }, + osascript: OutputToolStatus { + name: "osascript", + installed: osascript_installed, + available: osascript_available, + path: osascript_path, + note: osascript_note, + }, + pbcopy: OutputToolStatus { + name: "pbcopy", + installed: pbcopy_installed, + available: pbcopy_available, + path: pbcopy_path, + note: pbcopy_note, + }, primary_method, } } @@ -300,61 +362,72 @@ pub fn print_output_chain_status(status: &OutputChainStatus) { let display = std::env::var("DISPLAY").unwrap_or_default(); format!("X11 (DISPLAY={})", display) } + DisplayServer::MacOS => "macOS (Quartz)".to_string(), DisplayServer::Unknown => "Unknown (no WAYLAND_DISPLAY or DISPLAY set)".to_string(), }; println!(" Display server: {}", ds_info); - // wtype - print_tool_status( - &status.wtype, - status.display_server == DisplayServer::Wayland, - ); - - // eitype - print_tool_status( - &status.eitype, - status.display_server == DisplayServer::Wayland, - ); - - // ydotool - if status.ydotool.installed { - let daemon_status = if status.ydotool_daemon { - "\x1b[32mdaemon running\x1b[0m" - } else { - "\x1b[31mdaemon not running\x1b[0m" - }; - if let Some(ref path) = status.ydotool.path { - if status.ydotool.available { - println!( - " ydotool: \x1b[32m✓\x1b[0m installed ({}), {}", - path, daemon_status - ); + // Show platform-specific tools + if status.display_server == DisplayServer::MacOS { + // macOS tools + print_tool_status(&status.osascript, true); + print_tool_status(&status.pbcopy, true); + } else { + // Linux tools + // wtype + print_tool_status( + &status.wtype, + status.display_server == DisplayServer::Wayland, + ); + + // eitype + print_tool_status( + &status.eitype, + status.display_server == DisplayServer::Wayland, + ); + + // ydotool + if status.ydotool.installed { + let daemon_status = if status.ydotool_daemon { + "\x1b[32mdaemon running\x1b[0m" } else { - println!( - " ydotool: \x1b[33m⚠\x1b[0m installed ({}), {}", - path, daemon_status - ); + "\x1b[31mdaemon not running\x1b[0m" + }; + if let Some(ref path) = status.ydotool.path { + if status.ydotool.available { + println!( + " ydotool: \x1b[32m✓\x1b[0m installed ({}), {}", + path, daemon_status + ); + } else { + println!( + " ydotool: \x1b[33m⚠\x1b[0m installed ({}), {}", + path, daemon_status + ); + } } + } else { + println!(" ydotool: \x1b[31m✗\x1b[0m not installed"); } - } else { - println!(" ydotool: \x1b[31m✗\x1b[0m not installed"); - } - // wl-copy - print_tool_status( - &status.wl_copy, - status.display_server == DisplayServer::Wayland, - ); + // wl-copy + print_tool_status( + &status.wl_copy, + status.display_server == DisplayServer::Wayland, + ); - // xclip (only show on X11 or if installed) - if status.display_server == DisplayServer::X11 || status.xclip.installed { - print_tool_status(&status.xclip, status.display_server == DisplayServer::X11); + // xclip (only show on X11 or if installed) + if status.display_server == DisplayServer::X11 || status.xclip.installed { + print_tool_status(&status.xclip, status.display_server == DisplayServer::X11); + } } // Summary println!(); if let Some(ref method) = status.primary_method { let method_desc = match method.as_str() { + "osascript" => "osascript (AppleScript/System Events)", + "pbcopy" => "pbcopy (clipboard, requires manual paste)", "wtype" => "wtype (CJK supported)", "eitype" => "eitype (libei, GNOME/KDE native)", "ydotool" => "ydotool (CJK not supported)", @@ -364,9 +437,11 @@ pub fn print_output_chain_status(status: &OutputChainStatus) { println!(" \x1b[32m→\x1b[0m Text will be typed via {}", method_desc); } else { println!(" \x1b[31m→\x1b[0m No text output method available!"); - println!( - " Install wtype (Wayland), eitype (GNOME/KDE), or ydotool (X11) for typing support" - ); + if status.display_server == DisplayServer::MacOS { + println!(" osascript should be available on macOS"); + } else { + println!(" Install wtype (Wayland), eitype (GNOME/KDE), or ydotool (X11) for typing support"); + } } } @@ -455,7 +530,7 @@ pub async fn run_setup( if !quiet { println!("\nCreating default config file..."); } - std::fs::write(&config_path, crate::config::DEFAULT_CONFIG)?; + std::fs::write(&config_path, crate::config::default_config_content())?; if !quiet { print_success(&format!("Created: {:?}", config_path)); } @@ -474,7 +549,7 @@ pub async fn run_setup( .map(model::is_sensevoice_model) .unwrap_or(false); - // Use model_override if provided, otherwise use config default (for Whisper) + // Validate model_override if provided (variable unused after this, each branch re-defines) let _model_name: &str = match model_override { Some(name) => { // Validate the model name (check Whisper, Parakeet, and SenseVoice) @@ -674,17 +749,33 @@ pub async fn run_setup( if !quiet && !no_post_install { println!(); println!("Next steps:"); - println!(" 1. Set up a compositor keybinding to trigger recording:"); - println!( - " Example for Hyprland: bind = , XF86AudioRecord, exec, voxtype record-toggle\n" - ); - println!(" 2. Start the daemon: voxtype daemon\n"); - println!("Optional:"); - println!(" voxtype setup check - Verify system configuration"); - println!(" voxtype setup model - Download/switch whisper models"); - println!(" voxtype setup systemd - Install as systemd service"); - println!(" voxtype setup waybar - Get Waybar integration config"); - println!(" voxtype setup compositor - Fix modifier key issues (Hyprland/Sway/River)"); + + #[cfg(target_os = "macos")] + { + println!(" 1. Install as app bundle (recommended):"); + println!(" voxtype setup app-bundle\n"); + println!(" 2. Or run the interactive setup wizard:"); + println!(" voxtype setup macos\n"); + println!("Optional:"); + println!(" voxtype setup check - Verify system configuration"); + println!(" voxtype setup model - Download/switch whisper models"); + println!(" voxtype setup app-bundle --status - Check installation status"); + } + + #[cfg(not(target_os = "macos"))] + { + println!(" 1. Set up a compositor keybinding to trigger recording:"); + println!( + " Example for Hyprland: bind = , XF86AudioRecord, exec, voxtype record-toggle\n" + ); + println!(" 2. Start the daemon: voxtype daemon\n"); + println!("Optional:"); + println!(" voxtype setup check - Verify system configuration"); + println!(" voxtype setup model - Download/switch whisper models"); + println!(" voxtype setup systemd - Install as systemd service"); + println!(" voxtype setup waybar - Get Waybar integration config"); + println!(" voxtype setup compositor - Fix modifier key issues (Hyprland/Sway/River)"); + } } Ok(()) @@ -773,24 +864,29 @@ pub async fn run_checks(config: &Config) -> anyhow::Result<()> { } } - // Check whisper model - println!("\nWhisper Model:"); - let model_name = &config.whisper.model; - let model_filename = crate::transcribe::whisper::get_model_filename(model_name); - let model_path = models_dir.join(&model_filename); + // Check whisper model (only if using Whisper engine) + if config.engine == crate::config::TranscriptionEngine::Whisper { + println!("\nWhisper Model:"); + let model_name = &config.whisper.model; + let model_filename = crate::transcribe::whisper::get_model_filename(model_name); + let model_path = models_dir.join(&model_filename); - if model_path.exists() { - let size = std::fs::metadata(&model_path) - .map(|m| m.len() as f64 / 1024.0 / 1024.0) - .unwrap_or(0.0); - print_success(&format!( - "Model '{}' installed ({:.0} MB)", - model_name, size - )); + if model_path.exists() { + let size = std::fs::metadata(&model_path) + .map(|m| m.len() as f64 / 1024.0 / 1024.0) + .unwrap_or(0.0); + print_success(&format!( + "Model '{}' installed ({:.0} MB)", + model_name, size + )); + } else { + print_failure(&format!("Model '{}' not found", model_name)); + println!(" Run: voxtype setup --download"); + all_ok = false; + } } else { - print_failure(&format!("Model '{}' not found", model_name)); - println!(" Run: voxtype setup --download"); - all_ok = false; + println!("\nWhisper Model:"); + print_info("Using Parakeet engine (Whisper model not required)"); } // Check Parakeet models @@ -804,10 +900,11 @@ pub async fn run_checks(config: &Config) -> anyhow::Result<()> { if path.is_dir() { let name = entry.file_name().to_string_lossy().to_string(); if name.contains("parakeet") { - // Check if it has the required ONNX files - let encoder_path = path.join("encoder-model.onnx"); - let has_encoder = encoder_path.exists(); + // Check if it has the required ONNX files (including quantized variants) + let has_encoder = path.join("encoder-model.onnx").exists() + || path.join("encoder-model.int8.onnx").exists(); let has_decoder = path.join("decoder_joint-model.onnx").exists() + || path.join("decoder_joint-model.int8.onnx").exists() || path.join("model.onnx").exists(); if has_encoder || has_decoder { // Get total size of model files diff --git a/src/setup/model.rs b/src/setup/model.rs index 0eb87612..42e38f10 100644 --- a/src/setup/model.rs +++ b/src/setup/model.rs @@ -7,6 +7,16 @@ use std::io::{self, Write}; use std::path::Path; use std::process::Command; +/// Section-header tag rendered next to the engines whose ONNX graphs the +/// MIGraphX 7.2 EP can't compile (Moonshine/SenseVoice/Paraformer/Dolphin/ +/// Omnilingual). Only shown on the AMD-targeted binary so users picking a +/// model see at a glance which engines stay on CPU even with their GPU +/// installed. NVIDIA/CPU binaries don't print this. +#[cfg(feature = "onnx-migraphx-enabled")] +const AMD_CPU_ONLY_TAG: &str = " \x1b[33m[CPU on AMD GPU]\x1b[0m"; +#[cfg(not(feature = "onnx-migraphx-enabled"))] +const AMD_CPU_ONLY_TAG: &str = ""; + /// Model information for display struct ModelInfo { name: &'static str, @@ -345,10 +355,7 @@ const SENSEVOICE_MODELS: &[SenseVoiceModelInfo] = &[ size_mb: 938, description: "Full precision (larger, slightly better accuracy)", languages: "zh/en/ja/ko/yue", - files: &[ - ("model.onnx", "model.onnx"), - ("tokens.txt", "tokens.txt"), - ], + files: &[("model.onnx", "model.onnx"), ("tokens.txt", "tokens.txt")], huggingface_repo: "csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", }, ]; @@ -409,20 +416,18 @@ struct DolphinModelInfo { huggingface_repo: &'static str, } -const DOLPHIN_MODELS: &[DolphinModelInfo] = &[ - DolphinModelInfo { - name: "base", - dir_name: "dolphin-base", - size_mb: 198, - description: "Dictation-optimized (recommended)", - languages: "en/zh", - files: &[ - ("model.int8.onnx", "model.int8.onnx"), - ("tokens.txt", "tokens.txt"), - ], - huggingface_repo: "csukuangfj/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", - }, -]; +const DOLPHIN_MODELS: &[DolphinModelInfo] = &[DolphinModelInfo { + name: "base", + dir_name: "dolphin-base", + size_mb: 198, + description: "Dictation-optimized (recommended)", + languages: "en/zh", + files: &[ + ("model.int8.onnx", "model.int8.onnx"), + ("tokens.txt", "tokens.txt"), + ], + huggingface_repo: "csukuangfj/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", +}]; // ============================================================================= // Omnilingual Model Definitions @@ -438,20 +443,58 @@ struct OmnilingualModelInfo { huggingface_repo: &'static str, } -const OMNILINGUAL_MODELS: &[OmnilingualModelInfo] = &[ - OmnilingualModelInfo { - name: "300m", - dir_name: "omnilingual-300m", - size_mb: 3900, - description: "1600+ languages, 300M params", - languages: "1600+ langs", - files: &[ - ("model.onnx", "model.onnx"), - ("tokens.txt", "tokens.txt"), - ], - huggingface_repo: "csukuangfj/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-2025-11-12", - }, -]; +const OMNILINGUAL_MODELS: &[OmnilingualModelInfo] = &[OmnilingualModelInfo { + name: "300m", + dir_name: "omnilingual-300m", + size_mb: 3900, + description: "1600+ languages, 300M params", + languages: "1600+ langs", + files: &[("model.onnx", "model.onnx"), ("tokens.txt", "tokens.txt")], + huggingface_repo: "csukuangfj/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-2025-11-12", +}]; + +// ============================================================================= +// Cohere Transcribe Model Definitions +// ============================================================================= +// Encoder-decoder ASR via ONNX Runtime, Whisper-style task tokens. Currently +// #1 on the Open ASR Leaderboard. The original CohereLabs weights are gated +// on HuggingFace; we use the community ONNX export which is Apache 2.0 and +// does not require an HF token. Each model is 5 files (encoder + decoder +// .onnx structural files, their .data weight sidecars, and tokens.txt). + +struct CohereModelInfo { + name: &'static str, + dir_name: &'static str, + size_mb: u32, + description: &'static str, + languages: &'static str, + files: &'static [(&'static str, &'static str)], + huggingface_repo: &'static str, +} + +const COHERE_MODELS: &[CohereModelInfo] = &[CohereModelInfo { + name: "int8", + dir_name: "cohere-transcribe-int8", + // 6 MB encoder structure + 2.8 GB encoder weights + 0.5 MB decoder + // structure + 220 MB decoder weights + 270 KB tokens. Round up. + size_mb: 3100, + description: "Encoder-decoder ASR, #1 Open ASR Leaderboard", + languages: "ar,de,en,es,fr,hi,it,ja,ko,nl,pt,ru,tr,zh", + files: &[ + ("cohere-encoder.int8.onnx", "cohere-encoder.int8.onnx"), + ( + "cohere-encoder.int8.onnx.data", + "cohere-encoder.int8.onnx.data", + ), + ("cohere-decoder.int8.onnx", "cohere-decoder.int8.onnx"), + ( + "cohere-decoder.int8.onnx.data", + "cohere-decoder.int8.onnx.data", + ), + ("tokens.txt", "tokens.txt"), + ], + huggingface_repo: "cstr/cohere-transcribe-onnx-int8", +}]; // ============================================================================= // Whisper Model Functions @@ -473,6 +516,7 @@ pub async fn interactive_select() -> anyhow::Result<()> { println!("=======================\n"); let models_dir = Config::models_dir(); + println!("Models directory: {:?}\n", models_dir); // Load current config to determine active model @@ -484,6 +528,7 @@ pub async fn interactive_select() -> anyhow::Result<()> { let is_paraformer_engine = matches!(config.engine, TranscriptionEngine::Paraformer); let is_dolphin_engine = matches!(config.engine, TranscriptionEngine::Dolphin); let is_omnilingual_engine = matches!(config.engine, TranscriptionEngine::Omnilingual); + let is_cohere_engine = matches!(config.engine, TranscriptionEngine::Cohere); let current_whisper_model = &config.whisper.model; let current_parakeet_model = config.parakeet.as_ref().map(|p| p.model.as_str()); let current_moonshine_model = config.moonshine.as_ref().map(|m| m.model.as_str()); @@ -491,12 +536,14 @@ pub async fn interactive_select() -> anyhow::Result<()> { let current_paraformer_model = config.paraformer.as_ref().map(|p| p.model.as_str()); let current_dolphin_model = config.dolphin.as_ref().map(|d| d.model.as_str()); let current_omnilingual_model = config.omnilingual.as_ref().map(|o| o.model.as_str()); + let current_cohere_model = config.cohere.as_ref().map(|c| c.model.as_str()); let parakeet_available = cfg!(feature = "parakeet"); let moonshine_available = cfg!(feature = "moonshine"); let sensevoice_available = cfg!(feature = "sensevoice"); let paraformer_available = cfg!(feature = "paraformer"); let dolphin_available = cfg!(feature = "dolphin"); let omnilingual_available = cfg!(feature = "omnilingual"); + let cohere_available = cfg!(feature = "cohere"); let whisper_count = MODELS.len(); let parakeet_count = PARAKEET_MODELS.len(); let moonshine_count = MOONSHINE_MODELS.len(); @@ -504,6 +551,7 @@ pub async fn interactive_select() -> anyhow::Result<()> { let paraformer_count = PARAFORMER_MODELS.len(); let dolphin_count = DOLPHIN_MODELS.len(); let omnilingual_count = OMNILINGUAL_MODELS.len(); + let cohere_count = COHERE_MODELS.len(); let available_count = |available: bool, count: usize| if available { count } else { 0 }; let total_count = whisper_count @@ -512,7 +560,8 @@ pub async fn interactive_select() -> anyhow::Result<()> { + available_count(sensevoice_available, sensevoice_count) + available_count(paraformer_available, paraformer_count) + available_count(dolphin_available, dolphin_count) - + available_count(omnilingual_available, omnilingual_count); + + available_count(omnilingual_available, omnilingual_count) + + available_count(cohere_available, cohere_count); // --- Whisper Section --- println!("--- Whisper (OpenAI, 99+ languages) ---\n"); @@ -583,7 +632,10 @@ pub async fn interactive_select() -> anyhow::Result<()> { } else { 0 }; - println!("\n--- Moonshine (Moonshine AI, encoder-decoder ASR) ---\n"); + println!( + "\n--- Moonshine (Moonshine AI, encoder-decoder ASR){} ---\n", + AMD_CPU_ONLY_TAG + ); if moonshine_available { for (i, model) in MOONSHINE_MODELS.iter().enumerate() { @@ -628,7 +680,10 @@ pub async fn interactive_select() -> anyhow::Result<()> { } else { 0 }; - println!("\n--- SenseVoice (Alibaba FunAudioLLM, CJK + English) ---\n"); + println!( + "\n--- SenseVoice (Alibaba FunAudioLLM, CJK + English){} ---\n", + AMD_CPU_ONLY_TAG + ); if sensevoice_available { for (i, model) in SENSEVOICE_MODELS.iter().enumerate() { @@ -660,9 +715,12 @@ pub async fn interactive_select() -> anyhow::Result<()> { } // --- Paraformer Section --- - let paraformer_offset = sensevoice_offset - + available_count(sensevoice_available, sensevoice_count); - println!("\n--- Paraformer (FunASR, Chinese + English) ---\n"); + let paraformer_offset = + sensevoice_offset + available_count(sensevoice_available, sensevoice_count); + println!( + "\n--- Paraformer (FunASR, Chinese + English){} ---\n", + AMD_CPU_ONLY_TAG + ); if paraformer_available { for (i, model) in PARAFORMER_MODELS.iter().enumerate() { @@ -694,9 +752,12 @@ pub async fn interactive_select() -> anyhow::Result<()> { } // --- Dolphin Section --- - let dolphin_offset = paraformer_offset - + available_count(paraformer_available, paraformer_count); - println!("\n--- Dolphin (dictation-optimized CTC) ---\n"); + let dolphin_offset = + paraformer_offset + available_count(paraformer_available, paraformer_count); + println!( + "\n--- Dolphin (dictation-optimized CTC){} ---\n", + AMD_CPU_ONLY_TAG + ); if dolphin_available { for (i, model) in DOLPHIN_MODELS.iter().enumerate() { @@ -728,9 +789,11 @@ pub async fn interactive_select() -> anyhow::Result<()> { } // --- Omnilingual Section --- - let omnilingual_offset = dolphin_offset - + available_count(dolphin_available, dolphin_count); - println!("\n--- Omnilingual (FunASR, 50+ languages) ---\n"); + let omnilingual_offset = dolphin_offset + available_count(dolphin_available, dolphin_count); + println!( + "\n--- Omnilingual (FunASR, 50+ languages){} ---\n", + AMD_CPU_ONLY_TAG + ); if omnilingual_available { for (i, model) in OMNILINGUAL_MODELS.iter().enumerate() { @@ -761,6 +824,43 @@ pub async fn interactive_select() -> anyhow::Result<()> { println!(" \x1b[90m(not available - rebuild with --features omnilingual)\x1b[0m"); } + // --- Cohere Section --- + let cohere_offset = + omnilingual_offset + available_count(omnilingual_available, omnilingual_count); + println!( + "\n--- Cohere Transcribe (Cohere Labs, #1 Open ASR Leaderboard){} ---\n", + AMD_CPU_ONLY_TAG + ); + + if cohere_available { + for (i, model) in COHERE_MODELS.iter().enumerate() { + let model_path = models_dir.join(model.dir_name); + let installed = model_path.exists() && validate_cohere_model(&model_path).is_ok(); + + let is_current = is_cohere_engine && current_cohere_model == Some(model.dir_name); + let star = if is_current { "*" } else { " " }; + + let status = if installed { + "\x1b[32m[installed]\x1b[0m" + } else { + "" + }; + + println!( + " {}[{:>2}] {:<28} ({:>4} MB) {} - {} {}", + star, + cohere_offset + i + 1, + model.dir_name, + model.size_mb, + model.languages, + model.description, + status + ); + } + } else { + println!(" \x1b[90m(not available - rebuild with --features cohere)\x1b[0m"); + } + println!("\n [ 0] Cancel\n"); // Get user selection @@ -791,13 +891,43 @@ pub async fn interactive_select() -> anyhow::Result<()> { handle_sensevoice_selection(sensevoice_index).await } else if paraformer_available && selection <= paraformer_offset + paraformer_count { let idx = selection - paraformer_offset; - handle_onnx_engine_selection("paraformer", PARAFORMER_MODELS.iter().map(|m| (m.name, m.dir_name, m.size_mb, m.files, m.huggingface_repo)).collect(), idx, validate_onnx_ctc_model).await + handle_onnx_engine_selection( + "paraformer", + PARAFORMER_MODELS + .iter() + .map(|m| (m.name, m.dir_name, m.size_mb, m.files, m.huggingface_repo)) + .collect(), + idx, + validate_onnx_ctc_model, + ) + .await } else if dolphin_available && selection <= dolphin_offset + dolphin_count { let idx = selection - dolphin_offset; - handle_onnx_engine_selection("dolphin", DOLPHIN_MODELS.iter().map(|m| (m.name, m.dir_name, m.size_mb, m.files, m.huggingface_repo)).collect(), idx, validate_onnx_ctc_model).await + handle_onnx_engine_selection( + "dolphin", + DOLPHIN_MODELS + .iter() + .map(|m| (m.name, m.dir_name, m.size_mb, m.files, m.huggingface_repo)) + .collect(), + idx, + validate_onnx_ctc_model, + ) + .await } else if omnilingual_available && selection <= omnilingual_offset + omnilingual_count { let idx = selection - omnilingual_offset; - handle_onnx_engine_selection("omnilingual", OMNILINGUAL_MODELS.iter().map(|m| (m.name, m.dir_name, m.size_mb, m.files, m.huggingface_repo)).collect(), idx, validate_onnx_ctc_model).await + handle_onnx_engine_selection( + "omnilingual", + OMNILINGUAL_MODELS + .iter() + .map(|m| (m.name, m.dir_name, m.size_mb, m.files, m.huggingface_repo)) + .collect(), + idx, + validate_onnx_ctc_model, + ) + .await + } else if cohere_available && selection <= cohere_offset + cohere_count { + let idx = selection - cohere_offset; + handle_cohere_selection(idx).await } else { println!("\nInvalid selection."); Ok(()) @@ -1069,7 +1199,8 @@ const GTCRN_MODEL_URL: &str = "https://github.com/k2-fsa/sherpa-onnx/releases/do const GTCRN_MODEL_FILENAME: &str = "gtcrn_simple.onnx"; /// ECAPA-TDNN speaker embedding model URL and filename -const ECAPA_MODEL_URL: &str = "https://huggingface.co/pranjal-pravesh/ecapa_tdnn_onnx/resolve/main/ecapa_tdnn.onnx"; +const ECAPA_MODEL_URL: &str = + "https://huggingface.co/pranjal-pravesh/ecapa_tdnn_onnx/resolve/main/ecapa_tdnn.onnx"; const ECAPA_MODEL_FILENAME: &str = "ecapa_tdnn.onnx"; /// Ensure the GTCRN speech enhancement model is downloaded. @@ -1711,6 +1842,275 @@ fn download_moonshine_model_by_info(model: &MoonshineModelInfo) -> anyhow::Resul Ok(()) } +// ============================================================================= +// Cohere Transcribe Functions +// ============================================================================= + +/// Validate that a Cohere model directory has the required five files. +pub fn validate_cohere_model(path: &Path) -> anyhow::Result<()> { + if !path.exists() { + anyhow::bail!("Model directory does not exist: {:?}", path); + } + let required = [ + "cohere-encoder.int8.onnx", + "cohere-encoder.int8.onnx.data", + "cohere-decoder.int8.onnx", + "cohere-decoder.int8.onnx.data", + "tokens.txt", + ]; + let missing: Vec<&str> = required + .iter() + .copied() + .filter(|f| !path.join(f).exists()) + .collect(); + if missing.is_empty() { + Ok(()) + } else { + anyhow::bail!("Incomplete Cohere model, missing: {}", missing.join(", ")) + } +} + +/// Download a Cohere model by name (public API for run_setup). +pub fn download_cohere_model(model_name: &str) -> anyhow::Result<()> { + let model = COHERE_MODELS + .iter() + .find(|m| m.name == model_name) + .ok_or_else(|| anyhow::anyhow!("Unknown Cohere model: {}", model_name))?; + download_cohere_model_by_info(model) +} + +/// Download a Cohere model using its info struct. +fn download_cohere_model_by_info(model: &CohereModelInfo) -> anyhow::Result<()> { + let models_dir = Config::models_dir(); + let model_path = models_dir.join(model.dir_name); + std::fs::create_dir_all(&model_path)?; + + // Cohere is a multi-GB download. Even with a fast connection it's a + // visible commitment, and on slow links it can mean 30+ minutes. Print + // the size up front so users don't wonder why their disk is filling. + println!( + "\nDownloading {} ({} MB across {} files)...", + model.dir_name, + model.size_mb, + model.files.len() + ); + println!( + "This is the largest model voxtype ships. Ensure you have at least \ + {} MB of free space in {}.\n", + // Add 10% headroom for filesystem overhead. + model.size_mb + (model.size_mb / 10), + model_path.display(), + ); + + for (repo_path, local_filename) in model.files { + let file_path = model_path.join(local_filename); + + if file_path.exists() { + println!(" {} already exists, skipping", local_filename); + continue; + } + + let url = format!( + "https://huggingface.co/{}/resolve/main/{}", + model.huggingface_repo, repo_path + ); + + println!("Downloading {}...", local_filename); + + let status = Command::new("curl") + .args([ + "-L", + "--progress-bar", + "-o", + file_path.to_str().unwrap_or("file"), + &url, + ]) + .status(); + + match status { + Ok(exit_status) if exit_status.success() => {} + Ok(exit_status) => { + print_failure(&format!( + "Download failed: curl exited with code {}", + exit_status.code().unwrap_or(-1) + )); + let _ = std::fs::remove_file(&file_path); + anyhow::bail!("Download failed for {}", local_filename) + } + Err(e) => { + print_failure(&format!("Failed to run curl: {}", e)); + print_info("Please ensure curl is installed (e.g., 'sudo pacman -S curl')"); + anyhow::bail!("curl not available: {}", e) + } + } + } + + validate_cohere_model(&model_path)?; + print_success(&format!( + "Model '{}' downloaded to {:?}", + model.dir_name, model_path + )); + + Ok(()) +} + +/// Handle Cohere model selection (download + config update). +async fn handle_cohere_selection(selection: usize) -> anyhow::Result<()> { + let models_dir = Config::models_dir(); + + if selection == 0 || selection > COHERE_MODELS.len() { + println!("\nCancelled."); + return Ok(()); + } + + let model = &COHERE_MODELS[selection - 1]; + let model_path = models_dir.join(model.dir_name); + + if model_path.exists() && validate_cohere_model(&model_path).is_ok() { + println!("\nModel '{}' is already installed.\n", model.dir_name); + println!(" [1] Set as default model (update config)"); + println!(" [2] Re-download"); + println!(" [0] Cancel\n"); + + print!("Select option [1]: "); + io::stdout().flush()?; + + let mut choice = String::new(); + io::stdin().read_line(&mut choice)?; + let choice = choice.trim(); + + match choice { + "" | "1" => { + update_config_cohere(model.dir_name)?; + restart_daemon_if_running().await; + return Ok(()); + } + "2" => {} + _ => { + println!("Cancelled."); + return Ok(()); + } + } + } + + // Size-confirm before kicking off a multi-GB download. + println!(); + print_warning(&format!( + "Cohere is a {} MB download — the largest model voxtype offers.", + model.size_mb, + )); + print_info("It runs entirely on-device with no cloud calls. Apache 2.0 licensed."); + println!(); + print!("Continue? [Y/n]: "); + io::stdout().flush()?; + + let mut confirm = String::new(); + io::stdin().read_line(&mut confirm)?; + let confirm = confirm.trim().to_lowercase(); + if confirm == "n" || confirm == "no" { + println!("Cancelled."); + return Ok(()); + } + + download_cohere_model_by_info(model)?; + update_config_cohere(model.dir_name)?; + restart_daemon_if_running().await; + Ok(()) +} + +/// Update config to use Cohere engine with a specific model (status messages). +fn update_config_cohere(model_name: &str) -> anyhow::Result<()> { + if let Some(config_path) = Config::default_path() { + if config_path.exists() { + let content = std::fs::read_to_string(&config_path)?; + let updated = update_cohere_in_config(&content, model_name); + std::fs::write(&config_path, updated)?; + print_success(&format!( + "Config updated: engine = \"cohere\", model = \"{}\"", + model_name + )); + Ok(()) + } else { + print_info("No config file found. Run 'voxtype setup' first."); + Ok(()) + } + } else { + anyhow::bail!("Could not determine config path") + } +} + +/// Update the config to use Cohere engine with a specific model. Mirrors +/// `update_moonshine_in_config` exactly — the only difference is the engine +/// name and section name. If the section doesn't exist, append a stub at EOF. +fn update_cohere_in_config(config: &str, model_name: &str) -> String { + let mut result = String::new(); + let mut has_engine_line = false; + let mut has_cohere_section = false; + let mut in_cohere_section = false; + let mut cohere_model_updated = false; + + for line in config.lines() { + let trimmed = line.trim(); + + if trimmed.starts_with('[') { + if in_cohere_section && !cohere_model_updated { + result.push_str(&format!("model = \"{}\"\n", model_name)); + cohere_model_updated = true; + } + in_cohere_section = trimmed == "[cohere]"; + if in_cohere_section { + has_cohere_section = true; + } + } + + if trimmed.starts_with("engine") && !trimmed.starts_with('[') { + result.push_str("engine = \"cohere\"\n"); + has_engine_line = true; + } else if in_cohere_section && trimmed.starts_with("model") { + result.push_str(&format!("model = \"{}\"\n", model_name)); + cohere_model_updated = true; + } else { + result.push_str(line); + result.push('\n'); + } + } + + if in_cohere_section && !cohere_model_updated { + result.push_str(&format!("model = \"{}\"\n", model_name)); + } + + if !has_engine_line { + let mut new_result = String::new(); + let mut engine_added = false; + for line in result.lines() { + let trimmed = line.trim(); + if !engine_added + && !trimmed.is_empty() + && !trimmed.starts_with('#') + && !trimmed.starts_with("engine") + { + new_result.push_str("engine = \"cohere\"\n\n"); + engine_added = true; + } + new_result.push_str(line); + new_result.push('\n'); + } + if !engine_added { + new_result.push_str("engine = \"cohere\"\n"); + } + result = new_result; + } + + if !has_cohere_section { + if !result.ends_with('\n') { + result.push('\n'); + } + result.push_str(&format!("\n[cohere]\nmodel = \"{}\"\n", model_name)); + } + + result +} + /// Update config to use Moonshine engine and a specific model (with status messages) fn update_config_moonshine(model_name: &str) -> anyhow::Result<()> { if let Some(config_path) = Config::default_path() { @@ -1937,8 +2337,7 @@ pub fn validate_sensevoice_model(path: &Path) -> anyhow::Result<()> { anyhow::bail!("Model directory does not exist: {:?}", path); } - let has_model = - path.join("model.int8.onnx").exists() || path.join("model.onnx").exists(); + let has_model = path.join("model.int8.onnx").exists() || path.join("model.onnx").exists(); let has_tokens = path.join("tokens.txt").exists(); if has_model && has_tokens { @@ -2254,7 +2653,10 @@ async fn handle_onnx_engine_selection( // Validate validate_fn(&model_path)?; - print_success(&format!("Model '{}' downloaded to {:?}", dir_name, model_path)); + print_success(&format!( + "Model '{}' downloaded to {:?}", + dir_name, model_path + )); // Update config and restart daemon update_config_engine(engine_name, name)?; @@ -2285,10 +2687,7 @@ fn download_onnx_model( continue; } - let url = format!( - "https://huggingface.co/{}/resolve/main/{}", - repo, repo_path - ); + let url = format!("https://huggingface.co/{}/resolve/main/{}", repo, repo_path); println!("Downloading {}...", local_filename); @@ -2403,7 +2802,10 @@ fn update_engine_in_config(config: &str, engine_name: &str, model_name: &str) -> } if !has_section { - result.push_str(&format!("\n[{}]\nmodel = \"{}\"\n", engine_name, model_name)); + result.push_str(&format!( + "\n[{}]\nmodel = \"{}\"\n", + engine_name, model_name + )); } if !config.ends_with('\n') && result.ends_with('\n') { diff --git a/src/setup/parakeet.rs b/src/setup/parakeet.rs index 4ffea28c..9c85bdbe 100644 --- a/src/setup/parakeet.rs +++ b/src/setup/parakeet.rs @@ -1,291 +1,207 @@ -//! Parakeet backend management for voxtype +//! Parakeet/ONNX backend management. //! -//! Switches between Whisper and Parakeet binaries by updating the symlink. -//! Parakeet binaries are stored in /usr/lib/voxtype/ alongside Whisper variants. +//! User-facing wrapper around [`super::binary`] for the legacy +//! `voxtype setup onnx`/`voxtype setup parakeet` CLI. -use std::fs; -use std::os::unix::fs::symlink; +use super::binary::{self, EngineFamily, Variant}; use std::path::Path; -use std::process::Command; -const VOXTYPE_LIB_DIR: &str = "/usr/lib/voxtype"; -const VOXTYPE_BIN: &str = "/usr/bin/voxtype"; - -/// Parakeet backend variants +/// Parakeet backend variants exposed to existing callers (status formatting, +/// CLI dispatch). Each maps to one [`Variant`] in the `Onnx` family. #[derive(Debug, Clone, Copy, PartialEq)] pub enum ParakeetBackend { Avx2, Avx512, + /// CUDA 12.x (NVIDIA, ort built against libcudart.so.12) + Cuda12, + /// CUDA 13.x (NVIDIA, ort built against libcudart.so.13, requires driver 580+) + Cuda13, + /// Unversioned CUDA binary (source-built or pre-0.7.0). Cuda, - Rocm, + Migraphx, /// Custom binary (source-compiled without specific suffix) Custom, } impl ParakeetBackend { - fn binary_name(&self) -> &'static str { + fn variant(self) -> Variant { match self { - ParakeetBackend::Avx2 => "voxtype-onnx-avx2", - ParakeetBackend::Avx512 => "voxtype-onnx-avx512", - ParakeetBackend::Cuda => "voxtype-onnx-cuda", - ParakeetBackend::Rocm => "voxtype-onnx-rocm", - ParakeetBackend::Custom => "voxtype-onnx", + ParakeetBackend::Avx2 => Variant::OnnxAvx2, + ParakeetBackend::Avx512 => Variant::OnnxAvx512, + ParakeetBackend::Cuda12 => Variant::OnnxCuda12, + ParakeetBackend::Cuda13 => Variant::OnnxCuda13, + ParakeetBackend::Cuda => Variant::OnnxCuda, + ParakeetBackend::Migraphx => Variant::OnnxMigraphx, + ParakeetBackend::Custom => Variant::OnnxNative, } } - pub fn display_name(&self) -> &'static str { - match self { - ParakeetBackend::Avx2 => "ONNX (AVX2)", - ParakeetBackend::Avx512 => "ONNX (AVX-512)", - ParakeetBackend::Cuda => "ONNX (CUDA)", - ParakeetBackend::Rocm => "ONNX (ROCm)", - ParakeetBackend::Custom => "ONNX (Custom)", + fn from_variant(v: Variant) -> Option { + match v { + Variant::OnnxAvx2 => Some(ParakeetBackend::Avx2), + Variant::OnnxAvx512 => Some(ParakeetBackend::Avx512), + Variant::OnnxCuda12 => Some(ParakeetBackend::Cuda12), + Variant::OnnxCuda13 => Some(ParakeetBackend::Cuda13), + Variant::OnnxCuda => Some(ParakeetBackend::Cuda), + Variant::OnnxMigraphx => Some(ParakeetBackend::Migraphx), + Variant::OnnxNative => Some(ParakeetBackend::Custom), + _ => None, } } - fn whisper_equivalent(&self) -> &'static str { + pub fn display_name(&self) -> &'static str { + self.variant().display() + } + + fn whisper_equivalent(&self) -> Variant { match self { - ParakeetBackend::Avx2 => "voxtype-avx2", - ParakeetBackend::Avx512 => "voxtype-avx512", - ParakeetBackend::Cuda => "voxtype-vulkan", // CUDA users likely have GPU, fall back to vulkan - ParakeetBackend::Rocm => "voxtype-vulkan", // ROCm users have AMD GPU, fall back to vulkan - ParakeetBackend::Custom => "voxtype-native", // Source builds: natively compiled, no CPU tier + ParakeetBackend::Avx2 => Variant::WhisperAvx2, + ParakeetBackend::Avx512 => Variant::WhisperAvx512, + // GPU users get Vulkan as the closest Whisper equivalent. + ParakeetBackend::Cuda12 + | ParakeetBackend::Cuda13 + | ParakeetBackend::Cuda + | ParakeetBackend::Migraphx => Variant::WhisperVulkan, + ParakeetBackend::Custom => Variant::WhisperNative, } } } -/// Detect if Parakeet is currently active +/// True if the active variant is in the ONNX family. pub fn is_parakeet_active() -> bool { - if let Ok(link_target) = fs::read_link(VOXTYPE_BIN) { - if let Some(target_name) = link_target.file_name() { - if let Some(name) = target_name.to_str() { - return name.contains("onnx") || name.contains("parakeet"); - } - } - } - false + binary::active_variant() + .map(|v| v.family() == EngineFamily::Onnx) + .unwrap_or(false) } -/// Detect which Parakeet backend is currently active (if any) pub fn detect_current_parakeet_backend() -> Option { - if let Ok(link_target) = fs::read_link(VOXTYPE_BIN) { - let target_name = link_target.file_name()?.to_str()?; - return match target_name { - // New ONNX names - "voxtype-onnx-avx2" => Some(ParakeetBackend::Avx2), - "voxtype-onnx-avx512" => Some(ParakeetBackend::Avx512), - "voxtype-onnx-cuda" => Some(ParakeetBackend::Cuda), - "voxtype-onnx-rocm" => Some(ParakeetBackend::Rocm), - "voxtype-onnx" => Some(ParakeetBackend::Custom), - // Legacy parakeet names (backward compat) - "voxtype-parakeet-avx2" => Some(ParakeetBackend::Avx2), - "voxtype-parakeet-avx512" => Some(ParakeetBackend::Avx512), - "voxtype-parakeet-cuda" => Some(ParakeetBackend::Cuda), - "voxtype-parakeet-rocm" => Some(ParakeetBackend::Rocm), - "voxtype-parakeet" => Some(ParakeetBackend::Custom), - _ => None, - }; - } - None -} - -/// Detect which Whisper backend is currently active -fn detect_current_whisper_backend() -> Option<&'static str> { - if let Ok(link_target) = fs::read_link(VOXTYPE_BIN) { - let target_name = link_target.file_name()?.to_str()?; - return match target_name { - "voxtype-avx2" => Some("voxtype-avx2"), - "voxtype-avx512" => Some("voxtype-avx512"), - "voxtype-vulkan" => Some("voxtype-vulkan"), - "voxtype-native" => Some("voxtype-native"), - _ => None, - }; - } - None + ParakeetBackend::from_variant(binary::active_variant()?) } -/// Detect available Parakeet backends pub fn detect_available_backends() -> Vec { - let mut available = Vec::new(); - - for backend in [ - ParakeetBackend::Avx2, - ParakeetBackend::Avx512, - ParakeetBackend::Cuda, - ParakeetBackend::Rocm, - ParakeetBackend::Custom, - ] { - let path = Path::new(VOXTYPE_LIB_DIR).join(backend.binary_name()); - if path.exists() { - available.push(backend); - } - } + binary::enumerate_installed() + .into_iter() + .filter_map(ParakeetBackend::from_variant) + .collect() +} - available +/// Detect which Whisper backend is currently active (legacy helper retained +/// for `show_status` output). +fn detect_current_whisper_variant() -> Option { + binary::active_variant().filter(|v| v.family() == EngineFamily::Whisper) } -/// Detect the best Parakeet backend for this system +/// Pick the best ONNX variant for this system. fn detect_best_parakeet_backend() -> Option { - let available = detect_available_backends(); + let inv = binary::inventory(); + let installed_onnx: Vec<&binary::VariantStatus> = inv + .variants + .iter() + .filter(|s| s.installed && s.variant.family() == EngineFamily::Onnx) + .collect(); - if available.is_empty() { + if installed_onnx.is_empty() { return None; } - let has_avx512 = fs::read_to_string("/proc/cpuinfo") - .map(|info| info.contains("avx512f")) - .unwrap_or(false); - - // Prefer CUDA if available and NVIDIA GPU detected. - // The CUDA binary bundles ONNX Runtime which may contain AVX-512 instructions, - // so only select it if the CPU supports AVX-512. - if available.contains(&ParakeetBackend::Cuda) && detect_nvidia_gpu() && has_avx512 { - return Some(ParakeetBackend::Cuda); - } - - // Prefer ROCm if available and AMD GPU detected. - // The ROCm binary bundles ONNX Runtime which contains AVX-512 instructions, - // so only select it if the CPU supports AVX-512. - if available.contains(&ParakeetBackend::Rocm) && detect_amd_gpu() && has_avx512 { - return Some(ParakeetBackend::Rocm); - } - - // Check for AVX-512 CPU-only backend - if available.contains(&ParakeetBackend::Avx512) && has_avx512 { - return Some(ParakeetBackend::Avx512); - } - - // Fall back to AVX2 - if available.contains(&ParakeetBackend::Avx2) { - return Some(ParakeetBackend::Avx2); - } - - // Fall back to Native (source-compiled generic binary) - if available.contains(&ParakeetBackend::Custom) { - return Some(ParakeetBackend::Custom); - } - - // Last resort: whatever is available - available.first().copied() -} - -/// Detect if NVIDIA GPU is present -fn detect_nvidia_gpu() -> bool { - // Check for nvidia-smi - if let Ok(output) = Command::new("nvidia-smi") - .arg("--query-gpu=name") - .arg("--format=csv,noheader") - .output() - { - return output.status.success() && !output.stdout.is_empty(); - } - - // Check for NVIDIA device nodes - Path::new("/dev/nvidia0").exists() -} - -/// Detect if AMD GPU is present -fn detect_amd_gpu() -> bool { - // Check for AMD GPU via lspci - if let Ok(output) = Command::new("lspci").output() { - if output.status.success() { - let output_str = String::from_utf8_lossy(&output.stdout).to_lowercase(); - if output_str.contains("amd") || output_str.contains("radeon") { - return true; + // Prefer CUDA on NVIDIA hosts. cu12 vs cu13 binaries differ only in which + // ONNX Runtime prebuilt they bundle (libcudart.so.12 vs .13); pick the one + // matching the host's runtime so the EP doesn't fail to register and + // silently fall back to CPU. + let host_cuda = detect_cuda_runtime_major(); + let cuda_pref: &[Variant] = match host_cuda { + Some(13) => &[Variant::OnnxCuda13, Variant::OnnxCuda, Variant::OnnxCuda12], + Some(12) => &[Variant::OnnxCuda12, Variant::OnnxCuda, Variant::OnnxCuda13], + // Host CUDA detection failed; prefer cu13 since CUDA 13 is the + // rolling-distro default. Users on cu12 can override manually. + _ => &[Variant::OnnxCuda13, Variant::OnnxCuda12, Variant::OnnxCuda], + }; + for v in cuda_pref { + if let Some(status) = installed_onnx.iter().find(|s| &s.variant == v) { + if status.runs_on_this_cpu && status.gpu_available { + return ParakeetBackend::from_variant(*v); } } } - // Check for AMD DRI render nodes - if let Ok(entries) = fs::read_dir("/dev/dri") { - for entry in entries.flatten() { - if let Some(name) = entry.file_name().to_str() { - if name.starts_with("renderD") { - // Check if it's an AMD device via sysfs - let card_num = name.trim_start_matches("renderD"); - let vendor_path = format!( - "/sys/class/drm/card{}/device/vendor", - card_num.parse::().unwrap_or(0) - 128 - ); - if let Ok(vendor) = fs::read_to_string(&vendor_path) { - // AMD vendor ID is 0x1002 - if vendor.trim() == "0x1002" { - return true; - } - } - } + // Then MIGraphX, then CPU-only backends. + let preference = [ + Variant::OnnxMigraphx, + Variant::OnnxAvx512, + Variant::OnnxAvx2, + Variant::OnnxNative, + ]; + for v in preference { + if let Some(status) = installed_onnx.iter().find(|s| s.variant == v) { + if status.runs_on_this_cpu && status.gpu_available { + return ParakeetBackend::from_variant(v); } } } - - false + // Fall back to whatever's installed even if the heuristic warns against it. + installed_onnx + .first() + .and_then(|s| ParakeetBackend::from_variant(s.variant)) } -/// Switch symlink to a different binary -fn switch_binary(binary_name: &str) -> anyhow::Result<()> { - let binary_path = Path::new(VOXTYPE_LIB_DIR).join(binary_name); - - if !binary_path.exists() { - anyhow::bail!( - "Binary not found: {}\n\ - Install the appropriate voxtype package variant.", - binary_path.display() - ); - } +/// Detect the host's CUDA runtime major version by dlopen'ing libcudart. +/// Returns Some(12), Some(13), or None if CUDA isn't installed or the probe +/// fails. Used by detect_best_parakeet_backend to pick between voxtype-onnx-cuda-12 +/// and voxtype-onnx-cuda-13 based on what the host can actually run. +pub fn detect_cuda_runtime_major() -> Option { + use std::ffi::CString; + let candidates = ["libcudart.so", "libcudart.so.13", "libcudart.so.12"]; + let handle = candidates.iter().find_map(|name| { + let cstr = CString::new(*name).ok()?; + let h = unsafe { libc::dlopen(cstr.as_ptr(), libc::RTLD_LAZY) }; + if h.is_null() { None } else { Some(h) } + })?; - // Remove existing symlink - if Path::new(VOXTYPE_BIN).exists() || fs::symlink_metadata(VOXTYPE_BIN).is_ok() { - fs::remove_file(VOXTYPE_BIN).map_err(|e| { - anyhow::anyhow!( - "Failed to remove existing symlink (need sudo?): {}\n\ - Try: sudo voxtype setup onnx --enable", - e - ) - })?; + let sym_name = CString::new("cudaRuntimeGetVersion").ok()?; + let sym = unsafe { libc::dlsym(handle, sym_name.as_ptr()) }; + if sym.is_null() { + unsafe { libc::dlclose(handle) }; + return None; } - // Create new symlink - symlink(&binary_path, VOXTYPE_BIN).map_err(|e| { - anyhow::anyhow!( - "Failed to create symlink (need sudo?): {}\n\ - Try: sudo voxtype setup onnx --enable", - e - ) - })?; - - // Restore SELinux context if available - let _ = Command::new("restorecon").arg(VOXTYPE_BIN).status(); + type CudaRuntimeGetVersion = unsafe extern "C" fn(*mut i32) -> i32; + let get_version: CudaRuntimeGetVersion = unsafe { std::mem::transmute(sym) }; + let mut version: i32 = 0; + let result = unsafe { get_version(&mut version) }; + unsafe { libc::dlclose(handle) }; - Ok(()) + if result != 0 { + return None; + } + Some(version / 1000) } -/// Show Parakeet backend status pub fn show_status() { println!("=== Voxtype ONNX Engine Status ===\n"); - // Current engine if is_parakeet_active() { if let Some(backend) = detect_current_parakeet_backend() { println!("Active engine: Parakeet"); println!(" Backend: {}", backend.display_name()); println!( " Binary: {}", - Path::new(VOXTYPE_LIB_DIR) - .join(backend.binary_name()) + Path::new(binary::LIB_DIR) + .join(backend.variant().binary_name()) .display() ); } } else { println!("Active engine: Whisper"); - if let Some(backend) = detect_current_whisper_backend() { + if let Some(variant) = detect_current_whisper_variant() { println!( " Binary: {}", - Path::new(VOXTYPE_LIB_DIR).join(backend).display() + Path::new(binary::LIB_DIR) + .join(variant.binary_name()) + .display() ); } } - // Available ONNX backends println!("\nAvailable ONNX backends:"); let available = detect_available_backends(); let current = detect_current_parakeet_backend(); @@ -297,13 +213,14 @@ pub fn show_status() { for backend in [ ParakeetBackend::Avx2, ParakeetBackend::Avx512, + ParakeetBackend::Cuda12, + ParakeetBackend::Cuda13, ParakeetBackend::Cuda, - ParakeetBackend::Rocm, + ParakeetBackend::Migraphx, ParakeetBackend::Custom, ] { let installed = available.contains(&backend); let active = current == Some(backend); - let status = if active { "active" } else if installed { @@ -311,35 +228,29 @@ pub fn show_status() { } else { "not installed" }; - println!(" {} - {}", backend.display_name(), status); } } - // GPU detection for CUDA/ROCm + // GPU detection for CUDA/MIGraphX println!(); - let has_nvidia = detect_nvidia_gpu(); - let has_amd = detect_amd_gpu(); - let has_avx512 = fs::read_to_string("/proc/cpuinfo") - .map(|info| info.contains("avx512f")) - .unwrap_or(false); - - if has_nvidia { + let gpus = binary::detect_gpus(); + let cpu = binary::detect_cpu(); + if gpus.nvidia { println!("NVIDIA GPU: detected"); } - if has_amd { + if gpus.amd { println!("AMD GPU: detected"); } - if !has_nvidia && !has_amd { + if !gpus.nvidia && !gpus.amd { println!("GPU: not detected"); } - if (has_nvidia || has_amd) && !has_avx512 { - println!("\nNote: ONNX GPU binaries (CUDA/ROCm) require AVX-512 CPU support."); + if (gpus.nvidia || gpus.amd) && !cpu.avx512 { + println!("\nNote: ONNX GPU binaries (CUDA/MIGraphX) require AVX-512 CPU support."); println!(" Your CPU supports AVX2 only. Use ONNX (AVX2) for CPU-based inference,"); println!(" or use the Whisper engine with Vulkan for GPU acceleration."); } - // Usage hints println!(); if !is_parakeet_active() && !available.is_empty() { println!("To enable ONNX engines:"); @@ -350,10 +261,8 @@ pub fn show_status() { } } -/// Enable Parakeet backend pub fn enable() -> anyhow::Result<()> { let available = detect_available_backends(); - if available.is_empty() { anyhow::bail!( "No ONNX binaries installed.\n\ @@ -369,13 +278,11 @@ pub fn enable() -> anyhow::Result<()> { return Ok(()); } - // Find best ONNX backend let backend = detect_best_parakeet_backend() .ok_or_else(|| anyhow::anyhow!("No suitable ONNX backend found"))?; - switch_binary(backend.binary_name())?; + binary::switch_to(backend.variant())?; - // Regenerate systemd service if it exists if super::systemd::regenerate_service_file()? { println!("Updated systemd service to use ONNX backend."); } @@ -388,63 +295,51 @@ pub fn enable() -> anyhow::Result<()> { Ok(()) } -/// Disable Parakeet backend (switch back to Whisper) pub fn disable() -> anyhow::Result<()> { if !is_parakeet_active() { println!("ONNX engine is not currently enabled (already using Whisper)."); return Ok(()); } - // Determine which Whisper backend to switch to based on current Parakeet backend - let current_parakeet = detect_current_parakeet_backend(); - let whisper_backend = match current_parakeet { - Some(backend) => backend.whisper_equivalent(), - None => "voxtype-avx2", // Default fallback - }; + let preferred = detect_current_parakeet_backend() + .map(|b| b.whisper_equivalent()) + .unwrap_or(Variant::WhisperAvx2); - // Check if the Whisper backend exists - let whisper_path = Path::new(VOXTYPE_LIB_DIR).join(whisper_backend); - let final_backend = if whisper_path.exists() { - whisper_backend + let installed = binary::enumerate_installed(); + let target = if installed.contains(&preferred) { + preferred } else { - // Try to find any available Whisper backend - for fallback in [ - "voxtype-avx512", - "voxtype-avx2", - "voxtype-vulkan", - "voxtype-native", - ] { - if Path::new(VOXTYPE_LIB_DIR).join(fallback).exists() { - eprintln!( - "Note: {} not found, using {} instead", - whisper_backend, fallback - ); - break; - } + // Fall back to any installed Whisper variant in this preference order. + let order = [ + Variant::WhisperAvx512, + Variant::WhisperAvx2, + Variant::WhisperVulkan, + Variant::WhisperNative, + ]; + let chosen = order + .iter() + .find(|v| installed.contains(v)) + .copied() + .ok_or_else(|| anyhow::anyhow!("No Whisper backend found to switch to"))?; + if chosen != preferred { + eprintln!( + "Note: {} not found, using {} instead", + preferred.binary_name(), + chosen.binary_name() + ); } - // Find first available - [ - "voxtype-avx512", - "voxtype-avx2", - "voxtype-vulkan", - "voxtype-native", - ] - .iter() - .find(|b| Path::new(VOXTYPE_LIB_DIR).join(b).exists()) - .copied() - .ok_or_else(|| anyhow::anyhow!("No Whisper backend found to switch to"))? + chosen }; - switch_binary(final_backend)?; + binary::switch_to(target)?; - // Regenerate systemd service if it exists if super::systemd::regenerate_service_file()? { println!("Updated systemd service to use Whisper backend."); } println!( "Switched to Whisper ({}) backend.", - final_backend.trim_start_matches("voxtype-") + target.binary_name().trim_start_matches("voxtype-") ); println!(); println!("Restart voxtype to use Whisper:"); @@ -458,51 +353,62 @@ mod tests { use super::*; #[test] - fn test_parakeet_backend_binary_names() { - assert_eq!(ParakeetBackend::Avx2.binary_name(), "voxtype-onnx-avx2"); - assert_eq!(ParakeetBackend::Avx512.binary_name(), "voxtype-onnx-avx512"); - assert_eq!(ParakeetBackend::Cuda.binary_name(), "voxtype-onnx-cuda"); - assert_eq!(ParakeetBackend::Rocm.binary_name(), "voxtype-onnx-rocm"); - assert_eq!(ParakeetBackend::Custom.binary_name(), "voxtype-onnx"); + fn parakeet_backend_round_trip() { + for b in [ + ParakeetBackend::Avx2, + ParakeetBackend::Avx512, + ParakeetBackend::Cuda12, + ParakeetBackend::Cuda13, + ParakeetBackend::Cuda, + ParakeetBackend::Migraphx, + ParakeetBackend::Custom, + ] { + assert_eq!(ParakeetBackend::from_variant(b.variant()), Some(b)); + } } #[test] - fn test_parakeet_backend_display_names() { - assert_eq!(ParakeetBackend::Avx2.display_name(), "ONNX (AVX2)"); - assert_eq!(ParakeetBackend::Avx512.display_name(), "ONNX (AVX-512)"); - assert_eq!(ParakeetBackend::Cuda.display_name(), "ONNX (CUDA)"); - assert_eq!(ParakeetBackend::Rocm.display_name(), "ONNX (ROCm)"); - assert_eq!(ParakeetBackend::Custom.display_name(), "ONNX (Custom)"); + fn parakeet_backend_binary_names() { + assert_eq!(ParakeetBackend::Cuda12.variant().binary_name(), "voxtype-onnx-cuda-12"); + assert_eq!(ParakeetBackend::Cuda13.variant().binary_name(), "voxtype-onnx-cuda-13"); + assert_eq!(ParakeetBackend::Migraphx.variant().binary_name(), "voxtype-onnx-migraphx"); } #[test] - fn test_parakeet_whisper_equivalents() { - assert_eq!(ParakeetBackend::Avx2.whisper_equivalent(), "voxtype-avx2"); - assert_eq!( - ParakeetBackend::Avx512.whisper_equivalent(), - "voxtype-avx512" - ); - assert_eq!(ParakeetBackend::Cuda.whisper_equivalent(), "voxtype-vulkan"); - assert_eq!(ParakeetBackend::Rocm.whisper_equivalent(), "voxtype-vulkan"); - assert_eq!( - ParakeetBackend::Custom.whisper_equivalent(), - "voxtype-native" - ); + fn whisper_variants_dont_resolve_to_parakeet() { + for v in [ + Variant::WhisperAvx2, + Variant::WhisperAvx512, + Variant::WhisperVulkan, + Variant::WhisperNative, + ] { + assert_eq!(ParakeetBackend::from_variant(v), None); + } + } + + #[test] + fn whisper_equivalents_are_whisper() { + for b in [ + ParakeetBackend::Avx2, + ParakeetBackend::Avx512, + ParakeetBackend::Cuda12, + ParakeetBackend::Cuda13, + ParakeetBackend::Cuda, + ParakeetBackend::Migraphx, + ParakeetBackend::Custom, + ] { + assert_eq!(b.whisper_equivalent().family(), EngineFamily::Whisper); + } } #[test] - fn test_is_parakeet_active_false_when_no_symlink() { - // When /usr/bin/voxtype doesn't exist or isn't a symlink, should return false - // This test verifies the function handles missing files gracefully - assert!(!is_parakeet_active() || is_parakeet_active()); // Just verify no panic + fn is_parakeet_active_does_not_panic() { + let _ = is_parakeet_active(); } #[test] - fn test_detect_available_backends_returns_vec() { - // Verify function returns without panicking + fn detect_available_backends_returns_vec() { let backends = detect_available_backends(); - // On most dev machines, no parakeet binaries are installed - // Just verify it returns a valid vector assert!(backends.len() <= 5); } @@ -510,12 +416,13 @@ mod tests { fn test_backend_enum_equality() { assert_eq!(ParakeetBackend::Avx2, ParakeetBackend::Avx2); assert_ne!(ParakeetBackend::Avx2, ParakeetBackend::Avx512); - assert_ne!(ParakeetBackend::Avx512, ParakeetBackend::Cuda); + assert_ne!(ParakeetBackend::Avx512, ParakeetBackend::Cuda12); + assert_ne!(ParakeetBackend::Cuda12, ParakeetBackend::Cuda13); } #[test] fn test_backend_clone() { - let backend = ParakeetBackend::Cuda; + let backend = ParakeetBackend::Cuda12; let cloned = backend; assert_eq!(backend, cloned); } diff --git a/src/text/mod.rs b/src/text/mod.rs index 92d6d9bb..cbda27c5 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -18,6 +18,26 @@ pub struct TextProcessor { smart_auto_submit: bool, /// Pre-compiled regex for submit trigger detection submit_re: Regex, + /// Whether filler-word filtering is enabled + filter_filler_words: bool, + /// Pre-compiled regex matching any configured filler word. + /// `None` when the filter is disabled or the list is empty so the hot + /// path can early-out without touching regex. + filler_re: Option, + /// Pre-compiled regex matching duplicate spaces left behind after + /// removing fillers. Compiled once even when the filter is off so + /// rebuilding the processor stays cheap. + filler_space_re: Regex, + /// Pre-compiled regex matching " ," / " ." / " ;" / " ?" etc. left + /// behind when a filler precedes attached punctuation. + filler_punct_re: Regex, + /// Pre-compiled regex matching duplicated punctuation like ", ," that + /// can appear after removing back-to-back fillers around commas. + filler_dup_punct_re: Regex, + /// Pre-compiled regex matching a connector punctuation (",;:") that ends + /// up directly before a sentence terminator (".!?") after filler removal, + /// e.g. "hello world, uh." -> "hello world,." -> "hello world.". + filler_connector_before_term_re: Regex, } impl TextProcessor { @@ -35,11 +55,48 @@ impl TextProcessor { let submit_re = Regex::new(r"(?i)(?:^|\s)submit[.!?,;]*\s*$") .expect("BUG: submit regex is a compile-time constant and must be valid"); + // Build a single alternation of all filler words. Word boundaries + // (\b) ensure "um" is removed without touching "umbrella" or "summer". + let filler_re = if config.filter_filler_words && !config.filler_words.is_empty() { + let alternation = config + .filler_words + .iter() + .filter(|w| !w.trim().is_empty()) + .map(|w| regex::escape(w.trim())) + .collect::>() + .join("|"); + if alternation.is_empty() { + None + } else { + let pattern = format!(r"(?i)\b(?:{})\b", alternation); + Regex::new(&pattern).ok() + } + } else { + None + }; + + let filler_space_re = Regex::new(r" {2,}") + .expect("BUG: whitespace regex is a compile-time constant and must be valid"); + let filler_punct_re = Regex::new(r" +([,.;:!?])") + .expect("BUG: punctuation regex is a compile-time constant and must be valid"); + let filler_dup_punct_re = Regex::new(r"([,;:])(\s*[,;:])+").expect( + "BUG: duplicate-punctuation regex is a compile-time constant and must be valid", + ); + let filler_connector_before_term_re = Regex::new(r"[,;:]+(\s*)([.!?])").expect( + "BUG: connector-before-terminator regex is a compile-time constant and must be valid", + ); + Self { spoken_punctuation: config.spoken_punctuation, replacements, smart_auto_submit: config.smart_auto_submit, submit_re, + filter_filler_words: config.filter_filler_words, + filler_re, + filler_space_re, + filler_punct_re, + filler_dup_punct_re, + filler_connector_before_term_re, } } @@ -47,6 +104,13 @@ impl TextProcessor { pub fn process(&self, text: &str) -> String { let mut result = text.to_string(); + // Filter filler words first, on the raw transcription. Running before + // word_replacements lets users override the default list (e.g. by + // mapping "um" to itself) without needing to disable the filter. + if self.filter_filler_words { + result = self.apply_filler_filter(&result); + } + // Apply replacements first so phrases containing spoken punctuation words // (e.g. "slash pr" → "/pr") match before those words are converted to // punctuation characters. @@ -161,6 +225,52 @@ impl TextProcessor { result } + /// Remove filler words and clean up the punctuation/whitespace they leave + /// behind. Examples: + /// "Well, um, I think" -> "Well, I think" + /// "uh hello" -> "hello" + /// "I think, uh." -> "I think." + /// "um uh hello" -> "hello" + fn apply_filler_filter(&self, text: &str) -> String { + let Some(re) = &self.filler_re else { + return text.to_string(); + }; + + // Replace each filler with a single space so the input + // "um, hello" becomes " , hello" and we can fold whitespace below. + let mut result = re.replace_all(text, " ").into_owned(); + + // Collapse "" to "" so " , hello" -> ", hello". + result = self.filler_punct_re.replace_all(&result, "$1").into_owned(); + + // Collapse runs like ",," or ", ," that appear when fillers sit + // between commas/semicolons/colons. + result = self + .filler_dup_punct_re + .replace_all(&result, "$1") + .into_owned(); + + // A connector ("," ";" ":") sitting directly before a sentence + // terminator (".!?") is dropped: "hello world, uh." starts as + // "hello world,." and should become "hello world.". + result = self + .filler_connector_before_term_re + .replace_all(&result, "$2") + .into_owned(); + + // Collapse multiple spaces left behind to a single space. + result = self.filler_space_re.replace_all(&result, " ").into_owned(); + + // Trim leading/trailing whitespace and dangling connector punctuation + // produced when fillers appeared at the start/end of the utterance. + result + .trim() + .trim_start_matches([',', ';', ':']) + .trim_start() + .trim_end_matches([',', ';', ':']) + .to_string() + } + /// Apply custom word replacements (case-insensitive) fn apply_replacements(&self, text: &str) -> String { let mut result = text.to_string(); @@ -237,6 +347,7 @@ mod tests { .map(|(k, v)| (k.to_string(), v.to_string())) .collect(), smart_auto_submit: false, + ..Default::default() } } @@ -245,6 +356,7 @@ mod tests { spoken_punctuation, replacements: HashMap::new(), smart_auto_submit: true, + ..Default::default() } } @@ -410,6 +522,7 @@ mod tests { spoken_punctuation: true, replacements: HashMap::new(), smart_auto_submit: true, + ..Default::default() }; let processor = TextProcessor::new(&config); @@ -428,6 +541,7 @@ mod tests { spoken_punctuation: true, replacements: HashMap::new(), smart_auto_submit: true, + ..Default::default() }; let processor = TextProcessor::new(&config); @@ -531,4 +645,166 @@ mod tests { assert_eq!(processor.process("dash dash"), "--"); } + + fn make_filler_config(enabled: bool, words: Option>) -> TextConfig { + let filler_words = match words { + Some(words) => words.into_iter().map(String::from).collect(), + None => TextConfig::default().filler_words, + }; + TextConfig { + filter_filler_words: enabled, + filler_words, + ..Default::default() + } + } + + #[test] + fn test_filler_filter_enabled_by_default() { + // Filler-word filtering ships on by default. Existing users who want + // the old behavior must opt out via `filter_filler_words = false`. + let config = TextConfig::default(); + assert!(config.filter_filler_words); + + let processor = TextProcessor::new(&config); + assert_eq!(processor.process("um hello"), "hello"); + } + + #[test] + fn test_filler_filter_default_list() { + // Sanity-check the documented default list. + let config = TextConfig::default(); + assert_eq!( + config.filler_words, + vec!["uh", "um", "er", "ah", "eh", "hmm", "hm", "mm", "mhm"] + ); + } + + #[test] + fn test_filler_filter_enabled_basic() { + let config = make_filler_config(true, None); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("um hello world"), "hello world"); + assert_eq!(processor.process("hello uh world"), "hello world"); + assert_eq!(processor.process("hello world um"), "hello world"); + } + + #[test] + fn test_filler_filter_case_insensitive() { + let config = make_filler_config(true, None); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("UM hello"), "hello"); + assert_eq!(processor.process("Um hello"), "hello"); + assert_eq!(processor.process("Hmm I see"), "I see"); + } + + #[test] + fn test_filler_filter_respects_word_boundaries() { + // The classic edge case: "um" inside "umbrella" must not be removed. + let config = make_filler_config(true, None); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("umbrella"), "umbrella"); + assert_eq!(processor.process("an umbrella"), "an umbrella"); + assert_eq!(processor.process("summer"), "summer"); + assert_eq!(processor.process("hummingbird"), "hummingbird"); + assert_eq!(processor.process("erase the file"), "erase the file"); + } + + #[test] + fn test_filler_filter_punctuation_cleanup_mid_sentence() { + let config = make_filler_config(true, None); + let processor = TextProcessor::new(&config); + + // The canonical example from the brief. + assert_eq!(processor.process("Well, um, I think"), "Well, I think"); + } + + #[test] + fn test_filler_filter_punctuation_cleanup_start() { + let config = make_filler_config(true, None); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("um, hello world"), "hello world"); + assert_eq!(processor.process("uh hello world"), "hello world"); + } + + #[test] + fn test_filler_filter_punctuation_cleanup_end() { + let config = make_filler_config(true, None); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("hello world, um"), "hello world"); + assert_eq!(processor.process("hello world, uh."), "hello world."); + } + + #[test] + fn test_filler_filter_back_to_back_fillers() { + let config = make_filler_config(true, None); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("um uh hello"), "hello"); + // Back-to-back fillers between commas collapse to a single comma: + // "hello [um], [uh], world" -> "hello, world". This matches the + // canonical "Well, um, I think" -> "Well, I think" treatment. + assert_eq!(processor.process("hello um, uh, world"), "hello, world"); + assert_eq!(processor.process("um, uh, well"), "well"); + } + + #[test] + fn test_filler_filter_preserves_sentence_punctuation() { + let config = make_filler_config(true, None); + let processor = TextProcessor::new(&config); + + // Sentence-final punctuation must survive even when a filler sits + // immediately before it. + assert_eq!(processor.process("hello um."), "hello."); + assert_eq!(processor.process("hello um!"), "hello!"); + assert_eq!(processor.process("hello um?"), "hello?"); + } + + #[test] + fn test_filler_filter_custom_list() { + // Override the default list. "um" should now be preserved while + // "like" and "you know" are stripped. + let config = make_filler_config(true, Some(vec!["like", "you know"])); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("um like hello"), "um hello"); + assert_eq!(processor.process("hello you know world"), "hello world"); + } + + #[test] + fn test_filler_filter_empty_list_is_noop() { + // An empty list with the flag enabled should leave text untouched + // rather than panic when building the regex. + let config = make_filler_config(true, Some(vec![])); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("um hello"), "um hello"); + } + + #[test] + fn test_filler_filter_runs_before_replacements() { + // If a user maps "uh" to "uhhh" via word_replacements, the filler + // filter strips "uh" first, so the replacement sees clean input. + let mut config = make_filler_config(true, None); + config + .replacements + .insert("hello".to_string(), "HELLO".to_string()); + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("um hello uh world"), "HELLO world"); + } + + #[test] + fn test_filler_filter_with_spoken_punctuation() { + // Pipeline interaction: filler is removed first, then "period" -> ".". + let mut config = make_filler_config(true, None); + config.spoken_punctuation = true; + let processor = TextProcessor::new(&config); + + assert_eq!(processor.process("well um I think period"), "well I think."); + } } diff --git a/src/transcribe/cohere.rs b/src/transcribe/cohere.rs new file mode 100644 index 00000000..52d004dd --- /dev/null +++ b/src/transcribe/cohere.rs @@ -0,0 +1,789 @@ +//! Cohere Transcribe speech-to-text (feature-gated) +//! +//! Uses Cohere Labs' Cohere Transcribe model via ONNX Runtime. Wired into +//! the engine factory, CLI, and `[cohere]` config section. Compile in via +//! `cargo build --features cohere` (CPU) or `--features cohere-cuda` +//! / `--features cohere-tensorrt` for GPU acceleration. +//! +//! ## Model architecture (verified against `cstr/cohere-transcribe-onnx-int8`) +//! +//! Encoder (`cohere-encoder.int8.onnx`): +//! ```text +//! inputs: +//! audio : F32 [1, n_samples] # raw 16 kHz PCM +//! outputs: +//! n_layer_cross_k : F32 [8, 1, T_enc, 1024] # precomputed cross-attn K +//! n_layer_cross_v : F32 [8, 1, T_enc, 1024] # precomputed cross-attn V +//! ``` +//! +//! `T_enc = (n_samples / 1280) + 1`. The encoder bakes log-mel preprocessing +//! and the cross-attention projection into a single graph: feed raw PCM, +//! get cross-K/V tensors back ready to plug into the decoder. +//! +//! Decoder (`cohere-decoder.int8.onnx`): +//! ```text +//! inputs: +//! tokens : I64 [1, n_tokens] +//! in_n_layer_self_k_cache : F32 [8, 1, 8, 1024, 128] # rolling self-K cache +//! in_n_layer_self_v_cache : F32 [8, 1, 8, 1024, 128] +//! n_layer_cross_k : F32 [8, 1, T_enc, 1024] # from encoder +//! n_layer_cross_v : F32 [8, 1, T_enc, 1024] +//! offset : I64 [] # write position +//! outputs: +//! logits : F32 [1, n_tokens, 16384] +//! out_n_layer_self_k_cache : F32 [8, 1, 8, 1024, 128] +//! out_n_layer_self_v_cache : F32 [8, 1, 8, 1024, 128] +//! ``` +//! +//! Architecture constants (all fixed for this export): +//! - 8 layers, 8 heads, head dim 128, d_model 1024 +//! - Self-attention rolling cache: 1024 token capacity +//! - Vocab: 16384 (matches `tokens.txt` line count) +//! +//! Cross-attention K/V are computed once by the encoder per utterance and +//! reused at every decoder step. The self-attention cache is a fixed-size +//! ring with the `offset` scalar tracking where the next K/V slice goes. +//! +//! ## Decoder prefix +//! +//! Cohere Transcribe uses a Whisper-style multi-token decoder prefix. For +//! English transcription with punctuation/capitalization on, no timestamps, +//! no diarization, the prefix is: +//! +//! ```text +//! [<|startoftranscript|>=4, <|en|>=62, <|pnc|>=5, <|itn|>=8, +//! <|notimestamp|>=11, <|nodiarize|>=13] +//! ``` +//! +//! Generation continues until `<|endoftext|>=3`. +//! +//! ## Downloading the int8 model for the PoC test +//! +//! The original `CohereLabs/cohere-transcribe-03-2026` weights are gated on +//! HuggingFace (Apache 2.0 licensed but require accepting the model card). +//! The community ONNX export at `cstr/cohere-transcribe-onnx-int8` is not gated: +//! +//! ```bash +//! mkdir -p ~/.cache/voxtype-models/cohere-transcribe-int8 +//! cd ~/.cache/voxtype-models/cohere-transcribe-int8 +//! BASE=https://huggingface.co/cstr/cohere-transcribe-onnx-int8/resolve/main +//! for f in cohere-encoder.int8.onnx cohere-encoder.int8.onnx.data \ +//! cohere-decoder.int8.onnx cohere-decoder.int8.onnx.data \ +//! tokens.txt; do +//! curl -L "$BASE/$f" -o "$f" +//! done +//! ``` +//! +//! ## Running the integration test +//! +//! ```bash +//! VOXTYPE_COHERE_MODEL_DIR=~/.cache/voxtype-models/cohere-transcribe-int8 \ +//! cargo test --features cohere transcribe::cohere::tests::cohere_poc \ +//! -- --ignored --nocapture +//! ``` +//! +//! ## Configuration +//! +//! ```toml +//! engine = "cohere" +//! +//! [cohere] +//! model = "cohere-transcribe-int8" # subdir in voxtype's models dir +//! language = "en" # one of the 14 supported langs +//! threads = 4 # optional; defaults to num_cpus.min(4) +//! on_demand_loading = false +//! ``` + +use super::Transcriber; +use crate::config::CohereConfig; +use crate::error::TranscribeError; +use ort::session::Session; +use ort::value::Tensor; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; + +// --------------------------------------------------------------------------- +// Architecture constants (fixed for the cstr/cohere-transcribe-onnx-int8 export) +// --------------------------------------------------------------------------- + +const N_LAYERS: usize = 8; +const N_HEADS: usize = 8; +const HEAD_DIM: usize = 128; +const D_MODEL: usize = N_HEADS * HEAD_DIM; // 1024 +const SELF_KV_CACHE_LEN: usize = 1024; +const VOCAB_SIZE: usize = 16384; +const SAMPLE_RATE: usize = 16_000; + +/// `<|endoftext|>` token ID. Verified against the cstr export's tokens.txt +/// in `build_prefix_against_real_tokens_txt`. The decoder prefix and other +/// task tokens are looked up by name at runtime in `build_prefix`, so they +/// don't need const declarations here. +const TOK_EOS: i64 = 3; + +/// Cohere Transcribe officially supports 14 languages. The language tokens +/// live in `tokens.txt` as `<||>` entries; we look them up by name at +/// `new()` time so future model versions that change the IDs still work. +const SUPPORTED_LANGUAGES: &[&str] = &[ + "ar", "de", "en", "es", "fr", "hi", "it", "ja", "ko", "nl", "pt", "ru", "tr", "zh", +]; + +/// Generation safety limits. +const MAX_TOKENS_PER_SECOND: f32 = 8.0; +const ABSOLUTE_MAX_TOKENS: usize = 1024; + +// --------------------------------------------------------------------------- +// Transcriber +// --------------------------------------------------------------------------- + +/// Cohere Transcribe transcriber using ONNX Runtime. +pub struct CohereTranscriber { + encoder: Mutex, + decoder: Mutex, + /// SentencePiece tokens loaded from `tokens.txt` (id -> piece string). + tokens: HashMap, + /// Resolved decoder prefix (`<|sot|>` + language + task tokens). Fed in + /// the first decoder call to populate the self-attention KV cache. + prefix: Vec, +} + +impl CohereTranscriber { + /// Construct from `[cohere]` config: resolves model name → path, applies + /// thread count, builds the language-specific decoder prefix. + pub fn new(config: &CohereConfig) -> Result { + let model_dir = resolve_model_path(&config.model)?; + let threads = config.threads.unwrap_or_else(|| num_cpus::get().min(4)); + Self::with_threads_and_lang(&model_dir, threads, &config.language) + } + + /// Load the Cohere encoder + decoder + tokens from a model directory. + /// + /// Expects the cstr/cohere-transcribe-onnx-int8 layout: + /// - `cohere-encoder.int8.onnx` (+ `.onnx.data` sidecar) + /// - `cohere-decoder.int8.onnx` (+ `.onnx.data` sidecar) + /// - `tokens.txt` + pub fn from_dir(model_dir: &Path) -> Result { + Self::with_threads_and_lang(model_dir, num_cpus::get().min(4), "en") + } + + /// Load with an explicit thread count for ONNX intra-op parallelism. + pub fn with_threads(model_dir: &Path, threads: usize) -> Result { + Self::with_threads_and_lang(model_dir, threads, "en") + } + + /// Full constructor with thread count and language code. + pub fn with_threads_and_lang( + model_dir: &Path, + threads: usize, + language: &str, + ) -> Result { + tracing::info!("Loading Cohere Transcribe model from {:?}", model_dir); + let start = std::time::Instant::now(); + + let encoder_file = model_dir.join("cohere-encoder.int8.onnx"); + let decoder_file = model_dir.join("cohere-decoder.int8.onnx"); + let tokens_file = model_dir.join("tokens.txt"); + + for (label, path) in [ + ("encoder", &encoder_file), + ("decoder", &decoder_file), + ("tokens.txt", &tokens_file), + ] { + if !path.exists() { + return Err(TranscribeError::ModelNotFound(format!( + "Cohere {label} not found: {}\n \ + Download from https://huggingface.co/cstr/cohere-transcribe-onnx-int8", + path.display(), + ))); + } + } + + let tokens = load_tokens(&tokens_file)?; + if tokens.len() != VOCAB_SIZE { + tracing::warn!( + "tokens.txt has {} entries; expected {}. Decoder logits dim ({}) \ + may not align with this tokens file.", + tokens.len(), + VOCAB_SIZE, + VOCAB_SIZE, + ); + } + + let encoder = build_session(&encoder_file, threads, "encoder")?; + let decoder = build_session(&decoder_file, threads, "decoder")?; + + let prefix = build_prefix(&tokens, language)?; + + tracing::info!( + "Cohere model loaded in {:.2}s ({} tokens, language='{}', prefix={:?})", + start.elapsed().as_secs_f32(), + tokens.len(), + language, + prefix, + ); + + Ok(Self { + encoder: Mutex::new(encoder), + decoder: Mutex::new(decoder), + tokens, + prefix, + }) + } + + /// Run encoder + autoregressive decoder, return generated token ids + /// (excluding the prefix and EOS). + fn run_inference(&self, samples: &[f32]) -> Result, TranscribeError> { + let duration_secs = samples.len() as f32 / SAMPLE_RATE as f32; + + // ---- Encoder ---- + let encoder_start = std::time::Instant::now(); + let n_samples = samples.len(); + let audio_tensor = Tensor::::from_array(([1usize, n_samples], samples.to_vec())) + .map_err(|e| TranscribeError::InferenceFailed(format!("audio tensor: {e}")))?; + + let (cross_k_shape, cross_k_data, cross_v_shape, cross_v_data) = { + let mut encoder = self + .encoder + .lock() + .map_err(|e| TranscribeError::InferenceFailed(format!("encoder lock: {e}")))?; + let mut outputs = encoder + .run(ort::inputs!["audio" => audio_tensor]) + .map_err(|e| TranscribeError::InferenceFailed(format!("encoder run: {e}")))?; + + let cross_k_val = outputs.remove("n_layer_cross_k").ok_or_else(|| { + TranscribeError::InferenceFailed("encoder missing n_layer_cross_k".into()) + })?; + let cross_v_val = outputs.remove("n_layer_cross_v").ok_or_else(|| { + TranscribeError::InferenceFailed("encoder missing n_layer_cross_v".into()) + })?; + let (k_shape, k_data) = cross_k_val + .try_extract_tensor::() + .map_err(|e| TranscribeError::InferenceFailed(format!("extract cross_k: {e}")))?; + let (v_shape, v_data) = cross_v_val + .try_extract_tensor::() + .map_err(|e| TranscribeError::InferenceFailed(format!("extract cross_v: {e}")))?; + ( + k_shape.to_vec(), + k_data.to_vec(), + v_shape.to_vec(), + v_data.to_vec(), + ) + }; + tracing::debug!( + "Cohere encoder ran in {:.2}s, T_enc={:?}", + encoder_start.elapsed().as_secs_f32(), + cross_k_shape, + ); + + // ---- Decoder ---- + let decoder_start = std::time::Instant::now(); + + // Self-attention cache: zero-initialized rolling buffer. + // Cache shape is [N_LAYERS, batch=1, N_HEADS, SELF_KV_CACHE_LEN, HEAD_DIM]. + let cache_elems = N_LAYERS * N_HEADS * SELF_KV_CACHE_LEN * HEAD_DIM; + let mut self_k_data: Vec = vec![0.0; cache_elems]; + let mut self_v_data: Vec = vec![0.0; cache_elems]; + let cache_shape: [usize; 5] = [N_LAYERS, 1, N_HEADS, SELF_KV_CACHE_LEN, HEAD_DIM]; + + // Step 1: feed the prefix tokens together so the cache populates in + // a single call. After this, offset = prefix.len(). + let mut offset: i64 = 0; + let next_after_prefix = self.decoder_step( + &self.prefix, + offset, + &mut self_k_data, + &mut self_v_data, + cache_shape, + &cross_k_shape, + &cross_k_data, + &cross_v_shape, + &cross_v_data, + )?; + offset += self.prefix.len() as i64; + + let mut generated: Vec = Vec::new(); + if next_after_prefix == TOK_EOS { + return Ok(Vec::new()); + } + generated.push(next_after_prefix); + + // Steps 2..N: feed one token per step. + let max_tokens = + ((duration_secs * MAX_TOKENS_PER_SECOND) as usize).clamp(16, ABSOLUTE_MAX_TOKENS); + for _ in 0..max_tokens { + let last = *generated.last().unwrap(); + let next = self.decoder_step( + &[last], + offset, + &mut self_k_data, + &mut self_v_data, + cache_shape, + &cross_k_shape, + &cross_k_data, + &cross_v_shape, + &cross_v_data, + )?; + offset += 1; + + if next == TOK_EOS { + break; + } + if offset as usize >= SELF_KV_CACHE_LEN { + tracing::warn!("Cohere: self-attention cache full ({}); truncating", offset); + break; + } + generated.push(next); + } + + tracing::debug!( + "Cohere decoder produced {} tokens in {:.2}s", + generated.len(), + decoder_start.elapsed().as_secs_f32(), + ); + + Ok(generated.into_iter().map(|t| t as u32).collect()) + } + + /// Single decoder forward pass. + /// + /// Updates `self_k_data` and `self_v_data` in place from the decoder's + /// output cache, and returns the predicted next-token id (greedy argmax + /// over the LAST timestep's logits). + #[allow(clippy::too_many_arguments)] + fn decoder_step( + &self, + new_tokens: &[i64], + offset: i64, + self_k_data: &mut Vec, + self_v_data: &mut Vec, + cache_shape: [usize; 5], + cross_k_shape: &[i64], + cross_k_data: &[f32], + cross_v_shape: &[i64], + cross_v_data: &[f32], + ) -> Result { + let cross_k_shape_us: Vec = cross_k_shape.iter().map(|&d| d as usize).collect(); + let cross_v_shape_us: Vec = cross_v_shape.iter().map(|&d| d as usize).collect(); + let n = new_tokens.len(); + + let tokens_tensor = Tensor::::from_array(([1usize, n], new_tokens.to_vec())) + .map_err(|e| TranscribeError::InferenceFailed(format!("tokens tensor: {e}")))?; + let self_k_tensor = + Tensor::::from_array((cache_shape, std::mem::take(self_k_data))) + .map_err(|e| TranscribeError::InferenceFailed(format!("self_k tensor: {e}")))?; + let self_v_tensor = + Tensor::::from_array((cache_shape, std::mem::take(self_v_data))) + .map_err(|e| TranscribeError::InferenceFailed(format!("self_v tensor: {e}")))?; + let cross_k_tensor = + Tensor::::from_array((cross_k_shape_us.clone(), cross_k_data.to_vec())) + .map_err(|e| TranscribeError::InferenceFailed(format!("cross_k tensor: {e}")))?; + let cross_v_tensor = + Tensor::::from_array((cross_v_shape_us, cross_v_data.to_vec())) + .map_err(|e| TranscribeError::InferenceFailed(format!("cross_v tensor: {e}")))?; + let offset_tensor = Tensor::::from_array(([] as [usize; 0], vec![offset])) + .map_err(|e| TranscribeError::InferenceFailed(format!("offset tensor: {e}")))?; + + let mut decoder = self + .decoder + .lock() + .map_err(|e| TranscribeError::InferenceFailed(format!("decoder lock: {e}")))?; + + let mut outputs = decoder + .run(ort::inputs![ + "tokens" => tokens_tensor, + "in_n_layer_self_k_cache" => self_k_tensor, + "in_n_layer_self_v_cache" => self_v_tensor, + "n_layer_cross_k" => cross_k_tensor, + "n_layer_cross_v" => cross_v_tensor, + "offset" => offset_tensor, + ]) + .map_err(|e| TranscribeError::InferenceFailed(format!("decoder run: {e}")))?; + + // Logits: pick the last timestep's argmax. + let logits_val = outputs + .remove("logits") + .ok_or_else(|| TranscribeError::InferenceFailed("decoder missing logits".into()))?; + let (logits_shape, logits_data) = logits_val + .try_extract_tensor::() + .map_err(|e| TranscribeError::InferenceFailed(format!("extract logits: {e}")))?; + if logits_shape.len() != 3 || logits_shape[2] as usize != VOCAB_SIZE { + return Err(TranscribeError::InferenceFailed(format!( + "unexpected logits shape: {logits_shape:?}, expected [B, T, {VOCAB_SIZE}]" + ))); + } + let n_steps = logits_shape[1] as usize; + let last_offset = (n_steps - 1) * VOCAB_SIZE; + let last_logits = &logits_data[last_offset..last_offset + VOCAB_SIZE]; + let next_id = argmax(last_logits) as i64; + + // Pull updated cache out and refill our owned buffers. + let new_k = outputs.remove("out_n_layer_self_k_cache").ok_or_else(|| { + TranscribeError::InferenceFailed("decoder missing out_n_layer_self_k_cache".into()) + })?; + let new_v = outputs.remove("out_n_layer_self_v_cache").ok_or_else(|| { + TranscribeError::InferenceFailed("decoder missing out_n_layer_self_v_cache".into()) + })?; + let (_, k_data) = new_k + .try_extract_tensor::() + .map_err(|e| TranscribeError::InferenceFailed(format!("extract self_k: {e}")))?; + let (_, v_data) = new_v + .try_extract_tensor::() + .map_err(|e| TranscribeError::InferenceFailed(format!("extract self_v: {e}")))?; + *self_k_data = k_data.to_vec(); + *self_v_data = v_data.to_vec(); + + Ok(next_id) + } + + /// Convert generated token ids into text. Filters control / language / + /// task tokens (anything in the form `<|...|>`, plus ``/``) + /// and reconstructs SentencePiece word boundaries (U+2581 → space). + fn decode_tokens(&self, token_ids: &[u32]) -> String { + let mut out = String::new(); + for &id in token_ids { + let Some(piece) = self.tokens.get(&id) else { + continue; + }; + if is_special_token(piece) { + continue; + } + // Replace the SentencePiece word-boundary marker with a space. + out.push_str(&piece.replace('\u{2581}', " ")); + } + out.trim().to_string() + } +} + +impl Transcriber for CohereTranscriber { + fn transcribe(&self, samples: &[f32]) -> Result { + if samples.is_empty() { + return Err(TranscribeError::AudioFormat("Empty audio buffer".into())); + } + + let duration_secs = samples.len() as f32 / SAMPLE_RATE as f32; + tracing::debug!( + "Transcribing {:.2}s of audio ({} samples) with Cohere", + duration_secs, + samples.len(), + ); + + let start = std::time::Instant::now(); + let token_ids = self.run_inference(samples)?; + let text = self.decode_tokens(&token_ids).trim().to_string(); + tracing::info!( + "Cohere transcription completed in {:.2}s: {:?}", + start.elapsed().as_secs_f32(), + if text.chars().count() > 50 { + format!("{}...", text.chars().take(50).collect::()) + } else { + text.clone() + } + ); + Ok(text) + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Build an ONNX Runtime session for the encoder or decoder, registering +/// any GPU execution providers that were compiled in via +/// [`super::onnx_ep::register_gpu_eps`]. +/// +/// Note: there's no `cohere-migraphx` feature today because the int8 +/// model uses MatMulNBits(bits=8) which MIGraphX 7.2 can't compile. +/// On the AMD-targeted binary, Cohere runs on the CPU EP only. +fn build_session( + path: &Path, + threads: usize, + label: &str, +) -> Result { + let builder = Session::builder() + .map_err(|e| TranscribeError::InitFailed(format!("{label} builder: {e}")))? + .with_intra_threads(threads) + .map_err(|e| TranscribeError::InitFailed(format!("{label} threads: {e}")))?; + + let mut builder = super::onnx_ep::register_gpu_eps(builder, "Cohere", label) + .map_err(|e| TranscribeError::InitFailed(format!("{label} EPs: {e}")))?; + + builder.commit_from_file(path).map_err(|e| { + TranscribeError::InitFailed(format!( + "Failed to load Cohere {label} from {:?}: {e}", + path + )) + }) +} + +/// Load `tokens.txt` (one ` \n` line per token). The cstr/Cohere +/// export uses the same NeMo-style layout that other ONNX-engine downloads +/// already use; tolerant of trailing whitespace or CRLF endings. +fn load_tokens(path: &Path) -> Result, TranscribeError> { + let content = std::fs::read_to_string(path).map_err(|e| { + TranscribeError::ModelNotFound(format!("Failed to read {}: {e}", path.display())) + })?; + let mut map = HashMap::new(); + for (line_no, raw) in content.lines().enumerate() { + let line = raw.trim_end_matches(['\r', '\n']); + if line.is_empty() { + continue; + } + // Format is ``. Piece may itself contain spaces + // for some special tokens, but in this export the last whitespace + // separator is unambiguous because IDs are integers. + let last_space = line.rfind(char::is_whitespace).ok_or_else(|| { + TranscribeError::ModelNotFound(format!( + "{}:{}: malformed token line: {line:?}", + path.display(), + line_no + 1, + )) + })?; + let (piece, id_str) = line.split_at(last_space); + let id: u32 = id_str.trim().parse().map_err(|_| { + TranscribeError::ModelNotFound(format!( + "{}:{}: non-integer token id in {line:?}", + path.display(), + line_no + 1, + )) + })?; + map.insert(id, piece.to_string()); + } + Ok(map) +} + +/// Build the decoder prefix sequence for a given language code. +/// +/// The prefix is `[<|sot|>, <||>, <|pnc|>, <|itn|>, <|notimestamp|>, +/// <|nodiarize|>]`. We resolve the language and task tokens by name from +/// `tokens.txt` rather than hard-coding IDs so the wiring survives a +/// future export that renumbers tokens. +fn build_prefix( + tokens: &HashMap, + language: &str, +) -> Result, TranscribeError> { + let lang = language.trim().to_ascii_lowercase(); + if !SUPPORTED_LANGUAGES.contains(&lang.as_str()) { + return Err(TranscribeError::InitFailed(format!( + "Cohere does not officially support language '{language}'. \ + Supported languages: {SUPPORTED_LANGUAGES:?}", + ))); + } + let lang_tag = format!("<|{lang}|>"); + let lookup = |name: &str| -> Result { + tokens + .iter() + .find_map(|(id, piece)| (piece == name).then_some(*id as i64)) + .ok_or_else(|| { + TranscribeError::InitFailed(format!( + "Cohere tokens.txt missing required special token {name:?}" + )) + }) + }; + Ok(vec![ + lookup("<|startoftranscript|>")?, + lookup(&lang_tag)?, + lookup("<|pnc|>")?, + lookup("<|itn|>")?, + lookup("<|notimestamp|>")?, + lookup("<|nodiarize|>")?, + ]) +} + +/// True for Cohere control / language / task tokens. These are stripped +/// from the decoded output so users don't see literal `<|en|>` strings. +fn is_special_token(piece: &str) -> bool { + if piece.starts_with("<|") && piece.ends_with("|>") { + return true; + } + matches!(piece, "" | "" | "" | "") +} + +/// Greedy argmax over a 1-D logits slice. +fn argmax(logits: &[f32]) -> usize { + let mut best = 0usize; + let mut best_v = f32::NEG_INFINITY; + for (i, &v) in logits.iter().enumerate() { + if v > best_v { + best_v = v; + best = i; + } + } + best +} + +/// Resolve a model name or path to a directory containing the Cohere ONNX files. +fn resolve_model_path(model: &str) -> Result { + let path = PathBuf::from(model); + if path.is_absolute() && path.exists() { + return Ok(path); + } + + let models_dir = crate::config::Config::models_dir(); + let candidate = models_dir.join(model); + if candidate.exists() { + return Ok(candidate); + } + + let local = PathBuf::from("models").join(model); + if local.exists() { + return Ok(local); + } + + Err(TranscribeError::ModelNotFound(format!( + "Cohere model '{}' not found. Looked in:\n - {}\n - {}\n - {}", + model, + path.display(), + candidate.display(), + local.display(), + ))) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures") + } + + /// Load a 16 kHz mono WAV into f32 samples in [-1, 1]. + fn load_wav(path: &Path) -> Vec { + let reader = hound::WavReader::open(path) + .unwrap_or_else(|e| panic!("Failed to open {}: {}", path.display(), e)); + let spec = reader.spec(); + assert_eq!(spec.sample_rate, 16_000, "Expected 16 kHz audio"); + assert_eq!(spec.channels, 1, "Expected mono audio"); + + let max_val = (1i64 << (spec.bits_per_sample - 1)) as f32; + reader + .into_samples::() + .filter_map(|s| s.ok()) + .map(|s| s as f32 / max_val) + .collect() + } + + /// End-to-end PoC: load the int8 Cohere model and transcribe a fixture WAV. + /// + /// Run with: + /// ```bash + /// VOXTYPE_COHERE_MODEL_DIR=~/.cache/voxtype-models/cohere-transcribe-int8 \ + /// cargo test --features cohere transcribe::cohere::tests::cohere_poc \ + /// -- --ignored --nocapture + /// ``` + #[test] + #[ignore] + fn cohere_poc() { + let model_dir = std::env::var("VOXTYPE_COHERE_MODEL_DIR") + .map(PathBuf::from) + .unwrap_or_else(|_| { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("models") + .join("cohere-transcribe-int8") + }); + + assert!( + model_dir.exists(), + "Cohere model dir not found at {}. See module docs for download instructions.", + model_dir.display() + ); + + let transcriber = + CohereTranscriber::from_dir(&model_dir).expect("Failed to load Cohere transcriber"); + + let wav_path = fixtures_dir().join("vad").join("speech_hello.wav"); + let samples = load_wav(&wav_path); + assert!( + !samples.is_empty(), + "Loaded zero samples from {}", + wav_path.display() + ); + + let text = transcriber + .transcribe(&samples) + .expect("Cohere transcription failed"); + eprintln!("Cohere PoC transcription: {:?}", text); + } + + #[test] + fn resolve_model_path_not_found() { + let result = resolve_model_path("/nonexistent/cohere/path"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + TranscribeError::ModelNotFound(_) + )); + } + + #[test] + fn argmax_picks_highest() { + assert_eq!(argmax(&[0.1, 0.5, 0.3, 0.4]), 1); + assert_eq!(argmax(&[1.0]), 0); + assert_eq!(argmax(&[-1.0, -0.5, -0.9]), 1); + } + + #[test] + fn build_prefix_rejects_unsupported_language() { + let tokens = HashMap::new(); + let err = build_prefix(&tokens, "klingon").unwrap_err(); + assert!(matches!(err, TranscribeError::InitFailed(_))); + } + + #[test] + fn build_prefix_against_real_tokens_txt() { + // Sanity check: build_prefix yields the documented IDs when the real + // tokens.txt is available. Skipped when the model isn't downloaded. + let Ok(dir) = std::env::var("VOXTYPE_COHERE_MODEL_DIR").map(PathBuf::from) else { + return; + }; + let tokens_path = dir.join("tokens.txt"); + if !tokens_path.exists() { + return; + } + let tokens = load_tokens(&tokens_path).expect("tokens.txt should load"); + let prefix = build_prefix(&tokens, "en").expect("build_prefix English"); + assert_eq!(prefix, vec![4, 62, 5, 8, 11, 13]); + assert_eq!( + tokens.get(&3).map(String::as_str), + Some("<|endoftext|>"), + "EOS token id 3 should map to <|endoftext|>" + ); + } + + #[test] + fn build_prefix_lookup_uses_named_tokens() { + // Synthesize a minimal tokens.txt-equivalent map and check that + // build_prefix resolves names correctly (no hard-coded IDs). + let mut tokens = HashMap::new(); + tokens.insert(4, "<|startoftranscript|>".to_string()); + tokens.insert(5, "<|pnc|>".to_string()); + tokens.insert(8, "<|itn|>".to_string()); + tokens.insert(11, "<|notimestamp|>".to_string()); + tokens.insert(13, "<|nodiarize|>".to_string()); + tokens.insert(62, "<|en|>".to_string()); + let prefix = build_prefix(&tokens, "en").unwrap(); + assert_eq!(prefix, vec![4, 62, 5, 8, 11, 13]); + + // If the language token is missing, error rather than panic. + let mut partial = tokens.clone(); + partial.remove(&62); + assert!(build_prefix(&partial, "en").is_err()); + } + + #[test] + fn special_token_filter() { + assert!(is_special_token("<|en|>")); + assert!(is_special_token("<|startoftranscript|>")); + assert!(is_special_token("<|endoftext|>")); + assert!(is_special_token("")); + assert!(is_special_token("")); + assert!(!is_special_token("hello")); + assert!(!is_special_token("\u{2581}world")); + } +} + +// Trip-wire: keep D_MODEL aligned with N_HEADS * HEAD_DIM. +const _: () = { + if D_MODEL != N_HEADS * HEAD_DIM { + panic!("D_MODEL must equal N_HEADS * HEAD_DIM"); + } +}; diff --git a/src/transcribe/ctc.rs b/src/transcribe/ctc.rs index 16906f00..cb0e09a3 100644 --- a/src/transcribe/ctc.rs +++ b/src/transcribe/ctc.rs @@ -135,9 +135,8 @@ fn tokens_to_string( /// Format: each line is "token_string token_id" (space-separated). /// The token string may contain spaces, so we split from the right. pub fn load_tokens(path: &Path) -> Result, TranscribeError> { - let content = std::fs::read_to_string(path).map_err(|e| { - TranscribeError::InitFailed(format!("Failed to read tokens.txt: {}", e)) - })?; + let content = std::fs::read_to_string(path) + .map_err(|e| TranscribeError::InitFailed(format!("Failed to read tokens.txt: {}", e)))?; let mut tokens = HashMap::new(); for line in content.lines() { @@ -174,11 +173,7 @@ mod tests { fn test_load_tokens() { let temp_dir = TempDir::new().unwrap(); let tokens_path = temp_dir.path().join("tokens.txt"); - fs::write( - &tokens_path, - " 0\n 1\nhello 2\nworld 3\n", - ) - .unwrap(); + fs::write(&tokens_path, " 0\n 1\nhello 2\nworld 3\n").unwrap(); let tokens = load_tokens(&tokens_path).unwrap(); assert_eq!(tokens.get(&0), Some(&"".to_string())); diff --git a/src/transcribe/dolphin.rs b/src/transcribe/dolphin.rs index c6674cf9..97e8869b 100644 --- a/src/transcribe/dolphin.rs +++ b/src/transcribe/dolphin.rs @@ -75,15 +75,16 @@ impl DolphinTranscriber { let tokens = ctc::load_tokens(&tokens_path)?; tracing::debug!("Loaded {} tokens", tokens.len()); - // Create ONNX session + // Create ONNX session. + // No GPU EP registration: Dolphin runs on the CPU EP only. + // MIGraphX 7.2 rejects this encoder's Slice op shape, so we + // keep the engine on CPU on the AMD-targeted binary. let session = Session::builder() .map_err(|e| { TranscribeError::InitFailed(format!("ONNX session builder failed: {}", e)) })? .with_intra_threads(threads) - .map_err(|e| { - TranscribeError::InitFailed(format!("Failed to set threads: {}", e)) - })? + .map_err(|e| TranscribeError::InitFailed(format!("Failed to set threads: {}", e)))? .commit_from_file(&model_file) .map_err(|e| { TranscribeError::InitFailed(format!( @@ -155,21 +156,15 @@ impl Transcriber for DolphinTranscriber { // x: shape [1, T, 80] let (x_data, _offset) = features.into_raw_vec_and_offset(); - let x_tensor = - Tensor::::from_array(([1usize, num_frames, feat_dim], x_data)).map_err(|e| { - TranscribeError::InferenceFailed(format!( - "Failed to create input tensor: {}", - e - )) + let x_tensor = Tensor::::from_array(([1usize, num_frames, feat_dim], x_data)) + .map_err(|e| { + TranscribeError::InferenceFailed(format!("Failed to create input tensor: {}", e)) })?; // x_len: shape [1] (i64) - let x_len_tensor = Tensor::::from_array(([1usize], vec![num_frames as i64])) - .map_err(|e| { - TranscribeError::InferenceFailed(format!( - "Failed to create length tensor: {}", - e - )) + let x_len_tensor = + Tensor::::from_array(([1usize], vec![num_frames as i64])).map_err(|e| { + TranscribeError::InferenceFailed(format!("Failed to create length tensor: {}", e)) })?; // Run inference @@ -180,10 +175,7 @@ impl Transcriber for DolphinTranscriber { let inputs: Vec<(std::borrow::Cow, ort::session::SessionInputValue)> = vec![ (std::borrow::Cow::Borrowed("x"), x_tensor.into()), - ( - std::borrow::Cow::Borrowed("x_len"), - x_len_tensor.into(), - ), + (std::borrow::Cow::Borrowed("x_len"), x_len_tensor.into()), ]; let outputs = session.run(inputs).map_err(|e| { @@ -349,10 +341,7 @@ fn read_cmvn_from_metadata(session: &Session) -> Result<(Vec, Vec), Tr ))); } - tracing::debug!( - "Loaded CMVN stats: {} dimensions", - neg_mean.len() - ); + tracing::debug!("Loaded CMVN stats: {} dimensions", neg_mean.len()); Ok((neg_mean, inv_stddev)) } @@ -382,10 +371,7 @@ fn resolve_model_path(model: &str) -> Result { } // Check sherpa-onnx naming convention - let sherpa_name = format!( - "sherpa-onnx-{}-ctc-multi-lang", - model_dir_name - ); + let sherpa_name = format!("sherpa-onnx-{}-ctc-multi-lang", model_dir_name); let sherpa_path = models_dir.join(&sherpa_name); if sherpa_path.exists() { return Ok(sherpa_path); diff --git a/src/transcribe/fbank.rs b/src/transcribe/fbank.rs index 625226fc..597e38e0 100644 --- a/src/transcribe/fbank.rs +++ b/src/transcribe/fbank.rs @@ -235,11 +235,7 @@ pub fn apply_cmvn(features: &mut Array2, neg_mean: &[f32], inv_stddev: &[f3 /// /// Returns num_mels triangular filters, each with fft_size/2+1 coefficients. /// Uses the standard mel scale: mel = 1127 * ln(1 + f/700) -pub fn compute_mel_filterbank( - num_mels: usize, - fft_size: usize, - sample_rate: f32, -) -> Vec> { +pub fn compute_mel_filterbank(num_mels: usize, fft_size: usize, sample_rate: f32) -> Vec> { let num_bins = fft_size / 2 + 1; let max_freq = sample_rate / 2.0; diff --git a/src/transcribe/mod.rs b/src/transcribe/mod.rs index e42c6f5c..8f5c421c 100644 --- a/src/transcribe/mod.rs +++ b/src/transcribe/mod.rs @@ -24,15 +24,21 @@ pub mod worker; feature = "paraformer", feature = "dolphin", feature = "omnilingual", + feature = "cohere", ))] pub mod fbank; +/// Shared GPU execution-provider registration for ONNX-based engines. +#[cfg(feature = "onnx-common")] +pub mod onnx_ep; + /// Shared CTC greedy decoder for CTC-based ASR engines #[cfg(any( feature = "sensevoice", feature = "paraformer", feature = "dolphin", feature = "omnilingual", + feature = "cohere", ))] pub mod ctc; @@ -54,6 +60,11 @@ pub mod dolphin; #[cfg(feature = "omnilingual")] pub mod omnilingual; +/// Cohere Transcribe backend (proof-of-concept, not wired into factory/CLI/config). +/// See `src/transcribe/cohere.rs` for usage. +#[cfg(feature = "cohere")] +pub mod cohere; + use crate::config::{Config, TranscriptionEngine, WhisperConfig, WhisperMode}; use crate::error::TranscribeError; use crate::setup::gpu; @@ -201,6 +212,20 @@ pub fn create_transcriber(config: &Config) -> Result, Trans "Omnilingual engine requested but voxtype was not compiled with --features omnilingual" .to_string(), )), + #[cfg(feature = "cohere")] + TranscriptionEngine::Cohere => { + let cfg = config.cohere.as_ref().ok_or_else(|| { + TranscribeError::InitFailed( + "Cohere engine selected but [cohere] config section is missing".to_string(), + ) + })?; + Ok(Box::new(cohere::CohereTranscriber::new(cfg)?)) + } + #[cfg(not(feature = "cohere"))] + TranscriptionEngine::Cohere => Err(TranscribeError::InitFailed( + "Cohere engine requested but voxtype was not compiled with --features cohere" + .to_string(), + )), } } diff --git a/src/transcribe/moonshine.rs b/src/transcribe/moonshine.rs index b02a34b0..8943009d 100644 --- a/src/transcribe/moonshine.rs +++ b/src/transcribe/moonshine.rs @@ -117,7 +117,11 @@ impl MoonshineTranscriber { let tokenizer = Tokenizer::from_file(&tokenizer_path) .map_err(|e| TranscribeError::InitFailed(format!("Failed to load tokenizer: {}", e)))?; - // Create ONNX sessions + // Create ONNX sessions. + // No GPU EP registration: Moonshine runs on the CPU EP only. + // MIGraphX 7.2 can't compile the encoder-decoder `If` op (then/else + // sub-graphs have different output shapes), so we keep the engine + // on CPU on the AMD-targeted binary. let encoder = Session::builder() .map_err(|e| { TranscribeError::InitFailed(format!("ONNX encoder session builder failed: {}", e)) diff --git a/src/transcribe/omnilingual.rs b/src/transcribe/omnilingual.rs index 142e4819..7b80e647 100644 --- a/src/transcribe/omnilingual.rs +++ b/src/transcribe/omnilingual.rs @@ -70,15 +70,17 @@ impl OmnilingualTranscriber { let tokens = ctc::load_tokens(&tokens_path)?; tracing::debug!("Loaded {} tokens", tokens.len()); - // Create ONNX session + // Create ONNX session. + // No GPU EP registration: Omnilingual runs on the CPU EP only. + // MIGraphX 7.2 produces garbled output and pays a 65s+ first-call + // compile penalty on this graph, so we keep the engine on CPU on + // the AMD-targeted binary. let session = Session::builder() .map_err(|e| { TranscribeError::InitFailed(format!("ONNX session builder failed: {}", e)) })? .with_intra_threads(threads) - .map_err(|e| { - TranscribeError::InitFailed(format!("Failed to set threads: {}", e)) - })? + .map_err(|e| TranscribeError::InitFailed(format!("Failed to set threads: {}", e)))? .commit_from_file(&model_file) .map_err(|e| { TranscribeError::InitFailed(format!( @@ -124,10 +126,7 @@ impl Transcriber for OmnilingualTranscriber { // x: shape [1, num_samples] let x_tensor = Tensor::::from_array(([1usize, num_samples], normalized)).map_err(|e| { - TranscribeError::InferenceFailed(format!( - "Failed to create input tensor: {}", - e - )) + TranscribeError::InferenceFailed(format!("Failed to create input tensor: {}", e)) })?; // Run inference @@ -212,7 +211,11 @@ impl Transcriber for OmnilingualTranscriber { fn normalize_audio(samples: &[f32]) -> Vec { let n = samples.len() as f32; let mean: f32 = samples.iter().sum::() / n; - let variance: f32 = samples.iter().map(|&s| (s - mean) * (s - mean)).sum::() / n; + let variance: f32 = samples + .iter() + .map(|&s| (s - mean) * (s - mean)) + .sum::() + / n; let inv_stddev = 1.0 / (variance + 1e-5_f32).sqrt(); samples.iter().map(|&s| (s - mean) * inv_stddev).collect() diff --git a/src/transcribe/onnx_ep.rs b/src/transcribe/onnx_ep.rs new file mode 100644 index 00000000..bb597f79 --- /dev/null +++ b/src/transcribe/onnx_ep.rs @@ -0,0 +1,73 @@ +//! Shared helper that registers ONNX Runtime execution providers for any +//! ONNX-backed engine in voxtype. +//! +//! Each engine's session builder calls [`register_gpu_eps`] to attach the +//! GPU EPs that were compiled into this binary. The compile-time gating +//! lives on three marker features in `Cargo.toml`: +//! +//! - `onnx-cuda-enabled` — CUDA EP (NVIDIA) +//! - `onnx-migraphx-enabled` — MIGraphX EP (AMD) +//! - `onnx-tensorrt-enabled` — TensorRT EP (NVIDIA, optimized) +//! +//! Enabling any per-engine feature like `cohere-migraphx` or +//! `parakeet-cuda` transitively enables the matching marker, so this +//! helper sees the right EPs without each engine duplicating the cfg +//! plumbing. +//! +//! Order matters: ort tries EPs in sequence and falls through to the +//! next on registration failure. Specialized EPs (TensorRT) come before +//! their generic siblings (CUDA). The CPU EP is always implicit at the +//! bottom of the chain — even if every GPU EP fails to register at +//! runtime (no GPU, missing driver, missing companion .so files), ort +//! still runs the model on CPU. + +#[cfg(feature = "onnx-common")] +use ort::execution_providers::ExecutionProviderDispatch; +#[cfg(feature = "onnx-common")] +use ort::session::builder::{BuilderResult, SessionBuilder}; + +/// Register GPU EPs onto a session builder. +/// +/// `engine_label` and `session_label` are used only for logging +/// (`"Cohere encoder: registering execution providers [...]"`). Returns +/// the modified builder; if no EPs are compiled in or registration +/// fails, falls through unchanged and ort uses the CPU EP. +#[cfg(feature = "onnx-common")] +pub fn register_gpu_eps( + builder: SessionBuilder, + engine_label: &str, + session_label: &str, +) -> BuilderResult { + let providers = compiled_providers(); + if providers.is_empty() { + return Ok(builder); + } + let names: Vec<&'static str> = providers.iter().map(|(n, _)| *n).collect(); + tracing::info!("{engine_label} {session_label}: registering execution providers {names:?}"); + let dispatches: Vec<_> = providers.into_iter().map(|(_, ep)| ep).collect(); + builder.with_execution_providers(dispatches) +} + +#[cfg(feature = "onnx-common")] +fn compiled_providers() -> Vec<(&'static str, ExecutionProviderDispatch)> { + #[allow(unused_mut)] + let mut providers: Vec<(&'static str, ExecutionProviderDispatch)> = Vec::new(); + + #[cfg(feature = "onnx-tensorrt-enabled")] + { + use ort::execution_providers::{ExecutionProvider, TensorRTExecutionProvider}; + providers.push(("TensorRT", TensorRTExecutionProvider::default().build())); + } + #[cfg(feature = "onnx-cuda-enabled")] + { + use ort::execution_providers::{CUDAExecutionProvider, ExecutionProvider}; + providers.push(("CUDA", CUDAExecutionProvider::default().build())); + } + #[cfg(feature = "onnx-migraphx-enabled")] + { + use ort::execution_providers::{ExecutionProvider, MIGraphXExecutionProvider}; + providers.push(("MIGraphX", MIGraphXExecutionProvider::default().build())); + } + + providers +} diff --git a/src/transcribe/paraformer.rs b/src/transcribe/paraformer.rs index 24016192..ce721f99 100644 --- a/src/transcribe/paraformer.rs +++ b/src/transcribe/paraformer.rs @@ -14,8 +14,8 @@ //! Languages: zh+en (bilingual), zh+yue+en (trilingual) //! Model files: model.int8.onnx (or model.onnx), tokens.txt, am.mvn -use super::fbank::{self, FbankExtractor, LfrConfig}; use super::ctc; +use super::fbank::{self, FbankExtractor, LfrConfig}; use super::Transcriber; use crate::config::ParaformerConfig; use crate::error::TranscribeError; @@ -76,15 +76,16 @@ impl ParaformerTranscriber { let tokens = ctc::load_tokens(&tokens_path)?; tracing::debug!("Loaded {} tokens", tokens.len()); - // Create ONNX session + // Create ONNX session. + // No GPU EP registration: Paraformer runs on the CPU EP only. + // MIGraphX 7.2 segfaults during model load on this graph; we + // keep the engine on CPU on the AMD-targeted binary. let session = Session::builder() .map_err(|e| { TranscribeError::InitFailed(format!("ONNX session builder failed: {}", e)) })? .with_intra_threads(threads) - .map_err(|e| { - TranscribeError::InitFailed(format!("Failed to set threads: {}", e)) - })? + .map_err(|e| TranscribeError::InitFailed(format!("Failed to set threads: {}", e)))? .commit_from_file(&model_file) .map_err(|e| { TranscribeError::InitFailed(format!( @@ -169,19 +170,13 @@ impl Transcriber for ParaformerTranscriber { let (x_data, _offset) = features.into_raw_vec_and_offset(); let speech_tensor = Tensor::::from_array(([1usize, num_frames, feat_dim], x_data)) .map_err(|e| { - TranscribeError::InferenceFailed(format!( - "Failed to create speech tensor: {}", - e - )) + TranscribeError::InferenceFailed(format!("Failed to create speech tensor: {}", e)) })?; // speech_lengths: shape [1] let lengths_tensor = Tensor::::from_array(([1usize], vec![num_frames as i32])) .map_err(|e| { - TranscribeError::InferenceFailed(format!( - "Failed to create lengths tensor: {}", - e - )) + TranscribeError::InferenceFailed(format!("Failed to create lengths tensor: {}", e)) })?; // 5. Run inference @@ -272,10 +267,7 @@ fn decode_paraformer_output( } else if shape_dims.len() == 2 { // [batch, seq_len] - pre-argmaxed token IDs as f32 let seq_len = shape_dims[1] as usize; - let token_ids: Vec = data[..seq_len] - .iter() - .map(|&v| v as u32) - .collect(); + let token_ids: Vec = data[..seq_len].iter().map(|&v| v as u32).collect(); Ok(tokens_to_text(&token_ids, tokens)) } else { Err(TranscribeError::InferenceFailed(format!( @@ -349,9 +341,8 @@ fn tokens_to_text(token_ids: &[u32], tokens: &HashMap) -> String { fn read_cmvn_from_kaldi_mvn( path: &std::path::Path, ) -> Result<(Vec, Vec), TranscribeError> { - let data = std::fs::read(path).map_err(|e| { - TranscribeError::InitFailed(format!("Failed to read am.mvn: {}", e)) - })?; + let data = std::fs::read(path) + .map_err(|e| TranscribeError::InitFailed(format!("Failed to read am.mvn: {}", e)))?; let mut pos = 0; @@ -429,7 +420,12 @@ fn read_cmvn_from_kaldi_mvn( ))); } - tracing::debug!("am.mvn: {} rows x {} cols, double={}", rows, cols, is_double); + tracing::debug!( + "am.mvn: {} rows x {} cols, double={}", + rows, + cols, + is_double + ); // Read matrix data let feat_dim = cols - 1; // last column is the count @@ -515,9 +511,7 @@ fn read_cmvn_from_metadata(session: &Session) -> Result<(Vec, Vec), Tr })?; let inv_stddev_str = metadata.custom("inv_stddev").ok_or_else(|| { - TranscribeError::InitFailed( - "Model metadata missing 'inv_stddev' key".to_string(), - ) + TranscribeError::InitFailed("Model metadata missing 'inv_stddev' key".to_string()) })?; let neg_mean: Vec = neg_mean_str diff --git a/src/transcribe/parakeet.rs b/src/transcribe/parakeet.rs index ae69581e..824742da 100644 --- a/src/transcribe/parakeet.rs +++ b/src/transcribe/parakeet.rs @@ -12,7 +12,7 @@ use crate::config::{ParakeetConfig, ParakeetModelType}; use crate::error::TranscribeError; #[cfg(any( feature = "parakeet-cuda", - feature = "parakeet-rocm", + feature = "parakeet-migraphx", feature = "parakeet-tensorrt" ))] use parakeet_rs::ExecutionProvider; @@ -310,16 +310,16 @@ fn build_execution_config() -> Option { return None; } - #[cfg(feature = "parakeet-rocm")] + #[cfg(feature = "parakeet-migraphx")] { - tracing::info!("Configuring ROCm execution provider for AMD GPU acceleration"); - return Some(ExecutionConfig::new().with_execution_provider(ExecutionProvider::ROCm)); + tracing::info!("Configuring MIGraphX execution provider for AMD GPU acceleration"); + return Some(ExecutionConfig::new().with_execution_provider(ExecutionProvider::MIGraphX)); } #[cfg(not(any( feature = "parakeet-cuda", feature = "parakeet-tensorrt", - feature = "parakeet-rocm" + feature = "parakeet-migraphx" )))] { None @@ -391,25 +391,32 @@ fn probe_cuda_runtime() -> bool { let minor = (version % 1000) / 10; tracing::info!("Detected CUDA runtime version: {}.{}", major, minor); - // The bundled ONNX Runtime (via ort crate) is built against CUDA 12.x. - // A major version mismatch causes a segfault in ONNX Runtime's CUDA EP - // initialization - there's no way to catch this from Rust. - const EXPECTED_CUDA_MAJOR: i32 = 12; + // ort 2.0.0-rc.12 picks the cu12 or cu13 prebuilt at compile time from + // ORT_CUDA_VERSION (see ort-sys/build/download/resolve.rs). build.rs + // mirrors that selection into VOXTYPE_BUILD_CUDA_MAJOR so this probe + // accepts only the runtime version the bundled EP can actually talk to. + // A mismatched major would crash ort's CUDA EP during initialization. + // + // Voxtype ships separate voxtype-onnx-cuda-12 and voxtype-onnx-cuda-13 + // binaries. `voxtype setup gpu --enable` symlinks voxtype-onnx-cuda to + // whichever variant matches the host's CUDA runtime. + const EXPECTED_CUDA_MAJOR: i32 = match env!("VOXTYPE_BUILD_CUDA_MAJOR").as_bytes() { + b"13" => 13, + _ => 12, + }; if major != EXPECTED_CUDA_MAJOR { tracing::error!( - "CUDA version mismatch: found CUDA {}.{}, but the bundled ONNX Runtime \ - requires CUDA {}.x. Continuing would crash the process.\n \ + "CUDA version mismatch: found CUDA {major}.{minor}, but this binary's \ + bundled ONNX Runtime requires CUDA {EXPECTED_CUDA_MAJOR}.x. \ + Continuing would crash the process.\n \ Options:\n \ - 1. Install CUDA {} (e.g., the cuda-12 package)\n \ - 2. Use the pre-built release binary (voxtype-onnx-cuda) which bundles \ - compatible libraries\n \ - 3. Build from source with --features parakeet-load-dynamic to link against \ - your system's ONNX Runtime instead", + 1. Install the matching voxtype-onnx-cuda-{EXPECTED_CUDA_MAJOR} package\n \ + 2. Switch to voxtype-onnx-cuda-{} for your CUDA version (`voxtype setup gpu --enable` \ + auto-detects and points the symlink at the right one)\n \ + 3. Build from source with --features parakeet-load-dynamic to link \ + against your system's ONNX Runtime instead", major, - minor, - EXPECTED_CUDA_MAJOR, - EXPECTED_CUDA_MAJOR, ); return false; } diff --git a/src/transcribe/sensevoice.rs b/src/transcribe/sensevoice.rs index 60c76e7a..9d6efc7e 100644 --- a/src/transcribe/sensevoice.rs +++ b/src/transcribe/sensevoice.rs @@ -10,8 +10,8 @@ //! Supports languages: auto, zh, en, ja, ko, yue //! Model files: model.int8.onnx (or model.onnx), tokens.txt -use super::fbank::{self, FbankExtractor, LfrConfig}; use super::ctc::{self, CtcConfig}; +use super::fbank::{self, FbankExtractor, LfrConfig}; use super::Transcriber; use crate::config::SenseVoiceConfig; use crate::error::TranscribeError; @@ -75,15 +75,16 @@ impl SenseVoiceTranscriber { let tokens = ctc::load_tokens(&tokens_path)?; tracing::debug!("Loaded {} tokens", tokens.len()); - // Create ONNX session + // Create ONNX session. + // No GPU EP registration: SenseVoice runs on the CPU EP only. + // MIGraphX 7.2 rejects this encoder's Where-op broadcast pattern, + // so we keep the engine on CPU on the AMD-targeted binary. let session = Session::builder() .map_err(|e| { TranscribeError::InitFailed(format!("ONNX session builder failed: {}", e)) })? .with_intra_threads(threads) - .map_err(|e| { - TranscribeError::InitFailed(format!("Failed to set threads: {}", e)) - })? + .map_err(|e| TranscribeError::InitFailed(format!("Failed to set threads: {}", e)))? .commit_from_file(&model_file) .map_err(|e| { TranscribeError::InitFailed(format!( @@ -171,28 +172,19 @@ impl Transcriber for SenseVoiceTranscriber { let (x_data, _offset) = features.into_raw_vec_and_offset(); let x_tensor = Tensor::::from_array(([1usize, num_frames, feat_dim], x_data)) .map_err(|e| { - TranscribeError::InferenceFailed(format!( - "Failed to create input tensor: {}", - e - )) + TranscribeError::InferenceFailed(format!("Failed to create input tensor: {}", e)) })?; // x_length: shape [1] let x_length_tensor = Tensor::::from_array(([1usize], vec![num_frames as i32])) .map_err(|e| { - TranscribeError::InferenceFailed(format!( - "Failed to create length tensor: {}", - e - )) + TranscribeError::InferenceFailed(format!("Failed to create length tensor: {}", e)) })?; // language: shape [1] let language_tensor = Tensor::::from_array(([1usize], vec![self.language_id])) .map_err(|e| { - TranscribeError::InferenceFailed(format!( - "Failed to create language tensor: {}", - e - )) + TranscribeError::InferenceFailed(format!("Failed to create language tensor: {}", e)) })?; // text_norm: shape [1] @@ -212,9 +204,18 @@ impl Transcriber for SenseVoiceTranscriber { let inputs: Vec<(std::borrow::Cow, ort::session::SessionInputValue)> = vec![ (std::borrow::Cow::Borrowed("x"), x_tensor.into()), - (std::borrow::Cow::Borrowed("x_length"), x_length_tensor.into()), - (std::borrow::Cow::Borrowed("language"), language_tensor.into()), - (std::borrow::Cow::Borrowed("text_norm"), text_norm_tensor.into()), + ( + std::borrow::Cow::Borrowed("x_length"), + x_length_tensor.into(), + ), + ( + std::borrow::Cow::Borrowed("language"), + language_tensor.into(), + ), + ( + std::borrow::Cow::Borrowed("text_norm"), + text_norm_tensor.into(), + ), ]; let outputs = session.run(inputs).map_err(|e| { @@ -249,11 +250,7 @@ impl Transcriber for SenseVoiceTranscriber { } else if shape_dims.len() == 2 { // Pre-argmaxed output: each value is already a token ID let time_steps = shape_dims[1] as usize; - ctc::decode_pre_argmax( - &logits_data[..time_steps], - &self.tokens, - &self.ctc_config, - ) + ctc::decode_pre_argmax(&logits_data[..time_steps], &self.tokens, &self.ctc_config) } else { return Err(TranscribeError::InferenceFailed(format!( "Unexpected logits shape: {:?}", diff --git a/src/transcribe/whisper.rs b/src/transcribe/whisper.rs index 2c4a428c..9ea38b47 100644 --- a/src/transcribe/whisper.rs +++ b/src/transcribe/whisper.rs @@ -192,6 +192,10 @@ impl Transcriber for WhisperTranscriber { params.set_suppress_blank(true); params.set_suppress_nst(true); + // Prevent hallucination/looping by not conditioning on previous text + // This is especially important for short clips where Whisper can repeat itself + params.set_no_context(true); + // Set initial prompt if configured if let Some(prompt) = &self.initial_prompt { params.set_initial_prompt(prompt); diff --git a/src/tui/advanced_section.rs b/src/tui/advanced_section.rs new file mode 100644 index 00000000..294361d9 --- /dev/null +++ b/src/tui/advanced_section.rs @@ -0,0 +1,370 @@ +//! Advanced settings: less-common knobs the TUI surfaces in one place. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{self, FeedbackLevel, FormRowSpec}; +use super::config_editor::{ConfigEditor, EditorError}; + +#[derive(Debug, Clone)] +pub struct AdvancedState { + pub gpu_isolation: bool, + pub on_demand_loading: bool, + pub flash_attention: bool, + pub eager_processing: bool, + pub gpu_device: Option, + pub field: Field, + pub feedback: Option<(FeedbackLevel, String)>, + pub dirty_since_load: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Field { + GpuIsolation, + OnDemand, + FlashAttention, + Eager, + GpuDevice, +} +impl Field { + const ALL: &'static [Field] = &[ + Field::GpuIsolation, + Field::OnDemand, + Field::FlashAttention, + Field::Eager, + Field::GpuDevice, + ]; +} + +impl AdvancedState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + Ok(Self { + gpu_isolation: ed.get_bool("whisper", "gpu_isolation").unwrap_or(false), + on_demand_loading: ed + .get_bool("whisper", "on_demand_loading") + .unwrap_or(false), + flash_attention: ed.get_bool("whisper", "flash_attention").unwrap_or(false), + eager_processing: ed + .get_bool("whisper", "eager_processing") + .unwrap_or(false), + gpu_device: ed.get_int("whisper", "gpu_device"), + field: Field::GpuIsolation, + feedback: None, + dirty_since_load: false, + }) + } + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some((FeedbackLevel::Err, format!("load: {}", e))); + return Action::None; + } + }; + ed.set_bool("whisper", "gpu_isolation", self.gpu_isolation); + ed.set_bool("whisper", "on_demand_loading", self.on_demand_loading); + ed.set_bool("whisper", "flash_attention", self.flash_attention); + ed.set_bool("whisper", "eager_processing", self.eager_processing); + match self.gpu_device { + Some(n) if n >= 0 => ed.set_int("whisper", "gpu_device", n), + _ => ed.unset("whisper", "gpu_device"), + } + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.feedback = Some(( + FeedbackLevel::Ok, + format!("Saved to {}", ed.path().display()), + )); + } + Err(e) => self.feedback = Some((FeedbackLevel::Err, format!("save: {}", e))), + } + Action::None + } + pub fn reset(&mut self) { + if let Ok(fresh) = Self::load() { + let field = self.field; + *self = fresh; + self.field = field; + self.feedback = Some((FeedbackLevel::Ok, "Reverted".to_string())); + } + } + fn move_field(&mut self, delta: i32) { + let len = Field::ALL.len() as i32; + let cur = Field::ALL.iter().position(|f| *f == self.field).unwrap_or(0) as i32; + self.field = Field::ALL[((cur + delta).rem_euclid(len)) as usize]; + } + fn cycle(&mut self, delta: i32) { + match self.field { + Field::GpuIsolation => self.gpu_isolation = !self.gpu_isolation, + Field::OnDemand => self.on_demand_loading = !self.on_demand_loading, + Field::FlashAttention => self.flash_attention = !self.flash_attention, + Field::Eager => self.eager_processing = !self.eager_processing, + Field::GpuDevice => { + let cur = self.gpu_device.unwrap_or(-1); + let next = cur + delta as i64; + self.gpu_device = if next < 0 { None } else { Some(next.min(7)) }; + } + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.advanced { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Advanced"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget(Paragraph::new("Failed to load config.").wrap(Wrap { trim: true }), inner); + return; + } + }; + + let rows = vec![ + FormRowSpec::new( + state.field == Field::GpuIsolation, + "GPU isolation (subprocess)", + yesno(state.gpu_isolation), + ), + FormRowSpec::new( + state.field == Field::OnDemand, + "On-demand model loading", + yesno(state.on_demand_loading), + ), + FormRowSpec::new( + state.field == Field::FlashAttention, + "Flash attention", + yesno(state.flash_attention), + ), + FormRowSpec::new( + state.field == Field::Eager, + "Eager input processing", + yesno(state.eager_processing), + ), + FormRowSpec::new( + state.field == Field::GpuDevice, + "GPU device index", + state + .gpu_device + .map(|n| n.to_string()) + .unwrap_or_else(|| "auto".to_string()), + ), + ]; + + let feedback_pair = state + .feedback + .as_ref() + .map(|(lvl, msg)| (*lvl, msg.as_str())); + + common::render_form_with_guidance( + f, + area, + "Advanced", + state.dirty_since_load, + feedback_pair, + &rows, + guidance_for_field(state), + ); +} + +fn yesno(b: bool) -> String { + (if b { "yes" } else { "no" }).to_string() +} + +fn heading<'a>(text: &'a str) -> Line<'a> { + Line::from(Span::styled( + text, + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance_for_field(state: &AdvancedState) -> Vec> { + match state.field { + Field::GpuIsolation => vec![ + heading("GPU isolation"), + Line::from(""), + Line::from( + "Runs each transcription in a short-lived subprocess that \ + exits afterward. The GPU releases all VRAM between recordings \ + instead of holding the model resident.", + ), + Line::from(""), + Line::from(Span::styled( + "Turn it on if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • You're on a laptop with hybrid graphics and want the \ + discrete GPU to power down between dictations.", + ), + Line::from( + " • You see VRAM use creep upward over a long voxtype \ + session.", + ), + Line::from(""), + Line::from(Span::styled( + "Leave it off if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • Latency matters more than VRAM. Subprocess startup adds \ + ~100-300 ms per recording.", + ), + ], + Field::OnDemand => vec![ + heading("On-demand model loading"), + Line::from(""), + Line::from( + "When on, voxtype loads the model only when recording starts \ + (and unloads at idle). When off, the model stays resident \ + from daemon start.", + ), + Line::from(""), + Line::from(Span::styled( + "Turn it on if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • You only dictate occasionally and don't want the daemon \ + holding ~1-2 GB of RAM in the background.", + ), + Line::from(""), + Line::from(Span::styled( + "Leave it off if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • You dictate frequently. Resident-mode transcription \ + starts instantly; on-demand loads add a one-shot delay on \ + the first key press.", + ), + ], + Field::FlashAttention => vec![ + heading("Flash attention"), + Line::from(""), + Line::from( + "A GPU-only inference optimization that reduces memory \ + bandwidth pressure in the attention layers. Speeds up \ + transcription on capable cards.", + ), + Line::from(""), + Line::from(Span::styled( + "Turn it on if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • You're on Vulkan or CUDA with a recent GPU. \ + Particularly noticeable on large-v3 and large-v3-turbo.", + ), + Line::from(""), + Line::from(Span::styled( + "Leave it off if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • You're CPU-only or on older hardware (no benefit, may \ + cause crashes on a few drivers).", + ), + ], + Field::Eager => vec![ + heading("Eager input processing"), + Line::from(""), + Line::from( + "Voxtype starts transcribing audio chunks while you're still \ + recording, instead of waiting until you release the PTT key. \ + The final transcript stitches the chunks together.", + ), + Line::from(""), + Line::from(Span::styled( + "Turn it on if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • You record long-form (>10 sec) and the post-recording \ + wait feels like dead time.", + ), + Line::from(""), + Line::from(Span::styled( + "Leave it off if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • You record short bursts (a few seconds). The chunked \ + transcripts can occasionally split a sentence awkwardly.", + ), + Line::from( + " • You're on a laptop and CPU/GPU heat matters. Eager \ + mode keeps the inference engine busy during recording.", + ), + ], + Field::GpuDevice => vec![ + heading("GPU device index"), + Line::from(""), + Line::from( + "Picks which GPU voxtype targets on multi-GPU systems. The \ + default (auto) leaves the choice to the driver, which often \ + picks the integrated GPU on hybrid laptops.", + ), + Line::from(""), + Line::from(Span::styled( + "Set a specific index if:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " • Transcription is slower than expected and you suspect \ + the iGPU is being used. Try 1 (or 2) to target the \ + discrete card.", + ), + Line::from(""), + Line::from(Span::styled( + "Run `vulkaninfo --summary` or `nvidia-smi -L` to see your \ + device numbering.", + Style::default().fg(Color::Gray), + )), + ], + } +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.advanced.as_mut() { + Some(s) => s, + None => return Action::None, + }; + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + state.cycle(-1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => { + state.cycle(1); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} diff --git a/src/tui/app.rs b/src/tui/app.rs new file mode 100644 index 00000000..b533c57c --- /dev/null +++ b/src/tui/app.rs @@ -0,0 +1,434 @@ +//! TUI application state. + +use crate::setup::binary::{self, Acceleration, EngineFamily, InstallKind, Inventory, Variant}; +use std::path::Path; + +use super::audio::AudioState; +use super::engine::EngineState; +use super::hotkey::HotkeyState; +use super::advanced_section::AdvancedState; +use super::meeting_section::MeetingState; +use super::notifications_section::NotificationsState; +use super::output_section::OutputState; +use super::section::Section; +use super::text_section::TextState; +use super::vad_section::VadState; +use super::waybar_section::WaybarState; + +/// What the event handler asks the run-loop to do next. +pub enum Action { + None, + Quit, + SwitchVariant(Variant), +} + +/// Result of the most recent variant-switch attempt, displayed as a banner. +pub struct SwitchOutcome { + pub success: bool, + pub message: String, +} + +/// Layout of the variant matrix: rows = engine family, columns = acceleration. +pub const ROWS: &[EngineFamily] = &[EngineFamily::Whisper, EngineFamily::Onnx]; +pub const COLS: &[Acceleration] = &[ + Acceleration::Avx2, + Acceleration::Avx512, + Acceleration::Vulkan, + Acceleration::Cuda, + Acceleration::Migraphx, +]; + +pub struct App { + pub inventory: Inventory, + /// Cursor position in the variant matrix (row, col). + pub cursor: (usize, usize), + /// True after the active variant changes; daemon should be restarted. + pub restart_needed: bool, + /// Result of the last switch attempt, if any. + pub last_switch: Option, + pub daemon_running: bool, + /// Hidden testing flag: render as Package install even when running from a + /// source build, so the variant matrix can be exercised in dev. + pub force_package_mode: bool, + /// Section currently rendered in the right pane. + pub current_section: Section, + /// Index into Section::ALL of the section being hovered in the sidebar. + /// Independent of `current_section` so the user can scroll the sidebar + /// without committing. + pub sidebar_cursor: usize, + /// True when keyboard input is steered at the sidebar (Tab toggles). + pub sidebar_focused: bool, + /// `?` toggles a centered help overlay listing every keybinding. + pub help_open: bool, + /// If the configured engine's model isn't downloaded, this holds the + /// model name so the General banner can prompt the user to fetch it. + /// Computed at load time and on `refresh_inventory()`. + pub missing_model: Option, + /// Lazily loaded Hotkey section state. None until the user opens Hotkey + /// for the first time (or load fails). + pub hotkey: Option, + pub audio: Option, + pub engine: Option, + pub output: Option, + pub text: Option, + pub vad: Option, + pub meeting: Option, + pub notifications: Option, + pub waybar: Option, + pub advanced: Option, +} + +#[derive(Debug, Clone)] +pub struct MissingModel { + pub engine: String, + pub model: String, + pub setup_command: &'static str, +} + +/// Build the inventory and, if `force_package_mode` is set, override the +/// install_kind so the TUI exercises the variant-matrix code path during +/// development without needing to install the binary. +fn build_inventory(force_package_mode: bool) -> Inventory { + let mut inv = binary::inventory(); + if force_package_mode && inv.install_kind == InstallKind::Source { + inv.install_kind = InstallKind::Package; + if inv.package_lib_dir.is_none() { + inv.package_lib_dir = Some(Path::new(binary::LIB_DIR).to_path_buf()); + } + // If `enumerate_installed()` was skipped because we resolved as Source, + // populate the matrix now so cells render with real on-disk state. + if inv.variants.is_empty() { + inv.variants = Variant::ALL + .iter() + .map(|&v| binary::VariantStatus { + variant: v, + binary_name: v.binary_name().to_string(), + installed: Path::new(binary::LIB_DIR).join(v.binary_name()).exists(), + runs_on_this_cpu: variant_runs_on_cpu(v, &inv.cpu), + gpu_available: variant_gpu_available(v, &inv.gpus), + active: inv.active_variant == Some(v), + }) + .collect(); + } + } + inv +} + +fn variant_runs_on_cpu(v: Variant, cpu: &binary::Cpu) -> bool { + match v.acceleration() { + Acceleration::Avx512 | Acceleration::Cuda | Acceleration::Migraphx => cpu.avx512, + _ => cpu.avx2, + } +} + +fn variant_gpu_available(v: Variant, g: &binary::Gpus) -> bool { + match v.acceleration() { + Acceleration::Cuda => g.nvidia, + Acceleration::Migraphx => g.amd, + _ => true, + } +} + +impl App { + pub fn new(force_package_mode: bool) -> Self { + let inventory = build_inventory(force_package_mode); + let cursor = initial_cursor(&inventory); + Self { + inventory, + cursor, + restart_needed: false, + last_switch: None, + daemon_running: is_daemon_running(), + force_package_mode, + current_section: Section::General, + sidebar_cursor: 0, + sidebar_focused: true, + help_open: false, + missing_model: detect_missing_model(), + hotkey: None, + audio: None, + engine: None, + output: None, + text: None, + vad: None, + meeting: None, + notifications: None, + waybar: None, + advanced: None, + } + } + + /// Ensure section-specific state is loaded the first time a section opens. + pub fn ensure_section_loaded(&mut self) { + match self.current_section { + Section::Hotkey if self.hotkey.is_none() => { + self.hotkey = HotkeyState::load().ok(); + } + Section::Audio if self.audio.is_none() => { + self.audio = AudioState::load().ok(); + } + Section::Engine if self.engine.is_none() => { + self.engine = EngineState::load().ok(); + } + Section::Output if self.output.is_none() => { + self.output = OutputState::load().ok(); + } + Section::Text if self.text.is_none() => { + self.text = TextState::load().ok(); + } + Section::Vad if self.vad.is_none() => { + self.vad = VadState::load().ok(); + } + Section::Meeting if self.meeting.is_none() => { + self.meeting = MeetingState::load().ok(); + } + Section::Notifications if self.notifications.is_none() => { + self.notifications = NotificationsState::load().ok(); + } + Section::Waybar if self.waybar.is_none() => { + self.waybar = WaybarState::load().ok(); + } + Section::Advanced if self.advanced.is_none() => { + self.advanced = AdvancedState::load().ok(); + } + _ => {} + } + } + + pub fn move_sidebar(&mut self, delta: i32) { + let len = Section::ALL.len() as i32; + if len == 0 { + return; + } + let new = (self.sidebar_cursor as i32 + delta).clamp(0, len - 1); + self.sidebar_cursor = new as usize; + } + + pub fn open_hovered_section(&mut self) { + if let Some(section) = Section::ALL.get(self.sidebar_cursor).copied() { + self.current_section = section; + self.ensure_section_loaded(); + } + } + + pub fn focus_sidebar(&mut self) { + self.sidebar_focused = true; + // Keep cursor in sync with the active section so the user lands on the + // currently-open section when they Tab back to the sidebar. + if let Some(idx) = Section::ALL + .iter() + .position(|s| *s == self.current_section) + { + self.sidebar_cursor = idx; + } + } + + pub fn focus_content(&mut self) { + self.sidebar_focused = false; + } + + /// True when a section is in inline-edit mode and should swallow keys + /// instead of letting global shortcuts (Esc, Tab, q) act on them. + pub fn is_editing(&self) -> bool { + match self.current_section { + Section::Engine => self.engine.as_ref().is_some_and(|s| s.editing.is_some()), + Section::Output => self.output.as_ref().is_some_and(|s| s.editing.is_some()), + Section::Hotkey => self.hotkey.as_ref().is_some_and(|s| s.editing.is_some()), + Section::Audio => self.audio.as_ref().is_some_and(|s| s.editing.is_some()), + Section::Waybar => self.waybar.as_ref().is_some_and(|s| s.editing.is_some()), + _ => false, + } + } + + pub fn refresh_inventory(&mut self) { + self.inventory = build_inventory(self.force_package_mode); + self.daemon_running = is_daemon_running(); + self.missing_model = detect_missing_model(); + } + + /// Map a (row, col) cell to a Variant if one exists for that combination. + /// Returns None for invalid pairs (e.g. Whisper × CUDA). + pub fn variant_at(&self, row: usize, col: usize) -> Option { + let family = *ROWS.get(row)?; + let accel = *COLS.get(col)?; + Variant::ALL + .iter() + .copied() + .find(|v| v.family() == family && v.acceleration() == accel) + } + + pub fn move_cursor(&mut self, drow: i32, dcol: i32) { + let (r, c) = self.cursor; + let new_r = clamp_signed(r as i32 + drow, ROWS.len()); + let new_c = clamp_signed(c as i32 + dcol, COLS.len()); + self.cursor = (new_r, new_c); + } + + pub fn record_switch_attempt(&mut self, variant: Variant, result: Result<(), String>) { + let (success, message) = match result { + Ok(()) => (true, format!("Switched to {}.", variant.display())), + Err(e) => (false, e), + }; + if success { + self.restart_needed = true; + } + self.last_switch = Some(SwitchOutcome { success, message }); + let _ = variant; + self.refresh_inventory(); + } +} + +fn clamp_signed(v: i32, len: usize) -> usize { + if len == 0 { + return 0; + } + v.clamp(0, (len - 1) as i32) as usize +} + +fn initial_cursor(inv: &Inventory) -> (usize, usize) { + if let Some(active) = inv.active_variant { + let row = ROWS.iter().position(|f| *f == active.family()).unwrap_or(0); + let col = COLS + .iter() + .position(|a| *a == active.acceleration()) + .unwrap_or(0); + (row, col) + } else { + (0, 0) + } +} + +/// Detect whether the configured engine's active model file is on disk. +/// Returns the engine + model name + a setup command hint when it's missing, +/// or None when the model is present (or we can't determine it). +fn detect_missing_model() -> Option { + use crate::config; + let cfg = config::load_config(None).ok()?; + let dir = config::Config::models_dir(); + let (engine_name, model, setup_command) = match cfg.engine { + config::TranscriptionEngine::Whisper => ( + "whisper", + cfg.whisper.model.clone(), + "voxtype setup model", + ), + config::TranscriptionEngine::Parakeet => ( + "parakeet", + cfg.parakeet + .as_ref() + .map(|c| c.model.clone()) + .unwrap_or_default(), + "voxtype setup model", + ), + config::TranscriptionEngine::Moonshine => ( + "moonshine", + cfg.moonshine + .as_ref() + .map(|c| c.model.clone()) + .unwrap_or_default(), + "voxtype setup model", + ), + config::TranscriptionEngine::SenseVoice => ( + "sensevoice", + cfg.sensevoice + .as_ref() + .map(|c| c.model.clone()) + .unwrap_or_default(), + "voxtype setup model", + ), + config::TranscriptionEngine::Paraformer => ( + "paraformer", + cfg.paraformer + .as_ref() + .map(|c| c.model.clone()) + .unwrap_or_default(), + "voxtype setup model", + ), + config::TranscriptionEngine::Dolphin => ( + "dolphin", + cfg.dolphin + .as_ref() + .map(|c| c.model.clone()) + .unwrap_or_default(), + "voxtype setup model", + ), + config::TranscriptionEngine::Omnilingual => ( + "omnilingual", + cfg.omnilingual + .as_ref() + .map(|c| c.model.clone()) + .unwrap_or_default(), + "voxtype setup model", + ), + // Cohere — checked but model layout differs by rc/0.7.0; skip the + // disk probe rather than emit a false-positive missing warning. + config::TranscriptionEngine::Cohere => return None, + }; + + if model.is_empty() { + return None; + } + + let installed = if engine_name == "whisper" { + dir.join(format!("ggml-{}.bin", model)).exists() + } else { + let p = dir.join(&model); + p.exists() + }; + if installed { + None + } else { + Some(MissingModel { + engine: engine_name.to_string(), + model, + setup_command, + }) + } +} + +/// Mirrors the check in main.rs; we duplicate it here to avoid a circular +/// dependency on a private helper. +fn is_daemon_running() -> bool { + let pid_path = crate::config::Config::runtime_dir().join("pid"); + let pid_str = match std::fs::read_to_string(&pid_path) { + Ok(s) => s, + Err(_) => return false, + }; + let pid: u32 = match pid_str.trim().parse() { + Ok(p) => p, + Err(_) => return false, + }; + std::path::Path::new(&format!("/proc/{}", pid)).exists() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn variant_at_finds_known_pairs() { + let app = App::new(false); + // Whisper × AVX2 = WhisperAvx2 + assert_eq!(app.variant_at(0, 0), Some(Variant::WhisperAvx2)); + // ONNX × CUDA = OnnxCuda + assert_eq!(app.variant_at(1, 3), Some(Variant::OnnxCuda)); + } + + #[test] + fn variant_at_returns_none_for_invalid_pairs() { + let app = App::new(false); + // Whisper × CUDA — no such variant + assert_eq!(app.variant_at(0, 3), None); + // ONNX × Vulkan — no such variant + assert_eq!(app.variant_at(1, 2), None); + } + + #[test] + fn move_cursor_clamps_at_edges() { + let mut app = App::new(false); + app.cursor = (0, 0); + app.move_cursor(-1, -1); + assert_eq!(app.cursor, (0, 0)); + app.move_cursor(10, 10); + assert_eq!(app.cursor, (ROWS.len() - 1, COLS.len() - 1)); + } +} diff --git a/src/tui/audio.rs b/src/tui/audio.rs new file mode 100644 index 00000000..3ade1570 --- /dev/null +++ b/src/tui/audio.rs @@ -0,0 +1,592 @@ +//! Audio settings: input device, max duration, feedback sounds, MPRIS pause. + +use cpal::traits::{DeviceTrait, HostTrait}; +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{ + self, FeedbackLevel as CommonFeedback, FormRowSpec, TextInput, TextInputResult, +}; +use super::config_editor::{ConfigEditor, EditorError}; + +#[derive(Debug, Clone)] +pub struct AudioState { + pub device: String, + pub max_duration_secs: u32, + pub pause_media: bool, + pub feedback_enabled: bool, + pub feedback_theme: String, + pub feedback_volume: f32, + + pub field: Field, + pub feedback: Option, + pub dirty_since_load: bool, + /// Cached device list (default + everything cpal finds). Loaded once. + pub device_choices: Vec, + pub editing: Option, +} + +#[derive(Debug, Clone)] +pub struct TextEdit { + pub field: Field, + pub input: TextInput, +} + +#[derive(Debug, Clone)] +pub struct Feedback { + pub level: FeedbackLevel, + pub message: String, +} + +#[derive(Debug, Clone, Copy)] +pub enum FeedbackLevel { + Ok, + Err, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Field { + Device, + MaxDuration, + PauseMedia, + FeedbackEnabled, + FeedbackTheme, + FeedbackVolume, +} + +impl Field { + const ALL: &'static [Field] = &[ + Field::Device, + Field::MaxDuration, + Field::PauseMedia, + Field::FeedbackEnabled, + Field::FeedbackTheme, + Field::FeedbackVolume, + ]; +} + +const THEME_CHOICES: &[&str] = &["default", "subtle", "mechanical"]; +/// Step in seconds for the max-duration cycler. +const DURATION_STEP: u32 = 30; +const DURATION_MIN: u32 = 30; +const DURATION_MAX: u32 = 1800; +const VOLUME_STEP: f32 = 0.1; + +impl AudioState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + Ok(Self { + device: ed + .get_string("audio", "device") + .unwrap_or_else(|| "default".to_string()), + max_duration_secs: ed + .get_int("audio", "max_duration_secs") + .map(|n| n.clamp(0, u32::MAX as i64) as u32) + .unwrap_or(120), + pause_media: ed.get_bool("audio", "pause_media").unwrap_or(false), + feedback_enabled: ed + .get_bool("audio.feedback", "enabled") + .unwrap_or(false), + feedback_theme: ed + .get_string("audio.feedback", "theme") + .unwrap_or_else(|| "default".to_string()), + feedback_volume: ed + .get_string("audio.feedback", "volume") + .and_then(|s| s.parse().ok()) + .or_else(|| { + ed.get_int("audio.feedback", "volume") + .map(|n| n as f32) + }) + .unwrap_or(0.7), + field: Field::Device, + feedback: None, + dirty_since_load: false, + device_choices: enumerate_input_devices(), + editing: None, + }) + } + + fn is_text_field(field: Field) -> bool { + matches!(field, Field::Device) + } + + fn start_edit_if_text_field(&mut self) -> bool { + if !Self::is_text_field(self.field) { + return false; + } + self.editing = Some(TextEdit { + field: self.field, + input: TextInput::new(self.device.clone()), + }); + true + } + + fn commit_text_edit(&mut self, field: Field, buffer: String) { + if let Field::Device = field { + let trimmed = buffer.trim(); + self.device = if trimmed.is_empty() { + "default".to_string() + } else { + trimmed.to_string() + }; + } + self.dirty_since_load = true; + self.feedback = None; + } + + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("load: {}", e), + }); + return Action::None; + } + }; + ed.set_string("audio", "device", &self.device); + ed.set_int( + "audio", + "max_duration_secs", + self.max_duration_secs as i64, + ); + ed.set_bool("audio", "pause_media", self.pause_media); + ed.set_bool("audio.feedback", "enabled", self.feedback_enabled); + ed.set_string("audio.feedback", "theme", &self.feedback_theme); + ed.set_string( + "audio.feedback", + "volume", + &format!("{:.2}", self.feedback_volume), + ); + + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.feedback = Some(Feedback { + level: FeedbackLevel::Ok, + message: format!("Saved to {}", ed.path().display()), + }); + } + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("save: {}", e), + }); + } + } + Action::None + } + + pub fn reset(&mut self) { + match Self::load() { + Ok(fresh) => { + let field = self.field; + let cached = self.device_choices.clone(); + *self = fresh; + self.field = field; + self.device_choices = cached; + self.feedback = Some(Feedback { + level: FeedbackLevel::Ok, + message: "Reverted unsaved changes".to_string(), + }); + } + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("reload: {}", e), + }); + } + } + } + + fn move_field(&mut self, delta: i32) { + let len = Field::ALL.len() as i32; + let cur = Field::ALL.iter().position(|f| *f == self.field).unwrap_or(0) as i32; + let new = (cur + delta).rem_euclid(len); + self.field = Field::ALL[new as usize]; + } + + fn cycle(&mut self, delta: i32) { + match self.field { + Field::Device => { + if !self.device_choices.is_empty() { + let idx = self + .device_choices + .iter() + .position(|d| d == &self.device) + .map(|i| i as i32) + .unwrap_or(-1); + let new = (idx + delta).rem_euclid(self.device_choices.len() as i32); + self.device = self.device_choices[new as usize].clone(); + } + } + Field::MaxDuration => { + let next = self.max_duration_secs as i32 + delta * DURATION_STEP as i32; + self.max_duration_secs = + next.clamp(DURATION_MIN as i32, DURATION_MAX as i32) as u32; + } + Field::PauseMedia => { + self.pause_media = !self.pause_media; + } + Field::FeedbackEnabled => { + self.feedback_enabled = !self.feedback_enabled; + } + Field::FeedbackTheme => { + let idx = THEME_CHOICES + .iter() + .position(|t| *t == self.feedback_theme) + .map(|i| i as i32) + .unwrap_or(-1); + let new = (idx + delta).rem_euclid(THEME_CHOICES.len() as i32); + self.feedback_theme = THEME_CHOICES[new as usize].to_string(); + } + Field::FeedbackVolume => { + let next = self.feedback_volume + delta as f32 * VOLUME_STEP; + self.feedback_volume = next.clamp(0.0, 1.0); + } + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +fn enumerate_input_devices() -> Vec { + // ALSA's PCM probing prints "Cannot open device /dev/dsp" and similar + // messages to stderr for every device cpal touches. Inside the TUI's + // alternate screen those lines paint over our frame and corrupt the + // next redraw. Silence stderr for the duration of the probe. + let _silenced = SilencedStderr::install(); + + let mut out = vec!["default".to_string()]; + let host = cpal::default_host(); + if let Ok(devices) = host.input_devices() { + for d in devices { + if let Ok(name) = d.name() { + if name != "default" && !out.contains(&name) { + out.push(name); + } + } + } + } + out +} + +/// RAII guard that redirects fd 2 (stderr) to /dev/null on construction and +/// restores the original fd on drop. Used to swallow noisy ALSA / cpal +/// stderr during device enumeration so it doesn't bleed into the TUI's +/// alternate screen. +struct SilencedStderr { + saved_fd: Option, +} + +impl SilencedStderr { + fn install() -> Self { + let null_fd = unsafe { + libc::open( + b"/dev/null\0".as_ptr() as *const libc::c_char, + libc::O_WRONLY, + ) + }; + if null_fd < 0 { + return Self { saved_fd: None }; + } + let saved = unsafe { libc::dup(libc::STDERR_FILENO) }; + if saved < 0 { + unsafe { libc::close(null_fd) }; + return Self { saved_fd: None }; + } + unsafe { libc::dup2(null_fd, libc::STDERR_FILENO) }; + unsafe { libc::close(null_fd) }; + Self { + saved_fd: Some(saved), + } + } +} + +impl Drop for SilencedStderr { + fn drop(&mut self) { + if let Some(saved) = self.saved_fd.take() { + unsafe { + libc::dup2(saved, libc::STDERR_FILENO); + libc::close(saved); + } + } + } +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.audio { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Audio"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget( + Paragraph::new("Failed to load config; check ~/.config/voxtype/config.toml.") + .wrap(Wrap { trim: true }), + inner, + ); + return; + } + }; + + let rows = vec![ + FormRowSpec::new( + state.field == Field::Device, + "Input device", + match state.editing.as_ref() { + Some(e) if e.field == Field::Device => e.input.caret_string(), + _ => state.device.clone(), + }, + ), + FormRowSpec::new( + state.field == Field::MaxDuration, + "Max recording (seconds)", + state.max_duration_secs.to_string(), + ), + FormRowSpec::new( + state.field == Field::PauseMedia, + "Pause MPRIS media on record", + yesno(state.pause_media), + ), + FormRowSpec::new( + state.field == Field::FeedbackEnabled, + "Audio feedback sounds", + if state.feedback_enabled { "on" } else { "off" }, + ), + FormRowSpec::new( + state.field == Field::FeedbackTheme, + "Sound theme", + &state.feedback_theme, + ) + .dimmed(!state.feedback_enabled), + FormRowSpec::new( + state.field == Field::FeedbackVolume, + "Volume", + format!("{:.0}%", state.feedback_volume * 100.0), + ) + .dimmed(!state.feedback_enabled), + ]; + + let feedback_pair = state.feedback.as_ref().map(|fb| { + ( + match fb.level { + FeedbackLevel::Ok => CommonFeedback::Ok, + FeedbackLevel::Err => CommonFeedback::Err, + }, + fb.message.as_str(), + ) + }); + + common::render_form_with_guidance( + f, + area, + "Audio", + state.dirty_since_load, + feedback_pair, + &rows, + guidance_for_field(state), + ); +} + +fn yesno(b: bool) -> &'static str { + if b { + "yes" + } else { + "no" + } +} + +fn heading<'a>(text: &'a str) -> Line<'a> { + Line::from(Span::styled( + text, + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance_for_field(state: &AudioState) -> Vec> { + match state.field { + Field::Device => { + let count = state.device_choices.len().saturating_sub(1); + vec![ + heading("Input device"), + Line::from(""), + Line::from(format!( + "Detected {} device{} via cpal.", + count, + if count == 1 { "" } else { "s" } + )), + Line::from(""), + Line::from( + "\"default\" follows whatever PipeWire/PulseAudio is set \ + to as the system default source. If you swap headsets or \ + plug in a USB mic, default will follow.", + ), + Line::from(""), + Line::from( + "Pick a specific device if you want voxtype to ignore the \ + system default and stay locked to one mic — useful when \ + you stream and don't want voxtype to grab your stream mic.", + ), + ] + } + Field::MaxDuration => vec![ + heading("Max recording duration"), + Line::from(""), + Line::from( + "Safety cap. If you accidentally lock the PTT key down (or \ + use toggle mode and forget), voxtype stops at this many \ + seconds and transcribes what it has.", + ), + Line::from(""), + Line::from( + "120-300 seconds is normal for dictation. Bump to 600+ for \ + meeting-mode-style long recordings.", + ), + ], + Field::PauseMedia => vec![ + heading("Pause MPRIS media on record"), + Line::from(""), + Line::from( + "Pauses Spotify, MPV, browsers, and other MPRIS players \ + while you record, then resumes them when transcription \ + finishes.", + ), + Line::from(""), + Line::from(Span::styled( + "Requires playerctl to be installed.", + Style::default().fg(Color::Gray), + )), + Line::from(""), + Line::from( + "Useful if music in the background ever bleeds into your \ + mic, or if you'd rather hear yourself dictate without \ + lyrics in the way.", + ), + ], + Field::FeedbackEnabled => vec![ + heading("Audio feedback sounds"), + Line::from(""), + Line::from( + "Plays short cue sounds when recording starts, stops, and \ + (optionally) when transcription completes. Helpful when the \ + visual indicator isn't where you're looking.", + ), + Line::from(""), + Line::from(Span::styled( + "Sound theme and volume are only used when this is on.", + Style::default().fg(Color::Gray), + )), + ], + Field::FeedbackTheme => vec![ + heading("Sound theme"), + Line::from(""), + Line::from(Span::styled( + "default: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Soft chime up / down. Most users keep this."), + Line::from(""), + Line::from(Span::styled( + "subtle: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Quieter taps. Good in shared rooms."), + Line::from(""), + Line::from(Span::styled( + "mechanical: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Sharp tactile clicks."), + Line::from(""), + Line::from(Span::styled( + "Custom themes can point at a directory of .wav files; \ + edit [audio.feedback] theme directly to use one.", + Style::default().fg(Color::Gray), + )), + ], + Field::FeedbackVolume => vec![ + heading("Feedback volume"), + Line::from(""), + Line::from( + "Volume of the feedback cues, 0-100%. Independent of system \ + volume — voxtype attenuates the sample at playback time.", + ), + Line::from(""), + Line::from( + "Tuning tip: pick the lowest volume you can still hear over \ + your typing. The cue is a confirmation, not an alert.", + ), + ], + } +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.audio.as_mut() { + Some(s) => s, + None => return Action::None, + }; + + if state.editing.is_some() { + return handle_edit_key(state, key); + } + + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + state.cycle(-1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => { + state.cycle(1); + Action::None + } + KeyCode::Enter | KeyCode::Char('i') => { + state.start_edit_if_text_field(); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} + +fn handle_edit_key(state: &mut AudioState, key: KeyEvent) -> Action { + let Some(editing) = state.editing.as_mut() else { + return Action::None; + }; + match editing.input.handle_key(key) { + TextInputResult::Continue => Action::None, + TextInputResult::Commit => { + let buf = editing.input.buffer().to_string(); + let field = editing.field; + state.editing = None; + state.commit_text_edit(field, buf); + Action::None + } + TextInputResult::Cancel => { + state.editing = None; + Action::None + } + } +} diff --git a/src/tui/common.rs b/src/tui/common.rs new file mode 100644 index 00000000..e7184a54 --- /dev/null +++ b/src/tui/common.rs @@ -0,0 +1,467 @@ +//! Shared rendering helpers for form-style sections (Hotkey, Audio, Output, …). + +#![allow(dead_code)] + +use crossterm::event::{KeyCode, KeyEvent, KeyModifiers}; +use ratatui::{ + layout::{Constraint, Direction, Layout, Rect}, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +/// Minimal single-line text input. Owned by a section's state when a free-text +/// field is being edited; sections check whether `editing` is `Some` and route +/// keys to [`TextInput::handle_key`] while it is. +#[derive(Debug, Clone)] +pub struct TextInput { + buffer: String, + /// Byte offset within `buffer`. Always lands on a UTF-8 char boundary. + cursor: usize, + /// Original value at the time editing started — used to detect "no change" + /// and reportable on Cancel. + original: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TextInputResult { + /// Key was consumed but editing continues. + Continue, + /// User pressed Enter; commit `buffer()` to the underlying field. + Commit, + /// User pressed Esc; discard buffer. + Cancel, +} + +impl TextInput { + pub fn new(initial: impl Into) -> Self { + let buffer: String = initial.into(); + let cursor = buffer.len(); + Self { + original: buffer.clone(), + buffer, + cursor, + } + } + + pub fn buffer(&self) -> &str { + &self.buffer + } + + pub fn changed(&self) -> bool { + self.buffer != self.original + } + + pub fn handle_key(&mut self, key: KeyEvent) -> TextInputResult { + match key.code { + KeyCode::Enter => TextInputResult::Commit, + KeyCode::Esc => TextInputResult::Cancel, + KeyCode::Char('c') if key.modifiers.contains(KeyModifiers::CONTROL) => { + TextInputResult::Cancel + } + KeyCode::Backspace => { + if self.cursor > 0 { + let prev = prev_char_boundary(&self.buffer, self.cursor); + self.buffer.replace_range(prev..self.cursor, ""); + self.cursor = prev; + } + TextInputResult::Continue + } + KeyCode::Delete => { + if self.cursor < self.buffer.len() { + let next = next_char_boundary(&self.buffer, self.cursor); + self.buffer.replace_range(self.cursor..next, ""); + } + TextInputResult::Continue + } + KeyCode::Left => { + if self.cursor > 0 { + self.cursor = prev_char_boundary(&self.buffer, self.cursor); + } + TextInputResult::Continue + } + KeyCode::Right => { + if self.cursor < self.buffer.len() { + self.cursor = next_char_boundary(&self.buffer, self.cursor); + } + TextInputResult::Continue + } + KeyCode::Home => { + self.cursor = 0; + TextInputResult::Continue + } + KeyCode::End => { + self.cursor = self.buffer.len(); + TextInputResult::Continue + } + KeyCode::Char('a') if key.modifiers.contains(KeyModifiers::CONTROL) => { + self.cursor = 0; + TextInputResult::Continue + } + KeyCode::Char('e') if key.modifiers.contains(KeyModifiers::CONTROL) => { + self.cursor = self.buffer.len(); + TextInputResult::Continue + } + KeyCode::Char('u') if key.modifiers.contains(KeyModifiers::CONTROL) => { + // Clear the line. + self.buffer.clear(); + self.cursor = 0; + TextInputResult::Continue + } + KeyCode::Char('w') if key.modifiers.contains(KeyModifiers::CONTROL) => { + // Delete the previous word. + let prev_word = prev_word_boundary(&self.buffer, self.cursor); + self.buffer.replace_range(prev_word..self.cursor, ""); + self.cursor = prev_word; + TextInputResult::Continue + } + KeyCode::Char(c) if !key.modifiers.contains(KeyModifiers::CONTROL) => { + let mut tmp = [0u8; 4]; + let s = c.encode_utf8(&mut tmp); + self.buffer.insert_str(self.cursor, s); + self.cursor += s.len(); + TextInputResult::Continue + } + _ => TextInputResult::Continue, + } + } + + /// Plain-text rendering of the buffer with a `│` caret inserted at the + /// cursor position. Suitable for slotting into a form row's value column + /// where we can't easily run multi-span styling. + pub fn caret_string(&self) -> String { + let mut out = String::with_capacity(self.buffer.len() + 1); + out.push_str(&self.buffer[..self.cursor]); + out.push('│'); + out.push_str(&self.buffer[self.cursor..]); + out + } + + /// Render the buffer with a visible cursor caret. Returned line is meant + /// to slot into a form row's "value" column. + pub fn render_inline(&self) -> Line<'static> { + let (before, at, after) = split_at_cursor(&self.buffer, self.cursor); + let caret_glyph = if at.is_empty() { " ".to_string() } else { at }; + Line::from(vec![ + Span::raw(before), + Span::styled( + caret_glyph, + Style::default().bg(Color::White).fg(Color::Black), + ), + Span::raw(after), + ]) + } +} + +fn prev_char_boundary(s: &str, idx: usize) -> usize { + let mut i = idx.saturating_sub(1); + while i > 0 && !s.is_char_boundary(i) { + i -= 1; + } + i +} + +fn next_char_boundary(s: &str, idx: usize) -> usize { + let mut i = (idx + 1).min(s.len()); + while i < s.len() && !s.is_char_boundary(i) { + i += 1; + } + i +} + +fn prev_word_boundary(s: &str, idx: usize) -> usize { + let bytes = s.as_bytes(); + let mut i = idx; + // Skip trailing spaces. + while i > 0 && bytes[i - 1].is_ascii_whitespace() { + i -= 1; + } + // Skip non-space characters. + while i > 0 && !bytes[i - 1].is_ascii_whitespace() { + i -= 1; + } + i +} + +#[cfg(test)] +mod tests { + use super::*; + use crossterm::event::{KeyEventKind, KeyEventState}; + + fn key(code: KeyCode) -> KeyEvent { + KeyEvent { + code, + modifiers: KeyModifiers::NONE, + kind: KeyEventKind::Press, + state: KeyEventState::NONE, + } + } + + fn ctrl(c: char) -> KeyEvent { + KeyEvent { + code: KeyCode::Char(c), + modifiers: KeyModifiers::CONTROL, + kind: KeyEventKind::Press, + state: KeyEventState::NONE, + } + } + + #[test] + fn typing_appends_to_buffer() { + let mut input = TextInput::new(""); + input.handle_key(key(KeyCode::Char('h'))); + input.handle_key(key(KeyCode::Char('i'))); + assert_eq!(input.buffer(), "hi"); + } + + #[test] + fn backspace_deletes_prev_char() { + let mut input = TextInput::new("hello"); + assert_eq!(input.handle_key(key(KeyCode::Backspace)), TextInputResult::Continue); + assert_eq!(input.buffer(), "hell"); + } + + #[test] + fn left_then_insert_inserts_mid_string() { + let mut input = TextInput::new("ac"); + input.handle_key(key(KeyCode::Left)); + input.handle_key(key(KeyCode::Char('b'))); + assert_eq!(input.buffer(), "abc"); + } + + #[test] + fn enter_signals_commit() { + let mut input = TextInput::new("done"); + assert_eq!(input.handle_key(key(KeyCode::Enter)), TextInputResult::Commit); + } + + #[test] + fn esc_signals_cancel() { + let mut input = TextInput::new("x"); + assert_eq!(input.handle_key(key(KeyCode::Esc)), TextInputResult::Cancel); + } + + #[test] + fn ctrl_u_clears() { + let mut input = TextInput::new("hello"); + input.handle_key(ctrl('u')); + assert_eq!(input.buffer(), ""); + } + + #[test] + fn ctrl_w_deletes_prev_word() { + let mut input = TextInput::new("hello world"); + input.handle_key(ctrl('w')); + assert_eq!(input.buffer(), "hello "); + } + + #[test] + fn changed_tracks_buffer_vs_original() { + let mut input = TextInput::new("abc"); + assert!(!input.changed()); + input.handle_key(key(KeyCode::Char('d'))); + assert!(input.changed()); + } +} + +fn split_at_cursor(s: &str, idx: usize) -> (String, String, String) { + if idx >= s.len() { + return (s.to_string(), String::new(), String::new()); + } + let mut next = idx; + while next < s.len() && !s.is_char_boundary(next + 1) { + next += 1; + } + next = (next + 1).min(s.len()); + while next < s.len() && !s.is_char_boundary(next) { + next += 1; + } + ( + s[..idx].to_string(), + s[idx..next].to_string(), + s[next..].to_string(), + ) +} + +#[derive(Debug, Clone, Copy)] +pub enum FeedbackLevel { + Ok, + Err, +} + +pub fn render_feedback(f: &mut Frame, area: Rect, level: FeedbackLevel, message: &str) { + let style = match level { + FeedbackLevel::Ok => Style::default().fg(Color::Green), + FeedbackLevel::Err => Style::default().fg(Color::Red), + }; + let prefix = match level { + FeedbackLevel::Ok => "✓ ", + FeedbackLevel::Err => "✗ ", + }; + f.render_widget( + Paragraph::new(Line::from(Span::styled( + format!("{}{}", prefix, message), + style, + ))), + area, + ); +} + +pub fn render_section_header(f: &mut Frame, area: Rect, title: &str, dirty: bool) { + let dirty_span = if dirty { + Span::styled(" • unsaved", Style::default().fg(Color::Yellow)) + } else { + Span::raw("") + }; + let line = Line::from(vec![ + Span::styled( + title.to_string(), + Style::default().add_modifier(Modifier::BOLD), + ), + dirty_span, + ]); + f.render_widget(Paragraph::new(vec![line, Line::from("")]), area); +} + +pub fn render_bottom_hint(f: &mut Frame, area: Rect, dirty: bool) { + let dirty_marker = if dirty { + Span::styled(" ●", Style::default().fg(Color::Yellow)) + } else { + Span::raw("") + }; + let line = Line::from(vec![ + Span::styled( + " ↑↓ field ←→ change s save r revert ", + Style::default().fg(Color::Gray), + ), + dirty_marker, + ]); + f.render_widget(Paragraph::new(line), area); +} + +/// Single form row: focused or unfocused, with a label-and-value layout that +/// matches the rest of the form sections. +pub fn form_row<'a>(focused: bool, label: &str, value: &str) -> Line<'a> { + form_row_dimmed(focused, false, label, value) +} + +/// Form row that supports a `dimmed` variant for fields disabled by another +/// toggle (e.g. the rest of the Hotkey form when the evdev listener is off). +pub fn form_row_dimmed<'a>( + focused: bool, + dimmed: bool, + label: &str, + value: &str, +) -> Line<'a> { + let dim_color = Color::DarkGray; + let label_style = if dimmed { + Style::default().fg(dim_color) + } else if focused { + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD) + } else { + Style::default() + }; + let value_style = if dimmed { + Style::default().fg(dim_color) + } else if focused { + Style::default().bg(Color::DarkGray).fg(Color::White) + } else { + Style::default().fg(Color::White) + }; + let prefix = if focused { "▸ " } else { " " }; + Line::from(vec![ + Span::styled(format!("{}{:<32}", prefix, label), label_style), + Span::styled(format!(" ◂ {} ▸", value), value_style), + ]) +} + +/// Specification for a row in a two-pane form. +pub struct FormRowSpec { + pub focused: bool, + pub dimmed: bool, + pub label: String, + pub value: String, +} + +impl FormRowSpec { + pub fn new(focused: bool, label: impl Into, value: impl Into) -> Self { + Self { + focused, + dimmed: false, + label: label.into(), + value: value.into(), + } + } + + pub fn dimmed(mut self, dimmed: bool) -> Self { + self.dimmed = dimmed; + self + } +} + +/// Render a section using the General-style two-panel layout: a form panel on +/// the left (rows, save/revert hints) and a guidance panel on the right that +/// shows context-sensitive help for the focused row. +/// +/// Layout (vertical): +/// 1 row feedback (only present if `feedback` is Some) +/// 2 rows section title + dirty marker +/// N rows two columns: form (Settings) on left, guidance (About) on right +/// 1 row bottom hint +pub fn render_form_with_guidance( + f: &mut Frame, + area: Rect, + title: &str, + dirty: bool, + feedback: Option<(FeedbackLevel, &str)>, + rows: &[FormRowSpec], + guidance: Vec>, +) { + let chunks = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Length(if feedback.is_some() { 2 } else { 0 }), + Constraint::Length(2), + Constraint::Min(8), + Constraint::Length(1), + ]) + .split(area); + + if let Some((lvl, msg)) = feedback { + render_feedback(f, chunks[0], lvl, msg); + } + render_section_header(f, chunks[1], title, dirty); + + // Two columns: Settings on the left, About on the right. + let body = Layout::default() + .direction(Direction::Horizontal) + .constraints([Constraint::Percentage(55), Constraint::Percentage(45)]) + .split(chunks[2]); + + render_settings_panel(f, body[0], rows); + render_guidance_panel(f, body[1], guidance); + + render_bottom_hint(f, chunks[3], dirty); +} + +fn render_settings_panel(f: &mut Frame, area: Rect, rows: &[FormRowSpec]) { + let block = Block::default().borders(Borders::ALL).title("Settings"); + let inner = block.inner(area); + f.render_widget(block, area); + + let lines: Vec = rows + .iter() + .map(|r| form_row_dimmed(r.focused, r.dimmed, &r.label, &r.value)) + .collect(); + f.render_widget(Paragraph::new(lines).wrap(Wrap { trim: false }), inner); +} + +fn render_guidance_panel(f: &mut Frame, area: Rect, lines: Vec>) { + let block = Block::default().borders(Borders::ALL).title("About"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget(Paragraph::new(lines).wrap(Wrap { trim: false }), inner); +} diff --git a/src/tui/compositor_bindings.rs b/src/tui/compositor_bindings.rs new file mode 100644 index 00000000..b2786608 --- /dev/null +++ b/src/tui/compositor_bindings.rs @@ -0,0 +1,1197 @@ +//! Detect `voxtype record` bindings declared in compositor configs. +//! +//! Useful when the user has the evdev listener disabled and is relying on +//! compositor-level keybindings to call voxtype. The Hotkey section's About +//! pane shows what bindings are wired up so users can verify their config +//! without leaving the TUI. +//! +//! Supports Hyprland, Sway, and Niri. Their config formats are parsed with +//! plain regex — we don't pull in a real KDL/Hyprland parser for what is +//! ultimately advisory output. +//! +//! # Compositors not yet covered +//! +//! - River: shell-script-based init; any function could call voxtype, so a +//! simple grep would mostly produce false positives. +//! - GNOME / KDE: bindings live in dconf / kglobalshortcuts databases. Worth +//! a follow-up but a different shape of detection. + +use std::fs; +use std::path::{Path, PathBuf}; + +#[derive(Debug, Clone)] +pub struct Binding { + pub compositor: &'static str, + /// Human-readable key combo as written in the config (e.g. "SUPER+HOME"). + pub keys: String, + /// Voxtype subcommand being bound (`record start`, `record cancel`, + /// `meeting start`, `meeting stop`, …). + pub action: String, + /// Path to the file the binding came from, for reporting. + pub source: PathBuf, +} + +/// Format hint for a [`Suggestion`] — picked from the compositor that owns +/// the most existing bindings, falling back to Hyprland. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Compositor { + Hyprland, + Sway, + Niri, +} + +impl Compositor { + pub fn name(self) -> &'static str { + match self { + Compositor::Hyprland => "Hyprland", + Compositor::Sway => "Sway", + Compositor::Niri => "Niri", + } + } +} + +/// One missing binding the user might want to add. +#[derive(Debug, Clone)] +pub struct Suggestion { + pub label: String, + pub purpose: &'static str, + pub config_lines: Vec, +} + +/// Pick the most likely compositor based on the bindings already detected, +/// or default to Hyprland. +pub fn dominant_compositor(detected: &[Binding]) -> Compositor { + let mut hypr = 0; + let mut sway = 0; + let mut niri = 0; + for b in detected { + match b.compositor { + "Hyprland" => hypr += 1, + "Sway" => sway += 1, + "Niri" => niri += 1, + _ => {} + } + } + if niri > hypr && niri > sway { + Compositor::Niri + } else if sway > hypr { + Compositor::Sway + } else { + Compositor::Hyprland + } +} + +/// Look at the actions the user has already bound and suggest config snippets +/// for likely-missing ones (cancel, toggle, meeting start/stop). Suggested +/// keys come from a small candidate list, skipping any combo already bound to +/// another action in the user's compositor configs. +pub fn suggest_missing(detected: &[Binding]) -> Vec { + let comp = dominant_compositor(detected); + let occupied = enumerate_occupied_keys(comp); + let actions: std::collections::HashSet<&str> = + detected.iter().map(|b| b.action.as_str()).collect(); + + let has_start = actions.contains("record start"); + let has_stop = actions.contains("record stop"); + let has_toggle = actions.contains("record toggle"); + let has_cancel = actions.contains("record cancel"); + let has_meeting_start = actions.contains("meeting start"); + let has_meeting_stop = actions.contains("meeting stop"); + + // Track keys we've already proposed in this batch so two suggestions don't + // collide with each other. + let mut taken: std::collections::HashSet = occupied.clone(); + + let mut suggestions = Vec::new(); + + if has_start && !has_stop { + suggestions.push(make_suggestion( + comp, + &mut taken, + "Stop (release of your PTT key)", + "Without a stop binding, hold-to-record never finishes — voxtype \ + will run until max_duration_secs hits.", + Role::Stop, + )); + } + if has_stop && !has_start { + suggestions.push(make_suggestion( + comp, + &mut taken, + "Start (press of your PTT key)", + "You have a stop binding but no start — recording can't begin from \ + your compositor.", + Role::Start, + )); + } + + if !has_start && !has_stop && !has_toggle { + suggestions.push(make_suggestion( + comp, + &mut taken, + "Push-to-talk (start + stop pair)", + "Hold the key while you speak; release to transcribe.", + Role::PttPair, + )); + suggestions.push(make_suggestion( + comp, + &mut taken, + "Toggle (single-key alternative)", + "Press once to start, again to stop. Better for long dictations.", + Role::Toggle, + )); + } else if !has_toggle && (has_start || has_stop) { + suggestions.push(make_suggestion( + comp, + &mut taken, + "Toggle (alternative to PTT)", + "A single-key toggle bound to a different key gives you a \ + long-dictation flow without competing with the PTT key.", + Role::Toggle, + )); + } + + if !has_cancel { + suggestions.push(make_suggestion( + comp, + &mut taken, + "Cancel (abort in-progress recording)", + "Discards audio without transcribing — useful when you trip the \ + PTT key by accident or the wrong window has focus.", + Role::Cancel, + )); + } + + if !has_meeting_start && !has_meeting_stop { + suggestions.push(make_suggestion( + comp, + &mut taken, + "Meeting mode (start + stop)", + "Long-form recording with chunked transcription. Bind separate \ + keys so meeting capture doesn't collide with regular dictation.", + Role::MeetingPair, + )); + } else if has_meeting_start && !has_meeting_stop { + suggestions.push(make_suggestion( + comp, + &mut taken, + "Meeting stop", + "You have a meeting-start binding but no stop. Without it the \ + meeting only ends when you run `voxtype meeting stop` from the CLI.", + Role::MeetingStop, + )); + } else if has_meeting_stop && !has_meeting_start { + suggestions.push(make_suggestion( + comp, + &mut taken, + "Meeting start", + "You bound meeting stop but not start.", + Role::MeetingStart, + )); + } + + suggestions +} + +#[derive(Debug, Clone, Copy)] +enum Role { + Start, + Stop, + Toggle, + Cancel, + PttPair, + MeetingStart, + MeetingStop, + MeetingPair, +} + +/// Candidate key combos in canonical form (uppercase, alphabetically sorted +/// modifiers, joined by '+'). Picked from in order; first non-occupied wins. +fn candidates_for(role: Role) -> &'static [&'static str] { + match role { + // PTT keys: typically modifier-free function/utility keys. + Role::Start | Role::Stop | Role::PttPair => &[ + "F13", "F14", "F15", "F16", "HOME", "PAUSE", "SCROLLLOCK", "INSERT", "MENU", + ], + Role::Toggle => &[ + "SUPER+SPACE", + "SUPER+SLASH", + "SUPER+SEMICOLON", + "SUPER+APOSTROPHE", + "SUPER+BACKSLASH", + "SUPER+COMMA", + "SUPER+PERIOD", + ], + Role::Cancel => &[ + "SUPER+ESCAPE", + "SUPER+BACKSPACE", + "SUPER+DELETE", + "CTRL+SUPER+ESCAPE", + ], + Role::MeetingStart => &[ + "SUPER+M", + "CTRL+SUPER+M", + "ALT+SUPER+M", + ], + Role::MeetingStop => &[ + "SHIFT+SUPER+M", + "ALT+SUPER+M", + "CTRL+SUPER+M", + ], + Role::MeetingPair => &[ + "SUPER+M", + "CTRL+SUPER+M", + "ALT+SUPER+M", + ], + } +} + +fn make_suggestion( + comp: Compositor, + taken: &mut std::collections::HashSet, + label: &str, + purpose: &'static str, + role: Role, +) -> Suggestion { + let candidates = candidates_for(role); + let mut chosen: Option<&'static str> = None; + for cand in candidates { + if !taken.contains(*cand) { + chosen = Some(cand); + break; + } + } + let key = chosen.unwrap_or(candidates[0]); + let collision = chosen.is_none(); + if !collision { + taken.insert(key.to_string()); + } + let stop_key = if matches!(role, Role::MeetingPair) { + // Pick a second key that doesn't collide with the start key just chosen. + let stop_candidates = candidates_for(Role::MeetingStop); + let mut second = None; + for cand in stop_candidates { + if !taken.contains(*cand) { + second = Some(*cand); + break; + } + } + let chosen_stop = second.unwrap_or(stop_candidates[0]); + if second.is_some() { + taken.insert(chosen_stop.to_string()); + } + Some(chosen_stop) + } else { + None + }; + + let mut config_lines = render_role(comp, role, key, stop_key.as_deref()); + if collision { + config_lines.insert( + 0, + "// All preferred candidates are already bound; pick a key that's free." + .to_string(), + ); + } + + Suggestion { + label: label.to_string(), + purpose, + config_lines, + } +} + +/// Render one role into compositor-formatted binding lines, parameterized by +/// the chosen canonical key (and optional second key for paired roles). +fn render_role( + comp: Compositor, + role: Role, + key: &str, + second_key: Option<&str>, +) -> Vec { + match (comp, role) { + (Compositor::Hyprland, Role::Start) => { + vec![hyprland_bind("bindd", key, "Voxtype PTT (start)", "voxtype record start")] + } + (Compositor::Hyprland, Role::Stop) => { + vec![hyprland_bind("bindrd", key, "Voxtype PTT (stop)", "voxtype record stop")] + } + (Compositor::Hyprland, Role::PttPair) => vec![ + hyprland_bind("bindd", key, "Voxtype PTT (start)", "voxtype record start"), + hyprland_bind("bindrd", key, "Voxtype PTT (stop)", "voxtype record stop"), + ], + (Compositor::Hyprland, Role::Toggle) => { + vec![hyprland_bind("bind", key, "Voxtype toggle", "voxtype record toggle")] + } + (Compositor::Hyprland, Role::Cancel) => { + vec![hyprland_bind("bind", key, "Voxtype cancel", "voxtype record cancel")] + } + (Compositor::Hyprland, Role::MeetingStart) => { + vec![hyprland_bind("bind", key, "Voxtype meeting start", "voxtype meeting start")] + } + (Compositor::Hyprland, Role::MeetingStop) => { + vec![hyprland_bind("bind", key, "Voxtype meeting stop", "voxtype meeting stop")] + } + (Compositor::Hyprland, Role::MeetingPair) => vec![ + hyprland_bind("bind", key, "Voxtype meeting start", "voxtype meeting start"), + hyprland_bind( + "bind", + second_key.unwrap_or("SHIFT+SUPER+M"), + "Voxtype meeting stop", + "voxtype meeting stop", + ), + ], + + (Compositor::Sway, Role::Start) => { + vec![format!("bindsym {} exec voxtype record start", canonical_to_sway(key))] + } + (Compositor::Sway, Role::Stop) => vec![format!( + "bindsym --release {} exec voxtype record stop", + canonical_to_sway(key) + )], + (Compositor::Sway, Role::PttPair) => vec![ + format!("bindsym {} exec voxtype record start", canonical_to_sway(key)), + format!( + "bindsym --release {} exec voxtype record stop", + canonical_to_sway(key) + ), + ], + (Compositor::Sway, Role::Toggle) => vec![format!( + "bindsym {} exec voxtype record toggle", + canonical_to_sway(key) + )], + (Compositor::Sway, Role::Cancel) => vec![format!( + "bindsym {} exec voxtype record cancel", + canonical_to_sway(key) + )], + (Compositor::Sway, Role::MeetingStart) => vec![format!( + "bindsym {} exec voxtype meeting start", + canonical_to_sway(key) + )], + (Compositor::Sway, Role::MeetingStop) => vec![format!( + "bindsym {} exec voxtype meeting stop", + canonical_to_sway(key) + )], + (Compositor::Sway, Role::MeetingPair) => vec![ + format!( + "bindsym {} exec voxtype meeting start", + canonical_to_sway(key) + ), + format!( + "bindsym {} exec voxtype meeting stop", + canonical_to_sway(second_key.unwrap_or("SHIFT+SUPER+M")) + ), + ], + + (Compositor::Niri, Role::Start) => vec![format!( + "{} {{ spawn \"voxtype\" \"record\" \"start\"; }}", + canonical_to_niri(key) + )], + (Compositor::Niri, Role::Stop) => vec![format!( + "// Niri does not bind on key release; consider Role::Toggle instead." + )], + (Compositor::Niri, Role::PttPair) => vec![ + format!( + "{} {{ spawn \"voxtype\" \"record\" \"toggle\"; }}", + canonical_to_niri(key) + ), + "// (Niri lacks key-release binds; use toggle in place of PTT.)" + .to_string(), + ], + (Compositor::Niri, Role::Toggle) => vec![format!( + "{} {{ spawn \"voxtype\" \"record\" \"toggle\"; }}", + canonical_to_niri(key) + )], + (Compositor::Niri, Role::Cancel) => vec![format!( + "{} {{ spawn \"voxtype\" \"record\" \"cancel\"; }}", + canonical_to_niri(key) + )], + (Compositor::Niri, Role::MeetingStart) => vec![format!( + "{} {{ spawn \"voxtype\" \"meeting\" \"start\"; }}", + canonical_to_niri(key) + )], + (Compositor::Niri, Role::MeetingStop) => vec![format!( + "{} {{ spawn \"voxtype\" \"meeting\" \"stop\"; }}", + canonical_to_niri(key) + )], + (Compositor::Niri, Role::MeetingPair) => vec![ + format!( + "{} {{ spawn \"voxtype\" \"meeting\" \"start\"; }}", + canonical_to_niri(key) + ), + format!( + "{} {{ spawn \"voxtype\" \"meeting\" \"stop\"; }}", + canonical_to_niri(second_key.unwrap_or("SHIFT+SUPER+M")) + ), + ], + } +} + +fn hyprland_bind(directive: &str, canonical_key: &str, label: &str, cmd: &str) -> String { + let (mods, key) = canonical_split(canonical_key); + let mods_hypr = mods.replace('+', " "); + if mods_hypr.is_empty() { + format!("{} = , {}, {}, exec, {}", directive, key, label, cmd) + } else { + format!("{} = {}, {}, {}, exec, {}", directive, mods_hypr, key, label, cmd) + } +} + +/// Split "MODS+KEY" into ("MODS+SORTED", "KEY"). Modifiers come back '+'-joined. +fn canonical_split(canonical: &str) -> (String, String) { + let parts: Vec<&str> = canonical.split('+').collect(); + if parts.len() == 1 { + return (String::new(), parts[0].to_string()); + } + let key = parts.last().copied().unwrap_or("").to_string(); + let mut mods: Vec<&str> = parts[..parts.len() - 1].to_vec(); + mods.sort(); + (mods.join("+"), key) +} + +fn canonical_to_sway(canonical: &str) -> String { + // Sway uses Mod4 for SUPER. Lowercase the key and capitalize modifiers. + let (mods, key) = canonical_split(canonical); + let mods = mods + .split('+') + .filter(|s| !s.is_empty()) + .map(|m| match m { + "SUPER" => "Mod4", + "ALT" => "Mod1", + "CTRL" => "Ctrl", + "SHIFT" => "Shift", + other => other, + }) + .collect::>() + .join("+"); + let sway_key = sway_key_name(&key); + if mods.is_empty() { + sway_key + } else { + format!("{}+{}", mods, sway_key) + } +} + +fn sway_key_name(canonical_key: &str) -> String { + // Sway keysym names are lowercase and use specific words for some keys. + match canonical_key { + "SPACE" => "space".into(), + "ESCAPE" => "Escape".into(), + "BACKSPACE" => "BackSpace".into(), + "DELETE" => "Delete".into(), + "RETURN" | "ENTER" => "Return".into(), + "PRINT" => "Print".into(), + "PAUSE" => "Pause".into(), + "INSERT" => "Insert".into(), + "HOME" => "Home".into(), + "END" => "End".into(), + "MENU" => "Menu".into(), + "SCROLLLOCK" => "Scroll_Lock".into(), + "APOSTROPHE" => "apostrophe".into(), + "SEMICOLON" => "semicolon".into(), + "SLASH" => "slash".into(), + "BACKSLASH" => "backslash".into(), + "COMMA" => "comma".into(), + "PERIOD" => "period".into(), + // F1-F24 keep their casing. + s if s.starts_with('F') && s[1..].chars().all(|c| c.is_ascii_digit()) => s.into(), + // Single letters: lowercase. + s if s.len() == 1 => s.to_lowercase(), + s => s.into(), + } +} + +fn canonical_to_niri(canonical: &str) -> String { + // Niri uses "Mod" for SUPER, capitalized modifiers separated by '+', + // and human-cased key names. + let (mods, key) = canonical_split(canonical); + let mods = mods + .split('+') + .filter(|s| !s.is_empty()) + .map(|m| match m { + "SUPER" => "Mod", + "CTRL" => "Ctrl", + "ALT" => "Alt", + "SHIFT" => "Shift", + other => other, + }) + .collect::>() + .join("+"); + let niri_key = niri_key_name(&key); + if mods.is_empty() { + niri_key + } else { + format!("{}+{}", mods, niri_key) + } +} + +fn niri_key_name(canonical_key: &str) -> String { + match canonical_key { + "SPACE" => "Space".into(), + "ESCAPE" => "Escape".into(), + "BACKSPACE" => "BackSpace".into(), + "DELETE" => "Delete".into(), + "RETURN" | "ENTER" => "Return".into(), + "INSERT" => "Insert".into(), + "HOME" => "Home".into(), + "END" => "End".into(), + "PAUSE" => "Pause".into(), + "MENU" => "Menu".into(), + "SCROLLLOCK" => "Scroll_Lock".into(), + "APOSTROPHE" => "Apostrophe".into(), + "SEMICOLON" => "Semicolon".into(), + "SLASH" => "Slash".into(), + "BACKSLASH" => "Backslash".into(), + "COMMA" => "Comma".into(), + "PERIOD" => "Period".into(), + s if s.starts_with('F') && s[1..].chars().all(|c| c.is_ascii_digit()) => s.into(), + s => s.into(), + } +} + +/// Walk all compositor configs and return the canonical-form set of every +/// key combo bound to anything, regardless of action. Used to make +/// suggestions skip combos already in use. +fn enumerate_occupied_keys(_comp: Compositor) -> std::collections::HashSet { + // We collect keys from every compositor we can find — better to over-skip + // candidates than to clash with a user's existing binding because we + // assumed the wrong compositor. + let mut out = std::collections::HashSet::new(); + let home = match std::env::var("HOME") { + Ok(h) => PathBuf::from(h), + Err(_) => return out, + }; + enumerate_hyprland_keys(&home, &mut out); + enumerate_sway_keys(&home, &mut out); + enumerate_niri_keys(&home, &mut out); + out +} + +fn enumerate_hyprland_keys(home: &Path, out: &mut std::collections::HashSet) { + let dir = home.join(".config/hypr"); + let Ok(entries) = fs::read_dir(&dir) else { return }; + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) != Some("conf") { + continue; + } + let Ok(text) = fs::read_to_string(&path) else { continue }; + for line in text.lines() { + let trimmed = line.trim(); + if trimmed.starts_with('#') { + continue; + } + let Some((lhs, rhs)) = trimmed.split_once('=') else { continue }; + if !lhs.trim().starts_with("bind") { + continue; + } + let parts: Vec<&str> = rhs.split(',').map(str::trim).collect(); + if parts.len() < 2 { + continue; + } + let mods = parts[0]; + let key = parts[1]; + if key.is_empty() { + continue; + } + out.insert(canonicalize_hyprland(mods, key)); + } + } +} + +fn enumerate_sway_keys(home: &Path, out: &mut std::collections::HashSet) { + let mut paths: Vec = Vec::new(); + let main = home.join(".config/sway/config"); + if main.exists() { + paths.push(main); + } + if let Ok(entries) = fs::read_dir(home.join(".config/sway/config.d")) { + for entry in entries.flatten() { + paths.push(entry.path()); + } + } + for path in paths { + let Ok(text) = fs::read_to_string(&path) else { continue }; + for line in text.lines() { + let trimmed = line.trim(); + if trimmed.starts_with('#') { + continue; + } + let mut parts = trimmed.split_whitespace(); + let Some(head) = parts.next() else { continue }; + if head != "bindsym" && head != "bindcode" { + continue; + } + let mut rest: Vec<&str> = parts.collect(); + while let Some(first) = rest.first() { + if first.starts_with("--") { + rest.remove(0); + } else { + break; + } + } + let Some(combo) = rest.first() else { continue }; + out.insert(canonicalize_sway(combo)); + } + } +} + +fn enumerate_niri_keys(home: &Path, out: &mut std::collections::HashSet) { + let path = home.join(".config/niri/config.kdl"); + let Ok(text) = fs::read_to_string(&path) else { return }; + for line in text.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("//") { + continue; + } + let Some((keys, _)) = trimmed.split_once('{') else { continue }; + let keys = keys.trim(); + if keys.is_empty() { + continue; + } + if !keys + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '_') + { + continue; + } + out.insert(canonicalize_niri(keys)); + } +} + +fn canonicalize_hyprland(mods: &str, key: &str) -> String { + let mut parts: Vec = mods + .split_whitespace() + .map(|m| m.to_uppercase()) + .filter(|s| !s.is_empty()) + .collect(); + parts.sort(); + parts.push(key.to_uppercase()); + parts.join("+") +} + +fn canonicalize_sway(combo: &str) -> String { + let mut parts: Vec = combo + .split('+') + .map(|m| match m.to_lowercase().as_str() { + "mod4" => "SUPER".to_string(), + "mod1" => "ALT".to_string(), + "ctrl" | "control" => "CTRL".to_string(), + "shift" => "SHIFT".to_string(), + other => other.to_uppercase(), + }) + .collect(); + if parts.is_empty() { + return String::new(); + } + let key = parts.pop().unwrap_or_default(); + let key = sway_key_canon(&key); + parts.sort(); + parts.push(key); + parts.join("+") +} + +fn sway_key_canon(s: &str) -> String { + // Map sway key names back to canonical (uppercase) form. + match s.to_lowercase().as_str() { + "space" => "SPACE".into(), + "escape" => "ESCAPE".into(), + "backspace" => "BACKSPACE".into(), + "return" | "enter" => "RETURN".into(), + "delete" => "DELETE".into(), + "scroll_lock" => "SCROLLLOCK".into(), + "apostrophe" => "APOSTROPHE".into(), + "semicolon" => "SEMICOLON".into(), + "slash" => "SLASH".into(), + "backslash" => "BACKSLASH".into(), + "comma" => "COMMA".into(), + "period" => "PERIOD".into(), + other => other.to_uppercase(), + } +} + +fn canonicalize_niri(combo: &str) -> String { + let mut parts: Vec = combo + .split('+') + .map(|m| match m.to_lowercase().as_str() { + "mod" => "SUPER".to_string(), + "ctrl" => "CTRL".to_string(), + "alt" => "ALT".to_string(), + "shift" => "SHIFT".to_string(), + other => other.to_uppercase(), + }) + .collect(); + if parts.is_empty() { + return String::new(); + } + let key = parts.pop().unwrap_or_default(); + parts.sort(); + parts.push(key); + parts.join("+") +} + +pub fn detect() -> Vec { + let mut out = Vec::new(); + let home = match std::env::var("HOME") { + Ok(h) => PathBuf::from(h), + Err(_) => return out, + }; + + detect_hyprland(&home, &mut out); + detect_sway(&home, &mut out); + detect_niri(&home, &mut out); + out +} + +fn detect_hyprland(home: &Path, out: &mut Vec) { + let dir = home.join(".config/hypr"); + let entries = match fs::read_dir(&dir) { + Ok(e) => e, + Err(_) => return, + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) != Some("conf") { + continue; + } + let Ok(text) = fs::read_to_string(&path) else { + continue; + }; + for line in text.lines() { + if let Some(b) = parse_hyprland_line(line, &path) { + out.push(b); + } + } + } +} + +/// Hyprland `bindd? = MODS, KEY, NAME, exec, voxtype SUBCMD ACTION` lines +/// (and `bindrd?`, `bindl`, `bindel`, `binde`, `bindle`, …). +fn parse_hyprland_line(line: &str, source: &Path) -> Option { + let trimmed = line.trim(); + if trimmed.starts_with('#') { + return None; + } + let (lhs, rhs) = trimmed.split_once('=')?; + let lhs = lhs.trim(); + if !lhs.starts_with("bind") { + return None; + } + if !rhs.contains("voxtype") { + return None; + } + // Split by commas; Hyprland tolerates whitespace. + let parts: Vec<&str> = rhs.split(',').map(str::trim).collect(); + if parts.len() < 4 { + return None; + } + let mods = parts[0]; + let key = parts[1]; + let cmd = parts.last().copied().unwrap_or(""); + let action = action_from_command(cmd)?; + let keys = if mods.is_empty() { + key.to_string() + } else { + format!("{}+{}", mods, key) + }; + Some(Binding { + compositor: "Hyprland", + keys, + action, + source: source.to_path_buf(), + }) +} + +fn detect_sway(home: &Path, out: &mut Vec) { + let main = home.join(".config/sway/config"); + if main.exists() { + if let Ok(text) = fs::read_to_string(&main) { + for line in text.lines() { + if let Some(b) = parse_sway_line(line, &main) { + out.push(b); + } + } + } + } + let conf_d = home.join(".config/sway/config.d"); + if let Ok(entries) = fs::read_dir(&conf_d) { + for entry in entries.flatten() { + let path = entry.path(); + let Ok(text) = fs::read_to_string(&path) else { + continue; + }; + for line in text.lines() { + if let Some(b) = parse_sway_line(line, &path) { + out.push(b); + } + } + } + } +} + +/// Sway `bindsym MOD+KEY exec voxtype SUBCMD ACTION` (or `bindcode`). +fn parse_sway_line(line: &str, source: &Path) -> Option { + let trimmed = line.trim(); + if trimmed.starts_with('#') { + return None; + } + if !trimmed.contains("voxtype") { + return None; + } + let mut parts = trimmed.split_whitespace(); + let head = parts.next()?; + if head != "bindsym" && head != "bindcode" { + return None; + } + // Skip optional `--release` and similar flags. + let mut rest: Vec<&str> = parts.collect(); + while let Some(first) = rest.first() { + if first.starts_with("--") { + rest.remove(0); + } else { + break; + } + } + let keys = rest.first()?.to_string(); + // Find `exec` and look at what comes after `voxtype record`. + let cmd_start = rest.iter().position(|w| *w == "exec")? + 1; + let cmd = rest[cmd_start..].join(" "); + let action = action_from_command(&cmd)?; + Some(Binding { + compositor: "Sway", + keys, + action, + source: source.to_path_buf(), + }) +} + +fn detect_niri(home: &Path, out: &mut Vec) { + let path = home.join(".config/niri/config.kdl"); + let Ok(text) = fs::read_to_string(&path) else { + return; + }; + for line in text.lines() { + if let Some(b) = parse_niri_line(line, &path) { + out.push(b); + } + } +} + +/// Niri's KDL `binds { Mod+Key { spawn "voxtype" "record" "ACTION"; } }`. +/// We only handle single-line bindings, which is the common case. +fn parse_niri_line(line: &str, source: &Path) -> Option { + let trimmed = line.trim(); + if trimmed.starts_with("//") { + return None; + } + if !trimmed.contains("voxtype") || !trimmed.contains("spawn") { + return None; + } + // Form: `Mod+Key { spawn "voxtype" "record" "ACTION"; }`. + let (keys, rest) = trimmed.split_once('{')?; + let keys = keys.trim(); + if keys.is_empty() { + return None; + } + // Pull the quoted args after `spawn`. + let spawn_idx = rest.find("spawn")?; + let args_part = &rest[spawn_idx + "spawn".len()..]; + let mut quoted: Vec = Vec::new(); + let mut chars = args_part.chars().peekable(); + while let Some(c) = chars.next() { + if c == '"' { + let mut buf = String::new(); + for c in chars.by_ref() { + if c == '"' { + break; + } + buf.push(c); + } + quoted.push(buf); + } + } + if quoted.first().map(|s| s.as_str()) != Some("voxtype") { + return None; + } + let subcmd = quoted.get(1)?.clone(); + let leaf = quoted.get(2)?.clone(); + let action = format!("{} {}", subcmd, leaf); + if !is_known_action(&action) { + return None; + } + Some(Binding { + compositor: "Niri", + keys: keys.to_string(), + action, + source: source.to_path_buf(), + }) +} + +fn action_from_command(cmd: &str) -> Option { + // Look for `voxtype ` in the command line. + let lc = cmd.to_lowercase(); + let idx = lc.find("voxtype")?; + let after = &cmd[idx + "voxtype".len()..]; + let mut iter = after.split_whitespace(); + let subcmd = iter + .next()? + .trim_matches(|c: char| !c.is_alphanumeric()) + .to_string(); + let leaf = iter + .next()? + .trim_matches(|c: char| !c.is_alphanumeric()) + .to_string(); + let action = format!("{} {}", subcmd, leaf); + if is_known_action(&action) { + Some(action) + } else { + None + } +} + +fn is_known_action(action: &str) -> bool { + matches!( + action, + "record start" + | "record stop" + | "record toggle" + | "record cancel" + | "meeting start" + | "meeting stop" + | "meeting pause" + | "meeting resume" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::Path; + + fn dummy_path() -> &'static Path { + Path::new("/tmp/dummy.conf") + } + + #[test] + fn parses_hyprland_bindd() { + let line = "bindd = , HOME, Voxtype PTT (start), exec, voxtype record start"; + let b = parse_hyprland_line(line, dummy_path()).unwrap(); + assert_eq!(b.compositor, "Hyprland"); + assert_eq!(b.keys, "HOME"); + assert_eq!(b.action, "record start"); + } + + #[test] + fn parses_hyprland_bindrd_with_mod() { + let line = "bindrd = SUPER, F13, Stop, exec, voxtype record stop"; + let b = parse_hyprland_line(line, dummy_path()).unwrap(); + assert_eq!(b.keys, "SUPER+F13"); + assert_eq!(b.action, "record stop"); + } + + #[test] + fn parses_hyprland_meeting_start() { + let line = "bind = SUPER, M, Meeting start, exec, voxtype meeting start"; + let b = parse_hyprland_line(line, dummy_path()).unwrap(); + assert_eq!(b.action, "meeting start"); + } + + #[test] + fn skips_hyprland_comments_and_unrelated() { + assert!(parse_hyprland_line("# bind = , HOME, ..., exec, voxtype record start", dummy_path()).is_none()); + assert!(parse_hyprland_line("bind = , HOME, ..., exec, alacritty", dummy_path()).is_none()); + } + + #[test] + fn parses_sway_bindsym() { + let line = "bindsym Mod4+Home exec voxtype record toggle"; + let b = parse_sway_line(line, dummy_path()).unwrap(); + assert_eq!(b.compositor, "Sway"); + assert_eq!(b.keys, "Mod4+Home"); + assert_eq!(b.action, "record toggle"); + } + + #[test] + fn parses_sway_with_release_flag() { + let line = "bindsym --release Mod4+Home exec voxtype record stop"; + let b = parse_sway_line(line, dummy_path()).unwrap(); + assert_eq!(b.keys, "Mod4+Home"); + assert_eq!(b.action, "record stop"); + } + + #[test] + fn parses_niri_spawn() { + let line = r#" Mod+Home { spawn "voxtype" "record" "start"; }"#; + let b = parse_niri_line(line, dummy_path()).unwrap(); + assert_eq!(b.compositor, "Niri"); + assert_eq!(b.keys, "Mod+Home"); + assert_eq!(b.action, "record start"); + } + + #[test] + fn parses_niri_meeting() { + let line = r#"Mod+M { spawn "voxtype" "meeting" "start"; }"#; + let b = parse_niri_line(line, dummy_path()).unwrap(); + assert_eq!(b.action, "meeting start"); + } + + #[test] + fn suggests_cancel_when_only_ptt_bound() { + let detected = vec![Binding { + compositor: "Hyprland", + keys: "HOME".into(), + action: "record start".into(), + source: PathBuf::from("/dev/null"), + }, Binding { + compositor: "Hyprland", + keys: "HOME".into(), + action: "record stop".into(), + source: PathBuf::from("/dev/null"), + }]; + let labels: Vec<_> = suggest_missing(&detected) + .iter() + .map(|s| s.label.clone()) + .collect(); + assert!(labels.iter().any(|l| l.contains("Cancel"))); + assert!(labels.iter().any(|l| l.contains("Toggle"))); + assert!(labels.iter().any(|l| l.contains("Meeting"))); + } + + #[test] + fn dominant_compositor_picks_majority() { + let bindings = vec![ + Binding { + compositor: "Sway", + keys: "k".into(), + action: "record start".into(), + source: PathBuf::new(), + }, + Binding { + compositor: "Sway", + keys: "k".into(), + action: "record stop".into(), + source: PathBuf::new(), + }, + Binding { + compositor: "Hyprland", + keys: "k".into(), + action: "record toggle".into(), + source: PathBuf::new(), + }, + ]; + assert_eq!(dominant_compositor(&bindings), Compositor::Sway); + } + + #[test] + fn dominant_compositor_empty_defaults_to_hyprland() { + assert_eq!(dominant_compositor(&[]), Compositor::Hyprland); + } + + #[test] + fn canonicalize_hyprland_sorts_modifiers() { + assert_eq!(canonicalize_hyprland("SUPER SHIFT", "M"), "SHIFT+SUPER+M"); + assert_eq!(canonicalize_hyprland("", "HOME"), "HOME"); + assert_eq!(canonicalize_hyprland("super", "f13"), "SUPER+F13"); + } + + #[test] + fn canonicalize_sway_normalizes_mod4_and_keys() { + assert_eq!(canonicalize_sway("Mod4+space"), "SUPER+SPACE"); + assert_eq!(canonicalize_sway("Mod4+Shift+m"), "SHIFT+SUPER+M"); + assert_eq!(canonicalize_sway("Escape"), "ESCAPE"); + } + + #[test] + fn canonicalize_niri_normalizes_mod_word() { + assert_eq!(canonicalize_niri("Mod+Shift+M"), "SHIFT+SUPER+M"); + } + + #[test] + fn make_suggestion_skips_first_candidate_if_taken() { + let mut taken: std::collections::HashSet = + ["F13", "F14"].iter().map(|s| s.to_string()).collect(); + let s = make_suggestion( + Compositor::Hyprland, + &mut taken, + "PTT", + "test", + Role::PttPair, + ); + assert!( + s.config_lines.iter().any(|l| l.contains("F15")), + "expected F15 in lines: {:?}", + s.config_lines + ); + assert!( + !s.config_lines.iter().any(|l| l.contains(" F13 ") || l.contains(" F14 ")), + "lines should not include F13/F14: {:?}", + s.config_lines + ); + } + + #[test] + fn make_suggestion_warns_when_all_candidates_taken() { + let mut taken: std::collections::HashSet = candidates_for(Role::Cancel) + .iter() + .map(|s| s.to_string()) + .collect(); + let s = make_suggestion( + Compositor::Hyprland, + &mut taken, + "Cancel", + "test", + Role::Cancel, + ); + assert!( + s.config_lines + .iter() + .any(|l| l.contains("preferred candidates are already bound")), + "expected collision warning, got: {:?}", + s.config_lines + ); + } + + #[test] + fn sway_render_uses_release_for_stop() { + let mut taken = std::collections::HashSet::new(); + let s = make_suggestion( + Compositor::Sway, + &mut taken, + "Stop", + "test", + Role::Stop, + ); + assert!(s.config_lines.iter().any(|l| l.contains("--release"))); + } + + #[test] + fn niri_pttpair_falls_back_to_toggle() { + let mut taken = std::collections::HashSet::new(); + let s = make_suggestion( + Compositor::Niri, + &mut taken, + "PTT", + "test", + Role::PttPair, + ); + assert!(s.config_lines.iter().any(|l| l.contains("\"toggle\""))); + } + + #[test] + fn niri_skips_other_spawn_lines() { + let line = r#" Mod+T { spawn "alacritty"; }"#; + assert!(parse_niri_line(line, dummy_path()).is_none()); + } + + #[test] + fn niri_skips_comments() { + let line = r#"// Mod+Home { spawn "voxtype" "record" "start"; }"#; + assert!(parse_niri_line(line, dummy_path()).is_none()); + } + + #[test] + fn rejects_unknown_action() { + let line = "bindd = , HOME, ..., exec, voxtype record dance"; + assert!(parse_hyprland_line(line, dummy_path()).is_none()); + } +} diff --git a/src/tui/config_editor.rs b/src/tui/config_editor.rs new file mode 100644 index 00000000..6a81cb8c --- /dev/null +++ b/src/tui/config_editor.rs @@ -0,0 +1,308 @@ +// Most callers land in the next commit; keep dead-code warnings quiet until +// the Hotkey section starts using it. +#![allow(dead_code)] + +//! Shared config-file editing plumbing for TUI sections. +//! +//! Wraps `toml_edit` so per-section edits preserve comments, formatting, and +//! unknown fields. Writes are atomic (temp file + rename), and every write +//! is followed by a parse-validation pass through [`crate::config::load_config`] +//! before returning success — if the new file would fail to load at startup, +//! the in-memory edit is rolled back and the on-disk file is left alone. + +use crate::config; +use std::fs; +use std::io::Write; +use std::path::{Path, PathBuf}; +use toml_edit::{DocumentMut, Item, Value}; + +#[derive(Debug, thiserror::Error)] +pub enum EditorError { + #[error("could not determine config path; set $XDG_CONFIG_HOME or $HOME")] + NoConfigPath, + #[error("read {path}: {source}")] + Read { + path: PathBuf, + source: std::io::Error, + }, + #[error("parse {path}: {source}")] + Parse { + path: PathBuf, + source: toml_edit::TomlError, + }, + #[error("write {path}: {source}")] + Write { + path: PathBuf, + source: std::io::Error, + }, + #[error("validate after write: {0}")] + Validate(String), +} + +pub struct ConfigEditor { + path: PathBuf, + document: DocumentMut, + dirty: bool, +} + +impl ConfigEditor { + /// Load `~/.config/voxtype/config.toml` (creating an empty document if the + /// file is missing — `save()` will write it on first edit). + pub fn load() -> Result { + let path = config::Config::default_path().ok_or(EditorError::NoConfigPath)?; + Self::load_from(path) + } + + fn load_from(path: PathBuf) -> Result { + let text = match fs::read_to_string(&path) { + Ok(s) => s, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(), + Err(e) => { + return Err(EditorError::Read { + path: path.clone(), + source: e, + }) + } + }; + let document: DocumentMut = text.parse().map_err(|e| EditorError::Parse { + path: path.clone(), + source: e, + })?; + Ok(Self { + path, + document, + dirty: false, + }) + } + + pub fn path(&self) -> &Path { + &self.path + } + + pub fn is_dirty(&self) -> bool { + self.dirty + } + + pub fn set_string(&mut self, table: &str, key: &str, value: &str) { + let item = self.ensure_table(table); + item[key] = toml_edit::value(value); + self.dirty = true; + } + + pub fn set_bool(&mut self, table: &str, key: &str, value: bool) { + let item = self.ensure_table(table); + item[key] = toml_edit::value(value); + self.dirty = true; + } + + pub fn set_int(&mut self, table: &str, key: &str, value: i64) { + let item = self.ensure_table(table); + item[key] = toml_edit::value(value); + self.dirty = true; + } + + /// Remove a key from a table (no-op if absent). + pub fn unset(&mut self, table: &str, key: &str) { + if let Some(t) = self.table_mut(table) { + if t.remove(key).is_some() { + self.dirty = true; + } + } + } + + fn table_mut(&mut self, dotted: &str) -> Option<&mut toml_edit::Table> { + let mut current = self.document.as_table_mut(); + if dotted.is_empty() { + return Some(current); + } + for segment in dotted.split('.') { + current = current + .get_mut(segment) + .and_then(|i| i.as_table_mut())?; + } + Some(current) + } + + fn table(&self, dotted: &str) -> Option<&toml_edit::Table> { + let mut current = self.document.as_table(); + if dotted.is_empty() { + return Some(current); + } + for segment in dotted.split('.') { + current = current.get(segment).and_then(|i| i.as_table())?; + } + Some(current) + } + + /// Public read-only access to a table, for callers that need to iterate + /// arbitrary keys (e.g. the replacement-list editor walking + /// `[text.replacements]`). + pub fn raw_table(&self, dotted: &str) -> Option<&toml_edit::Table> { + self.table(dotted) + } + + pub fn get_string(&self, table: &str, key: &str) -> Option { + self.value(table, key)?.as_str().map(|s| s.to_string()) + } + + pub fn get_bool(&self, table: &str, key: &str) -> Option { + self.value(table, key)?.as_bool() + } + + pub fn get_int(&self, table: &str, key: &str) -> Option { + self.value(table, key)?.as_integer() + } + + fn value(&self, table: &str, key: &str) -> Option<&Value> { + self.table(table)?.get(key).and_then(|i| i.as_value()) + } + + /// Ensure a (possibly dotted) `[table]` path exists and return it as a + /// mutable Item. Creates intermediate tables as needed. + fn ensure_table(&mut self, dotted: &str) -> &mut Item { + let segments: Vec<&str> = dotted.split('.').collect(); + let (last, rest) = segments + .split_last() + .expect("ensure_table called with empty path"); + + // Walk through (or create) intermediate tables. + let mut current: &mut toml_edit::Table = self.document.as_table_mut(); + for segment in rest { + if !current + .get(segment) + .map(|i| i.is_table()) + .unwrap_or(false) + { + current.insert(segment, Item::Table(toml_edit::Table::new())); + } + current = current[segment] + .as_table_mut() + .expect("just inserted a table"); + } + + if !current + .get(last) + .map(|i| i.is_table()) + .unwrap_or(false) + { + current.insert(last, Item::Table(toml_edit::Table::new())); + } + &mut current[last] + } + + /// Atomically write the document and validate it parses through the + /// regular `load_config` path. On validation failure the file is left + /// untouched on disk (atomic rename hasn't happened yet). + pub fn save(&mut self) -> Result<(), EditorError> { + let serialized = self.document.to_string(); + + // Validate before touching the on-disk file: parse the serialized + // text via the runtime config loader. We do this by writing to a temp + // file, loading from there, and only renaming on success. + let parent = self.path.parent().ok_or_else(|| EditorError::Write { + path: self.path.clone(), + source: std::io::Error::new( + std::io::ErrorKind::Other, + "config path has no parent directory", + ), + })?; + fs::create_dir_all(parent).map_err(|e| EditorError::Write { + path: parent.to_path_buf(), + source: e, + })?; + + let mut tmp = self.path.clone(); + let mut file_name = tmp + .file_name() + .map(|n| n.to_os_string()) + .unwrap_or_default(); + file_name.push(".tmp"); + tmp.set_file_name(file_name); + + { + let mut f = fs::File::create(&tmp).map_err(|e| EditorError::Write { + path: tmp.clone(), + source: e, + })?; + f.write_all(serialized.as_bytes()) + .map_err(|e| EditorError::Write { + path: tmp.clone(), + source: e, + })?; + f.sync_all().map_err(|e| EditorError::Write { + path: tmp.clone(), + source: e, + })?; + } + + // Validate by loading via the same code path the daemon uses. + if let Err(e) = config::load_config(Some(&tmp)) { + let _ = fs::remove_file(&tmp); + return Err(EditorError::Validate(format!("{}", e))); + } + + fs::rename(&tmp, &self.path).map_err(|e| EditorError::Write { + path: self.path.clone(), + source: e, + })?; + + self.dirty = false; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write as _; + + fn temp_config(contents: &str) -> (tempfile::TempDir, PathBuf) { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("config.toml"); + let mut f = fs::File::create(&path).unwrap(); + f.write_all(contents.as_bytes()).unwrap(); + (dir, path) + } + + #[test] + fn round_trip_preserves_comments() { + let (_dir, path) = temp_config( + "# top comment\n[hotkey]\n# inline\nkey = \"HOME\"\nmode = \"toggle\"\n", + ); + let mut ed = ConfigEditor::load_from(path.clone()).unwrap(); + ed.set_string("hotkey", "key", "PAUSE"); + let serialized = ed.document.to_string(); + assert!(serialized.contains("# top comment"), "{}", serialized); + assert!(serialized.contains("# inline"), "{}", serialized); + assert!(serialized.contains("key = \"PAUSE\"")); + assert!(serialized.contains("mode = \"toggle\"")); + } + + #[test] + fn missing_file_starts_empty() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("does-not-exist.toml"); + let ed = ConfigEditor::load_from(path).unwrap(); + assert!(!ed.is_dirty()); + assert_eq!(ed.document.to_string(), ""); + } + + #[test] + fn ensure_table_creates_if_missing() { + let (_dir, path) = temp_config(""); + let mut ed = ConfigEditor::load_from(path).unwrap(); + ed.set_bool("notification", "on_start", true); + let s = ed.document.to_string(); + assert!(s.contains("[notification]")); + assert!(s.contains("on_start = true")); + } + + #[test] + fn dirty_tracks_writes() { + let (_dir, path) = temp_config("[hotkey]\nkey = \"HOME\"\n"); + let mut ed = ConfigEditor::load_from(path).unwrap(); + assert!(!ed.is_dirty()); + ed.set_string("hotkey", "key", "PAUSE"); + assert!(ed.is_dirty()); + } +} diff --git a/src/tui/engine.rs b/src/tui/engine.rs new file mode 100644 index 00000000..e3688697 --- /dev/null +++ b/src/tui/engine.rs @@ -0,0 +1,1772 @@ +//! Engine section: per-engine tunables for the active transcription engine. +//! +//! The first row picks the engine itself; subsequent rows show the fields +//! that engine actually has. Switching engines on the form does not change +//! the value of fields you've edited for other engines — they're held in +//! memory until you save. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{ + self, FeedbackLevel as CommonFeedback, FormRowSpec, TextInput, TextInputResult, +}; +use super::config_editor::{ConfigEditor, EditorError}; +use crate::setup::binary::{self, EngineFamily, InstallKind, Variant}; +use crate::setup::model; + +#[derive(Debug, Clone)] +pub struct EngineState { + pub engine: String, + pub fields: AllFields, + pub cursor: usize, + pub feedback: Option, + pub dirty_since_load: bool, + /// If the chosen engine needs a different binary family than what's + /// currently active, this holds the variant we'll switch to on save. + /// `None` means no switch needed. + pub pending_variant_switch: Option, + /// True when we wanted to switch but couldn't (source build, no + /// installed variant supports the new engine, …). Surfaced as a warning + /// on the screen. + pub binary_switch_blocked: Option<&'static str>, + /// Active inline text edit. While `Some`, all keypresses route to the + /// TextInput; navigation and cycle are suspended. + pub editing: Option, +} + +#[derive(Debug, Clone)] +pub struct TextEdit { + pub field: FieldId, + pub input: TextInput, +} + +#[derive(Debug, Clone, Default)] +pub struct AllFields { + // Whisper + pub w_model: String, + pub w_mode: String, + pub w_language: String, + pub w_translate: bool, + pub w_threads: Option, + pub w_initial_prompt: Option, + pub w_flash_attention: bool, + pub w_on_demand_loading: bool, + pub w_gpu_isolation: bool, + /// Only relevant when mode = "remote". + pub w_remote_endpoint: Option, + pub w_remote_api_key: Option, + pub w_remote_model: Option, + + // Parakeet + pub pk_model: String, + pub pk_model_type: Option, // "tdt", "ctc", or None for auto-detect + pub pk_on_demand_loading: bool, + /// True if the [parakeet] table existed in the config at load time. We + /// only write back to it on save if either this is true or parakeet is + /// the active engine — otherwise saves leak partial tables that fail to + /// deserialize because `model` is required. + pub pk_section_existed: bool, + + // Moonshine + pub mn_model: String, + pub mn_quantized: bool, + pub mn_threads: Option, + pub mn_on_demand_loading: bool, + pub mn_section_existed: bool, + + // SenseVoice + pub sv_model: String, + pub sv_language: String, + pub sv_use_itn: bool, + pub sv_threads: Option, + pub sv_on_demand_loading: bool, + pub sv_section_existed: bool, + + // Paraformer + pub pf_model: String, + pub pf_threads: Option, + pub pf_on_demand_loading: bool, + pub pf_section_existed: bool, + + // Dolphin + pub dol_model: String, + pub dol_threads: Option, + pub dol_on_demand_loading: bool, + pub dol_section_existed: bool, + + // Omnilingual + pub om_model: String, + pub om_threads: Option, + pub om_on_demand_loading: bool, + pub om_section_existed: bool, + + // Cohere Transcribe (ONNX, ~3 GB int8 model) + pub co_model: String, + pub co_language: String, + pub co_threads: Option, + pub co_on_demand_loading: bool, + pub co_section_existed: bool, +} + +/// Model catalogs per engine. Whisper/Parakeet/Moonshine/SenseVoice come from +/// the central setup::model registry; Paraformer/Dolphin/Omnilingual aren't +/// registered yet, so we hardcode their canonical defaults. +fn model_catalog(engine: &str) -> Vec<&'static str> { + match engine { + "whisper" => model::valid_model_names(), + "parakeet" => model::valid_parakeet_model_names(), + "moonshine" => model::valid_moonshine_model_names(), + "sensevoice" => model::valid_sensevoice_model_names(), + "paraformer" => vec!["paraformer-zh", "paraformer-en"], + "dolphin" => vec!["dolphin-base"], + "omnilingual" => vec!["omnilingual-300m"], + "cohere" => vec!["cohere-transcribe-int8"], + _ => Vec::new(), + } +} + +/// Default model name baked into voxtype for each ONNX engine. Used when we +/// have to materialize a fresh `[engine]` table because the user just made it +/// the active engine for the first time — those structs require `model` and +/// the validator rejects a partial table. +const fn default_model(engine: &str) -> &'static str { + match engine.as_bytes() { + b"parakeet" => "parakeet-tdt-0.6b-v3", + b"moonshine" => "base", + b"sensevoice" => "sensevoice-small", + b"paraformer" => "paraformer-zh", + b"dolphin" => "dolphin-base", + b"omnilingual" => "omnilingual-300m", + b"cohere" => "cohere-transcribe-int8", + _ => "", + } +} + +#[derive(Debug, Clone)] +pub struct Feedback { + pub level: FeedbackLevel, + pub message: String, +} +#[derive(Debug, Clone, Copy)] +pub enum FeedbackLevel { + Ok, + Err, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FieldId { + Engine, + + // Whisper + WModel, + WMode, + WLanguage, + WTranslate, + WThreads, + WPrompt, + WFlashAttention, + WOnDemandLoading, + WGpuIsolation, + WRemoteEndpoint, + WRemoteApiKey, + WRemoteModel, + + // Parakeet + PkModel, + PkModelType, + PkOnDemandLoading, + + // Moonshine + MnModel, + MnQuantized, + MnThreads, + MnOnDemandLoading, + + // SenseVoice + SvModel, + SvLanguage, + SvUseItn, + SvThreads, + SvOnDemandLoading, + + // Paraformer + PfModel, + PfThreads, + PfOnDemandLoading, + + // Dolphin + DolModel, + DolThreads, + DolOnDemandLoading, + + // Omnilingual + OmModel, + OmThreads, + OmOnDemandLoading, + + // Cohere + CoModel, + CoLanguage, + CoThreads, + CoOnDemandLoading, +} + +const ENGINE_CHOICES: &[&str] = &[ + "whisper", + "parakeet", + "moonshine", + "sensevoice", + "paraformer", + "dolphin", + "omnilingual", + "cohere", +]; + +/// Cohere Transcribe officially supports these 14 languages. Token IDs are +/// looked up by name from tokens.txt at runtime, so the TUI only needs to +/// pass the two-letter code through to [cohere] language. +const CO_LANG_CHOICES: &[&str] = &[ + "ar", "de", "en", "es", "fr", "hi", "it", "ja", "ko", "nl", "pt", "ru", "tr", "zh", +]; + +const MODE_CHOICES: &[&str] = &["local", "remote", "cli"]; +const LANG_CHOICES: &[&str] = &[ + "auto", "en", "fr", "de", "it", "es", "pt", "nl", "pl", "zh", "ja", "ko", "ru", "ar", +]; +const SV_LANG_CHOICES: &[&str] = &["auto", "zh", "en", "ja", "ko", "yue"]; +const PARAKEET_MODEL_TYPES: &[Option<&str>] = &[None, Some("tdt"), Some("ctc")]; + +fn rows_for_engine_with_mode(engine: &str, whisper_mode: &str) -> Vec { + let mut rows = vec![FieldId::Engine]; + match engine { + "whisper" => { + rows.extend_from_slice(&[ + FieldId::WModel, + FieldId::WMode, + FieldId::WLanguage, + FieldId::WTranslate, + FieldId::WThreads, + FieldId::WPrompt, + FieldId::WFlashAttention, + FieldId::WOnDemandLoading, + FieldId::WGpuIsolation, + ]); + if whisper_mode == "remote" { + rows.extend_from_slice(&[ + FieldId::WRemoteEndpoint, + FieldId::WRemoteApiKey, + FieldId::WRemoteModel, + ]); + } + } + "parakeet" => rows.extend_from_slice(&[ + FieldId::PkModel, + FieldId::PkModelType, + FieldId::PkOnDemandLoading, + ]), + "moonshine" => rows.extend_from_slice(&[ + FieldId::MnModel, + FieldId::MnQuantized, + FieldId::MnThreads, + FieldId::MnOnDemandLoading, + ]), + "sensevoice" => rows.extend_from_slice(&[ + FieldId::SvModel, + FieldId::SvLanguage, + FieldId::SvUseItn, + FieldId::SvThreads, + FieldId::SvOnDemandLoading, + ]), + "paraformer" => rows.extend_from_slice(&[ + FieldId::PfModel, + FieldId::PfThreads, + FieldId::PfOnDemandLoading, + ]), + "dolphin" => rows.extend_from_slice(&[ + FieldId::DolModel, + FieldId::DolThreads, + FieldId::DolOnDemandLoading, + ]), + "omnilingual" => rows.extend_from_slice(&[ + FieldId::OmModel, + FieldId::OmThreads, + FieldId::OmOnDemandLoading, + ]), + "cohere" => rows.extend_from_slice(&[ + FieldId::CoModel, + FieldId::CoLanguage, + FieldId::CoThreads, + FieldId::CoOnDemandLoading, + ]), + _ => {} + } + rows +} + +impl EngineState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + let engine = ed + .get_string("", "engine") + .unwrap_or_else(|| "whisper".to_string()); + let fields = AllFields { + // Whisper + w_model: ed + .get_string("whisper", "model") + .unwrap_or_else(|| default_model("whisper").to_string()), + w_mode: ed + .get_string("whisper", "mode") + .unwrap_or_else(|| "local".to_string()), + w_language: ed + .get_string("whisper", "language") + .unwrap_or_else(|| "auto".to_string()), + w_translate: ed.get_bool("whisper", "translate").unwrap_or(false), + w_threads: ed.get_int("whisper", "threads"), + w_initial_prompt: ed.get_string("whisper", "initial_prompt"), + w_flash_attention: ed.get_bool("whisper", "flash_attention").unwrap_or(false), + w_on_demand_loading: ed.get_bool("whisper", "on_demand_loading").unwrap_or(false), + w_gpu_isolation: ed.get_bool("whisper", "gpu_isolation").unwrap_or(false), + w_remote_endpoint: ed.get_string("whisper", "remote_endpoint"), + w_remote_api_key: ed.get_string("whisper", "remote_api_key"), + w_remote_model: ed.get_string("whisper", "remote_model"), + + // Parakeet + pk_model: ed + .get_string("parakeet", "model") + .unwrap_or_else(|| default_model("parakeet").to_string()), + pk_model_type: ed.get_string("parakeet", "model_type"), + pk_on_demand_loading: ed + .get_bool("parakeet", "on_demand_loading") + .unwrap_or(false), + pk_section_existed: ed.get_string("parakeet", "model").is_some(), + + // Moonshine + mn_model: ed + .get_string("moonshine", "model") + .unwrap_or_else(|| default_model("moonshine").to_string()), + mn_quantized: ed.get_bool("moonshine", "quantized").unwrap_or(true), + mn_threads: ed.get_int("moonshine", "threads"), + mn_on_demand_loading: ed + .get_bool("moonshine", "on_demand_loading") + .unwrap_or(false), + mn_section_existed: ed.get_string("moonshine", "model").is_some(), + + // SenseVoice + sv_model: ed + .get_string("sensevoice", "model") + .unwrap_or_else(|| default_model("sensevoice").to_string()), + sv_language: ed + .get_string("sensevoice", "language") + .unwrap_or_else(|| "auto".to_string()), + sv_use_itn: ed.get_bool("sensevoice", "use_itn").unwrap_or(true), + sv_threads: ed.get_int("sensevoice", "threads"), + sv_on_demand_loading: ed + .get_bool("sensevoice", "on_demand_loading") + .unwrap_or(false), + sv_section_existed: ed.get_string("sensevoice", "model").is_some(), + + // Paraformer + pf_model: ed + .get_string("paraformer", "model") + .unwrap_or_else(|| default_model("paraformer").to_string()), + pf_threads: ed.get_int("paraformer", "threads"), + pf_on_demand_loading: ed + .get_bool("paraformer", "on_demand_loading") + .unwrap_or(false), + pf_section_existed: ed.get_string("paraformer", "model").is_some(), + + // Dolphin + dol_model: ed + .get_string("dolphin", "model") + .unwrap_or_else(|| default_model("dolphin").to_string()), + dol_threads: ed.get_int("dolphin", "threads"), + dol_on_demand_loading: ed + .get_bool("dolphin", "on_demand_loading") + .unwrap_or(false), + dol_section_existed: ed.get_string("dolphin", "model").is_some(), + + // Omnilingual + om_model: ed + .get_string("omnilingual", "model") + .unwrap_or_else(|| default_model("omnilingual").to_string()), + om_threads: ed.get_int("omnilingual", "threads"), + om_on_demand_loading: ed + .get_bool("omnilingual", "on_demand_loading") + .unwrap_or(false), + om_section_existed: ed.get_string("omnilingual", "model").is_some(), + + // Cohere + co_model: ed + .get_string("cohere", "model") + .unwrap_or_else(|| default_model("cohere").to_string()), + co_language: ed + .get_string("cohere", "language") + .unwrap_or_else(|| "en".to_string()), + co_threads: ed.get_int("cohere", "threads"), + co_on_demand_loading: ed + .get_bool("cohere", "on_demand_loading") + .unwrap_or(false), + co_section_existed: ed.get_string("cohere", "model").is_some(), + }; + let mut state = Self { + engine, + fields, + cursor: 0, + feedback: None, + dirty_since_load: false, + pending_variant_switch: None, + binary_switch_blocked: None, + editing: None, + }; + state.refresh_binary_match(); + Ok(state) + } + + /// Required binary family for an engine name. Whisper needs a Whisper + /// binary; everything ONNX-based needs an ONNX binary. + fn required_family(engine: &str) -> EngineFamily { + if engine == "whisper" { + EngineFamily::Whisper + } else { + EngineFamily::Onnx + } + } + + /// Recompute pending_variant_switch / binary_switch_blocked based on the + /// current engine selection. Called whenever the engine field changes. + fn refresh_binary_match(&mut self) { + self.pending_variant_switch = None; + self.binary_switch_blocked = None; + + let inv = binary::inventory(); + if inv.install_kind == InstallKind::Source { + // Source builds can't be hot-swapped; whether the running binary + // supports the chosen engine depends on its compiled features. + // Best we can do is flag it. + let supported = match self.engine.as_str() { + "whisper" => true, + "parakeet" => inv.compiled_features.iter().any(|f| *f == "parakeet"), + _ => inv + .compiled_features + .iter() + .any(|f| *f == self.engine.as_str()), + }; + if !supported { + self.binary_switch_blocked = Some( + "Source build: rebuild voxtype with the corresponding \ + Cargo feature for this engine.", + ); + } + return; + } + + let needed = Self::required_family(&self.engine); + let current_family = inv.active_variant.map(|v| v.family()); + if current_family == Some(needed) { + return; // already matches + } + + // Pick the recommended variant for the needed family on this hardware. + let target = if needed == EngineFamily::Whisper { + inv.recommendation.whisper + } else { + inv.recommendation.onnx + }; + + // Confirm the recommended variant is actually installed and runnable. + let runnable = inv + .variants + .iter() + .find(|s| s.variant == target) + .map(|s| s.installed && s.runs_on_this_cpu && s.gpu_available) + .unwrap_or(false); + + if runnable { + self.pending_variant_switch = Some(target); + } else { + // Fall back to any installed variant of the right family that + // runs on this hardware. + let fallback = inv.variants.iter().find(|s| { + s.variant.family() == needed + && s.installed + && s.runs_on_this_cpu + && s.gpu_available + }); + match fallback { + Some(s) => self.pending_variant_switch = Some(s.variant), + None => { + self.binary_switch_blocked = Some( + "No installed binary supports this engine on this \ + hardware. Install the matching voxtype variant first.", + ); + } + } + } + } + + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("load: {}", e), + }); + return Action::None; + } + }; + + // Active engine at root. + ed.set_string("", "engine", &self.engine); + + let f = &self.fields; + + // Whisper — always written; voxtype assumes a Whisper config exists. + ed.set_string("whisper", "model", &f.w_model); + ed.set_string("whisper", "mode", &f.w_mode); + ed.set_string("whisper", "language", &f.w_language); + ed.set_bool("whisper", "translate", f.w_translate); + match f.w_threads { + Some(n) => ed.set_int("whisper", "threads", n), + None => ed.unset("whisper", "threads"), + } + match &f.w_initial_prompt { + Some(p) if !p.is_empty() => ed.set_string("whisper", "initial_prompt", p), + _ => ed.unset("whisper", "initial_prompt"), + } + ed.set_bool("whisper", "flash_attention", f.w_flash_attention); + ed.set_bool("whisper", "on_demand_loading", f.w_on_demand_loading); + ed.set_bool("whisper", "gpu_isolation", f.w_gpu_isolation); + match &f.w_remote_endpoint { + Some(v) if !v.is_empty() => ed.set_string("whisper", "remote_endpoint", v), + _ => ed.unset("whisper", "remote_endpoint"), + } + match &f.w_remote_api_key { + Some(v) if !v.is_empty() => ed.set_string("whisper", "remote_api_key", v), + _ => ed.unset("whisper", "remote_api_key"), + } + match &f.w_remote_model { + Some(v) if !v.is_empty() => ed.set_string("whisper", "remote_model", v), + _ => ed.unset("whisper", "remote_model"), + } + + // Parakeet — only touch the table if it already existed or the user + // is making it the active engine. Now that the user can edit model + // here directly, the model field is always written when we touch the + // table. + if self.engine == "parakeet" || f.pk_section_existed { + ed.set_string("parakeet", "model", &f.pk_model); + match &f.pk_model_type { + Some(m) => ed.set_string("parakeet", "model_type", m), + None => ed.unset("parakeet", "model_type"), + } + ed.set_bool("parakeet", "on_demand_loading", f.pk_on_demand_loading); + } + + // Moonshine + if self.engine == "moonshine" || f.mn_section_existed { + ed.set_string("moonshine", "model", &f.mn_model); + ed.set_bool("moonshine", "quantized", f.mn_quantized); + match f.mn_threads { + Some(n) => ed.set_int("moonshine", "threads", n), + None => ed.unset("moonshine", "threads"), + } + ed.set_bool("moonshine", "on_demand_loading", f.mn_on_demand_loading); + } + + // SenseVoice + if self.engine == "sensevoice" || f.sv_section_existed { + ed.set_string("sensevoice", "model", &f.sv_model); + ed.set_string("sensevoice", "language", &f.sv_language); + ed.set_bool("sensevoice", "use_itn", f.sv_use_itn); + match f.sv_threads { + Some(n) => ed.set_int("sensevoice", "threads", n), + None => ed.unset("sensevoice", "threads"), + } + ed.set_bool("sensevoice", "on_demand_loading", f.sv_on_demand_loading); + } + + // Paraformer + if self.engine == "paraformer" || f.pf_section_existed { + ed.set_string("paraformer", "model", &f.pf_model); + match f.pf_threads { + Some(n) => ed.set_int("paraformer", "threads", n), + None => ed.unset("paraformer", "threads"), + } + ed.set_bool("paraformer", "on_demand_loading", f.pf_on_demand_loading); + } + + // Dolphin + if self.engine == "dolphin" || f.dol_section_existed { + ed.set_string("dolphin", "model", &f.dol_model); + match f.dol_threads { + Some(n) => ed.set_int("dolphin", "threads", n), + None => ed.unset("dolphin", "threads"), + } + ed.set_bool("dolphin", "on_demand_loading", f.dol_on_demand_loading); + } + + // Omnilingual + if self.engine == "omnilingual" || f.om_section_existed { + ed.set_string("omnilingual", "model", &f.om_model); + match f.om_threads { + Some(n) => ed.set_int("omnilingual", "threads", n), + None => ed.unset("omnilingual", "threads"), + } + ed.set_bool("omnilingual", "on_demand_loading", f.om_on_demand_loading); + } + + // Cohere + if self.engine == "cohere" || f.co_section_existed { + ed.set_string("cohere", "model", &f.co_model); + ed.set_string("cohere", "language", &f.co_language); + match f.co_threads { + Some(n) => ed.set_int("cohere", "threads", n), + None => ed.unset("cohere", "threads"), + } + ed.set_bool("cohere", "on_demand_loading", f.co_on_demand_loading); + } + + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + let pending = self.pending_variant_switch.take(); + self.feedback = Some(Feedback { + level: FeedbackLevel::Ok, + message: match pending { + Some(v) => format!( + "Saved. Switching binary to {} (will prompt for sudo)…", + v.display() + ), + None => format!("Saved to {}", ed.path().display()), + }, + }); + if let Some(v) = pending { + return Action::SwitchVariant(v); + } + } + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("save: {}", e), + }); + } + } + Action::None + } + + pub fn reset(&mut self) { + match Self::load() { + Ok(fresh) => { + let cursor = self.cursor; + *self = fresh; + let max = self.rows().len().saturating_sub(1); + self.cursor = cursor.min(max); + self.refresh_binary_match(); + self.feedback = Some(Feedback { + level: FeedbackLevel::Ok, + message: "Reverted unsaved changes".to_string(), + }); + } + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("reload: {}", e), + }); + } + } + } + + fn move_cursor(&mut self, delta: i32) { + let len = self.rows().len() as i32; + if len == 0 { + return; + } + let new = (self.cursor as i32 + delta).rem_euclid(len); + self.cursor = new as usize; + } + + /// Visible rows for the current engine. Whisper has extra rows when + /// running in remote mode; everything else is constant per engine. + fn rows(&self) -> Vec { + rows_for_engine_with_mode(&self.engine, &self.fields.w_mode) + } + + fn current_field(&self) -> FieldId { + let rows = self.rows(); + rows.get(self.cursor).copied().unwrap_or(FieldId::Engine) + } + + /// True if the focused field is a free-text field that should be edited + /// with the inline TextInput rather than a cycle list. + fn is_text_field(field: FieldId) -> bool { + matches!( + field, + FieldId::WPrompt + | FieldId::WRemoteEndpoint + | FieldId::WRemoteApiKey + | FieldId::WRemoteModel + ) + } + + fn start_edit_if_text_field(&mut self) -> bool { + let field = self.current_field(); + if !Self::is_text_field(field) { + return false; + } + let initial = match field { + FieldId::WPrompt => self.fields.w_initial_prompt.clone().unwrap_or_default(), + FieldId::WRemoteEndpoint => self.fields.w_remote_endpoint.clone().unwrap_or_default(), + FieldId::WRemoteApiKey => self.fields.w_remote_api_key.clone().unwrap_or_default(), + FieldId::WRemoteModel => self.fields.w_remote_model.clone().unwrap_or_default(), + _ => String::new(), + }; + self.editing = Some(TextEdit { + field, + input: TextInput::new(initial), + }); + true + } + + fn commit_text_edit(&mut self, field: FieldId, buffer: String) { + let trimmed = buffer.trim(); + let opt = if trimmed.is_empty() { + None + } else { + Some(buffer.clone()) + }; + match field { + FieldId::WPrompt => self.fields.w_initial_prompt = opt, + FieldId::WRemoteEndpoint => self.fields.w_remote_endpoint = opt, + FieldId::WRemoteApiKey => self.fields.w_remote_api_key = opt, + FieldId::WRemoteModel => self.fields.w_remote_model = opt, + _ => {} + } + self.dirty_since_load = true; + self.feedback = None; + } + + fn cycle(&mut self, delta: i32) { + let field = self.current_field(); + let f = &mut self.fields; + match field { + FieldId::Engine => { + let idx = ENGINE_CHOICES + .iter() + .position(|c| *c == self.engine) + .map(|i| i as i32) + .unwrap_or(0); + let n = (idx + delta).rem_euclid(ENGINE_CHOICES.len() as i32); + self.engine = ENGINE_CHOICES[n as usize].to_string(); + // Clamp cursor into the new engine's row range; keep it at row 1 + // when present so the user lands on the first engine-specific + // field. + let max = self.rows().len().saturating_sub(1); + self.cursor = self.cursor.min(max); + self.refresh_binary_match(); + } + FieldId::WModel => f.w_model = cycle_model("whisper", &f.w_model, delta), + FieldId::WMode => f.w_mode = cycle_str(MODE_CHOICES, &f.w_mode, delta), + FieldId::WLanguage => f.w_language = cycle_str(LANG_CHOICES, &f.w_language, delta), + FieldId::WTranslate => f.w_translate = !f.w_translate, + FieldId::WThreads => f.w_threads = cycle_threads(f.w_threads, delta), + FieldId::WPrompt => { + // Free-text field: enter inline edit mode instead of cycling + // through hardcoded presets. + self.editing = Some(TextEdit { + field: FieldId::WPrompt, + input: TextInput::new( + f.w_initial_prompt.clone().unwrap_or_default(), + ), + }); + return; // Don't mark dirty until commit. + } + FieldId::WFlashAttention => f.w_flash_attention = !f.w_flash_attention, + FieldId::WOnDemandLoading => f.w_on_demand_loading = !f.w_on_demand_loading, + FieldId::WGpuIsolation => f.w_gpu_isolation = !f.w_gpu_isolation, + FieldId::WRemoteEndpoint + | FieldId::WRemoteApiKey + | FieldId::WRemoteModel => { + self.start_edit_if_text_field(); + return; + } + + FieldId::PkModel => f.pk_model = cycle_model("parakeet", &f.pk_model, delta), + FieldId::PkModelType => { + let idx = PARAKEET_MODEL_TYPES + .iter() + .position(|c| c.as_deref() == f.pk_model_type.as_deref()) + .map(|i| i as i32) + .unwrap_or(0); + let n = (idx + delta).rem_euclid(PARAKEET_MODEL_TYPES.len() as i32); + f.pk_model_type = PARAKEET_MODEL_TYPES[n as usize].map(|s| s.to_string()); + } + FieldId::PkOnDemandLoading => f.pk_on_demand_loading = !f.pk_on_demand_loading, + + FieldId::MnModel => f.mn_model = cycle_model("moonshine", &f.mn_model, delta), + FieldId::MnQuantized => f.mn_quantized = !f.mn_quantized, + FieldId::MnThreads => f.mn_threads = cycle_threads(f.mn_threads, delta), + FieldId::MnOnDemandLoading => f.mn_on_demand_loading = !f.mn_on_demand_loading, + + FieldId::SvModel => f.sv_model = cycle_model("sensevoice", &f.sv_model, delta), + FieldId::SvLanguage => { + f.sv_language = cycle_str(SV_LANG_CHOICES, &f.sv_language, delta) + } + FieldId::SvUseItn => f.sv_use_itn = !f.sv_use_itn, + FieldId::SvThreads => f.sv_threads = cycle_threads(f.sv_threads, delta), + FieldId::SvOnDemandLoading => f.sv_on_demand_loading = !f.sv_on_demand_loading, + + FieldId::PfModel => f.pf_model = cycle_model("paraformer", &f.pf_model, delta), + FieldId::PfThreads => f.pf_threads = cycle_threads(f.pf_threads, delta), + FieldId::PfOnDemandLoading => f.pf_on_demand_loading = !f.pf_on_demand_loading, + + FieldId::DolModel => f.dol_model = cycle_model("dolphin", &f.dol_model, delta), + FieldId::DolThreads => f.dol_threads = cycle_threads(f.dol_threads, delta), + FieldId::DolOnDemandLoading => f.dol_on_demand_loading = !f.dol_on_demand_loading, + + FieldId::OmModel => f.om_model = cycle_model("omnilingual", &f.om_model, delta), + FieldId::OmThreads => f.om_threads = cycle_threads(f.om_threads, delta), + FieldId::OmOnDemandLoading => f.om_on_demand_loading = !f.om_on_demand_loading, + + FieldId::CoModel => f.co_model = cycle_model("cohere", &f.co_model, delta), + FieldId::CoLanguage => { + f.co_language = cycle_str(CO_LANG_CHOICES, &f.co_language, delta) + } + FieldId::CoThreads => f.co_threads = cycle_threads(f.co_threads, delta), + FieldId::CoOnDemandLoading => f.co_on_demand_loading = !f.co_on_demand_loading, + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +fn cycle_str(choices: &[&'static str], current: &str, delta: i32) -> String { + if choices.is_empty() { + return current.to_string(); + } + let idx = choices + .iter() + .position(|c| *c == current) + .map(|i| i as i32) + .unwrap_or(-1); + let new = (idx + delta).rem_euclid(choices.len() as i32); + choices[new as usize].to_string() +} + +fn cycle_model(engine: &str, current: &str, delta: i32) -> String { + let names = model_catalog(engine); + if names.is_empty() { + return current.to_string(); + } + let idx = names + .iter() + .position(|c| *c == current) + .map(|i| i as i32) + .unwrap_or(-1); + let new = (idx + delta).rem_euclid(names.len() as i32); + names[new as usize].to_string() +} + +fn cycle_threads(current: Option, delta: i32) -> Option { + let cur = current.unwrap_or(0); + let next = cur + delta as i64; + if next <= 0 { + None + } else { + Some(next.min(64)) + } +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.engine { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Engine"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget( + Paragraph::new("Failed to load config; check ~/.config/voxtype/config.toml.") + .wrap(Wrap { trim: true }), + inner, + ); + return; + } + }; + + let rows: Vec = state.rows() + .iter() + .enumerate() + .map(|(i, fid)| { + let (label, value) = field_label_value(state, *fid); + FormRowSpec::new(i == state.cursor, label, value) + }) + .collect(); + + let feedback_pair = state.feedback.as_ref().map(|fb| { + ( + match fb.level { + FeedbackLevel::Ok => CommonFeedback::Ok, + FeedbackLevel::Err => CommonFeedback::Err, + }, + fb.message.as_str(), + ) + }); + + common::render_form_with_guidance( + f, + area, + "Engine", + state.dirty_since_load, + feedback_pair, + &rows, + guidance(state), + ); +} + +/// Engines whose ONNX graphs MIGraphX 7.2 can't compile cleanly. On the +/// AMD-targeted binary they fall back to CPU even though the GPU is +/// registered for Parakeet. Surfaces a small `(CPU on AMD GPU)` tag in +/// the picker. Cohere is on this list because the int8 community export +/// uses MatMulNBits(bits=8) which MIGraphX 7.2 rejects; will move off +/// once an int4 / FP16 export ships. +const AMD_CPU_ONLY_ENGINES: &[&str] = &[ + "moonshine", + "sensevoice", + "paraformer", + "dolphin", + "omnilingual", + "cohere", +]; + +fn engine_value_for_display(engine: &str) -> String { + if cfg!(feature = "onnx-migraphx-enabled") && AMD_CPU_ONLY_ENGINES.contains(&engine) { + format!("{} (CPU on AMD GPU)", engine) + } else { + engine.to_string() + } +} + +fn field_label_value(state: &EngineState, fid: FieldId) -> (&'static str, String) { + let f = &state.fields; + match fid { + FieldId::Engine => ("Engine", engine_value_for_display(&state.engine)), + + FieldId::WModel => ("Whisper · model", f.w_model.clone()), + FieldId::WMode => ("Whisper · execution mode", f.w_mode.clone()), + FieldId::WLanguage => ("Whisper · language", f.w_language.clone()), + FieldId::WTranslate => ("Whisper · translate to English", yesno(f.w_translate)), + FieldId::WThreads => ( + "Whisper · threads", + f.w_threads + .map(|n| n.to_string()) + .unwrap_or_else(|| "auto".to_string()), + ), + FieldId::WPrompt => ( + "Whisper · initial prompt", + match state.editing.as_ref() { + Some(e) if e.field == FieldId::WPrompt => e.input.caret_string(), + _ => f + .w_initial_prompt + .as_deref() + .map(|s| { + if s.len() > 30 { + format!("{}…", &s[..30]) + } else { + s.to_string() + } + }) + .unwrap_or_else(|| "(none)".to_string()), + }, + ), + FieldId::WFlashAttention => ("Whisper · flash attention", yesno(f.w_flash_attention)), + FieldId::WOnDemandLoading => ("Whisper · on-demand model load", yesno(f.w_on_demand_loading)), + FieldId::WGpuIsolation => ("Whisper · GPU isolation", yesno(f.w_gpu_isolation)), + FieldId::WRemoteEndpoint => ( + "Whisper · remote endpoint", + match state.editing.as_ref() { + Some(e) if e.field == FieldId::WRemoteEndpoint => e.input.caret_string(), + _ => f + .w_remote_endpoint + .clone() + .unwrap_or_else(|| "(unset)".to_string()), + }, + ), + FieldId::WRemoteApiKey => ( + "Whisper · remote API key", + match state.editing.as_ref() { + Some(e) if e.field == FieldId::WRemoteApiKey => mask(&e.input.caret_string()), + _ => match f.w_remote_api_key.as_deref() { + None | Some("") => "(unset)".to_string(), + Some(_) => "•••••• (set; press Enter to edit)".to_string(), + }, + }, + ), + FieldId::WRemoteModel => ( + "Whisper · remote model", + match state.editing.as_ref() { + Some(e) if e.field == FieldId::WRemoteModel => e.input.caret_string(), + _ => f + .w_remote_model + .clone() + .unwrap_or_else(|| "(unset)".to_string()), + }, + ), + + FieldId::PkModel => ("Parakeet · model", f.pk_model.clone()), + FieldId::PkModelType => ( + "Parakeet · model architecture", + f.pk_model_type + .as_deref() + .unwrap_or("auto-detect") + .to_string(), + ), + FieldId::PkOnDemandLoading => { + ("Parakeet · on-demand model load", yesno(f.pk_on_demand_loading)) + } + + FieldId::MnModel => ("Moonshine · model", f.mn_model.clone()), + FieldId::MnQuantized => ("Moonshine · use quantized model", yesno(f.mn_quantized)), + FieldId::MnThreads => ( + "Moonshine · threads", + f.mn_threads + .map(|n| n.to_string()) + .unwrap_or_else(|| "auto".to_string()), + ), + FieldId::MnOnDemandLoading => { + ("Moonshine · on-demand model load", yesno(f.mn_on_demand_loading)) + } + + FieldId::SvModel => ("SenseVoice · model", f.sv_model.clone()), + FieldId::SvLanguage => ("SenseVoice · language", f.sv_language.clone()), + FieldId::SvUseItn => ( + "SenseVoice · inverse text normalization", + yesno(f.sv_use_itn), + ), + FieldId::SvThreads => ( + "SenseVoice · threads", + f.sv_threads + .map(|n| n.to_string()) + .unwrap_or_else(|| "auto".to_string()), + ), + FieldId::SvOnDemandLoading => { + ("SenseVoice · on-demand model load", yesno(f.sv_on_demand_loading)) + } + + FieldId::PfModel => ("Paraformer · model", f.pf_model.clone()), + FieldId::PfThreads => ( + "Paraformer · threads", + f.pf_threads + .map(|n| n.to_string()) + .unwrap_or_else(|| "auto".to_string()), + ), + FieldId::PfOnDemandLoading => { + ("Paraformer · on-demand model load", yesno(f.pf_on_demand_loading)) + } + + FieldId::DolModel => ("Dolphin · model", f.dol_model.clone()), + FieldId::DolThreads => ( + "Dolphin · threads", + f.dol_threads + .map(|n| n.to_string()) + .unwrap_or_else(|| "auto".to_string()), + ), + FieldId::DolOnDemandLoading => { + ("Dolphin · on-demand model load", yesno(f.dol_on_demand_loading)) + } + + FieldId::OmModel => ("Omnilingual · model", f.om_model.clone()), + FieldId::OmThreads => ( + "Omnilingual · threads", + f.om_threads + .map(|n| n.to_string()) + .unwrap_or_else(|| "auto".to_string()), + ), + FieldId::OmOnDemandLoading => { + ("Omnilingual · on-demand model load", yesno(f.om_on_demand_loading)) + } + + FieldId::CoModel => ("Cohere · model", f.co_model.clone()), + FieldId::CoLanguage => ("Cohere · language", f.co_language.clone()), + FieldId::CoThreads => ( + "Cohere · threads", + f.co_threads + .map(|n| n.to_string()) + .unwrap_or_else(|| "auto".to_string()), + ), + FieldId::CoOnDemandLoading => ( + "Cohere · on-demand model load", + yesno(f.co_on_demand_loading), + ), + } +} + +fn yesno(b: bool) -> String { + (if b { "yes" } else { "no" }).to_string() +} + +/// Mask a secret value for on-screen display while editing — show only the +/// final character so the user can verify they're typing what they intended, +/// but don't render the full key in the form row. +fn mask(s: &str) -> String { + if s.is_empty() { + return s.to_string(); + } + let last = s.chars().last().map(|c| c.to_string()).unwrap_or_default(); + let bullets: String = std::iter::repeat('•').take(s.chars().count() - 1).collect(); + format!("{}{}", bullets, last) +} + +fn heading(text: impl Into) -> Line<'static> { + Line::from(Span::styled( + text.into(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn engine_guidance(state: &EngineState) -> Vec> { + let mut lines: Vec> = Vec::new(); + + // Banner about a pending binary switch (or a blocked one) goes first so + // the user sees it without scrolling. + if let Some(target) = state.pending_variant_switch { + lines.push(Line::from(Span::styled( + "⚠ Binary switch required", + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from(format!( + "This engine needs the {} family. Saving will also switch the binary to:", + family_name(EngineState::required_family(&state.engine)) + ))); + lines.push(Line::from(Span::styled( + format!(" {} ({})", target.display(), target.binary_name()), + Style::default().fg(Color::Cyan), + ))); + lines.push(Line::from(Span::styled( + " Press s to save; pkexec will prompt for sudo.", + Style::default().fg(Color::Gray), + ))); + lines.push(Line::from("")); + } else if let Some(reason) = state.binary_switch_blocked { + lines.push(Line::from(Span::styled( + "⚠ Cannot switch binary", + Style::default() + .fg(Color::Red) + .add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from(reason.to_string())); + lines.push(Line::from("")); + } + + lines.push(heading("Active engine")); + lines.push(Line::from("")); + for (name, desc) in [ + ( + "whisper", + "OpenAI Whisper via whisper.cpp. Default. Multilingual; best \ + general-purpose accuracy.", + ), + ( + "parakeet", + "NVIDIA Parakeet TDT/CTC via ONNX Runtime. Tops the Open ASR \ + Leaderboard for English.", + ), + ( + "moonshine", + "Useful Sensors Moonshine. Encoder-decoder, low-latency, small \ + footprint. Good for English dictation.", + ), + ( + "sensevoice", + "Alibaba SenseVoice-Small. Strong on Chinese / Japanese / Korean \ + / Cantonese / English in one model.", + ), + ( + "paraformer / dolphin / omnilingual", + "Specialized FunASR models. Paraformer focuses on Chinese, \ + Dolphin is dictation-tuned, Omnilingual covers 1600 languages.", + ), + ( + "cohere", + "Cohere Transcribe (Cohere Labs). #1 on the Open ASR Leaderboard \ + for English (5.42 WER). 14 languages. ~3 GB on disk.", + ), + ] { + lines.push(Line::from(Span::styled( + format!("{}: ", name), + Style::default().add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from(desc.to_string())); + lines.push(Line::from("")); + } + lines.push(Line::from(Span::styled( + "Engine choice and binary family are linked. The TUI will swap \ + the binary for you when needed.", + Style::default().fg(Color::Gray), + ))); + lines +} + +fn family_name(family: EngineFamily) -> &'static str { + match family { + EngineFamily::Whisper => "Whisper", + EngineFamily::Onnx => "ONNX", + } +} + +fn guidance(state: &EngineState) -> Vec> { + let f = &state.fields; + match state.current_field() { + FieldId::Engine => engine_guidance(state), + + FieldId::WModel => model_guidance("whisper", &f.w_model), + FieldId::PkModel => model_guidance("parakeet", &f.pk_model), + FieldId::MnModel => model_guidance("moonshine", &f.mn_model), + FieldId::SvModel => model_guidance("sensevoice", &f.sv_model), + FieldId::PfModel => model_guidance("paraformer", &f.pf_model), + FieldId::DolModel => model_guidance("dolphin", &f.dol_model), + FieldId::OmModel => model_guidance("omnilingual", &f.om_model), + FieldId::CoModel => model_guidance("cohere", &f.co_model), + + FieldId::WMode => vec![ + heading("Whisper · execution mode"), + Line::from(""), + Line::from(Span::styled( + "local: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Transcribe in-process via whisper-rs. Default; offline."), + Line::from(""), + Line::from(Span::styled( + "remote: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Send audio to an OpenAI-compatible Whisper API. Set \ + [whisper] remote_endpoint and remote_api_key first.", + ), + Line::from(""), + Line::from(Span::styled( + "cli: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Shell out to a `whisper` CLI binary. Useful for testing \ + custom builds without rebuilding voxtype.", + ), + ], + FieldId::WLanguage => vec![ + heading("Whisper · language"), + Line::from(""), + Line::from( + "Two-letter language code or `auto`. Auto-detect costs ~50ms \ + on the first chunk; lock to a code when you only ever \ + dictate in one language.", + ), + Line::from(""), + Line::from(Span::styled( + "Multi-language allowlists (e.g. \"en,fr,de\") can be set in \ + config.toml as an array.", + Style::default().fg(Color::Gray), + )), + ], + FieldId::WTranslate => vec![ + heading("Whisper · translate to English"), + Line::from(""), + Line::from( + "When on, Whisper translates non-English speech to English in \ + the transcript.", + ), + Line::from(""), + Line::from( + "Useful for multilingual meetings where you want a single \ + English transcript.", + ), + ], + FieldId::WThreads => vec![ + heading("Whisper · threads"), + Line::from(""), + Line::from( + "Number of CPU threads Whisper uses. `auto` lets voxtype \ + pick (typically your physical-core count).", + ), + Line::from(""), + Line::from( + "Lower this to leave headroom for other work. Bump it for max \ + throughput on a CPU-only setup.", + ), + ], + FieldId::WPrompt => { + let mut lines = vec![ + heading("Whisper · initial prompt"), + Line::from(""), + Line::from( + "Hints Whisper about terminology, capitalization, or formatting. \ + Whisper biases its output toward what the prompt establishes.", + ), + Line::from(""), + Line::from( + "Useful for proper nouns and technical terms. Examples: \ + \"Voxtype, Hyprland, Claude.\" or \"Transcribe with proper \ + capitalization and punctuation.\"", + ), + Line::from(""), + Line::from(Span::styled( + "Press Enter or i to edit. While editing: type to insert, \ + Backspace/Delete to remove, Enter commits, Esc cancels. \ + Ctrl-W deletes the previous word; Ctrl-U clears the line.", + Style::default().fg(Color::Gray), + )), + ]; + if state.editing.as_ref().is_some_and(|e| e.field == FieldId::WPrompt) { + lines.insert( + 0, + Line::from(Span::styled( + "✎ Editing — Enter to commit, Esc to cancel", + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + )), + ); + lines.insert(1, Line::from("")); + } + lines + } + FieldId::WFlashAttention => vec![ + heading("Whisper · flash attention"), + Line::from(""), + Line::from( + "GPU optimization that reduces attention-layer memory \ + bandwidth. Faster on capable cards, especially on large-v3.", + ), + Line::from(""), + Line::from( + "No effect on CPU runs. A few older driver combos crash with \ + it on; turn it off if Whisper hangs.", + ), + ], + FieldId::WOnDemandLoading => vec![ + heading("Whisper · on-demand model loading"), + Line::from(""), + Line::from( + "Loads the model only when recording starts; unloads at idle. \ + Frees ~1-2 GB of RAM between dictations.", + ), + Line::from(""), + Line::from( + "Adds a one-shot delay on the first key press of each \ + dictation. Worth it for sporadic dictation; not worth it \ + for constant use.", + ), + ], + FieldId::WGpuIsolation => vec![ + heading("Whisper · GPU isolation"), + Line::from(""), + Line::from( + "Each transcription runs in a short-lived subprocess that \ + exits afterward, releasing all VRAM.", + ), + Line::from(""), + Line::from( + "Useful on hybrid-graphics laptops to let the discrete GPU \ + power down between dictations. Adds ~100-300ms of subprocess \ + startup per transcription.", + ), + ], + + FieldId::PkModelType => vec![ + heading("Parakeet · model architecture"), + Line::from(""), + Line::from(Span::styled( + "auto-detect: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "voxtype inspects the model directory and picks tdt or ctc \ + based on which ONNX files are present.", + ), + Line::from(""), + Line::from(Span::styled( + "tdt: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Token-and-Duration Transducer. Recommended; what Parakeet's \ + reference checkpoints use.", + ), + Line::from(""), + Line::from(Span::styled( + "ctc: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "CTC encoder-only models. Smaller, faster, slightly lower \ + accuracy.", + ), + Line::from(""), + Line::from(Span::styled( + "Leave at auto-detect unless you have reason to override.", + Style::default().fg(Color::Gray), + )), + ], + FieldId::PkOnDemandLoading => vec![ + heading("Parakeet · on-demand model loading"), + Line::from(""), + Line::from( + "Loads the Parakeet model only when recording starts; unloads \ + at idle.", + ), + Line::from(""), + Line::from( + "Same trade-off as Whisper: frees memory between dictations \ + at the cost of first-keystroke latency.", + ), + ], + + FieldId::MnQuantized => vec![ + heading("Moonshine · use quantized model"), + Line::from(""), + Line::from( + "Moonshine ships int8-quantized weights alongside full \ + precision. Quantized is ~2-3x faster on CPU at a small \ + accuracy cost.", + ), + Line::from(""), + Line::from( + "Falls back to full precision if the quantized files aren't \ + present in the model directory.", + ), + ], + FieldId::MnThreads => threads_guidance("Moonshine"), + FieldId::MnOnDemandLoading => on_demand_guidance("Moonshine"), + + FieldId::SvLanguage => vec![ + heading("SenseVoice · language"), + Line::from(""), + Line::from( + "SenseVoice is multilingual across CJK + English. Pick a \ + specific language to skip the language-detection step.", + ), + Line::from(""), + Line::from(Span::styled( + "auto: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Detect per-recording (default)."), + Line::from(Span::styled( + "zh / yue: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Mandarin / Cantonese."), + Line::from(Span::styled( + "ja / ko: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Japanese / Korean."), + Line::from(Span::styled( + "en: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("English."), + ], + FieldId::SvUseItn => vec![ + heading("SenseVoice · inverse text normalization"), + Line::from(""), + Line::from( + "Adds punctuation and converts spoken numbers/dates to their \ + written form (\"twenty twenty-six\" → \"2026\").", + ), + Line::from(""), + Line::from( + "Recommended on. Turn off if you want raw token output for \ + your own post-processing.", + ), + ], + FieldId::SvThreads => threads_guidance("SenseVoice"), + FieldId::SvOnDemandLoading => on_demand_guidance("SenseVoice"), + + FieldId::PfThreads => threads_guidance("Paraformer"), + FieldId::PfOnDemandLoading => on_demand_guidance("Paraformer"), + + FieldId::DolThreads => threads_guidance("Dolphin"), + FieldId::DolOnDemandLoading => on_demand_guidance("Dolphin"), + + FieldId::OmThreads => threads_guidance("Omnilingual"), + FieldId::OmOnDemandLoading => on_demand_guidance("Omnilingual"), + + FieldId::CoLanguage => vec![ + heading("Cohere · language"), + Line::from(""), + Line::from( + "Cohere Transcribe officially supports 14 languages. Pick the \ + one you'll be dictating in; Cohere does not auto-detect.", + ), + Line::from(""), + Line::from(Span::styled( + "Supported codes:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + " ar (Arabic), de (German), en (English), es (Spanish), \ + fr (French), hi (Hindi), it (Italian), ja (Japanese), \ + ko (Korean), nl (Dutch), pt (Portuguese), ru (Russian), \ + tr (Turkish), zh (Chinese)", + ), + ], + FieldId::CoThreads => threads_guidance("Cohere"), + FieldId::CoOnDemandLoading => on_demand_guidance("Cohere"), + + FieldId::WRemoteEndpoint => vec![ + heading("Whisper · remote endpoint"), + Line::from(""), + Line::from( + "OpenAI-compatible Whisper API base URL. voxtype POSTs audio \ + multipart/form-data to /audio/transcriptions.", + ), + Line::from(""), + Line::from( + "Examples: https://api.openai.com/v1, http://localhost:9000/v1 \ + (whisper.cpp server), https://api.groq.com/openai/v1.", + ), + Line::from(""), + Line::from(Span::styled( + "Press Enter to edit. Esc cancels.", + Style::default().fg(Color::Gray), + )), + ], + FieldId::WRemoteApiKey => vec![ + heading("Whisper · remote API key"), + Line::from(""), + Line::from( + "Bearer token for the remote endpoint. Stored as plain text \ + in config.toml — protect that file accordingly.", + ), + Line::from(""), + Line::from( + "If you'd rather not have it on disk, set the \ + VOXTYPE_WHISPER_API_KEY environment variable instead and \ + leave this unset.", + ), + Line::from(""), + Line::from(Span::styled( + "Press Enter to edit. Display is masked while not editing.", + Style::default().fg(Color::Gray), + )), + ], + FieldId::WRemoteModel => vec![ + heading("Whisper · remote model"), + Line::from(""), + Line::from( + "Model name to send with each request (the `model` field in \ + the multipart form). Defaults to whisper-1 if unset.", + ), + Line::from(""), + Line::from( + "Common values: whisper-1 (OpenAI), whisper-large-v3 \ + (Groq, Together), whisper.cpp (whisper.cpp server). Check \ + your provider's docs.", + ), + ], + } +} + +fn model_guidance(engine: &str, current: &str) -> Vec> { + let catalog = model_catalog(engine); + let installed = installed_models_for(engine); + let mut lines = vec![ + heading(format!("{} · model", display_engine(engine))), + Line::from(""), + Line::from(format!( + "Inference checkpoint voxtype loads for {}. ←→ cycles through \ + the models voxtype knows about; pick whichever balances accuracy \ + and speed for your hardware.", + display_engine(engine) + )), + Line::from(""), + ]; + if !catalog.is_empty() { + lines.push(Line::from(Span::styled( + "Available ( ● = installed, · = not downloaded )", + Style::default().add_modifier(Modifier::BOLD), + ))); + for name in &catalog { + let active = *name == current; + let inst = installed.iter().any(|i| i == name); + let cursor = if active { "▸ " } else { " " }; + let marker = if inst { "●" } else { "·" }; + let suffix = if inst { "" } else { " (not downloaded)" }; + let style = if !inst { + Style::default().fg(Color::Gray) + } else if active { + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD) + } else { + Style::default() + }; + lines.push(Line::from(Span::styled( + format!(" {}{} {}{}", cursor, marker, name, suffix), + style, + ))); + } + lines.push(Line::from("")); + } + lines.push(Line::from(Span::styled( + "Models you haven't downloaded yet show up here too. Switch to one, \ + save, then run `voxtype setup model` to fetch the weights.", + Style::default().fg(Color::Gray), + ))); + if engine == "cohere" { + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + "⚠ Cohere's int8 model is ~3 GB on disk — heaviest of the bundled \ + engines. Make sure you've got the space before downloading.", + Style::default().fg(Color::Yellow), + ))); + } + lines +} + +/// Installed models on disk for a given engine. Mirrors the inventory the old +/// Models section used to show. +fn installed_models_for(engine: &str) -> Vec { + use crate::config::Config; + let dir = Config::models_dir(); + let catalog = model_catalog(engine); + catalog + .into_iter() + .filter(|name| { + if engine == "whisper" { + dir.join(format!("ggml-{}.bin", name)).exists() + } else { + let p = dir.join(name); + p.exists() || p.is_dir() + } + }) + .map(|s| s.to_string()) + .collect() +} + +fn display_engine(engine: &str) -> &'static str { + match engine { + "whisper" => "Whisper", + "parakeet" => "Parakeet", + "moonshine" => "Moonshine", + "sensevoice" => "SenseVoice", + "paraformer" => "Paraformer", + "dolphin" => "Dolphin", + "omnilingual" => "Omnilingual", + "cohere" => "Cohere", + _ => "Engine", + } +} + +fn threads_guidance(engine: &str) -> Vec> { + vec![ + heading(format!("{} · threads", engine)), + Line::from(""), + Line::from(format!( + "Number of CPU threads ONNX Runtime uses for {} inference. \ + `auto` lets voxtype pick (typically physical-core count).", + engine + )), + Line::from(""), + Line::from( + "Lower it to leave CPU headroom for other tasks. Bump to your \ + core count for max throughput.", + ), + ] +} + +fn on_demand_guidance(engine: &str) -> Vec> { + vec![ + heading(format!("{} · on-demand model loading", engine)), + Line::from(""), + Line::from(format!( + "Load the {} model only when recording starts; unload at idle.", + engine + )), + Line::from(""), + Line::from( + "Frees memory between dictations at the cost of first-keystroke \ + latency. Worth it for sporadic dictation.", + ), + ] +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.engine.as_mut() { + Some(s) => s, + None => return Action::None, + }; + + // While inline-editing a text field, route every key into the input until + // the user commits or cancels. Esc / Ctrl-C cancel; Enter commits. + if state.editing.is_some() { + return handle_edit_key(state, key); + } + + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_cursor(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_cursor(1); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + state.cycle(-1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => { + state.cycle(1); + Action::None + } + // `i` / Enter open inline-edit on text-editable fields. + KeyCode::Enter | KeyCode::Char('i') => { + if state.start_edit_if_text_field() { + Action::None + } else { + Action::None + } + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} + +fn handle_edit_key(state: &mut EngineState, key: KeyEvent) -> Action { + let Some(editing) = state.editing.as_mut() else { + return Action::None; + }; + match editing.input.handle_key(key) { + TextInputResult::Continue => Action::None, + TextInputResult::Commit => { + let buf = editing.input.buffer().to_string(); + let field = editing.field; + state.editing = None; + state.commit_text_edit(field, buf); + Action::None + } + TextInputResult::Cancel => { + state.editing = None; + Action::None + } + } +} diff --git a/src/tui/general.rs b/src/tui/general.rs new file mode 100644 index 00000000..e49d20fc --- /dev/null +++ b/src/tui/general.rs @@ -0,0 +1,713 @@ +//! General settings screen: install info, daemon status, variant matrix. + +use crate::setup::binary::{Acceleration, EngineFamily, InstallKind, Variant}; +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::{Constraint, Direction, Layout, Rect}, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App, COLS, ROWS}; + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let chunks = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Length(banner_height(app)), + Constraint::Length(8), // install/daemon info + Constraint::Min(8), // variant matrix + Constraint::Length(2), // legend + Constraint::Length(1), // section help + ]) + .split(area); + + render_banner(f, chunks[0], app); + render_info(f, chunks[1], app); + + // Side-by-side: variant matrix on the left, hint pane on the right. + let body = Layout::default() + .direction(Direction::Horizontal) + .constraints([Constraint::Percentage(48), Constraint::Percentage(52)]) + .split(chunks[2]); + render_matrix(f, body[0], app); + render_hint(f, body[1], app); + + render_legend(f, chunks[3]); + render_help(f, chunks[4]); +} + +fn banner_height(app: &App) -> u16 { + let any = app.last_switch.is_some() || app.restart_needed || app.missing_model.is_some(); + if any { + 4 + } else { + 0 + } +} + +fn render_banner(f: &mut Frame, area: Rect, app: &App) { + if area.height == 0 { + return; + } + let mut lines = Vec::new(); + + if let Some(outcome) = &app.last_switch { + let style = if outcome.success { + Style::default().fg(Color::Green) + } else { + Style::default().fg(Color::Red) + }; + let prefix = if outcome.success { "✓ " } else { "✗ " }; + lines.push(Line::from(Span::styled( + format!("{}{}", prefix, outcome.message), + style, + ))); + } + + if app.restart_needed { + lines.push(Line::from(Span::styled( + " Daemon restart required: systemctl --user restart voxtype", + Style::default().fg(Color::Yellow), + ))); + } + + if let Some(missing) = &app.missing_model { + lines.push(Line::from(Span::styled( + format!( + "⚠ Active {} model not downloaded: {}", + missing.engine, missing.model + ), + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from(Span::styled( + format!(" Run `{}` to fetch it.", missing.setup_command), + Style::default().fg(Color::Gray), + ))); + } + + let block = Block::default().borders(Borders::ALL).title("Status"); + f.render_widget(Paragraph::new(lines).block(block).wrap(Wrap { trim: true }), area); +} + +fn render_info(f: &mut Frame, area: Rect, app: &App) { + let inv = &app.inventory; + let install_kind = match inv.install_kind { + InstallKind::Package => "package", + InstallKind::Source => "source", + }; + + let daemon_dot = if app.daemon_running { + Span::styled("●", Style::default().fg(Color::Green)) + } else { + Span::styled("●", Style::default().fg(Color::Red)) + }; + let daemon_text = if app.daemon_running { + "running" + } else { + "stopped" + }; + + let active = inv + .active_variant + .map(|v| format!("{} ({})", v.display(), v.binary_name())) + .unwrap_or_else(|| "unknown (symlink missing or unrecognized)".to_string()); + + let rec = &inv.recommendation; + let recommended = format!( + "{} / {}", + rec.whisper.display(), + rec.onnx.display() + ); + + let lines = vec![ + Line::from(vec![ + Span::raw("Daemon: "), + daemon_dot, + Span::raw(format!(" {}", daemon_text)), + ]), + Line::from(format!( + "Install: {} ({})", + inv.binary_path.display(), + install_kind + )), + Line::from(format!("Active: {}", active)), + Line::from(vec![ + Span::raw("Recommended: "), + Span::styled("★ ", Style::default().fg(Color::Cyan)), + Span::styled(recommended, Style::default().fg(Color::Cyan)), + Span::styled(" (Whisper / ONNX)", Style::default().fg(Color::Gray)), + ]), + Line::from(format!( + "CPU: AVX2={} AVX-512={}", + inv.cpu.avx2, inv.cpu.avx512 + )), + Line::from(format!( + "GPU: NVIDIA={} AMD={}", + inv.gpus.nvidia, inv.gpus.amd + )), + ]; + + let block = Block::default().borders(Borders::ALL).title("Install"); + f.render_widget(Paragraph::new(lines).block(block), area); +} + +fn render_matrix(f: &mut Frame, area: Rect, app: &App) { + let block = Block::default().borders(Borders::ALL).title("Variant"); + let inner = block.inner(area); + f.render_widget(block, area); + + if app.inventory.install_kind == InstallKind::Source { + let para = Paragraph::new(vec![ + Line::from("Source build detected."), + Line::from(""), + Line::from("Variant switching applies only to package installs"), + Line::from("(/usr/lib/voxtype/voxtype-*). To enable a different"), + Line::from("engine, rebuild with the appropriate Cargo features."), + Line::from(""), + Line::from(format!( + "Compiled features: {}", + if app.inventory.compiled_features.is_empty() { + "(none)".to_string() + } else { + app.inventory.compiled_features.join(", ") + } + )), + ]) + .wrap(Wrap { trim: true }); + f.render_widget(para, inner); + return; + } + + let mut lines = Vec::new(); + + // Header row + let mut header = vec![Span::raw(format!("{:<10}", ""))]; + for col in COLS { + header.push(Span::styled( + format!("{:<10}", accel_label(*col)), + Style::default().add_modifier(Modifier::BOLD), + )); + } + lines.push(Line::from(header)); + + // One row per engine family + for (r, family) in ROWS.iter().enumerate() { + let mut spans = vec![Span::styled( + format!("{:<10}", family_label(*family)), + Style::default().add_modifier(Modifier::BOLD), + )]; + for (c, _accel) in COLS.iter().enumerate() { + let cell = render_cell(app, r, c); + let is_cursor = app.cursor == (r, c); + let style = if is_cursor { + Style::default() + .bg(Color::DarkGray) + .fg(Color::White) + .add_modifier(Modifier::BOLD) + } else { + Style::default() + }; + spans.push(Span::styled(format!("{:<10}", cell), style)); + } + lines.push(Line::from(spans)); + } + + f.render_widget(Paragraph::new(lines), inner); +} + +fn render_cell(app: &App, row: usize, col: usize) -> String { + let Some(variant) = app.variant_at(row, col) else { + return "—".to_string(); + }; + + let status = app + .inventory + .variants + .iter() + .find(|s| s.variant == variant); + + let glyph = match status { + Some(s) if s.active => "● active", + Some(s) if !s.installed => "·", + Some(s) if !s.runs_on_this_cpu => "⚠ CPU", + Some(s) if !s.gpu_available => "⚠ GPU", + Some(_) => "✓", + None => "·", + }; + + if is_recommended(variant, &app.inventory.recommendation) { + format!("★ {}", glyph) + } else { + glyph.to_string() + } +} + +fn is_recommended(v: Variant, r: &crate::setup::binary::Recommendation) -> bool { + v == r.whisper || v == r.onnx +} + +fn render_hint(f: &mut Frame, area: Rect, app: &App) { + let block = Block::default().borders(Borders::ALL).title("About"); + let inner = block.inner(area); + f.render_widget(block, area); + + if app.inventory.install_kind == InstallKind::Source { + return; + } + + let (r, c) = app.cursor; + let lines: Vec = match app.variant_at(r, c) { + Some(variant) => variant_hint_lines(variant, app), + None => na_hint_lines(r, c), + }; + + f.render_widget(Paragraph::new(lines).wrap(Wrap { trim: false }), inner); +} + +fn variant_hint_lines<'a>(variant: Variant, app: &App) -> Vec> { + let hint = variant_hint(variant); + let status = app.inventory.variants.iter().find(|s| s.variant == variant); + + let status_line = match status { + Some(s) if s.active => Line::from(Span::styled( + "● Currently active", + Style::default().fg(Color::Green), + )), + Some(s) if !s.installed => Line::from(Span::styled( + "· Not installed on this system", + Style::default().fg(Color::Gray), + )), + Some(s) if !s.runs_on_this_cpu => Line::from(Span::styled( + "⚠ Won't run: CPU lacks required instructions", + Style::default().fg(Color::Yellow), + )), + Some(s) if !s.gpu_available => Line::from(Span::styled( + "⚠ Won't accelerate: required GPU not detected", + Style::default().fg(Color::Yellow), + )), + Some(_) => Line::from(Span::styled( + "✓ Ready to switch (Enter)", + Style::default().fg(Color::Cyan), + )), + None => Line::from(""), + }; + + let rec = &app.inventory.recommendation; + let mut lines: Vec = Vec::new(); + if variant == rec.whisper { + lines.push(Line::from(Span::styled( + "★ Recommended for Whisper on this hardware", + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from(Span::styled( + rec.whisper_reason.to_string(), + Style::default().fg(Color::Cyan), + ))); + lines.push(Line::from("")); + } + if variant == rec.onnx { + lines.push(Line::from(Span::styled( + "★ Recommended for ONNX engines on this hardware", + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from(Span::styled( + rec.onnx_reason.to_string(), + Style::default().fg(Color::Cyan), + ))); + lines.push(Line::from("")); + } + lines.push(Line::from(Span::styled( + hint.headline.to_string(), + Style::default().add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from("")); + for paragraph in hint.body { + lines.push(Line::from(paragraph.to_string())); + lines.push(Line::from("")); + } + lines.push(Line::from(vec![ + Span::styled("Models: ", Style::default().fg(Color::Gray)), + Span::raw(hint.models.to_string()), + ])); + lines.push(Line::from(vec![ + Span::styled("Speed: ", Style::default().fg(Color::Gray)), + Span::raw(hint.speed.to_string()), + ])); + lines.push(Line::from(vec![ + Span::styled("Hardware: ", Style::default().fg(Color::Gray)), + Span::raw(hint.hardware.to_string()), + ])); + + // Only show concrete model picks on the recommended cells, where the user + // is most likely to act on them. On non-recommended cells the static + // `models:` line above is enough. + if variant == rec.whisper || variant == rec.onnx { + let models = recommended_models(variant); + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + "Recommended models", + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from(vec![ + Span::styled(" English: ", Style::default().fg(Color::Gray)), + Span::raw(models.english.to_string()), + ])); + lines.push(Line::from(vec![ + Span::styled(" European: ", Style::default().fg(Color::Gray)), + Span::raw(models.european.to_string()), + ])); + lines.push(Line::from(vec![ + Span::styled(" Asian: ", Style::default().fg(Color::Gray)), + Span::raw(models.asian.to_string()), + ])); + if let Some(note) = models.note { + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + note.to_string(), + Style::default().fg(Color::Gray), + ))); + } + } + + lines.push(Line::from("")); + lines.push(status_line); + lines +} + +fn na_hint_lines<'a>(row: usize, col: usize) -> Vec> { + let family = ROWS.get(row).copied(); + let accel = COLS.get(col).copied(); + let suggestion = match (family, accel) { + (Some(EngineFamily::Whisper), Some(Acceleration::Cuda)) => { + "For NVIDIA GPU acceleration with Whisper, use Vulkan — it covers \ + NVIDIA, AMD, and Intel GPUs in a single binary." + } + (Some(EngineFamily::Whisper), Some(Acceleration::Migraphx)) => { + "For AMD GPU acceleration with Whisper, use Vulkan — voxtype's \ + whisper.cpp build uses Vulkan instead of ROCm." + } + (Some(EngineFamily::Onnx), Some(Acceleration::Vulkan)) => { + "ONNX Runtime does not ship a Vulkan execution provider. Use \ + ONNX (CUDA) for NVIDIA, ONNX (MIGraphX) for AMD, or ONNX (AVX2/AVX-512) \ + for CPU." + } + _ => "This combination is not built.", + }; + vec![ + Line::from(Span::styled( + "Not applicable", + Style::default() + .fg(Color::Gray) + .add_modifier(Modifier::BOLD), + )), + Line::from(""), + Line::from(suggestion.to_string()), + ] +} + +struct VariantHint { + headline: &'static str, + body: &'static [&'static str], + models: &'static str, + speed: &'static str, + hardware: &'static str, +} + +/// Recommended models for a given variant, broken out by language family. +/// Shown in the hint pane only when the cursor is on a *recommended* cell — +/// the goal is to give users a jumping-off point ("if I switch to this +/// binary, what should I download?") rather than a complete model catalog. +struct ModelRecommendations { + english: &'static str, + european: &'static str, + asian: &'static str, + /// Optional advice tied to the acceleration tier (e.g. prefer int8 on AVX2). + note: Option<&'static str>, +} + +fn recommended_models(v: Variant) -> ModelRecommendations { + match v { + // ---- Whisper family ---- + Variant::WhisperAvx2 | Variant::WhisperNative => ModelRecommendations { + english: "small.en (or base.en for low-power CPUs)", + european: "small (covers FR, DE, IT, ES, NL, PL, PT and more)", + asian: "medium (CJK accuracy improves a lot at medium+)", + note: Some( + "AVX2-only CPU: large-v3 will run but isn't realtime. Stick to \ + small/medium unless you have a GPU.", + ), + }, + Variant::WhisperAvx512 => ModelRecommendations { + english: "large-v3-turbo (fast and accurate)", + european: "large-v3-turbo (strong on most EU languages)", + asian: "large-v3 (better CJK than turbo for the same size)", + note: Some( + "AVX-512 makes large-v3-turbo practical on CPU; large-v3 is \ + slower but more accurate on non-English.", + ), + }, + Variant::WhisperVulkan => ModelRecommendations { + english: "large-v3-turbo", + european: "large-v3-turbo (FR, DE, IT, ES, NL, PL, PT, etc.)", + asian: "large-v3 (CJK; turbo is slightly weaker on Asian languages)", + note: Some( + "GPU acceleration removes the size penalty; pick whichever \ + model gives you the accuracy you need.", + ), + }, + + // ---- ONNX family ---- + Variant::OnnxAvx2 | Variant::OnnxNative => ModelRecommendations { + english: "parakeet-tdt-0.6b-v3-int8 (quantized; ~50% faster on CPU)", + european: "dolphin-base (multi-language CTC, dictation-tuned)", + asian: "sensevoice-small (zh, en, ja, ko, yue in one model)", + note: Some( + "On AVX2-only CPUs the int8 Parakeet variant is the practical \ + default. Omnilingual is also viable but heavier; Cohere is \ + the heaviest at ~3 GB but ranks #1 on the Open ASR \ + Leaderboard.", + ), + }, + Variant::OnnxAvx512 => ModelRecommendations { + english: "parakeet-tdt-0.6b-v3 (top of the Open ASR Leaderboard)", + european: "omnilingual-300m (1600 languages, including all of EU)", + asian: "sensevoice-small (zh, en, ja, ko, yue)", + note: Some( + "AVX-512 lets you run full-precision Parakeet at real-time \ + speed without a GPU.", + ), + }, + Variant::OnnxCuda12 | Variant::OnnxCuda13 | Variant::OnnxCuda => ModelRecommendations { + english: "parakeet-tdt-0.6b-v3", + european: "omnilingual-300m (1600 languages)", + asian: "sensevoice-small (zh/en/ja/ko/yue) or paraformer-zh for Chinese-only", + note: Some( + "CUDA inference is so fast on Parakeet that English dictation \ + is effectively instantaneous; use the largest model that fits \ + your VRAM.", + ), + }, + Variant::OnnxMigraphx => ModelRecommendations { + english: "parakeet-tdt-0.6b-v3", + european: "omnilingual-300m (1600 languages)", + asian: "sensevoice-small (zh, en, ja, ko, yue)", + note: Some( + "MIGraphX execution provider is new and may not register on all \ + driver versions; if you see ORT registration errors, fall back \ + to ONNX (AVX-512) on CPU.", + ), + }, + } +} + +fn variant_hint(v: Variant) -> VariantHint { + match v { + Variant::WhisperAvx2 => VariantHint { + headline: "Whisper on AVX2 CPUs", + body: &[ + "Baseline Whisper build. Runs on any x86-64 CPU since ~2013 \ + (Haswell/Excavator and newer). Pick this if your CPU lacks \ + AVX-512 and you don't have a GPU worth using.", + ], + models: "tiny, base, small, medium, large-v3, large-v3-turbo (and .en variants)", + speed: "Real-time on small/base; large-v3 is slow without a GPU", + hardware: "Any x86-64 CPU with AVX2", + }, + Variant::WhisperAvx512 => VariantHint { + headline: "Whisper on AVX-512 CPUs", + body: &[ + "Fastest CPU-only Whisper. Roughly 1.5-2x throughput over the \ + AVX2 build on supported chips. Use this if you don't have a \ + capable GPU but do have a recent Intel or AMD CPU.", + ], + models: "Same as AVX2; large-v3-turbo becomes practical for live use", + speed: "Best CPU performance; ~1.5-2x AVX2", + hardware: "Intel Tiger/Ice Lake+, AMD Zen 4+", + }, + Variant::WhisperVulkan => VariantHint { + headline: "Whisper with Vulkan GPU", + body: &[ + "Vendor-agnostic GPU acceleration via Vulkan compute shaders. \ + Works on NVIDIA, AMD, and Intel GPUs (including integrated \ + graphics that support Vulkan).", + "Best general-purpose pick for desktops and gaming laptops.", + ], + models: "All Whisper models; large-v3-turbo runs comfortably", + speed: "5-10x CPU on a discrete GPU; falls back to CPU if Vulkan unavailable", + hardware: "Any Vulkan 1.2 GPU; ~2 GB VRAM for large-v3", + }, + Variant::WhisperNative => VariantHint { + headline: "Whisper (source build)", + body: &[ + "A locally compiled Whisper binary with whatever Cargo features \ + you enabled. Reported when no specific tier suffix matches.", + ], + models: "Whatever your build supports", + speed: "Depends on build flags (RUSTFLAGS, GPU features)", + hardware: "Whatever you compiled for", + }, + Variant::OnnxAvx2 => VariantHint { + headline: "ONNX engines on AVX2 CPUs", + body: &[ + "CPU inference for the ONNX Runtime engine family: Parakeet, \ + Moonshine, SenseVoice, Paraformer, Dolphin, Omnilingual, and \ + Cohere Transcribe.", + "Pick this when you don't have a GPU but want a faster, more \ + accurate alternative to Whisper.", + ], + models: "parakeet-tdt-0.6b-v3, moonshine-base/tiny, sense-voice-small, paraformer-zh, dolphin-base, omnilingual", + speed: "Parakeet is ~2-3x faster than Whisper-large at higher accuracy", + hardware: "Any x86-64 CPU with AVX2", + }, + Variant::OnnxAvx512 => VariantHint { + headline: "ONNX engines on AVX-512 CPUs", + body: &[ + "Same engine set as ONNX (AVX2), built against a newer toolchain \ + that takes advantage of AVX-512 where ONNX Runtime can use it.", + ], + models: "Same as ONNX (AVX2)", + speed: "Modest gain over AVX2 build; ORT does runtime SIMD dispatch", + hardware: "Intel Tiger/Ice Lake+, AMD Zen 4+", + }, + Variant::OnnxCuda12 | Variant::OnnxCuda13 | Variant::OnnxCuda => VariantHint { + headline: "ONNX engines on NVIDIA CUDA", + body: &[ + "GPU inference via the CUDA execution provider. Best choice for \ + anyone with a recent NVIDIA card running Parakeet or another \ + ONNX engine.", + "voxtype ships separate cu12 and cu13 binaries; pick the one \ + matching your installed CUDA runtime. The unversioned variant \ + is for source builds and pre-0.7.0 installs.", + "Note: this binary bundles an ONNX Runtime built with AVX-512, \ + so the CPU also needs AVX-512 to load it cleanly.", + ], + models: "Same as ONNX (AVX2)", + speed: "10-20x AVX2 on capable GPUs; Parakeet faster than real-time even at large sizes", + hardware: "NVIDIA GPU + matching CUDA 12.x or 13.x driver; AVX-512 CPU", + }, + Variant::OnnxMigraphx => VariantHint { + headline: "ONNX engines on AMD MIGraphX", + body: &[ + "GPU inference for AMD GPUs via the MIGraphX execution provider \ + (replaces the ROCm EP that was retired in voxtype 0.7.0).", + "Note: this binary bundles an ONNX Runtime built with AVX-512, \ + so the CPU also needs AVX-512 to load it cleanly. MIGraphX \ + support is new — if the provider fails to register on your \ + driver/card combo, fall back to ONNX (AVX-512) on CPU or \ + switch the engine to Whisper (Vulkan).", + ], + models: "Same as ONNX (AVX2)", + speed: "Comparable to CUDA on similarly-tier GPUs", + hardware: "AMD GPU with MIGraphX-capable driver; AVX-512 CPU", + }, + Variant::OnnxNative => VariantHint { + headline: "ONNX engines (source build)", + body: &[ + "Locally compiled ONNX engine binary with whatever Cargo \ + features you enabled. Reported when no specific tier suffix \ + matches.", + ], + models: "Whatever your build supports", + speed: "Depends on build flags", + hardware: "Whatever you compiled for", + }, + } +} + +fn render_legend(f: &mut Frame, area: Rect) { + let line = Line::from(vec![ + Span::styled("★ recommended ", Style::default().fg(Color::Cyan)), + Span::raw("● active "), + Span::raw("✓ ready "), + Span::raw("⚠ CPU/GPU mismatch "), + Span::raw("· not installed "), + Span::raw("— not applicable"), + ]); + f.render_widget(Paragraph::new(line), area); +} + +fn render_help(f: &mut Frame, area: Rect) { + let line = Line::from(Span::styled( + " ↑↓←→ navigate matrix Enter switch r refresh ", + Style::default().fg(Color::Gray), + )); + f.render_widget(Paragraph::new(line), area); +} + +fn family_label(f: EngineFamily) -> &'static str { + match f { + EngineFamily::Whisper => "Whisper", + EngineFamily::Onnx => "ONNX", + } +} + +fn accel_label(a: Acceleration) -> &'static str { + match a { + Acceleration::Avx2 => "AVX2", + Acceleration::Avx512 => "AVX-512", + Acceleration::Vulkan => "Vulkan", + Acceleration::Cuda => "CUDA", + Acceleration::Migraphx => "MIGraphX", + Acceleration::Native => "native", + } +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + match key.code { + KeyCode::Char('r') => { + app.refresh_inventory(); + Action::None + } + KeyCode::Up | KeyCode::Char('k') => { + app.move_cursor(-1, 0); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + app.move_cursor(1, 0); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + app.move_cursor(0, -1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') => { + app.move_cursor(0, 1); + Action::None + } + KeyCode::Enter => match selected_actionable_variant(app) { + Some(v) => Action::SwitchVariant(v), + None => Action::None, + }, + _ => Action::None, + } +} + +/// Variant under the cursor, but only if switching to it makes sense: +/// - exists in the matrix +/// - is installed +/// - runs on this CPU +/// - has a compatible GPU (if required) +/// - is not already active +fn selected_actionable_variant(app: &App) -> Option { + if app.inventory.install_kind == InstallKind::Source { + return None; + } + let (r, c) = app.cursor; + let v = app.variant_at(r, c)?; + let status = app.inventory.variants.iter().find(|s| s.variant == v)?; + if status.active || !status.installed || !status.runs_on_this_cpu || !status.gpu_available { + return None; + } + Some(v) +} diff --git a/src/tui/hotkey.rs b/src/tui/hotkey.rs new file mode 100644 index 00000000..6395f171 --- /dev/null +++ b/src/tui/hotkey.rs @@ -0,0 +1,725 @@ +//! Hotkey settings: PTT key, mode, cancel/modifier keys, evdev enable. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{ + self, FeedbackLevel as CommonFeedback, FormRowSpec, TextInput, TextInputResult, +}; +use super::compositor_bindings; +use super::config_editor::{ConfigEditor, EditorError}; + +/// In-memory copy of the hotkey state, owned by `App`. Edits mutate this; `s` +/// commits via [`ConfigEditor`] and rolls back on validation error. +#[derive(Debug, Clone)] +pub struct HotkeyState { + pub key: String, + pub mode: Mode, + pub enabled: bool, + pub cancel_key: Option, + pub modifier: Option, + /// Status banner shown after Save / Reset, cleared on the next edit. + pub feedback: Option, + pub dirty_since_load: bool, + pub field: Field, + pub editing: Option, +} + +#[derive(Debug, Clone)] +pub struct TextEdit { + pub field: Field, + pub input: TextInput, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Mode { + PushToTalk, + Toggle, +} + +#[derive(Debug, Clone)] +pub struct Feedback { + pub level: FeedbackLevel, + pub message: String, +} + +#[derive(Debug, Clone, Copy)] +pub enum FeedbackLevel { + Ok, + Err, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Field { + Enabled, + Key, + Mode, + CancelKey, + Modifier, +} + +impl Field { + const ALL: &'static [Field] = &[ + Field::Enabled, + Field::Key, + Field::Mode, + Field::CancelKey, + Field::Modifier, + ]; +} + +/// Sensible PTT key choices, in order. Values match what voxtype's evdev +/// listener accepts (KEY_* names without the prefix). +const KEY_CHOICES: &[&str] = &[ + "HOME", + "PAUSE", + "SCROLLLOCK", + "INSERT", + "MENU", + "F13", + "F14", + "F15", + "F16", + "RIGHTCTRL", + "RIGHTALT", + "RIGHTMETA", + "CAPSLOCK", +]; + +const CANCEL_CHOICES: &[Option<&str>] = &[ + None, + Some("ESC"), + Some("BACKSPACE"), + Some("F12"), + Some("DELETE"), + Some("END"), +]; + +const MODIFIER_CHOICES: &[Option<&str>] = &[ + None, + Some("LEFTSHIFT"), + Some("RIGHTSHIFT"), + Some("LEFTCTRL"), + Some("LEFTALT"), + Some("LEFTMETA"), +]; + +impl HotkeyState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + Ok(Self { + key: ed + .get_string("hotkey", "key") + .unwrap_or_else(|| "HOME".to_string()), + mode: match ed.get_string("hotkey", "mode").as_deref() { + Some("toggle") => Mode::Toggle, + _ => Mode::PushToTalk, + }, + enabled: ed.get_bool("hotkey", "enabled").unwrap_or(true), + cancel_key: ed.get_string("hotkey", "cancel_key"), + modifier: ed.get_string("hotkey", "model_modifier"), + feedback: None, + dirty_since_load: false, + field: Field::Enabled, + editing: None, + }) + } + + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("load: {}", e), + }); + return Action::None; + } + }; + ed.set_string("hotkey", "key", &self.key); + ed.set_string( + "hotkey", + "mode", + match self.mode { + Mode::PushToTalk => "push_to_talk", + Mode::Toggle => "toggle", + }, + ); + ed.set_bool("hotkey", "enabled", self.enabled); + match &self.cancel_key { + Some(k) => ed.set_string("hotkey", "cancel_key", k), + None => ed.unset("hotkey", "cancel_key"), + } + match &self.modifier { + Some(k) => ed.set_string("hotkey", "model_modifier", k), + None => ed.unset("hotkey", "model_modifier"), + } + + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.feedback = Some(Feedback { + level: FeedbackLevel::Ok, + message: format!("Saved to {}", ed.path().display()), + }); + } + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("save: {}", e), + }); + } + } + Action::None + } + + pub fn reset(&mut self) { + match Self::load() { + Ok(fresh) => { + let field = self.field; + *self = fresh; + self.field = field; + self.feedback = Some(Feedback { + level: FeedbackLevel::Ok, + message: "Reverted unsaved changes".to_string(), + }); + } + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("reload: {}", e), + }); + } + } + } + + fn move_field(&mut self, delta: i32) { + let len = Field::ALL.len() as i32; + let cur = Field::ALL.iter().position(|f| *f == self.field).unwrap_or(0) as i32; + let new = (cur + delta).rem_euclid(len); + self.field = Field::ALL[new as usize]; + } + + /// Cycle the value of the focused field by `delta` (-1 for ← / +1 for →). + fn is_text_field(field: Field) -> bool { + // Free-text on Key / CancelKey / Modifier so users can type custom + // KEY_* names that aren't in the curated cycle list. + matches!(field, Field::Key | Field::CancelKey | Field::Modifier) + } + + fn start_edit_if_text_field(&mut self) -> bool { + // Edit only makes sense when the listener is enabled — otherwise + // these fields are dimmed/inert. + if !self.enabled || !Self::is_text_field(self.field) { + return false; + } + let initial = match self.field { + Field::Key => self.key.clone(), + Field::CancelKey => self.cancel_key.clone().unwrap_or_default(), + Field::Modifier => self.modifier.clone().unwrap_or_default(), + _ => String::new(), + }; + self.editing = Some(TextEdit { + field: self.field, + input: TextInput::new(initial), + }); + true + } + + fn commit_text_edit(&mut self, field: Field, buffer: String) { + let trimmed = buffer.trim(); + match field { + Field::Key => { + if !trimmed.is_empty() { + self.key = trimmed.to_uppercase(); + } + } + Field::CancelKey => { + self.cancel_key = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_uppercase()) + }; + } + Field::Modifier => { + self.modifier = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_uppercase()) + }; + } + _ => {} + } + self.dirty_since_load = true; + self.feedback = None; + } + + fn cycle(&mut self, delta: i32) { + // When the evdev listener is off, only the Enabled toggle responds — + // the rest of the form is greyed out and inert. + if !self.enabled && self.field != Field::Enabled { + return; + } + match self.field { + Field::Key => { + self.key = cycle_str(KEY_CHOICES, &self.key, delta); + } + Field::Mode => { + self.mode = match self.mode { + Mode::PushToTalk => Mode::Toggle, + Mode::Toggle => Mode::PushToTalk, + }; + } + Field::CancelKey => { + self.cancel_key = cycle_opt(CANCEL_CHOICES, self.cancel_key.as_deref(), delta); + } + Field::Modifier => { + self.modifier = cycle_opt(MODIFIER_CHOICES, self.modifier.as_deref(), delta); + } + Field::Enabled => { + self.enabled = !self.enabled; + } + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +fn cycle_str(choices: &[&'static str], current: &str, delta: i32) -> String { + if choices.is_empty() { + return current.to_string(); + } + let idx = choices + .iter() + .position(|c| *c == current) + .map(|i| i as i32) + .unwrap_or(-1); + let new = (idx + delta).rem_euclid(choices.len() as i32); + choices[new as usize].to_string() +} + +fn cycle_opt( + choices: &[Option<&'static str>], + current: Option<&str>, + delta: i32, +) -> Option { + if choices.is_empty() { + return current.map(|s| s.to_string()); + } + let idx = choices + .iter() + .position(|c| c.as_deref() == current) + .map(|i| i as i32) + .unwrap_or(0); + let new = (idx + delta).rem_euclid(choices.len() as i32); + choices[new as usize].map(|s| s.to_string()) +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.hotkey { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Hotkey"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget( + Paragraph::new("Failed to load config; check ~/.config/voxtype/config.toml.") + .wrap(Wrap { trim: true }), + inner, + ); + return; + } + }; + + // Greyout fields after Enabled when the evdev listener is off — those + // controls don't affect anything until the listener turns back on. + let greyout = !state.enabled; + + let rows = vec![ + FormRowSpec::new( + state.field == Field::Enabled, + "Built-in evdev listener", + if state.enabled { "enabled" } else { "disabled" }, + ), + FormRowSpec::new( + state.field == Field::Key, + "Push-to-talk key", + match state.editing.as_ref() { + Some(e) if e.field == Field::Key => e.input.caret_string(), + _ => display_key(&state.key), + }, + ) + .dimmed(greyout), + FormRowSpec::new( + state.field == Field::Mode, + "Mode", + match state.mode { + Mode::PushToTalk => "Push-to-talk (hold)", + Mode::Toggle => "Toggle (press to start/stop)", + }, + ) + .dimmed(greyout), + FormRowSpec::new( + state.field == Field::CancelKey, + "Cancel key", + match state.editing.as_ref() { + Some(e) if e.field == Field::CancelKey => e.input.caret_string(), + _ => state + .cancel_key + .as_deref() + .unwrap_or("(none)") + .to_string(), + }, + ) + .dimmed(greyout), + FormRowSpec::new( + state.field == Field::Modifier, + "Modifier (secondary model)", + match state.editing.as_ref() { + Some(e) if e.field == Field::Modifier => e.input.caret_string(), + _ => state + .modifier + .as_deref() + .unwrap_or("(none)") + .to_string(), + }, + ) + .dimmed(greyout), + ]; + + let feedback_pair = state + .feedback + .as_ref() + .map(|fb| (to_common_level(fb.level), fb.message.as_str())); + + let guidance = guidance_for_field(state); + + common::render_form_with_guidance( + f, + area, + "Hotkey", + state.dirty_since_load, + feedback_pair, + &rows, + guidance, + ); +} + +fn to_common_level(level: FeedbackLevel) -> CommonFeedback { + match level { + FeedbackLevel::Ok => CommonFeedback::Ok, + FeedbackLevel::Err => CommonFeedback::Err, + } +} + +/// Right-pane explanation for the focused field. +fn guidance_for_field(state: &HotkeyState) -> Vec> { + match state.field { + Field::Enabled => guidance_enabled(state), + Field::Key => guidance_key(state), + Field::Mode => guidance_mode(state), + Field::CancelKey => guidance_cancel(state), + Field::Modifier => guidance_modifier(state), + } +} + +fn heading<'a>(text: &'a str) -> Line<'a> { + Line::from(Span::styled( + text, + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance_enabled<'a>(state: &'a HotkeyState) -> Vec> { + let mut lines = vec![ + heading("Built-in evdev listener"), + Line::from(""), + Line::from( + "When enabled, voxtype reads keyboard events directly from \ + /dev/input/event* (your user must be in the `input` group). It \ + owns the chosen PTT key globally — no compositor binding needed.", + ), + Line::from(""), + Line::from( + "When disabled, voxtype reads no keys. Bind your compositor (\ + Hyprland, Sway, Niri, KDE shortcuts) to call:", + ), + Line::from(Span::styled( + " voxtype record start voxtype record stop", + Style::default().fg(Color::Gray), + )), + Line::from(Span::styled( + " voxtype record toggle voxtype record cancel", + Style::default().fg(Color::Gray), + )), + Line::from(""), + ]; + + let bindings = compositor_bindings::detect(); + if !bindings.is_empty() { + lines.push(Line::from(Span::styled( + "Compositor bindings detected", + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ))); + for b in &bindings { + let file = b + .source + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or(""); + lines.push(Line::from(format!( + " • [{}] {} → voxtype {}", + b.compositor, b.keys, b.action + ))); + lines.push(Line::from(Span::styled( + format!(" from {}", file), + Style::default().fg(Color::Gray), + ))); + } + lines.push(Line::from("")); + } else if !state.enabled { + lines.push(Line::from(Span::styled( + "No compositor bindings detected — voxtype will not receive any \ + PTT key events.", + Style::default().fg(Color::Red), + ))); + lines.push(Line::from("")); + } + + let suggestions = compositor_bindings::suggest_missing(&bindings); + if !suggestions.is_empty() { + let comp = compositor_bindings::dominant_compositor(&bindings); + lines.push(Line::from(Span::styled( + format!("Suggested additions ({} format)", comp.name()), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from("")); + for s in &suggestions { + lines.push(Line::from(Span::styled( + format!(" ▸ {}", s.label), + Style::default().add_modifier(Modifier::BOLD), + ))); + lines.push(Line::from(format!(" {}", s.purpose))); + for cfg in &s.config_lines { + lines.push(Line::from(Span::styled( + format!(" {}", cfg), + Style::default().fg(Color::Gray), + ))); + } + lines.push(Line::from("")); + } + } + + if !state.enabled { + lines.push(Line::from(Span::styled( + "Compositor mode active: the rest of this section is ignored.", + Style::default().fg(Color::Yellow), + ))); + } + lines +} + +fn guidance_key<'a>(state: &'a HotkeyState) -> Vec> { + let mut lines = vec![ + heading("Push-to-talk key"), + Line::from(""), + Line::from( + "Pick a key your fingers reach for without thinking. HOME, PAUSE, \ + SCROLLLOCK, F13 are popular because they don't conflict with \ + editor shortcuts.", + ), + Line::from(""), + Line::from( + "RIGHT* keys (RIGHTCTRL, RIGHTALT, RIGHTMETA) work well if you \ + touch-type with your left hand on the home row.", + ), + Line::from(""), + Line::from(Span::styled( + "Custom keys can be set in config.toml directly using KEY_* \ + names from .", + Style::default().fg(Color::Gray), + )), + ]; + if !state.enabled { + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + "(Ignored: evdev listener is disabled.)", + Style::default().fg(Color::Yellow), + ))); + } + lines +} + +fn guidance_mode<'a>(state: &'a HotkeyState) -> Vec> { + let mut lines = vec![ + heading("Activation mode"), + Line::from(""), + Line::from(Span::styled( + "Push-to-talk: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Hold the key while you speak; release to transcribe. Most \ + responsive — voice never starts running while you're thinking.", + ), + Line::from(""), + Line::from(Span::styled( + "Toggle: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Press once to start, press again to stop. Friendlier for long \ + dictation sessions but easy to leave running by accident.", + ), + ]; + if !state.enabled { + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + "(Ignored: evdev listener is disabled.)", + Style::default().fg(Color::Yellow), + ))); + } + lines +} + +fn guidance_cancel<'a>(state: &'a HotkeyState) -> Vec> { + let mut lines = vec![ + heading("Cancel key"), + Line::from(""), + Line::from( + "Aborts an in-progress recording or transcription and discards \ + audio without typing anything. Useful when you trip the PTT key \ + by accident or the wrong window is focused.", + ), + Line::from(""), + Line::from( + "ESC is the obvious pick. F12 / DELETE / END are good alternatives \ + if ESC is bound to something else in the foreground app.", + ), + Line::from(""), + Line::from(Span::styled( + "(none) leaves cancellation off — kill the recording with \ + `voxtype record cancel` instead.", + Style::default().fg(Color::Gray), + )), + ]; + if !state.enabled { + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + "(Ignored: evdev listener is disabled.)", + Style::default().fg(Color::Yellow), + ))); + } + lines +} + +fn guidance_modifier<'a>(state: &'a HotkeyState) -> Vec> { + let mut lines = vec![ + heading("Secondary-model modifier"), + Line::from(""), + Line::from( + "When this key is held alongside the PTT key, voxtype switches to \ + the [whisper] secondary_model for that recording.", + ), + Line::from(""), + Line::from( + "Common usage: large-v3 as your main model for accuracy, \ + small.en under the modifier for instant short notes.", + ), + Line::from(""), + Line::from(Span::styled( + "(none) disables the modifier behavior; the PTT key always uses \ + the primary model.", + Style::default().fg(Color::Gray), + )), + ]; + if !state.enabled { + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + "(Ignored: evdev listener is disabled.)", + Style::default().fg(Color::Yellow), + ))); + } + lines +} + +fn display_key(key: &str) -> String { + if KEY_CHOICES.iter().any(|c| *c == key) { + key.to_string() + } else { + format!("{} (custom)", key) + } +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.hotkey.as_mut() { + Some(s) => s, + None => return Action::None, + }; + + if state.editing.is_some() { + return handle_edit_key(state, key); + } + + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + state.cycle(-1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => { + state.cycle(1); + Action::None + } + KeyCode::Enter | KeyCode::Char('i') => { + state.start_edit_if_text_field(); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} + +fn handle_edit_key(state: &mut HotkeyState, key: KeyEvent) -> Action { + let Some(editing) = state.editing.as_mut() else { + return Action::None; + }; + match editing.input.handle_key(key) { + TextInputResult::Continue => Action::None, + TextInputResult::Commit => { + let buf = editing.input.buffer().to_string(); + let field = editing.field; + state.editing = None; + state.commit_text_edit(field, buf); + Action::None + } + TextInputResult::Cancel => { + state.editing = None; + Action::None + } + } +} diff --git a/src/tui/meeting_section.rs b/src/tui/meeting_section.rs new file mode 100644 index 00000000..1bb398f6 --- /dev/null +++ b/src/tui/meeting_section.rs @@ -0,0 +1,268 @@ +//! Meeting mode settings: enabled, audio source, diarization on/off. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{self, FeedbackLevel, FormRowSpec}; +use super::config_editor::{ConfigEditor, EditorError}; + +#[derive(Debug, Clone)] +pub struct MeetingState { + pub enabled: bool, + pub diarization_enabled: bool, + pub audio_source: String, + pub field: Field, + pub feedback: Option<(FeedbackLevel, String)>, + pub dirty_since_load: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Field { + Enabled, + Diarization, + AudioSource, +} +impl Field { + const ALL: &'static [Field] = &[Field::Enabled, Field::Diarization, Field::AudioSource]; +} +const SOURCES: &[&str] = &["mic", "system", "both"]; + +impl MeetingState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + Ok(Self { + enabled: ed.get_bool("meeting", "enabled").unwrap_or(false), + diarization_enabled: ed + .get_bool("meeting.diarization", "enabled") + .unwrap_or(false), + audio_source: ed + .get_string("meeting.audio", "source") + .unwrap_or_else(|| "mic".to_string()), + field: Field::Enabled, + feedback: None, + dirty_since_load: false, + }) + } + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some((FeedbackLevel::Err, format!("load: {}", e))); + return Action::None; + } + }; + ed.set_bool("meeting", "enabled", self.enabled); + ed.set_bool("meeting.diarization", "enabled", self.diarization_enabled); + ed.set_string("meeting.audio", "source", &self.audio_source); + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.feedback = Some(( + FeedbackLevel::Ok, + format!("Saved to {}", ed.path().display()), + )); + } + Err(e) => self.feedback = Some((FeedbackLevel::Err, format!("save: {}", e))), + } + Action::None + } + pub fn reset(&mut self) { + if let Ok(fresh) = Self::load() { + let field = self.field; + *self = fresh; + self.field = field; + self.feedback = Some((FeedbackLevel::Ok, "Reverted".to_string())); + } + } + fn move_field(&mut self, delta: i32) { + let len = Field::ALL.len() as i32; + let cur = Field::ALL.iter().position(|f| *f == self.field).unwrap_or(0) as i32; + self.field = Field::ALL[((cur + delta).rem_euclid(len)) as usize]; + } + fn cycle(&mut self, delta: i32) { + match self.field { + Field::Enabled => self.enabled = !self.enabled, + Field::Diarization => self.diarization_enabled = !self.diarization_enabled, + Field::AudioSource => { + let idx = SOURCES + .iter() + .position(|s| *s == self.audio_source) + .map(|i| i as i32) + .unwrap_or(0); + self.audio_source = SOURCES + [(idx + delta).rem_euclid(SOURCES.len() as i32) as usize] + .to_string(); + } + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.meeting { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Meeting"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget(Paragraph::new("Failed to load config.").wrap(Wrap { trim: true }), inner); + return; + } + }; + + let dim_when_off = !state.enabled; + let rows = vec![ + FormRowSpec::new(state.field == Field::Enabled, "Meeting mode", yesno(state.enabled)), + FormRowSpec::new( + state.field == Field::Diarization, + "Speaker diarization", + yesno(state.diarization_enabled), + ) + .dimmed(dim_when_off), + FormRowSpec::new( + state.field == Field::AudioSource, + "Audio source", + &state.audio_source, + ) + .dimmed(dim_when_off), + ]; + + let feedback_pair = state + .feedback + .as_ref() + .map(|(lvl, msg)| (*lvl, msg.as_str())); + + common::render_form_with_guidance( + f, + area, + "Meeting Mode", + state.dirty_since_load, + feedback_pair, + &rows, + guidance_for_field(state), + ); +} + +fn yesno(b: bool) -> String { + (if b { "yes" } else { "no" }).to_string() +} + +fn heading<'a>(text: &'a str) -> Line<'a> { + Line::from(Span::styled( + text, + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance_for_field(state: &MeetingState) -> Vec> { + match state.field { + Field::Enabled => vec![ + heading("Meeting mode"), + Line::from(""), + Line::from( + "Long-form recording mode. Voxtype chunks audio into \ + segments, transcribes each, and stitches a continuous \ + transcript with timestamps.", + ), + Line::from(""), + Line::from( + "Persists segments to ~/.local/share/voxtype/meetings/ so a \ + crash doesn't lose your transcript.", + ), + Line::from(""), + Line::from(Span::styled( + "Other [meeting.*] fields (chunk duration, summary command, \ + storage path) live in config.toml directly.", + Style::default().fg(Color::Gray), + )), + ], + Field::Diarization => vec![ + heading("Speaker diarization"), + Line::from(""), + Line::from( + "Tags each segment with a speaker label (Speaker 1, \ + Speaker 2, …) so the transcript reads like dialogue.", + ), + Line::from(""), + Line::from( + "Uses an ONNX speaker-embedding model (ECAPA-TDNN) plus \ + clustering. Requires the ml-diarization feature in your \ + build.", + ), + Line::from(""), + Line::from(Span::styled( + "Off by default — adds CPU cost and isn't useful for \ + single-speaker dictation.", + Style::default().fg(Color::Gray), + )), + ], + Field::AudioSource => vec![ + heading("Audio source"), + Line::from(""), + Line::from(Span::styled( + "mic: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Microphone only. Standard interview/podcast capture."), + Line::from(""), + Line::from(Span::styled( + "system: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "System audio (loopback) only. Captures meeting playback \ + from Zoom/Meet/etc.", + ), + Line::from(""), + Line::from(Span::styled( + "both: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Mic + system loopback. Voxtype runs GTCRN echo cancellation \ + to keep your voice from doubling.", + ), + ], + } +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.meeting.as_mut() { + Some(s) => s, + None => return Action::None, + }; + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + state.cycle(-1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => { + state.cycle(1); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} diff --git a/src/tui/mod.rs b/src/tui/mod.rs new file mode 100644 index 00000000..788aac13 --- /dev/null +++ b/src/tui/mod.rs @@ -0,0 +1,428 @@ +//! Terminal UI for `voxtype configure`. +//! +//! Renders an interactive view over voxtype settings. The General section +//! (variant picker + daemon status) is functional today; remaining sections +//! ship as placeholders and will be filled in over subsequent PRs. + +mod advanced_section; +mod app; +mod audio; +mod common; +mod compositor_bindings; +mod config_editor; +mod engine; +mod general; +mod hotkey; +mod meeting_section; +mod notifications_section; +mod output_section; +mod section; +mod sidebar; +mod text_section; +mod vad_section; +mod waybar_section; + +#[allow(unused_imports)] +pub(crate) use config_editor::{ConfigEditor, EditorError}; + +use crossterm::{ + event::{ + self, DisableMouseCapture, EnableMouseCapture, Event, KeyCode, KeyEvent, KeyModifiers, + MouseButton, MouseEvent, MouseEventKind, + }, + execute, + terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen}, +}; +use ratatui::{ + layout::{Constraint, Direction, Layout, Rect}, + style::{Color, Style}, + text::{Line, Span}, + widgets::Paragraph, + backend::CrosstermBackend, + Frame, Terminal, +}; +use std::io::{self, Stdout}; +use std::time::Duration; + +use app::{Action, App}; +use section::Section; + +type Tui = Terminal>; + +pub fn run(force_package_mode: bool) -> anyhow::Result<()> { + let mut terminal = enter_terminal()?; + let result = event_loop(&mut terminal, force_package_mode); + leave_terminal(&mut terminal)?; + result +} + +fn enter_terminal() -> anyhow::Result { + enable_raw_mode()?; + let mut stdout = io::stdout(); + execute!(stdout, EnterAlternateScreen, EnableMouseCapture)?; + Ok(Terminal::new(CrosstermBackend::new(stdout))?) +} + +fn leave_terminal(terminal: &mut Tui) -> anyhow::Result<()> { + disable_raw_mode()?; + execute!( + terminal.backend_mut(), + LeaveAlternateScreen, + DisableMouseCapture + )?; + terminal.show_cursor()?; + Ok(()) +} + +fn event_loop(terminal: &mut Tui, force_package_mode: bool) -> anyhow::Result<()> { + let mut app = App::new(force_package_mode); + let mut last_general_refresh = std::time::Instant::now(); + let general_refresh_interval = Duration::from_secs(2); + + loop { + terminal.draw(|f| draw(f, &app))?; + + if !event::poll(Duration::from_millis(250))? { + // Idle tick. Refresh the General-screen state (daemon status, + // active variant, inventory) so the green/red dot stays current + // without the user pressing `r`. + if app.current_section == Section::General + && last_general_refresh.elapsed() >= general_refresh_interval + { + app.refresh_inventory(); + last_general_refresh = std::time::Instant::now(); + } + continue; + } + match event::read()? { + Event::Key(key) => { + if !matches!( + key.kind, + crossterm::event::KeyEventKind::Press | crossterm::event::KeyEventKind::Repeat + ) { + continue; + } + + // Global shortcuts handled before delegating to the focused pane. + if let Some(action) = handle_global_key(&mut app, key) { + match dispatch_action(terminal, &mut app, action)? { + LoopControl::Continue => continue, + LoopControl::Quit => return Ok(()), + } + } + + let action = if app.sidebar_focused { + handle_sidebar_key(&mut app, key) + } else { + handle_section_key(&mut app, key) + }; + + match dispatch_action(terminal, &mut app, action)? { + LoopControl::Continue => {} + LoopControl::Quit => return Ok(()), + } + } + Event::Mouse(mouse) => { + handle_mouse(&mut app, mouse); + } + _ => {} + } + } +} + +enum LoopControl { + Continue, + Quit, +} + +fn dispatch_action( + terminal: &mut Tui, + app: &mut App, + action: Action, +) -> anyhow::Result { + match action { + Action::Quit => Ok(LoopControl::Quit), + Action::SwitchVariant(variant) => { + // Drop out of the alternate screen so pkexec can prompt. + leave_terminal(terminal)?; + let outcome = run_pkexec_switch(variant); + *terminal = enter_terminal()?; + terminal.clear()?; + app.record_switch_attempt(variant, outcome); + Ok(LoopControl::Continue) + } + Action::None => Ok(LoopControl::Continue), + } +} + +fn handle_global_key(app: &mut App, key: KeyEvent) -> Option { + // While the active section is inline-editing a text field, swallow + // global shortcuts so the user can type 'q', press Esc, etc. into the + // input. The section's handle_key gets the key instead. + if app.is_editing() { + return None; + } + + // Help overlay: any key dismisses it (including ?). + if app.help_open { + app.help_open = false; + return Some(Action::None); + } + if matches!(key.code, KeyCode::Char('?')) { + app.help_open = true; + return Some(Action::None); + } + + match (key.code, key.modifiers) { + (KeyCode::Char('q'), KeyModifiers::NONE) => Some(Action::Quit), + (KeyCode::Char('c'), m) if m.contains(KeyModifiers::CONTROL) => Some(Action::Quit), + (KeyCode::Tab, _) => { + if app.sidebar_focused { + app.focus_content(); + } else { + app.focus_sidebar(); + } + Some(Action::None) + } + (KeyCode::Esc, _) => { + if !app.sidebar_focused { + // First Esc returns focus to sidebar, second quits. + app.focus_sidebar(); + Some(Action::None) + } else { + Some(Action::Quit) + } + } + _ => None, + } +} + +fn handle_sidebar_key(app: &mut App, key: KeyEvent) -> Action { + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + app.move_sidebar(-1); + app.open_hovered_section(); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + app.move_sidebar(1); + app.open_hovered_section(); + Action::None + } + KeyCode::Enter | KeyCode::Right | KeyCode::Char('l') => { + app.open_hovered_section(); + app.focus_content(); + Action::None + } + _ => Action::None, + } +} + +fn handle_mouse(app: &mut App, mouse: MouseEvent) { + // Ignore mouse input while help overlay is open or a text field is editing. + if app.help_open || app.is_editing() { + return; + } + if !matches!(mouse.kind, MouseEventKind::Down(MouseButton::Left)) { + return; + } + + let col = mouse.column; + let row = mouse.row; + + // Title bar occupies row 0; sidebar inner rows begin at absolute row 1. + if col < sidebar::WIDTH && row >= 1 { + let idx = (row - 1) as usize; + if idx < Section::ALL.len() { + app.sidebar_cursor = idx; + app.open_hovered_section(); + app.focus_sidebar(); + } + return; + } + + if col >= sidebar::WIDTH { + app.focus_content(); + } +} + +fn handle_section_key(app: &mut App, key: KeyEvent) -> Action { + match app.current_section { + Section::General => general::handle_key(app, key), + Section::Hotkey => hotkey::handle_key(app, key), + Section::Audio => audio::handle_key(app, key), + Section::Engine => engine::handle_key(app, key), + Section::Output => output_section::handle_key(app, key), + Section::Text => text_section::handle_key(app, key), + Section::Vad => vad_section::handle_key(app, key), + Section::Meeting => meeting_section::handle_key(app, key), + Section::Notifications => notifications_section::handle_key(app, key), + Section::Waybar => waybar_section::handle_key(app, key), + Section::Advanced => advanced_section::handle_key(app, key), + } +} + +fn draw(f: &mut Frame, app: &App) { + let outer = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Length(1), // title bar + Constraint::Min(0), // body (sidebar + content) + Constraint::Length(1), // footer / help + ]) + .split(f.area()); + + render_title(f, outer[0]); + + let body = Layout::default() + .direction(Direction::Horizontal) + .constraints([Constraint::Length(sidebar::WIDTH), Constraint::Min(0)]) + .split(outer[1]); + + sidebar::render(f, body[0], app); + render_section(f, body[1], app); + + render_footer(f, outer[2], app); + + if app.help_open { + render_help_overlay(f); + } +} + +fn render_help_overlay(f: &mut Frame) { + let area = f.area(); + // Centered modal: ~70% width, ~85% height, capped at 78x30. + let w = area.width.saturating_sub(8).min(78); + let h = area.height.saturating_sub(4).min(30); + let x = area.x + area.width.saturating_sub(w) / 2; + let y = area.y + area.height.saturating_sub(h) / 2; + let rect = Rect { + x, + y, + width: w, + height: h, + }; + + // Clear under the modal so it overpaints whatever's behind. + f.render_widget(ratatui::widgets::Clear, rect); + + let block = ratatui::widgets::Block::default() + .borders(ratatui::widgets::Borders::ALL) + .border_style(Style::default().fg(Color::Cyan)) + .title(" Voxtype Configuration — Help "); + let inner = block.inner(rect); + f.render_widget(block, rect); + + let bold = Style::default().add_modifier(ratatui::style::Modifier::BOLD); + let dim = Style::default().fg(Color::Gray); + + let lines = vec![ + Line::from(Span::styled("Global", bold)), + Line::from(" Tab Toggle focus between sidebar and section"), + Line::from(" Esc Sidebar focus / quit from sidebar"), + Line::from(" q, Ctrl-C Quit"), + Line::from(" ? Toggle this help"), + Line::from(""), + Line::from(Span::styled("Sidebar", bold)), + Line::from(" ↑↓ / jk Navigate sections"), + Line::from(" Enter, →, l Open section / focus content"), + Line::from(""), + Line::from(Span::styled("Section forms", bold)), + Line::from(" ↑↓ / jk Navigate fields"), + Line::from(" ←→ / hl Cycle field value"), + Line::from(" Space Toggle / advance"), + Line::from(" Enter, i Edit text field"), + Line::from(" s Save changes to config.toml"), + Line::from(" r Revert unsaved changes"), + Line::from(""), + Line::from(Span::styled("Inline text editing", bold)), + Line::from(" type Insert at cursor"), + Line::from(" ←→ Move cursor"), + Line::from(" Home / End Beginning / end of line"), + Line::from(" Backspace Delete previous char"), + Line::from(" Delete Delete next char"), + Line::from(" Ctrl-W Delete previous word"), + Line::from(" Ctrl-U Clear line"), + Line::from(" Enter Commit"), + Line::from(" Esc, Ctrl-C Cancel"), + Line::from(""), + Line::from(Span::styled("Press any key to dismiss.", dim)), + ]; + f.render_widget(Paragraph::new(lines), inner); +} + +fn render_title(f: &mut Frame, area: Rect) { + let line = Line::from(vec![ + Span::raw(" Voxtype Configuration"), + Span::styled( + " · ", + Style::default().fg(Color::DarkGray), + ), + Span::styled( + "edit settings without leaving the terminal", + Style::default().fg(Color::DarkGray), + ), + ]); + f.render_widget(Paragraph::new(line), area); +} + +fn render_footer(f: &mut Frame, area: Rect, app: &App) { + let line = if app.sidebar_focused { + // Show the highlighted section's summary alongside the keymap so the + // user sees what each section covers without opening it. + let summary = Section::ALL + .get(app.sidebar_cursor) + .map(|s| s.summary()) + .unwrap_or(""); + Line::from(vec![ + Span::styled( + " ↑↓ Enter open Tab content ? help q quit ", + Style::default().fg(Color::Gray), + ), + Span::styled( + format!("│ {}", summary), + Style::default().fg(Color::Cyan), + ), + ]) + } else { + Line::from(Span::styled( + " Tab / Esc back to sidebar ? help q quit ", + Style::default().fg(Color::Gray), + )) + }; + f.render_widget(Paragraph::new(line), area); +} + +fn render_section(f: &mut Frame, area: Rect, app: &App) { + match app.current_section { + Section::General => general::render(f, area, app), + Section::Hotkey => hotkey::render(f, area, app), + Section::Audio => audio::render(f, area, app), + Section::Engine => engine::render(f, area, app), + Section::Output => output_section::render(f, area, app), + Section::Text => text_section::render(f, area, app), + Section::Vad => vad_section::render(f, area, app), + Section::Meeting => meeting_section::render(f, area, app), + Section::Notifications => notifications_section::render(f, area, app), + Section::Waybar => waybar_section::render(f, area, app), + Section::Advanced => advanced_section::render(f, area, app), + } +} + +fn run_pkexec_switch(variant: crate::setup::binary::Variant) -> Result<(), String> { + let exe = std::env::current_exe().map_err(|e| format!("current_exe: {}", e))?; + let status = std::process::Command::new("pkexec") + .arg(exe) + .arg("setup") + .arg("variant") + .arg("--to") + .arg(variant.binary_name()) + .status() + .map_err(|e| format!("failed to launch pkexec: {} (is polkit installed?)", e))?; + + if status.success() { + Ok(()) + } else { + Err(format!("pkexec exited with {}", status)) + } +} diff --git a/src/tui/notifications_section.rs b/src/tui/notifications_section.rs new file mode 100644 index 00000000..c1c1067e --- /dev/null +++ b/src/tui/notifications_section.rs @@ -0,0 +1,259 @@ +//! Desktop notifications section. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{self, FeedbackLevel, FormRowSpec}; +use super::config_editor::{ConfigEditor, EditorError}; + +#[derive(Debug, Clone)] +pub struct NotificationsState { + pub on_recording_start: bool, + pub on_recording_stop: bool, + pub on_transcription: bool, + pub show_engine_icon: bool, + pub field: Field, + pub feedback: Option<(FeedbackLevel, String)>, + pub dirty_since_load: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Field { + OnStart, + OnStop, + OnTranscription, + ShowEngineIcon, +} +impl Field { + const ALL: &'static [Field] = &[ + Field::OnStart, + Field::OnStop, + Field::OnTranscription, + Field::ShowEngineIcon, + ]; +} + +const TABLE: &str = "output.notification"; + +impl NotificationsState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + Ok(Self { + on_recording_start: ed.get_bool(TABLE, "on_recording_start").unwrap_or(false), + on_recording_stop: ed.get_bool(TABLE, "on_recording_stop").unwrap_or(false), + on_transcription: ed.get_bool(TABLE, "on_transcription").unwrap_or(true), + show_engine_icon: ed.get_bool(TABLE, "show_engine_icon").unwrap_or(false), + field: Field::OnStart, + feedback: None, + dirty_since_load: false, + }) + } + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some((FeedbackLevel::Err, format!("load: {}", e))); + return Action::None; + } + }; + ed.set_bool(TABLE, "on_recording_start", self.on_recording_start); + ed.set_bool(TABLE, "on_recording_stop", self.on_recording_stop); + ed.set_bool(TABLE, "on_transcription", self.on_transcription); + ed.set_bool(TABLE, "show_engine_icon", self.show_engine_icon); + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.feedback = Some(( + FeedbackLevel::Ok, + format!("Saved to {}", ed.path().display()), + )); + } + Err(e) => self.feedback = Some((FeedbackLevel::Err, format!("save: {}", e))), + } + Action::None + } + pub fn reset(&mut self) { + if let Ok(fresh) = Self::load() { + let field = self.field; + *self = fresh; + self.field = field; + self.feedback = Some((FeedbackLevel::Ok, "Reverted".to_string())); + } + } + fn move_field(&mut self, delta: i32) { + let len = Field::ALL.len() as i32; + let cur = Field::ALL.iter().position(|f| *f == self.field).unwrap_or(0) as i32; + self.field = Field::ALL[((cur + delta).rem_euclid(len)) as usize]; + } + fn cycle(&mut self) { + match self.field { + Field::OnStart => self.on_recording_start = !self.on_recording_start, + Field::OnStop => self.on_recording_stop = !self.on_recording_stop, + Field::OnTranscription => self.on_transcription = !self.on_transcription, + Field::ShowEngineIcon => self.show_engine_icon = !self.show_engine_icon, + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.notifications { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Notifications"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget(Paragraph::new("Failed to load config.").wrap(Wrap { trim: true }), inner); + return; + } + }; + + let rows = vec![ + FormRowSpec::new( + state.field == Field::OnStart, + "On recording start", + yesno(state.on_recording_start), + ), + FormRowSpec::new( + state.field == Field::OnStop, + "On recording stop", + yesno(state.on_recording_stop), + ), + FormRowSpec::new( + state.field == Field::OnTranscription, + "Show transcribed text", + yesno(state.on_transcription), + ), + FormRowSpec::new( + state.field == Field::ShowEngineIcon, + "Engine icon in title", + yesno(state.show_engine_icon), + ), + ]; + + let feedback_pair = state + .feedback + .as_ref() + .map(|(lvl, msg)| (*lvl, msg.as_str())); + + common::render_form_with_guidance( + f, + area, + "Desktop Notifications", + state.dirty_since_load, + feedback_pair, + &rows, + guidance_for_field(state), + ); +} + +fn yesno(b: bool) -> String { + (if b { "yes" } else { "no" }).to_string() +} + +fn heading<'a>(text: &'a str) -> Line<'a> { + Line::from(Span::styled( + text, + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance_for_field(state: &NotificationsState) -> Vec> { + match state.field { + Field::OnStart => vec![ + heading("On recording start"), + Line::from(""), + Line::from( + "Fires a desktop notification the moment voxtype begins \ + capturing audio.", + ), + Line::from(""), + Line::from( + "Useful when you have audio feedback off and want a visual \ + cue. Most users leave this off — the recording indicator in \ + Waybar covers it.", + ), + ], + Field::OnStop => vec![ + heading("On recording stop"), + Line::from(""), + Line::from( + "Notifies when voxtype stops recording and starts \ + transcribing. Helpful when transcription takes a few \ + seconds — you know voxtype heard the stop.", + ), + ], + Field::OnTranscription => vec![ + heading("Show transcribed text"), + Line::from(""), + Line::from( + "After transcription completes, posts the transcript text \ + in a desktop notification.", + ), + Line::from(""), + Line::from( + "Most useful when output goes to the wrong window (e.g. you \ + changed focus mid-dictation). The notification is the \ + receipt.", + ), + Line::from(""), + Line::from(Span::styled( + "Notifications go through libnotify, so they respect mako/\ + dunst/KDE/GNOME settings.", + Style::default().fg(Color::Gray), + )), + ], + Field::ShowEngineIcon => vec![ + heading("Engine icon in title"), + Line::from(""), + Line::from( + "Prefixes the notification title with an engine icon \ + (🦜 for Parakeet, 🗣️ for Whisper) so you can see at a \ + glance which engine produced the transcript.", + ), + Line::from(""), + Line::from( + "Helpful when you switch engines often or run multiple \ + voxtype configurations side by side.", + ), + ], + } +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.notifications.as_mut() { + Some(s) => s, + None => return Action::None, + }; + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Right | KeyCode::Char('h') | KeyCode::Char('l') + | KeyCode::Char(' ') => { + state.cycle(); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} diff --git a/src/tui/output_section.rs b/src/tui/output_section.rs new file mode 100644 index 00000000..f967b9b5 --- /dev/null +++ b/src/tui/output_section.rs @@ -0,0 +1,566 @@ +//! Output section: how transcribed text is delivered. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{self, FormRowSpec, TextInput, TextInputResult}; +use super::config_editor::{ConfigEditor, EditorError}; + +#[derive(Debug, Clone)] +pub struct OutputState { + pub mode: String, + pub fallback_to_clipboard: bool, + pub auto_submit: bool, + pub shift_enter_newlines: bool, + pub pre_type_delay_ms: i64, + pub append_text: Option, + pub post_process_command: Option, + pub field: Field, + pub feedback: Option, + pub dirty_since_load: bool, + pub editing: Option, +} + +#[derive(Debug, Clone)] +pub struct TextEdit { + pub field: Field, + pub input: TextInput, +} + +#[derive(Debug, Clone)] +pub struct Feedback { + pub level: FeedbackLevel, + pub message: String, +} +#[derive(Debug, Clone, Copy)] +pub enum FeedbackLevel { + Ok, + Err, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Field { + Mode, + Fallback, + AutoSubmit, + ShiftEnterNewlines, + PreTypeDelay, + AppendText, + PostProcess, +} + +impl Field { + const ALL: &'static [Field] = &[ + Field::Mode, + Field::Fallback, + Field::AutoSubmit, + Field::ShiftEnterNewlines, + Field::PreTypeDelay, + Field::AppendText, + Field::PostProcess, + ]; +} + +const MODE_CHOICES: &[&str] = &["type", "clipboard", "paste", "file"]; +const APPEND_CHOICES: &[Option<&str>] = &[None, Some(" "), Some("\n"), Some(". ")]; +const POST_PROCESS_PRESETS: &[Option<&str>] = &[ + None, + Some("ollama run llama3.2 'Polish: '"), + Some("sed 's/uh, //g'"), +]; +const DELAY_STEP: i64 = 25; + +impl OutputState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + Ok(Self { + mode: ed + .get_string("output", "mode") + .unwrap_or_else(|| "type".to_string()), + fallback_to_clipboard: ed + .get_bool("output", "fallback_to_clipboard") + .unwrap_or(true), + auto_submit: ed.get_bool("output", "auto_submit").unwrap_or(false), + shift_enter_newlines: ed + .get_bool("output", "shift_enter_newlines") + .unwrap_or(false), + pre_type_delay_ms: ed.get_int("output", "pre_type_delay_ms").unwrap_or(0), + append_text: ed.get_string("output", "append_text"), + post_process_command: ed.get_string("post_process", "command"), + field: Field::Mode, + feedback: None, + dirty_since_load: false, + editing: None, + }) + } + + fn is_text_field(field: Field) -> bool { + matches!(field, Field::AppendText | Field::PostProcess) + } + + fn start_edit_if_text_field(&mut self) -> bool { + if !Self::is_text_field(self.field) { + return false; + } + let initial = match self.field { + Field::AppendText => self.append_text.clone().unwrap_or_default(), + Field::PostProcess => self.post_process_command.clone().unwrap_or_default(), + _ => String::new(), + }; + self.editing = Some(TextEdit { + field: self.field, + input: TextInput::new(initial), + }); + true + } + + fn commit_text_edit(&mut self, field: Field, buffer: String) { + match field { + Field::AppendText => { + self.append_text = if buffer.is_empty() { None } else { Some(buffer) }; + } + Field::PostProcess => { + self.post_process_command = if buffer.trim().is_empty() { + None + } else { + Some(buffer) + }; + } + _ => {} + } + self.dirty_since_load = true; + self.feedback = None; + } + + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("load: {}", e), + }); + return Action::None; + } + }; + ed.set_string("output", "mode", &self.mode); + ed.set_bool( + "output", + "fallback_to_clipboard", + self.fallback_to_clipboard, + ); + ed.set_bool("output", "auto_submit", self.auto_submit); + ed.set_bool("output", "shift_enter_newlines", self.shift_enter_newlines); + ed.set_int("output", "pre_type_delay_ms", self.pre_type_delay_ms); + match &self.append_text { + Some(t) => ed.set_string("output", "append_text", t), + None => ed.unset("output", "append_text"), + } + match &self.post_process_command { + Some(c) if !c.is_empty() => ed.set_string("post_process", "command", c), + _ => ed.unset("post_process", "command"), + } + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.feedback = Some(Feedback { + level: FeedbackLevel::Ok, + message: format!("Saved to {}", ed.path().display()), + }); + } + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("save: {}", e), + }); + } + } + Action::None + } + + pub fn reset(&mut self) { + match Self::load() { + Ok(fresh) => { + let field = self.field; + *self = fresh; + self.field = field; + self.feedback = Some(Feedback { + level: FeedbackLevel::Ok, + message: "Reverted unsaved changes".to_string(), + }); + } + Err(e) => { + self.feedback = Some(Feedback { + level: FeedbackLevel::Err, + message: format!("reload: {}", e), + }); + } + } + } + + fn move_field(&mut self, delta: i32) { + let len = Field::ALL.len() as i32; + let cur = Field::ALL.iter().position(|f| *f == self.field).unwrap_or(0) as i32; + let new = (cur + delta).rem_euclid(len); + self.field = Field::ALL[new as usize]; + } + + fn cycle(&mut self, delta: i32) { + match self.field { + Field::Mode => { + let idx = MODE_CHOICES + .iter() + .position(|c| *c == self.mode) + .map(|i| i as i32) + .unwrap_or(0); + let n = (idx + delta).rem_euclid(MODE_CHOICES.len() as i32); + self.mode = MODE_CHOICES[n as usize].to_string(); + } + Field::Fallback => self.fallback_to_clipboard = !self.fallback_to_clipboard, + Field::AutoSubmit => self.auto_submit = !self.auto_submit, + Field::ShiftEnterNewlines => self.shift_enter_newlines = !self.shift_enter_newlines, + Field::PreTypeDelay => { + self.pre_type_delay_ms = + (self.pre_type_delay_ms + delta as i64 * DELAY_STEP).clamp(0, 5000); + } + Field::AppendText => { + let idx = APPEND_CHOICES + .iter() + .position(|c| c.as_deref() == self.append_text.as_deref()) + .map(|i| i as i32) + .unwrap_or(0); + let n = (idx + delta).rem_euclid(APPEND_CHOICES.len() as i32); + self.append_text = APPEND_CHOICES[n as usize].map(|s| s.to_string()); + } + Field::PostProcess => { + let idx = POST_PROCESS_PRESETS + .iter() + .position(|c| c.as_deref() == self.post_process_command.as_deref()) + .map(|i| i as i32) + .unwrap_or(0); + let n = (idx + delta).rem_euclid(POST_PROCESS_PRESETS.len() as i32); + self.post_process_command = + POST_PROCESS_PRESETS[n as usize].map(|s| s.to_string()); + } + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.output { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Output"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget( + Paragraph::new("Failed to load config; check ~/.config/voxtype/config.toml.") + .wrap(Wrap { trim: true }), + inner, + ); + return; + } + }; + + let rows = vec![ + FormRowSpec::new(state.field == Field::Mode, "Output mode", &state.mode), + FormRowSpec::new( + state.field == Field::Fallback, + "Fallback to clipboard", + yesno(state.fallback_to_clipboard), + ), + FormRowSpec::new( + state.field == Field::AutoSubmit, + "Auto-submit (press Enter)", + yesno(state.auto_submit), + ), + FormRowSpec::new( + state.field == Field::ShiftEnterNewlines, + "Newlines as Shift+Enter", + yesno(state.shift_enter_newlines), + ), + FormRowSpec::new( + state.field == Field::PreTypeDelay, + "Pre-type delay (ms)", + state.pre_type_delay_ms.to_string(), + ), + FormRowSpec::new( + state.field == Field::AppendText, + "Append after each", + match state.editing.as_ref() { + Some(e) if e.field == Field::AppendText => e.input.caret_string(), + _ => display_append(state.append_text.as_deref()), + }, + ), + FormRowSpec::new( + state.field == Field::PostProcess, + "Post-process command", + match state.editing.as_ref() { + Some(e) if e.field == Field::PostProcess => e.input.caret_string(), + _ => state + .post_process_command + .as_deref() + .map(|s| { + if s.len() > 30 { + format!("{}…", &s[..30]) + } else { + s.to_string() + } + }) + .unwrap_or_else(|| "(none)".to_string()), + }, + ), + ]; + + let feedback_pair = state.feedback.as_ref().map(|fb| { + ( + match fb.level { + FeedbackLevel::Ok => common::FeedbackLevel::Ok, + FeedbackLevel::Err => common::FeedbackLevel::Err, + }, + fb.message.as_str(), + ) + }); + + common::render_form_with_guidance( + f, + area, + "Output", + state.dirty_since_load, + feedback_pair, + &rows, + guidance_for_field(state), + ); +} + +fn yesno(b: bool) -> String { + (if b { "yes" } else { "no" }).to_string() +} + +fn display_append(s: Option<&str>) -> String { + match s { + None => "(none)".to_string(), + Some(" ") => "space".to_string(), + Some("\n") => "newline".to_string(), + Some(other) => format!("{:?}", other), + } +} + +fn heading<'a>(text: &'a str) -> Line<'a> { + Line::from(Span::styled( + text, + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance_for_field(state: &OutputState) -> Vec> { + match state.field { + Field::Mode => vec![ + heading("Output mode"), + Line::from(""), + Line::from(Span::styled( + "type: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Simulates keyboard typing via wtype → dotool → ydotool fallback. \ + Default; works in most apps.", + ), + Line::from(""), + Line::from(Span::styled( + "clipboard: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Puts text on the clipboard only — you paste it yourself."), + Line::from(""), + Line::from(Span::styled( + "paste: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from("Clipboard + Ctrl+V. Faster than typing for long transcripts."), + Line::from(""), + Line::from(Span::styled( + "file: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Appends to a file. Set [output] file_path before using.", + ), + ], + Field::Fallback => vec![ + heading("Fallback to clipboard"), + Line::from(""), + Line::from( + "When the active output method fails (no compositor support, \ + no daemon running, etc.), drop the transcript on the \ + clipboard so you don't lose it.", + ), + Line::from(""), + Line::from( + "Recommend keeping this on. The only reason to disable is if \ + you want voxtype to fail loudly when typing breaks — useful \ + in scripted setups.", + ), + ], + Field::AutoSubmit => vec![ + heading("Auto-submit"), + Line::from(""), + Line::from( + "After typing the transcript, press Enter automatically.", + ), + Line::from(""), + Line::from( + "Useful for chat boxes (Slack, Discord, terminal prompts) \ + where you'd hit Enter anyway. Skip if you typically want to \ + review/edit before sending.", + ), + ], + Field::ShiftEnterNewlines => vec![ + heading("Newlines as Shift+Enter"), + Line::from(""), + Line::from( + "Convert any newline in the transcript to Shift+Enter \ + instead of regular Enter.", + ), + Line::from(""), + Line::from( + "Match this to apps where Enter submits and Shift+Enter \ + inserts a newline (Cursor, Slack, Discord, ChatGPT, …). \ + Otherwise multi-line dictations submit prematurely.", + ), + ], + Field::PreTypeDelay => vec![ + heading("Pre-type delay"), + Line::from(""), + Line::from( + "Milliseconds to wait before voxtype starts typing. Helps \ + some compositors that drop the first character if the \ + virtual keyboard hasn't fully initialized.", + ), + Line::from(""), + Line::from( + "0 is the default. If you see the first character of \ + transcripts dropped, bump to 50-100ms.", + ), + ], + Field::AppendText => vec![ + heading("Append after each transcription"), + Line::from(""), + Line::from( + "Adds a fixed string after every transcription, before \ + auto-submit fires. Lets you tack on a separator without \ + saying it.", + ), + Line::from(""), + Line::from( + "space: dictate sentences incrementally and end up with \ + \"Sentence one. Sentence two.\" without manual spacing.", + ), + Line::from(""), + Line::from( + "newline: list-style notes where each PTT press should \ + start a new line.", + ), + ], + Field::PostProcess => vec![ + heading("Post-process command"), + Line::from(""), + Line::from( + "Pipes the transcript through an external command before \ + output. The transcript goes in via stdin; the command's \ + stdout is what gets typed.", + ), + Line::from(""), + Line::from( + "Common uses: local LLM cleanup (Ollama), filler-word \ + stripping (sed), markdown formatting.", + ), + Line::from(""), + Line::from(Span::styled( + "TUI cycles a few presets. Edit the command body in \ + [post_process] command in config.toml directly.", + Style::default().fg(Color::Gray), + )), + ], + } +} + +impl From for super::common::FeedbackLevel { + fn from(v: FeedbackLevel) -> Self { + match v { + FeedbackLevel::Ok => super::common::FeedbackLevel::Ok, + FeedbackLevel::Err => super::common::FeedbackLevel::Err, + } + } +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.output.as_mut() { + Some(s) => s, + None => return Action::None, + }; + + if state.editing.is_some() { + return handle_edit_key(state, key); + } + + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + state.cycle(-1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => { + state.cycle(1); + Action::None + } + KeyCode::Enter | KeyCode::Char('i') => { + state.start_edit_if_text_field(); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} + +fn handle_edit_key(state: &mut OutputState, key: KeyEvent) -> Action { + let Some(editing) = state.editing.as_mut() else { + return Action::None; + }; + match editing.input.handle_key(key) { + TextInputResult::Continue => Action::None, + TextInputResult::Commit => { + let buf = editing.input.buffer().to_string(); + let field = editing.field; + state.editing = None; + state.commit_text_edit(field, buf); + Action::None + } + TextInputResult::Cancel => { + state.editing = None; + Action::None + } + } +} diff --git a/src/tui/section.rs b/src/tui/section.rs new file mode 100644 index 00000000..bbc44100 --- /dev/null +++ b/src/tui/section.rs @@ -0,0 +1,66 @@ +//! The set of configuration sections shown in the sidebar. + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Section { + General, + Engine, + Hotkey, + Audio, + Output, + Text, + Vad, + Meeting, + Notifications, + Waybar, + Advanced, +} + +impl Section { + pub const ALL: &'static [Section] = &[ + Section::General, + Section::Engine, + Section::Hotkey, + Section::Audio, + Section::Output, + Section::Text, + Section::Vad, + Section::Meeting, + Section::Notifications, + Section::Waybar, + Section::Advanced, + ]; + + pub const fn label(self) -> &'static str { + match self { + Section::General => "General", + Section::Engine => "Engine", + Section::Hotkey => "Hotkey", + Section::Audio => "Audio", + Section::Output => "Output", + Section::Text => "Text", + Section::Vad => "VAD", + Section::Meeting => "Meeting", + Section::Notifications => "Notifications", + Section::Waybar => "Waybar", + Section::Advanced => "Advanced", + } + } + + /// One-line description shown when the cursor is on the section in the + /// sidebar but the section hasn't been opened yet. + pub const fn summary(self) -> &'static str { + match self { + Section::General => "Engine, variant binary, daemon status", + Section::Engine => "Engine + model + per-engine tuning", + Section::Hotkey => "Push-to-talk key, mode, modifier, cancel key", + Section::Audio => "Input device, max duration, feedback, MPRIS", + Section::Output => "Mode, driver order, post-processing, profiles", + Section::Text => "Spoken punctuation, replacements", + Section::Vad => "Silero VAD, energy thresholds, eager processing", + Section::Meeting => "Meeting mode: audio source, diarization, summary", + Section::Notifications => "Desktop notifications and expire times", + Section::Waybar => "Status integration: icon theme, overrides", + Section::Advanced => "GPU isolation, flash attention, on-demand loading", + } + } +} diff --git a/src/tui/sidebar.rs b/src/tui/sidebar.rs new file mode 100644 index 00000000..d3a970de --- /dev/null +++ b/src/tui/sidebar.rs @@ -0,0 +1,57 @@ +//! Left-hand sidebar navigation. + +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph}, + Frame, +}; + +use super::app::App; +use super::section::Section; + +/// Width of the sidebar column. Wide enough for "Notifications" + a marker +/// without wrapping. +pub const WIDTH: u16 = 18; + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let block = Block::default() + .borders(Borders::RIGHT) + .style(Style::default()); + let inner = block.inner(area); + f.render_widget(block, area); + + let mut lines: Vec = Vec::new(); + for section in Section::ALL { + let active = *section == app.current_section; + let focused_in_sidebar = app.sidebar_focused; + let style = if active && focused_in_sidebar { + Style::default() + .bg(Color::Blue) + .fg(Color::White) + .add_modifier(Modifier::BOLD) + } else if active { + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD) + } else if focused_in_sidebar + && hovered_section(app).map(|s| s == *section).unwrap_or(false) + { + Style::default().bg(Color::DarkGray).fg(Color::White) + } else { + Style::default().fg(Color::Gray) + }; + let marker = if active { "▶ " } else { " " }; + lines.push(Line::from(vec![Span::styled( + format!("{}{}", marker, section.label()), + style, + )])); + } + + f.render_widget(Paragraph::new(lines), inner); +} + +fn hovered_section(app: &App) -> Option
{ + Section::ALL.get(app.sidebar_cursor).copied() +} diff --git a/src/tui/text_section.rs b/src/tui/text_section.rs new file mode 100644 index 00000000..8c8c0080 --- /dev/null +++ b/src/tui/text_section.rs @@ -0,0 +1,527 @@ +//! Text-processing settings: spoken punctuation, smart auto-submit, and an +//! inline editor for the [text.replacements] map. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{self, FeedbackLevel, FormRowSpec, TextInput, TextInputResult}; +use super::config_editor::{ConfigEditor, EditorError}; + +#[derive(Debug, Clone)] +pub struct TextState { + pub spoken_punctuation: bool, + pub smart_auto_submit: bool, + /// Sorted by key for stable display. The user can edit keys/values via + /// the inline editor below. + pub replacements: Vec<(String, String)>, + /// Set of original keys at load time, so save() can detect deletions. + pub original_keys: Vec, + pub cursor: usize, + pub feedback: Option<(FeedbackLevel, String)>, + pub dirty_since_load: bool, + pub editing: Option, +} + +/// Editing state for the replacement list. Users edit the key first, then +/// the value; commit on the value commits the whole pair. +#[derive(Debug, Clone)] +pub struct ReplacementEdit { + pub target: EditTarget, + pub phase: EditPhase, + pub key_buffer: String, + pub input: TextInput, +} + +#[derive(Debug, Clone, Copy)] +pub enum EditTarget { + /// Editing the replacement at this index in `replacements`. + Existing(usize), + /// Adding a new replacement at the end of the list. + New, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EditPhase { + Key, + Value, +} + +/// Row-position vocabulary. Position 0 is the first toggle, and the last +/// position is always the "+ Add new replacement" row. +fn toggle_count() -> usize { + 2 +} + +fn add_row_index(replacements: &[(String, String)]) -> usize { + toggle_count() + replacements.len() +} + +fn total_rows(replacements: &[(String, String)]) -> usize { + add_row_index(replacements) + 1 +} + +impl TextState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + let replacements = read_replacements(&ed); + let original_keys: Vec = replacements.iter().map(|(k, _)| k.clone()).collect(); + Ok(Self { + spoken_punctuation: ed.get_bool("text", "spoken_punctuation").unwrap_or(false), + smart_auto_submit: ed.get_bool("text", "smart_auto_submit").unwrap_or(false), + replacements, + original_keys, + cursor: 0, + feedback: None, + dirty_since_load: false, + editing: None, + }) + } + + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some((FeedbackLevel::Err, format!("load: {}", e))); + return Action::None; + } + }; + ed.set_bool("text", "spoken_punctuation", self.spoken_punctuation); + ed.set_bool("text", "smart_auto_submit", self.smart_auto_submit); + + // Replacements: write every current entry, then unset any original + // keys that are no longer in the list (deletions). + let current_keys: std::collections::HashSet<&String> = + self.replacements.iter().map(|(k, _)| k).collect(); + for original in &self.original_keys { + if !current_keys.contains(original) { + ed.unset("text.replacements", original); + } + } + for (k, v) in &self.replacements { + ed.set_string("text.replacements", k, v); + } + + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.original_keys = self.replacements.iter().map(|(k, _)| k.clone()).collect(); + self.feedback = Some(( + FeedbackLevel::Ok, + format!("Saved to {}", ed.path().display()), + )); + } + Err(e) => self.feedback = Some((FeedbackLevel::Err, format!("save: {}", e))), + } + Action::None + } + + pub fn reset(&mut self) { + if let Ok(fresh) = Self::load() { + let cursor = self.cursor.min(total_rows(&fresh.replacements).saturating_sub(1)); + *self = fresh; + self.cursor = cursor; + self.feedback = Some((FeedbackLevel::Ok, "Reverted unsaved changes".to_string())); + } + } + + fn move_field(&mut self, delta: i32) { + let len = total_rows(&self.replacements) as i32; + let new = (self.cursor as i32 + delta).rem_euclid(len); + self.cursor = new as usize; + } + + fn cycle(&mut self) { + match self.cursor { + 0 => self.spoken_punctuation = !self.spoken_punctuation, + 1 => self.smart_auto_submit = !self.smart_auto_submit, + _ => {} // replacement / add rows don't cycle + } + self.dirty_since_load = true; + self.feedback = None; + } + + fn start_edit(&mut self) { + let target = if self.cursor == add_row_index(&self.replacements) { + EditTarget::New + } else if self.cursor >= toggle_count() { + EditTarget::Existing(self.cursor - toggle_count()) + } else { + return; // toggles, not editable as text + }; + + let initial_key = match target { + EditTarget::Existing(i) => self.replacements[i].0.clone(), + EditTarget::New => String::new(), + }; + + self.editing = Some(ReplacementEdit { + target, + phase: EditPhase::Key, + key_buffer: String::new(), + input: TextInput::new(initial_key), + }); + } + + fn delete_replacement_at_cursor(&mut self) { + if self.cursor >= toggle_count() && self.cursor < add_row_index(&self.replacements) { + let idx = self.cursor - toggle_count(); + self.replacements.remove(idx); + self.dirty_since_load = true; + self.feedback = None; + // Clamp cursor in case we removed the last entry. + let max = total_rows(&self.replacements).saturating_sub(1); + if self.cursor > max { + self.cursor = max; + } + } + } + + /// Called when the inline TextInput commits. Advances the edit phase or + /// finalizes the replacement. + fn commit_edit(&mut self) { + let Some(edit) = self.editing.take() else { + return; + }; + let buf = edit.input.buffer().to_string(); + match edit.phase { + EditPhase::Key => { + let trimmed = buf.trim().to_string(); + if trimmed.is_empty() { + // Empty key → cancel the whole flow. + self.feedback = None; + return; + } + let initial_value = match edit.target { + EditTarget::Existing(i) => self.replacements[i].1.clone(), + EditTarget::New => String::new(), + }; + self.editing = Some(ReplacementEdit { + target: edit.target, + phase: EditPhase::Value, + key_buffer: trimmed, + input: TextInput::new(initial_value), + }); + } + EditPhase::Value => { + let key = edit.key_buffer; + let value = buf; + if value.is_empty() { + // Empty value is allowed but doesn't make much sense; treat + // as a cancel for the new-entry flow. + if let EditTarget::New = edit.target { + return; + } + } + match edit.target { + EditTarget::Existing(i) => { + // Key may have changed; rewrite the entry in place. + self.replacements[i] = (key, value); + } + EditTarget::New => { + self.replacements.push((key, value)); + } + } + self.replacements.sort_by(|a, b| a.0.cmp(&b.0)); + self.dirty_since_load = true; + self.feedback = None; + } + } + } +} + +fn read_replacements(ed: &ConfigEditor) -> Vec<(String, String)> { + // Walk the [text.replacements] table directly via toml_edit, since the + // ConfigEditor accessor only returns single keyed values. + let mut out: Vec<(String, String)> = Vec::new(); + if let Some(table) = ed.raw_table("text.replacements") { + for (k, v) in table.iter() { + if let Some(s) = v.as_value().and_then(|v| v.as_str()) { + out.push((k.to_string(), s.to_string())); + } + } + } + out.sort_by(|a, b| a.0.cmp(&b.0)); + out +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.text { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Text"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget( + Paragraph::new("Failed to load config; check ~/.config/voxtype/config.toml.") + .wrap(Wrap { trim: true }), + inner, + ); + return; + } + }; + + let editing_idx = state.editing.as_ref().and_then(|e| match e.target { + EditTarget::Existing(i) => Some(i), + EditTarget::New => None, + }); + let editing_new = matches!(state.editing.as_ref().map(|e| e.target), Some(EditTarget::New)); + + let mut rows: Vec = Vec::new(); + + rows.push(FormRowSpec::new( + state.cursor == 0, + "Spoken punctuation conversion", + yesno(state.spoken_punctuation), + )); + rows.push(FormRowSpec::new( + state.cursor == 1, + "Smart auto-submit on \"submit\"", + yesno(state.smart_auto_submit), + )); + + for (i, (k, v)) in state.replacements.iter().enumerate() { + let row_idx = toggle_count() + i; + let label = format!("\"{}\"", k); + let value = if editing_idx == Some(i) { + replacement_edit_value(state) + } else { + format!("→ \"{}\"", v) + }; + rows.push(FormRowSpec::new(state.cursor == row_idx, label, value)); + } + + let add_idx = add_row_index(&state.replacements); + let add_label = if editing_new { + "(new entry)".to_string() + } else { + "+ Add new replacement".to_string() + }; + let add_value = if editing_new { + replacement_edit_value(state) + } else { + "press Enter".to_string() + }; + rows.push(FormRowSpec::new(state.cursor == add_idx, add_label, add_value)); + + let feedback_pair = state + .feedback + .as_ref() + .map(|(lvl, msg)| (*lvl, msg.as_str())); + + common::render_form_with_guidance( + f, + area, + "Text", + state.dirty_since_load, + feedback_pair, + &rows, + guidance(state), + ); +} + +fn replacement_edit_value(state: &TextState) -> String { + let Some(edit) = state.editing.as_ref() else { + return String::new(); + }; + match edit.phase { + EditPhase::Key => format!("editing key: {}", edit.input.caret_string()), + EditPhase::Value => format!( + "\"{}\" → {}", + edit.key_buffer, + edit.input.caret_string() + ), + } +} + +fn yesno(b: bool) -> String { + (if b { "yes" } else { "no" }).to_string() +} + +fn heading(text: impl Into) -> Line<'static> { + Line::from(Span::styled( + text.into(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance(state: &TextState) -> Vec> { + let total = total_rows(&state.replacements); + let on_replacement_row = state.cursor >= toggle_count() && state.cursor < total - 1; + let on_add_row = state.cursor == total - 1; + + if let Some(edit) = state.editing.as_ref() { + let header = match edit.phase { + EditPhase::Key => "✎ Editing key — Enter for value, Esc to cancel", + EditPhase::Value => "✎ Editing value — Enter to commit, Esc to cancel", + }; + return vec![ + Line::from(Span::styled( + header, + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + )), + Line::from(""), + Line::from( + "Replacements run as a case-insensitive substring match \ + across the transcript before output. The dictated word goes \ + on the left, the replacement on the right.", + ), + Line::from(""), + Line::from(Span::styled( + "Examples:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from(" \"vox type\" → \"voxtype\""), + Line::from(" \"a i\" → \"AI\""), + Line::from(" \"slack\" → \"Slack\""), + ]; + } + + if state.cursor == 0 { + return vec![ + heading("Spoken punctuation"), + Line::from(""), + Line::from( + "Maps words like \"period\", \"comma\", \"question mark\", \ + \"new line\" to their symbol equivalents in the transcript.", + ), + Line::from(""), + Line::from( + "Useful when the model can't reliably punctuate from prosody \ + (smaller Whisper models, accented speech).", + ), + ]; + } + + if state.cursor == 1 { + return vec![ + heading("Smart auto-submit"), + Line::from(""), + Line::from( + "Watches for \"submit\" at the end of a recording. If found, \ + voxtype strips it and presses Enter for you.", + ), + Line::from(""), + Line::from( + "Pair with [output] auto_submit = false: most dictations \ + don't auto-send, but ending with \"submit\" explicitly fires \ + Enter.", + ), + ]; + } + + if on_replacement_row { + let idx = state.cursor - toggle_count(); + let (k, v) = &state.replacements[idx]; + return vec![ + heading("Custom replacement"), + Line::from(""), + Line::from(format!(" \"{}\" → \"{}\"", k, v)), + Line::from(""), + Line::from( + "Press Enter to edit (key first, then value). Press d to \ + delete this entry.", + ), + Line::from(""), + Line::from(Span::styled( + "Replacements run before output. Match is case-insensitive \ + and operates on the whole transcript text.", + Style::default().fg(Color::Gray), + )), + ]; + } + + if on_add_row { + return vec![ + heading("Add a replacement"), + Line::from(""), + Line::from( + "Press Enter to start a new entry. You'll be prompted for the \ + key first, then the value.", + ), + Line::from(""), + Line::from(Span::styled( + "Examples:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from(" \"vox type\" → \"voxtype\""), + Line::from(" \"hyperland\" → \"Hyprland\""), + Line::from(" \"github\" → \"GitHub\""), + ]; + } + + Vec::new() +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.text.as_mut() { + Some(s) => s, + None => return Action::None, + }; + + if state.editing.is_some() { + return handle_edit_key(state, key); + } + + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Right | KeyCode::Char('h') | KeyCode::Char('l') + | KeyCode::Char(' ') => { + state.cycle(); + Action::None + } + KeyCode::Enter | KeyCode::Char('i') => { + // Enter on toggles flips them; on replacement rows starts edit. + if state.cursor < toggle_count() { + state.cycle(); + } else { + state.start_edit(); + } + Action::None + } + KeyCode::Char('d') | KeyCode::Delete => { + state.delete_replacement_at_cursor(); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} + +fn handle_edit_key(state: &mut TextState, key: KeyEvent) -> Action { + let Some(editing) = state.editing.as_mut() else { + return Action::None; + }; + match editing.input.handle_key(key) { + TextInputResult::Continue => Action::None, + TextInputResult::Commit => { + state.commit_edit(); + Action::None + } + TextInputResult::Cancel => { + state.editing = None; + Action::None + } + } +} diff --git a/src/tui/vad_section.rs b/src/tui/vad_section.rs new file mode 100644 index 00000000..88835f70 --- /dev/null +++ b/src/tui/vad_section.rs @@ -0,0 +1,270 @@ +//! Voice Activity Detection settings. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{self, FeedbackLevel, FormRowSpec}; +use super::config_editor::{ConfigEditor, EditorError}; + +#[derive(Debug, Clone)] +pub struct VadState { + pub enabled: bool, + pub backend: String, + pub threshold: f32, + pub field: Field, + pub feedback: Option<(FeedbackLevel, String)>, + pub dirty_since_load: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Field { + Enabled, + Backend, + Threshold, +} +impl Field { + const ALL: &'static [Field] = &[Field::Enabled, Field::Backend, Field::Threshold]; +} +const BACKEND_CHOICES: &[&str] = &["auto", "energy", "whisper"]; + +impl VadState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + Ok(Self { + enabled: ed.get_bool("vad", "enabled").unwrap_or(false), + backend: ed + .get_string("vad", "backend") + .unwrap_or_else(|| "auto".to_string()), + threshold: ed + .get_string("vad", "threshold") + .and_then(|s| s.parse().ok()) + .or_else(|| ed.get_int("vad", "threshold").map(|n| n as f32)) + .unwrap_or(0.5), + field: Field::Enabled, + feedback: None, + dirty_since_load: false, + }) + } + + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some((FeedbackLevel::Err, format!("load: {}", e))); + return Action::None; + } + }; + ed.set_bool("vad", "enabled", self.enabled); + ed.set_string("vad", "backend", &self.backend); + ed.set_string("vad", "threshold", &format!("{:.2}", self.threshold)); + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.feedback = Some(( + FeedbackLevel::Ok, + format!("Saved to {}", ed.path().display()), + )); + } + Err(e) => self.feedback = Some((FeedbackLevel::Err, format!("save: {}", e))), + } + Action::None + } + + pub fn reset(&mut self) { + match Self::load() { + Ok(fresh) => { + let field = self.field; + *self = fresh; + self.field = field; + self.feedback = Some((FeedbackLevel::Ok, "Reverted".to_string())); + } + Err(e) => self.feedback = Some((FeedbackLevel::Err, format!("reload: {}", e))), + } + } + + fn move_field(&mut self, delta: i32) { + let len = Field::ALL.len() as i32; + let cur = Field::ALL.iter().position(|f| *f == self.field).unwrap_or(0) as i32; + let new = (cur + delta).rem_euclid(len); + self.field = Field::ALL[new as usize]; + } + + fn cycle(&mut self, delta: i32) { + match self.field { + Field::Enabled => self.enabled = !self.enabled, + Field::Backend => { + let idx = BACKEND_CHOICES + .iter() + .position(|c| *c == self.backend) + .map(|i| i as i32) + .unwrap_or(0); + let n = (idx + delta).rem_euclid(BACKEND_CHOICES.len() as i32); + self.backend = BACKEND_CHOICES[n as usize].to_string(); + } + Field::Threshold => { + self.threshold = (self.threshold + delta as f32 * 0.05).clamp(0.0, 1.0); + } + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.vad { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("VAD"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget( + Paragraph::new("Failed to load config.").wrap(Wrap { trim: true }), + inner, + ); + return; + } + }; + + let dim_when_off = !state.enabled; + let rows = vec![ + FormRowSpec::new(state.field == Field::Enabled, "Enabled", yesno(state.enabled)), + FormRowSpec::new(state.field == Field::Backend, "Backend", &state.backend) + .dimmed(dim_when_off), + FormRowSpec::new( + state.field == Field::Threshold, + "Speech threshold", + format!("{:.2}", state.threshold), + ) + .dimmed(dim_when_off), + ]; + + let feedback_pair = state + .feedback + .as_ref() + .map(|(lvl, msg)| (*lvl, msg.as_str())); + + common::render_form_with_guidance( + f, + area, + "Voice Activity Detection", + state.dirty_since_load, + feedback_pair, + &rows, + guidance_for_field(state), + ); +} + +fn yesno(b: bool) -> String { + (if b { "yes" } else { "no" }).to_string() +} + +fn heading<'a>(text: &'a str) -> Line<'a> { + Line::from(Span::styled( + text, + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance_for_field(state: &VadState) -> Vec> { + match state.field { + Field::Enabled => vec![ + heading("Voice Activity Detection"), + Line::from(""), + Line::from( + "Filters out silence-only recordings before transcription. \ + Without VAD, Whisper sometimes hallucinates phrases like \ + \"Thank you.\" on a clip with no speech.", + ), + Line::from(""), + Line::from( + "Keep this on if you sometimes accidentally tap the PTT key \ + without speaking, or use toggle mode and forget you started \ + a recording.", + ), + ], + Field::Backend => vec![ + heading("VAD backend"), + Line::from(""), + Line::from(Span::styled( + "auto: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Whisper VAD for the Whisper engine; Energy VAD for ONNX. \ + Pick this unless you want to override.", + ), + Line::from(""), + Line::from(Span::styled( + "energy: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "RMS-amplitude threshold. Fast, no model needed, works with \ + any engine. Less accurate in noisy environments.", + ), + Line::from(""), + Line::from(Span::styled( + "whisper: ", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from( + "Silero VAD via whisper-rs. Most accurate. Requires \ + ggml-silero-vad.bin (run `voxtype setup vad` to fetch).", + ), + ], + Field::Threshold => vec![ + heading("Speech threshold"), + Line::from(""), + Line::from( + "0.0-1.0. Higher values mean voxtype demands more confident \ + speech detection before transcribing.", + ), + Line::from(""), + Line::from( + "0.5 is the default and works for most setups. Bump higher \ + (0.65-0.75) if voxtype occasionally transcribes background \ + noise. Lower (0.35-0.45) if it's rejecting your real speech.", + ), + ], + } +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.vad.as_mut() { + Some(s) => s, + None => return Action::None, + }; + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + state.cycle(-1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => { + state.cycle(1); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} diff --git a/src/tui/waybar_section.rs b/src/tui/waybar_section.rs new file mode 100644 index 00000000..24e695f4 --- /dev/null +++ b/src/tui/waybar_section.rs @@ -0,0 +1,418 @@ +//! Waybar status integration: icon theme + per-state icon overrides. + +use crossterm::event::{KeyCode, KeyEvent}; +use ratatui::{ + layout::Rect, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Paragraph, Wrap}, + Frame, +}; + +use super::app::{Action, App}; +use super::common::{self, FeedbackLevel, FormRowSpec, TextInput, TextInputResult}; +use super::config_editor::{ConfigEditor, EditorError}; + +#[derive(Debug, Clone)] +pub struct WaybarState { + pub icon_theme: String, + pub icon_idle: Option, + pub icon_recording: Option, + pub icon_transcribing: Option, + pub icon_stopped: Option, + pub field: Field, + pub feedback: Option<(FeedbackLevel, String)>, + pub dirty_since_load: bool, + pub editing: Option, +} + +#[derive(Debug, Clone)] +pub struct TextEdit { + pub field: Field, + pub input: TextInput, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Field { + Theme, + IconIdle, + IconRecording, + IconTranscribing, + IconStopped, +} + +impl Field { + const ALL: &'static [Field] = &[ + Field::Theme, + Field::IconIdle, + Field::IconRecording, + Field::IconTranscribing, + Field::IconStopped, + ]; +} + +const THEMES: &[&str] = &[ + "emoji", + "nerd-font", + "material", + "phosphor", + "codicons", + "omarchy", + "minimal", + "dots", + "arrows", + "text", +]; + +impl WaybarState { + pub fn load() -> Result { + let ed = ConfigEditor::load()?; + Ok(Self { + icon_theme: ed + .get_string("status", "icon_theme") + .unwrap_or_else(|| "emoji".to_string()), + icon_idle: ed.get_string("status.icons", "idle"), + icon_recording: ed.get_string("status.icons", "recording"), + icon_transcribing: ed.get_string("status.icons", "transcribing"), + icon_stopped: ed.get_string("status.icons", "stopped"), + field: Field::Theme, + feedback: None, + dirty_since_load: false, + editing: None, + }) + } + + pub fn save(&mut self) -> Action { + let mut ed = match ConfigEditor::load() { + Ok(e) => e, + Err(e) => { + self.feedback = Some((FeedbackLevel::Err, format!("load: {}", e))); + return Action::None; + } + }; + ed.set_string("status", "icon_theme", &self.icon_theme); + for (key, val) in [ + ("idle", &self.icon_idle), + ("recording", &self.icon_recording), + ("transcribing", &self.icon_transcribing), + ("stopped", &self.icon_stopped), + ] { + match val { + Some(v) if !v.is_empty() => ed.set_string("status.icons", key, v), + _ => ed.unset("status.icons", key), + } + } + match ed.save() { + Ok(()) => { + self.dirty_since_load = false; + self.feedback = Some(( + FeedbackLevel::Ok, + format!("Saved to {}", ed.path().display()), + )); + } + Err(e) => self.feedback = Some((FeedbackLevel::Err, format!("save: {}", e))), + } + Action::None + } + + pub fn reset(&mut self) { + if let Ok(fresh) = Self::load() { + let field = self.field; + *self = fresh; + self.field = field; + self.feedback = Some((FeedbackLevel::Ok, "Reverted".to_string())); + } + } + + fn move_field(&mut self, delta: i32) { + let len = Field::ALL.len() as i32; + let cur = Field::ALL.iter().position(|f| *f == self.field).unwrap_or(0) as i32; + self.field = Field::ALL[((cur + delta).rem_euclid(len)) as usize]; + } + + fn cycle(&mut self, delta: i32) { + match self.field { + Field::Theme => { + let idx = THEMES + .iter() + .position(|t| *t == self.icon_theme) + .map(|i| i as i32) + .unwrap_or(0); + self.icon_theme = THEMES + [((idx + delta).rem_euclid(THEMES.len() as i32)) as usize] + .to_string(); + } + // Icon overrides are free-text; ←→ kicks off inline edit. + Field::IconIdle | Field::IconRecording | Field::IconTranscribing | Field::IconStopped => { + self.start_edit_if_text_field(); + return; + } + } + self.dirty_since_load = true; + self.feedback = None; + } + + fn is_text_field(field: Field) -> bool { + matches!( + field, + Field::IconIdle + | Field::IconRecording + | Field::IconTranscribing + | Field::IconStopped + ) + } + + fn start_edit_if_text_field(&mut self) -> bool { + if !Self::is_text_field(self.field) { + return false; + } + let initial = match self.field { + Field::IconIdle => self.icon_idle.clone().unwrap_or_default(), + Field::IconRecording => self.icon_recording.clone().unwrap_or_default(), + Field::IconTranscribing => self.icon_transcribing.clone().unwrap_or_default(), + Field::IconStopped => self.icon_stopped.clone().unwrap_or_default(), + _ => String::new(), + }; + self.editing = Some(TextEdit { + field: self.field, + input: TextInput::new(initial), + }); + true + } + + fn commit_text_edit(&mut self, field: Field, buffer: String) { + let opt = if buffer.is_empty() { + None + } else { + Some(buffer) + }; + match field { + Field::IconIdle => self.icon_idle = opt, + Field::IconRecording => self.icon_recording = opt, + Field::IconTranscribing => self.icon_transcribing = opt, + Field::IconStopped => self.icon_stopped = opt, + _ => {} + } + self.dirty_since_load = true; + self.feedback = None; + } +} + +pub fn render(f: &mut Frame, area: Rect, app: &App) { + let state = match &app.waybar { + Some(s) => s, + None => { + let block = Block::default().borders(Borders::ALL).title("Waybar"); + let inner = block.inner(area); + f.render_widget(block, area); + f.render_widget( + Paragraph::new("Failed to load config.").wrap(Wrap { trim: true }), + inner, + ); + return; + } + }; + + let icon_value = |field: Field, value: &Option| -> String { + match state.editing.as_ref() { + Some(e) if e.field == field => e.input.caret_string(), + _ => value.clone().unwrap_or_else(|| "(theme default)".to_string()), + } + }; + + let rows = vec![ + FormRowSpec::new(state.field == Field::Theme, "Icon theme", &state.icon_theme), + FormRowSpec::new( + state.field == Field::IconIdle, + "Override · idle", + icon_value(Field::IconIdle, &state.icon_idle), + ), + FormRowSpec::new( + state.field == Field::IconRecording, + "Override · recording", + icon_value(Field::IconRecording, &state.icon_recording), + ), + FormRowSpec::new( + state.field == Field::IconTranscribing, + "Override · transcribing", + icon_value(Field::IconTranscribing, &state.icon_transcribing), + ), + FormRowSpec::new( + state.field == Field::IconStopped, + "Override · stopped", + icon_value(Field::IconStopped, &state.icon_stopped), + ), + ]; + + let feedback_pair = state + .feedback + .as_ref() + .map(|(lvl, msg)| (*lvl, msg.as_str())); + + common::render_form_with_guidance( + f, + area, + "Waybar / Status", + state.dirty_since_load, + feedback_pair, + &rows, + guidance(state), + ); +} + +fn heading(text: impl Into) -> Line<'static> { + Line::from(Span::styled( + text.into(), + Style::default() + .fg(Color::Cyan) + .add_modifier(Modifier::BOLD), + )) +} + +fn guidance(state: &WaybarState) -> Vec> { + match state.field { + Field::Theme => vec![ + heading("Icon theme"), + Line::from(""), + Line::from( + "The glyph set `voxtype status --follow` emits to your status \ + bar. Match it to whatever your bar's font supports.", + ), + Line::from(""), + Line::from(Span::styled( + "Common picks:", + Style::default().add_modifier(Modifier::BOLD), + )), + Line::from(" • emoji — works everywhere, no special font needed."), + Line::from(" • nerd-font — for users on a Nerd Font."), + Line::from(" • phosphor — Phosphor icon font."), + Line::from(" • omarchy — matches Omarchy's stock ricing."), + Line::from(" • text — plain ASCII, no glyphs at all."), + Line::from(""), + Line::from(Span::styled( + "Run `voxtype setup waybar` for ready-to-paste Waybar config.", + Style::default().fg(Color::Gray), + )), + ], + Field::IconIdle => icon_guidance( + state, + "idle", + "Shown when voxtype is loaded but not actively recording or \ + transcribing.", + ), + Field::IconRecording => icon_guidance( + state, + "recording", + "Shown while voxtype is capturing audio.", + ), + Field::IconTranscribing => icon_guidance( + state, + "transcribing", + "Shown while voxtype is running inference on a captured clip.", + ), + Field::IconStopped => icon_guidance( + state, + "stopped", + "Shown when the daemon isn't running. Useful for spotting that \ + voxtype crashed or wasn't started.", + ), + } +} + +fn icon_guidance(state: &WaybarState, label: &str, purpose: &str) -> Vec> { + let mut lines = vec![ + heading(format!("Override · {}", label)), + Line::from(""), + Line::from(purpose.to_string()), + Line::from(""), + Line::from(format!( + "Set a glyph here to override the {} theme's choice for this \ + state. Leave empty to fall back to the theme default.", + state.icon_theme + )), + Line::from(""), + Line::from(Span::styled( + "Press Enter or i to edit. Type any unicode glyph (emoji, Nerd \ + Font glyph, ASCII). Esc cancels.", + Style::default().fg(Color::Gray), + )), + ]; + if state + .editing + .as_ref() + .map(|e| e.field == state.field) + .unwrap_or(false) + { + lines.insert( + 0, + Line::from(Span::styled( + "✎ Editing — Enter to commit, Esc to cancel", + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + )), + ); + lines.insert(1, Line::from("")); + } + lines +} + +pub fn handle_key(app: &mut App, key: KeyEvent) -> Action { + let state = match app.waybar.as_mut() { + Some(s) => s, + None => return Action::None, + }; + + if state.editing.is_some() { + return handle_edit_key(state, key); + } + + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + state.move_field(-1); + Action::None + } + KeyCode::Down | KeyCode::Char('j') => { + state.move_field(1); + Action::None + } + KeyCode::Left | KeyCode::Char('h') => { + state.cycle(-1); + Action::None + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => { + state.cycle(1); + Action::None + } + KeyCode::Enter | KeyCode::Char('i') => { + state.start_edit_if_text_field(); + Action::None + } + KeyCode::Char('s') => state.save(), + KeyCode::Char('r') => { + state.reset(); + Action::None + } + _ => Action::None, + } +} + +fn handle_edit_key(state: &mut WaybarState, key: KeyEvent) -> Action { + let Some(editing) = state.editing.as_mut() else { + return Action::None; + }; + match editing.input.handle_key(key) { + TextInputResult::Continue => Action::None, + TextInputResult::Commit => { + let buf = editing.input.buffer().to_string(); + let field = editing.field; + state.editing = None; + state.commit_text_edit(field, buf); + Action::None + } + TextInputResult::Cancel => { + state.editing = None; + Action::None + } + } +} diff --git a/src/vad/mod.rs b/src/vad/mod.rs index 0e016bcc..60200e4c 100644 --- a/src/vad/mod.rs +++ b/src/vad/mod.rs @@ -62,7 +62,8 @@ pub fn create_vad(config: &Config) -> Result VadBackend::Energy, + | TranscriptionEngine::Omnilingual + | TranscriptionEngine::Cohere => VadBackend::Energy, } } explicit => explicit, diff --git a/website/appcast.xml b/website/appcast.xml new file mode 100644 index 00000000..b8316d84 --- /dev/null +++ b/website/appcast.xml @@ -0,0 +1,36 @@ + + + + Voxtype Updates + https://voxtype.io/appcast.xml + Push-to-talk voice-to-text for macOS + en + + + + Version 0.5.0 + 0.5.0 + 0.5.0 + 11.0 + Mon, 20 Jan 2026 00:00:00 +0000 + Voxtype 0.5.0 - macOS Release +

First official macOS release of Voxtype!

+
    +
  • Universal binary (Intel + Apple Silicon)
  • +
  • Metal GPU acceleration for fast transcription
  • +
  • LaunchAgent for automatic startup
  • +
+

For more details, visit voxtype.io/news

+ ]]>
+ +
+ + + +
+
diff --git a/website/download/index.html b/website/download/index.html index 6bf1ac74..4c9a8666 100644 --- a/website/download/index.html +++ b/website/download/index.html @@ -520,6 +520,22 @@

# In configuration.nix
environment.systemPackages = [ pkgs.voxtype ];

# Or try it
nix-shell -p voxtype
+ + +
+

+ + + + + + macOS +

+

Install via Homebrew (beta)

+
+ # Add the tap
brew tap peteonrails/voxtype

# Install
brew install --cask voxtype
+
+

@@ -543,8 +559,9 @@

Linux

macOS

    -
  • macOS 12 Monterey or later
  • -
  • Apple Silicon (M1/M2/M3) or Intel
  • +
  • macOS 13 Ventura or later
  • +
  • Apple Silicon (M1/M2/M3/M4) or Intel
  • +
  • Homebrew package manager
  • 4GB RAM minimum
  • 500MB disk space
diff --git a/website/index.html b/website/index.html index 6d09d4e8..da356d75 100644 --- a/website/index.html +++ b/website/index.html @@ -1094,7 +1094,7 @@

Legal