peteonrails · sjug · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
@@ -2170,11 +2170,22 @@ Echo cancellation mode for removing speaker bleed-through from the microphone si
 - `"auto"` - Use GTCRN neural speech enhancement on mic audio before transcription, followed by a phrase-level transcript dedup pass. The GTCRN model (~523 KB) is automatically downloaded on first `voxtype meeting start`.
 - `"disabled"` - No enhancement. Use this if you have system-level echo cancellation configured (e.g., PipeWire's `echo-cancel` module) or if you don't use loopback capture.
 
+### vad_threshold
+
+**Type:** Float
+**Default:** `0.01`
+**Required:** No
+
+RMS threshold for meeting chunk voice activity detection. Lower values are more permissive and can help quiet microphones; higher values skip more low-level noise before transcription. Set to `0.0` to disable this pre-transcription gate.
+
+For quiet USB/XLR mics, try `0.001`.
+
 **Example:**
 ```toml
 [meeting.audio]
 loopback_device = "auto"
 echo_cancel = "auto"  # GTCRN enhancement + transcript dedup
+vad_threshold = 0.001  # Optional: quiet mic tuning
 ```
 
 ---

@@ -254,6 +254,10 @@ mic_device = "default"
 # Loopback device for capturing remote participants' audio
 # "auto" = auto-detect, "disabled" = mic only, or a specific device name
 loopback_device = "auto"
+
+# RMS threshold for meeting voice activity detection (default: 0.01)
+# Lower to 0.001 for quiet mics; set 0.0 to disable this pre-transcription gate
+vad_threshold = 0.01
 ```
 
 Setting `loopback_device = "auto"` lets voxtype capture system audio (the other side of a call). When loopback is active, speaker attribution can distinguish between "You" (from the mic) and "Remote" (from system audio).

@@ -1983,6 +1983,7 @@ max_duration_mins = 180          # Maximum meeting length (0 = unlimited)
 mic_device = "default"           # Microphone (uses audio.device if not set)
 loopback_device = "auto"         # Capture remote participants: "auto", "disabled", or device name
 echo_cancel = "auto"             # GTCRN neural enhancement + transcript dedup
+vad_threshold = 0.01             # Lower to 0.001 for quiet mics; 0.0 disables meeting VAD
 
 [meeting.diarization]
 enabled = true

@@ -1330,6 +1330,11 @@ pub struct MeetingAudioConfig {
     /// and set this to "disabled".
     #[serde(default = "default_echo_cancel")]
     pub echo_cancel: String,
+
+    /// RMS threshold for meeting chunk voice activity detection.
+    /// Lower values are more permissive; 0.0 disables the pre-transcription gate.
+    #[serde(default = "default_meeting_vad_threshold")]
+    pub vad_threshold: f32,
 }
 
 fn default_mic_device() -> String {
@@ -1344,12 +1349,17 @@ fn default_echo_cancel() -> String {
     "auto".to_string()
 }
 
+fn default_meeting_vad_threshold() -> f32 {
+    0.01
+}
+
 impl Default for MeetingAudioConfig {
     fn default() -> Self {
         Self {
             mic_device: default_mic_device(),
             loopback_device: default_loopback(),
             echo_cancel: default_echo_cancel(),
+            vad_threshold: default_meeting_vad_threshold(),
         }
     }
 }
@@ -3505,6 +3515,7 @@ mod tests {
         let config = MeetingAudioConfig::default();
         assert_eq!(config.mic_device, "default");
         assert_eq!(config.loopback_device, "auto");
+        assert_eq!(config.vad_threshold, 0.01);
     }
 
     #[test]
@@ -3592,6 +3603,7 @@ mod tests {
             [meeting.audio]
             mic_device = "hw:1"
             loopback_device = "disabled"
+            vad_threshold = 0.001
 
             [meeting.diarization]
             enabled = false
@@ -3607,6 +3619,7 @@ mod tests {
         let config: Config = toml::from_str(toml_str).unwrap();
         assert_eq!(config.meeting.audio.mic_device, "hw:1");
         assert_eq!(config.meeting.audio.loopback_device, "disabled");
+        assert_eq!(config.meeting.audio.vad_threshold, 0.001);
         assert!(!config.meeting.diarization.enabled);
         assert_eq!(config.meeting.diarization.backend, "ml");
         assert_eq!(config.meeting.diarization.max_speakers, 5);

@@ -768,6 +768,7 @@ impl Daemon {
             },
             retain_audio: self.config.meeting.retain_audio,
             max_duration_mins: self.config.meeting.max_duration_mins,
+            vad_threshold: self.config.meeting.audio.vad_threshold,
             diarization: diarization_config,
         };
 
@@ -790,7 +791,18 @@ impl Daemon {
                                 "disabled" | "" => None,
                                 other => Some(other),
                             };
-                        match audio::DualCapture::new(&self.config.audio, loopback_device) {
+                        let mut meeting_audio_config = self.config.audio.clone();
+                        let meeting_mic_device = self.config.meeting.audio.mic_device.as_str();
+                        if !matches!(meeting_mic_device, "default" | "") {
+                            tracing::info!(
+                                "Meeting mic override: {} (dictation uses {})",
+                                meeting_mic_device,
+                                self.config.audio.device
+                            );
+                            meeting_audio_config.device =
+                                self.config.meeting.audio.mic_device.clone();
+                        }
+                        match audio::DualCapture::new(&meeting_audio_config, loopback_device) {
                             Ok(mut capture) => {
                                 if let Err(e) = capture.start().await {
                                     tracing::error!("Failed to start meeting audio: {}", e);
@@ -873,12 +885,24 @@ impl Daemon {
 
     /// Stop the current meeting
     async fn stop_meeting(&mut self) -> Result<()> {
-        if let Some(mut daemon) = self.meeting_daemon.take() {
-            // Stop audio capture
+        if self.meeting_daemon.is_some() {
+            // Stop audio capture and keep any samples that arrived since the last poll.
             if let Some(mut capture) = self.meeting_audio_capture.take() {
-                let _ = capture.stop().await;
+                match capture.stop().await {
+                    Ok(dual_samples) => {
+                        self.meeting_mic_buffer.extend(dual_samples.mic);
+                        self.meeting_loopback_buffer.extend(dual_samples.loopback);
+                    }
+                    Err(e) => {
+                        tracing::warn!("Failed to stop meeting audio cleanly: {}", e);
+                    }
+                }
             }
 
+            // Flush the final partial chunk so speech near stop is not dropped.
+            self.process_buffered_meeting_audio(true).await;
+
+            let mut daemon = self.meeting_daemon.take().expect("checked above");
             match daemon.stop().await {
                 Ok(meeting_id) => {
                     self.update_meeting_state("idle", None);
@@ -964,6 +988,109 @@ impl Daemon {
         16000 * self.config.meeting.chunk_duration_secs as usize
     }
 
+    async fn process_meeting_audio_pair(&mut self, mic_chunk: Vec<f32>, loopback_chunk: Vec<f32>) {
+        #[cfg_attr(not(feature = "onnx-common"), allow(unused_mut))]
+        let mut mic_chunk = mic_chunk;
+
+        // Enhance mic audio with GTCRN if available (removes echo/noise)
+        #[cfg(feature = "onnx-common")]
+        {
+            if !mic_chunk.is_empty() {
+                if let Some(ref enhancer) = self.speech_enhancer {
+                    match enhancer.enhance(&mic_chunk) {
+                        Ok(enhanced) => {
+                            tracing::debug!(
+                                "GTCRN enhanced mic chunk ({} samples)",
+                                enhanced.len()
+                            );
+                            mic_chunk = enhanced;
+                        }
+                        Err(e) => {
+                            tracing::warn!("GTCRN enhancement failed, using raw mic: {}", e);
+                        }
+                    }
+                }
+            }
+        }
+
+        if let Some(ref mut daemon) = self.meeting_daemon {
+            let mut had_loopback = false;
+
+            if !mic_chunk.is_empty() {
+                match daemon
+                    .process_chunk_with_source(mic_chunk, meeting::data::AudioSource::Microphone)
+                    .await
+                {
+                    Ok(Some(segments)) => {
+                        tracing::debug!("Processed mic chunk with {} segments", segments.len());
+                    }
+                    Ok(None) => {}
+                    Err(e) => {
+                        tracing::error!("Error processing mic chunk: {}", e);
+                    }
+                }
+            }
+
+            if !loopback_chunk.is_empty() {
+                match daemon
+                    .process_chunk_with_source(loopback_chunk, meeting::data::AudioSource::Loopback)
+                    .await
+                {
+                    Ok(Some(segments)) => {
+                        tracing::debug!(
+                            "Processed loopback chunk with {} segments",
+                            segments.len()
+                        );
+                        if !segments.is_empty() {
+                            had_loopback = true;
+                        }
+                    }
+                    Ok(None) => {}
+                    Err(e) => {
+                        tracing::error!("Error processing loopback chunk: {}", e);
+                    }
+                }
+            }
+
+            // Dedup bleed-through: strip echoed phrases from mic segments
+            if had_loopback {
+                if let Some(ref mut meeting) = daemon.current_meeting_mut() {
+                    let removed = meeting.transcript.dedup_bleed_through();
+                    if removed > 0 {
+                        tracing::info!("Removed {} bleed-through word(s) via dedup", removed);
+                    }
+                }
+            }
+        }
+    }
+
+    async fn process_buffered_meeting_audio(&mut self, include_tail: bool) {
+        let chunk_samples = self.meeting_chunk_samples();
+
+        while self.meeting_mic_buffer.len() >= chunk_samples {
+            let mic_chunk: Vec<f32> = self.meeting_mic_buffer.drain(..chunk_samples).collect();
+            let loopback_len = self.meeting_loopback_buffer.len().min(chunk_samples);
+            let loopback_chunk: Vec<f32> =
+                self.meeting_loopback_buffer.drain(..loopback_len).collect();
+            self.process_meeting_audio_pair(mic_chunk, loopback_chunk)
+                .await;
+        }
+
+        if include_tail {
+            let mic_tail = std::mem::take(&mut self.meeting_mic_buffer);
+            let loopback_tail = std::mem::take(&mut self.meeting_loopback_buffer);
+            if !mic_tail.is_empty() || !loopback_tail.is_empty() {
+                tracing::debug!(
+                    mic_samples = mic_tail.len(),
+                    loopback_samples = loopback_tail.len(),
+                    "Processing final meeting audio tail"
+                );
+                self.process_meeting_audio_pair(mic_tail, loopback_tail)
+                    .await;
+            }
+        }
+    }
+
     /// Reset state to idle and run post_output_command to reset compositor submap
     /// Call this when exiting from recording/transcribing without normal output flow
     async fn reset_to_idle(&mut self, state: &mut State) {
@@ -2683,72 +2810,7 @@ impl Daemon {
                         self.meeting_mic_buffer.extend(dual_samples.mic);
                         self.meeting_loopback_buffer.extend(dual_samples.loopback);
 
-                        // Check if mic buffer has enough samples for a chunk
-                        let chunk_samples = self.meeting_chunk_samples();
-                        if self.meeting_mic_buffer.len() >= chunk_samples {
-                            let mic_chunk: Vec<f32> = self.meeting_mic_buffer.drain(..chunk_samples).collect();
-
-                            // Also drain loopback buffer up to the same amount
-                            let loopback_len = self.meeting_loopback_buffer.len().min(chunk_samples);
-                            let loopback_chunk: Vec<f32> = self.meeting_loopback_buffer.drain(..loopback_len).collect();
-
-                            // Enhance mic audio with GTCRN if available (removes echo/noise)
-                            #[cfg(feature = "onnx-common")]
-                            let mic_chunk = if let Some(ref enhancer) = self.speech_enhancer {
-                                match enhancer.enhance(&mic_chunk) {
-                                    Ok(enhanced) => {
-                                        tracing::debug!("GTCRN enhanced mic chunk ({} samples)", enhanced.len());
-                                        enhanced
-                                    }
-                                    Err(e) => {
-                                        tracing::warn!("GTCRN enhancement failed, using raw mic: {}", e);
-                                        mic_chunk
-                                    }
-                                }
-                            } else {
-                                mic_chunk
-                            };
-
-                            if let Some(ref mut daemon) = self.meeting_daemon {
-                                // Process mic chunk
-                                let mut had_loopback = false;
-                                match daemon.process_chunk_with_source(mic_chunk, meeting::data::AudioSource::Microphone).await {
-                                    Ok(Some(segments)) => {
-                                        tracing::debug!("Processed mic chunk with {} segments", segments.len());
-                                    }
-                                    Ok(None) => {}
-                                    Err(e) => {
-                                        tracing::error!("Error processing mic chunk: {}", e);
-                                    }
-                                }
-
-                                // Process loopback chunk if non-empty
-                                if !loopback_chunk.is_empty() {
-                                    match daemon.process_chunk_with_source(loopback_chunk, meeting::data::AudioSource::Loopback).await {
-                                        Ok(Some(segments)) => {
-                                            tracing::debug!("Processed loopback chunk with {} segments", segments.len());
-                                            if !segments.is_empty() {
-                                                had_loopback = true;
-                                            }
-                                        }
-                                        Ok(None) => {}
-                                        Err(e) => {
-                                            tracing::error!("Error processing loopback chunk: {}", e);
-                                        }
-                                    }
-                                }
-
-                                // Dedup bleed-through: strip echoed phrases from mic segments
-                                if had_loopback {
-                                    if let Some(ref mut meeting) = daemon.current_meeting_mut() {
-                                        let removed = meeting.transcript.dedup_bleed_through();
-                                        if removed > 0 {
-                                            tracing::info!("Removed {} bleed-through word(s) via dedup", removed);
-                                        }
-                                    }
-                                }
-                            }
-                        }
+                        self.process_buffered_meeting_audio(false).await;
                     }
 
                     // Check meeting timeout

@@ -1359,6 +1359,7 @@ async fn run_meeting_command(config: &config::Config, action: MeetingAction) ->
         },
         retain_audio: config.meeting.retain_audio,
         max_duration_mins: config.meeting.max_duration_mins,
+        vad_threshold: config.meeting.audio.vad_threshold,
         diarization: None,
     };
 

@@ -265,7 +265,14 @@ impl ChunkProcessor {
 
         // Check for speech
         if !self.vad.contains_speech(&samples) {
-            tracing::debug!("Chunk {} has no speech, skipping", chunk_id);
+            tracing::debug!(
+                chunk_id,
+                source = %source,
+                duration_secs = samples.len() as f32 / self.config.sample_rate as f32,
+                rms = VoiceActivityDetector::calculate_rms(&samples),
+                threshold = self.config.vad_threshold,
+                "Meeting chunk skipped: no speech detected"
+            );
             return Ok(ProcessedChunk {
                 chunk_id,
                 segments: vec![],
@@ -276,7 +283,8 @@ impl ChunkProcessor {
 
         // Transcribe the chunk
         tracing::info!(
-            "Transcribing chunk {} ({:.1}s of audio)",
+            "Transcribing {:?} chunk {} ({:.1}s of audio)",
+            source,
             chunk_id,
             samples.len() as f32 / self.config.sample_rate as f32
         );