peteonrails · materemias · Apr 23, 2026 · Apr 23, 2026
@@ -2688,7 +2688,13 @@ impl Daemon {
                         if self.meeting_mic_buffer.len() >= chunk_samples {
                             let mic_chunk: Vec<f32> = self.meeting_mic_buffer.drain(..chunk_samples).collect();
 
-                            // Also drain loopback buffer up to the same amount
+                            // Also drain loopback buffer up to the same amount.
+                            // Do NOT pad to chunk_samples — diluting real speech
+                            // with silence can drop the VAD speech-frame ratio
+                            // below threshold and cause remote audio to be
+                            // discarded. Instead we reconcile timestamp offsets
+                            // after both sources have been dispatched via
+                            // sync_source_offsets.
                             let loopback_len = self.meeting_loopback_buffer.len().min(chunk_samples);
                             let loopback_chunk: Vec<f32> = self.meeting_loopback_buffer.drain(..loopback_len).collect();
 
@@ -2722,7 +2728,12 @@ impl Daemon {
                                     }
                                 }
 
-                                // Process loopback chunk if non-empty
+                                // Process loopback chunk if we have any real samples.
+                                // If the monitor is lagging we simply skip — the
+                                // sync_source_offsets call below bumps loopback's
+                                // timestamp offset to match mic so the next
+                                // loopback chunk lands at the correct wall-clock
+                                // position.
                                 if !loopback_chunk.is_empty() {
                                     match daemon.process_chunk_with_source(loopback_chunk, meeting::data::AudioSource::Loopback).await {
                                         Ok(Some(segments)) => {
@@ -2738,6 +2749,11 @@ impl Daemon {
                                     }
                                 }
 
+                                // Reconcile per-source offsets so any source that
+                                // received a short or skipped chunk this iteration
+                                // catches up to wall-clock before the next one.
+                                daemon.sync_source_offsets();
+
                                 // Dedup bleed-through: strip echoed phrases from mic segments
                                 if had_loopback {
                                     if let Some(ref mut meeting) = daemon.current_meeting_mut() {

@@ -110,6 +110,11 @@ pub struct MeetingDaemon {
     /// Previous chunk's post-processed text, tracked per audio source
     /// so mic and loopback contexts don't bleed into each other
     last_chunk_text: HashMap<AudioSource, String>,
+    /// Cumulative audio duration consumed per source, in milliseconds.
+    /// Used to compute per-source start offsets so mic and loopback
+    /// timelines stay anchored to real wall-clock elapsed time instead
+    /// of being pushed forward by the other source's segments.
+    source_offsets: HashMap<AudioSource, u64>,
 }
 
 impl MeetingDaemon {
@@ -157,6 +162,7 @@ impl MeetingDaemon {
             event_tx,
             post_processor,
             last_chunk_text: HashMap::new(),
+            source_offsets: HashMap::new(),
         })
     }
 
@@ -224,6 +230,7 @@ impl MeetingDaemon {
 
         self.state = std::mem::take(&mut self.state).stop();
         self.last_chunk_text.clear();
+        self.source_offsets.clear();
 
         // Finalize meeting
         if let Some(ref mut meeting) = self.current_meeting {
@@ -270,6 +277,18 @@ impl MeetingDaemon {
         self.current_meeting.as_ref().map(|m| m.metadata.id)
     }
 
+    /// Align all per-source timestamp offsets to the furthest-advanced source.
+    ///
+    /// Call this after dispatching the per-iteration chunks for all sources so
+    /// any source that received a short or empty chunk (e.g. loopback monitor
+    /// lagging at startup) catches up to wall-clock before the next iteration.
+    pub fn sync_source_offsets(&mut self) {
+        let max_offset = self.source_offsets.values().copied().max().unwrap_or(0);
+        for offset in self.source_offsets.values_mut() {
+            *offset = max_offset;
+        }
+    }
+
     /// Get mutable access to current meeting data (for dedup, etc.)
     pub fn current_meeting_mut(&mut self) -> Option<&mut MeetingData> {
         self.current_meeting.as_mut()
@@ -304,12 +323,21 @@ impl MeetingDaemon {
             ..Default::default()
         };
 
-        // Calculate start offset
-        let start_offset_ms = if let Some(ref meeting) = self.current_meeting {
-            meeting.transcript.duration_ms()
-        } else {
-            0
-        };
+        // Start offset is tracked per source: each source has its own wall-clock
+        // timeline. Deriving this from transcript.duration_ms() would conflate
+        // mic and loopback, pushing every new chunk past the other source's end
+        // and roughly doubling apparent meeting length on dual-track captures.
+        let start_offset_ms = *self.source_offsets.entry(source).or_insert(0);
+
+        // Advance the per-source offset up front, based on the input sample
+        // count, so the offset tracks wall-clock even when transcription errors
+        // or VAD skips this chunk (the caller has already drained these samples
+        // from its buffer, so the time has elapsed regardless).
+        let audio_duration_ms =
+            (samples.len() as f64 / chunk_config.sample_rate as f64 * 1000.0) as u64;
+        if let Some(offset) = self.source_offsets.get_mut(&source) {
+            *offset += audio_duration_ms;
+        }
 
         let mut processor = ChunkProcessor::new(chunk_config, transcriber.clone());
         let mut buffer = processor.new_buffer(chunk_id, source, start_offset_ms);