Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/daemon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2688,7 +2688,13 @@ impl Daemon {
if self.meeting_mic_buffer.len() >= chunk_samples {
let mic_chunk: Vec<f32> = self.meeting_mic_buffer.drain(..chunk_samples).collect();

// Also drain loopback buffer up to the same amount
// Also drain loopback buffer up to the same amount.
// Do NOT pad to chunk_samples — diluting real speech
// with silence can drop the VAD speech-frame ratio
// below threshold and cause remote audio to be
// discarded. Instead we reconcile timestamp offsets
// after both sources have been dispatched via
// sync_source_offsets.
let loopback_len = self.meeting_loopback_buffer.len().min(chunk_samples);
let loopback_chunk: Vec<f32> = self.meeting_loopback_buffer.drain(..loopback_len).collect();

Expand Down Expand Up @@ -2722,7 +2728,12 @@ impl Daemon {
}
}

// Process loopback chunk if non-empty
// Process loopback chunk if we have any real samples.
// If the monitor is lagging we simply skip — the
// sync_source_offsets call below bumps loopback's
// timestamp offset to match mic so the next
// loopback chunk lands at the correct wall-clock
// position.
if !loopback_chunk.is_empty() {
match daemon.process_chunk_with_source(loopback_chunk, meeting::data::AudioSource::Loopback).await {
Ok(Some(segments)) => {
Expand All @@ -2738,6 +2749,11 @@ impl Daemon {
}
}

// Reconcile per-source offsets so any source that
// received a short or skipped chunk this iteration
// catches up to wall-clock before the next one.
daemon.sync_source_offsets();

// Dedup bleed-through: strip echoed phrases from mic segments
if had_loopback {
if let Some(ref mut meeting) = daemon.current_meeting_mut() {
Expand Down
40 changes: 34 additions & 6 deletions src/meeting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ pub struct MeetingDaemon {
/// Previous chunk's post-processed text, tracked per audio source
/// so mic and loopback contexts don't bleed into each other
last_chunk_text: HashMap<AudioSource, String>,
/// Cumulative audio duration consumed per source, in milliseconds.
/// Used to compute per-source start offsets so mic and loopback
/// timelines stay anchored to real wall-clock elapsed time instead
/// of being pushed forward by the other source's segments.
source_offsets: HashMap<AudioSource, u64>,
}

impl MeetingDaemon {
Expand Down Expand Up @@ -157,6 +162,7 @@ impl MeetingDaemon {
event_tx,
post_processor,
last_chunk_text: HashMap::new(),
source_offsets: HashMap::new(),
})
}

Expand Down Expand Up @@ -224,6 +230,7 @@ impl MeetingDaemon {

self.state = std::mem::take(&mut self.state).stop();
self.last_chunk_text.clear();
self.source_offsets.clear();

// Finalize meeting
if let Some(ref mut meeting) = self.current_meeting {
Expand Down Expand Up @@ -270,6 +277,18 @@ impl MeetingDaemon {
self.current_meeting.as_ref().map(|m| m.metadata.id)
}

/// Align all per-source timestamp offsets to the furthest-advanced source.
///
/// Call this after dispatching the per-iteration chunks for all sources so
/// any source that received a short or empty chunk (e.g. loopback monitor
/// lagging at startup) catches up to wall-clock before the next iteration.
pub fn sync_source_offsets(&mut self) {
let max_offset = self.source_offsets.values().copied().max().unwrap_or(0);
for offset in self.source_offsets.values_mut() {
*offset = max_offset;
}
}

/// Get mutable access to current meeting data (for dedup, etc.)
pub fn current_meeting_mut(&mut self) -> Option<&mut MeetingData> {
self.current_meeting.as_mut()
Expand Down Expand Up @@ -304,12 +323,21 @@ impl MeetingDaemon {
..Default::default()
};

// Calculate start offset
let start_offset_ms = if let Some(ref meeting) = self.current_meeting {
meeting.transcript.duration_ms()
} else {
0
};
// Start offset is tracked per source: each source has its own wall-clock
// timeline. Deriving this from transcript.duration_ms() would conflate
// mic and loopback, pushing every new chunk past the other source's end
// and roughly doubling apparent meeting length on dual-track captures.
let start_offset_ms = *self.source_offsets.entry(source).or_insert(0);

// Advance the per-source offset up front, based on the input sample
// count, so the offset tracks wall-clock even when transcription errors
// or VAD skips this chunk (the caller has already drained these samples
// from its buffer, so the time has elapsed regardless).
let audio_duration_ms =
(samples.len() as f64 / chunk_config.sample_rate as f64 * 1000.0) as u64;
if let Some(offset) = self.source_offsets.get_mut(&source) {
*offset += audio_duration_ms;
}

let mut processor = ChunkProcessor::new(chunk_config, transcriber.clone());
let mut buffer = processor.new_buffer(chunk_id, source, start_offset_ms);
Expand Down