Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/rosetta-issue-99.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@livekit/agents": minor
---

feat(voice): add dynamic endpointing to the Node.js SDK
114 changes: 94 additions & 20 deletions agents/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -350,45 +350,119 @@ export class AsyncIterableQueue<T> implements AsyncIterableIterator<T> {
}
}

type ExpFilterConfig = {
alpha?: number;
initial?: number;
min?: number;
max?: number;
};

/** @internal */
// Ref: source livekit-agents/livekit/agents/utils/exp_filter.py - 5-64
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Ref comment uses wrong format in utils.ts

Same CLAUDE.md format violation — uses // Ref: source instead of // Ref: python and missing lines suffix.

Suggested change
// Ref: source livekit-agents/livekit/agents/utils/exp_filter.py - 5-64
// Ref: python livekit-agents/livekit/agents/utils/exp_filter.py - 5-64 lines
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

export class ExpFilter {
#alpha: number;
#max?: number;
#filtered?: number = undefined;
#alphaValue: number;
#maxValue?: number;
#minValue?: number;
#filteredValue?: number;

constructor(alpha: number, max?: number);
constructor(alpha: number, config?: ExpFilterConfig);
constructor(alpha: number, maxOrConfig?: number | ExpFilterConfig) {
this.assertAlpha(alpha);
this.#alphaValue = alpha;

if (typeof maxOrConfig === 'number') {
this.#maxValue = maxOrConfig;
return;
}

constructor(alpha: number, max?: number) {
this.#alpha = alpha;
this.#max = max;
this.#maxValue = maxOrConfig?.max;
this.#minValue = maxOrConfig?.min;
this.#filteredValue = maxOrConfig?.initial;
}

reset(alpha?: number) {
if (alpha) {
this.#alpha = alpha;
reset(alpha?: number): void;
reset(config?: ExpFilterConfig): void;
reset(alphaOrConfig?: number | ExpFilterConfig): void {
if (typeof alphaOrConfig === 'number') {
this.assertAlpha(alphaOrConfig);
this.#alphaValue = alphaOrConfig;
return;
}

if (alphaOrConfig?.alpha !== undefined) {
this.assertAlpha(alphaOrConfig.alpha);
this.#alphaValue = alphaOrConfig.alpha;
}
if (alphaOrConfig?.initial !== undefined) {
this.#filteredValue = alphaOrConfig.initial;
}
if (alphaOrConfig?.min !== undefined) {
this.#minValue = alphaOrConfig.min;
}
if (alphaOrConfig?.max !== undefined) {
this.#maxValue = alphaOrConfig.max;
}
this.#filtered = undefined;
}

apply(exp: number, sample: number): number {
if (this.#filtered) {
const a = this.#alpha ** exp;
this.#filtered = a * this.#filtered + (1 - a) * sample;
apply(exp: number, sample: number): number;
apply(exp: number, sample?: number): number {
const nextSample = sample ?? this.#filteredValue;
if (nextSample === undefined) {
throw new Error('sample or initial value must be given.');
}

if (this.#filteredValue === undefined) {
this.#filteredValue = nextSample;
} else {
this.#filtered = sample;
const alpha = this.#alphaValue ** exp;
this.#filteredValue = alpha * this.#filteredValue + (1 - alpha) * nextSample;
}

if (this.#max && this.#filtered > this.#max) {
this.#filtered = this.#max;
if (this.#maxValue !== undefined && this.#filteredValue > this.#maxValue) {
this.#filteredValue = this.#maxValue;
}
if (this.#minValue !== undefined && this.#filteredValue < this.#minValue) {
this.#filteredValue = this.#minValue;
}

return this.#filtered;
return this.#filteredValue;
}

updateBase(alpha: number): void {
this.assertAlpha(alpha);
this.#alphaValue = alpha;
}

get filtered(): number | undefined {
return this.#filtered;
return this.#filteredValue;
}

get value(): number | undefined {
return this.#filteredValue;
}

get alpha(): number {
return this.#alphaValue;
}

set alpha(alpha: number) {
this.#alpha = alpha;
this.assertAlpha(alpha);
this.#alphaValue = alpha;
}

get min(): number | undefined {
return this.#minValue;
}

get max(): number | undefined {
return this.#maxValue;
}

private assertAlpha(alpha: number): void {
if (!(alpha > 0 && alpha <= 1)) {
throw new Error('alpha must be in (0, 1].');
}
}
}

Expand Down
96 changes: 59 additions & 37 deletions agents/src/voice/agent_activity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ import {
} from './generation.js';
import type { TimedString } from './io.js';
import { SpeechHandle } from './speech_handle.js';
import { type EndpointingOptions, createEndpointing } from './turn_config/endpointing.js';
import { setParticipantSpanAttributes } from './utils.js';

export const agentActivityStorage = new AsyncLocalStorage<AgentActivity>();
Expand Down Expand Up @@ -469,6 +470,7 @@ export class AgentActivity implements RecognitionHooks {
this.vad.on('metrics_collected', this.onMetricsCollected);
}

// Ref: source livekit-agents/livekit/agents/voice/agent_activity.py - 768-784
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Ref comments use wrong format in agent_activity.ts

Same CLAUDE.md format violation — uses // Ref: source instead of // Ref: python and missing lines suffix. This affects all 6 Ref comments added to this file (lines 473, 753, 936, 959, 1845, 2127).

Suggested change
// Ref: source livekit-agents/livekit/agents/voice/agent_activity.py - 768-784
// Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 768-784 lines
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

this.audioRecognition = new AudioRecognition({
recognitionHooks: this,
// Disable stt node if stt is not provided
Expand All @@ -483,6 +485,17 @@ export class AgentActivity implements RecognitionHooks {
maxEndpointingDelay:
this.agent.turnHandling?.endpointing?.maxDelay ??
this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay,
endpointing: createEndpointing({
mode:
this.agent.turnHandling?.endpointing?.mode ??
this.agentSession.sessionOptions.turnHandling.endpointing.mode,
minDelay:
this.agent.turnHandling?.endpointing?.minDelay ??
this.agentSession.sessionOptions.turnHandling.endpointing.minDelay,
maxDelay:
this.agent.turnHandling?.endpointing?.maxDelay ??
this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay,
}),
rootSpanContext: this.agentSession.rootSpanContext,
sttModel: this.stt?.label,
sttProvider: this.getSttProvider(),
Expand Down Expand Up @@ -661,20 +674,6 @@ export class AgentActivity implements RecognitionHooks {
return this.agent.turnHandling ?? this.agentSession.sessionOptions.turnHandling;
}

// get minEndpointingDelay(): number {
// return (
// this.agent.turnHandling?.endpointing?.minDelay ??
// this.agentSession.sessionOptions.turnHandling.endpointing.minDelay
// );
// }

// get maxEndpointingDelay(): number {
// return (
// this.agent.turnHandling?.endpointing?.maxDelay ??
// this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay
// );
// }

get toolCtx(): ToolContext {
return this.agent.toolCtx;
}
Expand Down Expand Up @@ -721,6 +720,15 @@ export class AgentActivity implements RecognitionHooks {
}: {
toolChoice?: ToolChoice | null;
turnDetection?: TurnDetectionMode;
}): void;
updateOptions({
toolChoice,
turnDetection,
endpointing,
}: {
toolChoice?: ToolChoice | null;
turnDetection?: TurnDetectionMode;
endpointing?: EndpointingOptions;
}): void {
if (toolChoice !== undefined) {
this.toolChoice = toolChoice;
Expand All @@ -742,8 +750,12 @@ export class AgentActivity implements RecognitionHooks {
}
}

// Ref: source livekit-agents/livekit/agents/voice/agent_activity.py - 477-482
if (this.audioRecognition) {
this.audioRecognition.updateOptions({ turnDetection: this.turnDetectionMode });
this.audioRecognition.updateOptions({
endpointing: endpointing ? createEndpointing(endpointing) : undefined,
turnDetection: this.turnDetectionMode,
});
}
}

Expand Down Expand Up @@ -921,13 +933,10 @@ export class AgentActivity implements RecognitionHooks {
this.logger.info('onInputSpeechStarted');

if (!this.vad) {
// Ref: source livekit-agents/livekit/agents/voice/agent_activity.py - 1490-1498
this.agentSession._updateUserState('speaking');
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
this.audioRecognition.onStartOfOverlapSpeech(
0,
Date.now(),
this.agentSession._userSpeakingSpan,
);
if (this.audioRecognition) {
this.audioRecognition.onStartOfSpeech(Date.now(), 0, this.agentSession._userSpeakingSpan);
}
}

Expand All @@ -947,8 +956,9 @@ export class AgentActivity implements RecognitionHooks {
this.logger.info(ev, 'onInputSpeechStopped');

if (!this.vad) {
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
this.audioRecognition.onEndOfOverlapSpeech(Date.now(), this.agentSession._userSpeakingSpan);
// Ref: source livekit-agents/livekit/agents/voice/agent_activity.py - 1508-1516
if (this.audioRecognition) {
this.audioRecognition.onEndOfSpeech(Date.now(), this.agentSession._userSpeakingSpan);
}
this.agentSession._updateUserState('listening');
}
Expand Down Expand Up @@ -1832,13 +1842,17 @@ export class AgentActivity implements RecognitionHooks {
let replyTtsGenData: _TTSGenerationData | null = null;

const onFirstFrame = (startedSpeakingAt?: number) => {
replyStartedSpeakingAt = startedSpeakingAt ?? Date.now();
// Ref: source livekit-agents/livekit/agents/voice/agent_activity.py - 2183-2197
const agentSpeechStartedAt = startedSpeakingAt ?? Date.now();
replyStartedSpeakingAt = agentSpeechStartedAt;
this.agentSession._updateAgentState('speaking', {
startTime: startedSpeakingAt,
otelContext: speechHandle._agentTurnContext,
});
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
this.audioRecognition.onStartOfAgentSpeech();
if (this.audioRecognition) {
this.audioRecognition.onStartOfAgentSpeech(agentSpeechStartedAt);
}
if (this.isInterruptionDetectionEnabled) {
this.isInterruptionByAudioActivityEnabled = false;
}
};
Expand Down Expand Up @@ -1924,10 +1938,12 @@ export class AgentActivity implements RecognitionHooks {

if (this.agentSession.agentState === 'speaking') {
this.agentSession._updateAgentState('listening');
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
if (this.audioRecognition) {
this.audioRecognition.onEndOfAgentSpeech(Date.now());
}
this.restoreInterruptionByAudioActivity();
if (this.isInterruptionDetectionEnabled) {
this.restoreInterruptionByAudioActivity();
}
}
}

Expand Down Expand Up @@ -2108,13 +2124,17 @@ export class AgentActivity implements RecognitionHooks {

let agentStartedSpeakingAt: number | undefined;
const onFirstFrame = (startedSpeakingAt?: number) => {
agentStartedSpeakingAt = startedSpeakingAt ?? Date.now();
// Ref: source livekit-agents/livekit/agents/voice/agent_activity.py - 2183-2197
const agentSpeechStartedAt = startedSpeakingAt ?? Date.now();
agentStartedSpeakingAt = agentSpeechStartedAt;
this.agentSession._updateAgentState('speaking', {
startTime: startedSpeakingAt,
otelContext: speechHandle._agentTurnContext,
});
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
this.audioRecognition.onStartOfAgentSpeech();
if (this.audioRecognition) {
this.audioRecognition.onStartOfAgentSpeech(agentSpeechStartedAt);
}
if (this.isInterruptionDetectionEnabled) {
this.isInterruptionByAudioActivityEnabled = false;
}
};
Expand Down Expand Up @@ -2271,8 +2291,10 @@ export class AgentActivity implements RecognitionHooks {

if (this.agentSession.agentState === 'speaking') {
this.agentSession._updateAgentState('listening');
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
if (this.audioRecognition) {
this.audioRecognition.onEndOfAgentSpeech(Date.now());
}
if (this.isInterruptionDetectionEnabled) {
this.restoreInterruptionByAudioActivity();
}
}
Expand Down Expand Up @@ -2314,11 +2336,11 @@ export class AgentActivity implements RecognitionHooks {
this.agentSession._updateAgentState('thinking');
} else if (this.agentSession.agentState === 'speaking') {
this.agentSession._updateAgentState('listening');
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
{
this.audioRecognition.onEndOfAgentSpeech(Date.now());
this.restoreInterruptionByAudioActivity();
}
if (this.audioRecognition) {
this.audioRecognition.onEndOfAgentSpeech(Date.now());
}
if (this.isInterruptionDetectionEnabled) {
this.restoreInterruptionByAudioActivity();
}
}

Expand Down
Loading
Loading