feat(vela): support end-to-end mocked turn cancelation

This commit is contained in:
2026-04-08 19:49:31 +02:00
parent ff78fc4c8f
commit 0d5b53be00
7 changed files with 197 additions and 9 deletions

View File

@@ -22,7 +22,8 @@ function createSessionRecord() {
audioChunkCount: 0, audioChunkCount: 0,
started: false, started: false,
mockedTurnInFlight: false, mockedTurnInFlight: false,
mockedTurnTimers: [] mockedTurnTimers: [],
activeMockedTurnId: null
}; };
} }
@@ -33,11 +34,17 @@ function clearMockedTurn(session) {
session.mockedTurnTimers = []; session.mockedTurnTimers = [];
session.mockedTurnInFlight = false; session.mockedTurnInFlight = false;
session.activeMockedTurnId = null;
} }
function scheduleMockedTurnStep(session, delay, callback) { function scheduleMockedTurnStep(session, turnId, delay, callback) {
const timer = setTimeout(() => { const timer = setTimeout(() => {
session.mockedTurnTimers = session.mockedTurnTimers.filter((activeTimer) => activeTimer !== timer); session.mockedTurnTimers = session.mockedTurnTimers.filter((activeTimer) => activeTimer !== timer);
if (!session.mockedTurnInFlight || session.activeMockedTurnId !== turnId) {
return;
}
callback(); callback();
}, delay); }, delay);
@@ -53,23 +60,25 @@ function startMockedTurn(socket, session) {
clearMockedTurn(session); clearMockedTurn(session);
session.audioChunkCount = 0; session.audioChunkCount = 0;
session.mockedTurnInFlight = true; session.mockedTurnInFlight = true;
const turnId = crypto.randomUUID();
session.activeMockedTurnId = turnId;
updateSessionState(socket, session, 'listening'); updateSessionState(socket, session, 'listening');
scheduleMockedTurnStep(session, 75, () => { scheduleMockedTurnStep(session, turnId, 75, () => {
sendSocketMessage(socket, 'transcript.final', { text: MOCKED_USER_TRANSCRIPT }); sendSocketMessage(socket, 'transcript.final', { text: MOCKED_USER_TRANSCRIPT });
updateSessionState(socket, session, 'thinking'); updateSessionState(socket, session, 'thinking');
}); });
scheduleMockedTurnStep(session, 150, () => { scheduleMockedTurnStep(session, turnId, 150, () => {
updateSessionState(socket, session, 'speaking'); updateSessionState(socket, session, 'speaking');
sendSocketMessage(socket, 'response.text.delta', { text: '[mocked assistant] ' }); sendSocketMessage(socket, 'response.text.delta', { text: '[mocked assistant] ' });
}); });
scheduleMockedTurnStep(session, 225, () => { scheduleMockedTurnStep(session, turnId, 225, () => {
sendSocketMessage(socket, 'response.text.delta', { text: MOCKED_ASSISTANT_RESPONSE.replace('[mocked assistant] ', '') }); sendSocketMessage(socket, 'response.text.delta', { text: MOCKED_ASSISTANT_RESPONSE.replace('[mocked assistant] ', '') });
}); });
scheduleMockedTurnStep(session, 300, () => { scheduleMockedTurnStep(session, turnId, 300, () => {
sendSocketMessage(socket, 'response.completed', {}); sendSocketMessage(socket, 'response.completed', {});
clearMockedTurn(session); clearMockedTurn(session);
updateSessionState(socket, session, 'idle'); updateSessionState(socket, session, 'idle');

View File

@@ -405,3 +405,67 @@ test('websocket rejects a second mocked turn while one is in flight', async () =
await server.close(); await server.close();
} }
}); });
test('websocket cancel stops an active mocked turn and allows a new one without reconnecting', async () => {
const server = await startServer();
try {
const client = await connectWebSocket(server.port);
await client.nextMessage();
await client.nextMessage();
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'listening' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked user] What is the current mocked vertical slice?' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'thinking' }
});
client.sendJson({ type: 'response.cancel', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'idle' }
});
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'listening' }
});
await client.close();
} finally {
await server.close();
}
});
test('websocket safely accepts cancel when no turn is active', async () => {
const server = await startServer();
try {
const client = await connectWebSocket(server.port);
await client.nextMessage();
await client.nextMessage();
client.sendJson({ type: 'response.cancel', payload: {} });
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'listening' }
});
await client.close();
} finally {
await server.close();
}
});

View File

@@ -46,3 +46,34 @@ test('voice session shell covers the mocked transcript/response slice', async ({
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT); await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE); await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
}); });
test('voice session shell can cancel an active mocked turn and start another one', async ({ page }) => {
await page.goto('/');
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
await expect(page.getByTestId('cancel-turn-button')).toBeDisabled();
await page.getByTestId('connect-button').click();
await expect(page.getByTestId('connection-state')).toHaveText('connected');
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
await page.getByTestId('mocked-turn-button').click();
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
await expect(page.getByTestId('cancel-turn-button')).toBeEnabled();
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
await page.getByTestId('cancel-turn-button').click();
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
await expect(page.getByTestId('cancel-turn-button')).toBeDisabled();
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
await page.getByTestId('mocked-turn-button').click();
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
});

View File

@@ -62,6 +62,13 @@
sessionReadyReceived && sessionReadyReceived &&
!mockedTurnInFlight; !mockedTurnInFlight;
$: canCancelMockedTurn =
typeof WebSocket !== 'undefined' &&
connectionState === 'connected' &&
socket?.readyState === WebSocket.OPEN &&
sessionReadyReceived &&
mockedTurnInFlight;
function clearSocketHandlers(targetSocket) { function clearSocketHandlers(targetSocket) {
targetSocket.onopen = null; targetSocket.onopen = null;
targetSocket.onmessage = null; targetSocket.onmessage = null;
@@ -105,6 +112,23 @@
socket.send(JSON.stringify(createMessageEnvelope('mocked.turn.trigger', {}))); socket.send(JSON.stringify(createMessageEnvelope('mocked.turn.trigger', {})));
} }
function cancelActiveResponse() {
if (!socket || socket.readyState !== WebSocket.OPEN || connectionState !== 'connected') {
connectionDetail = 'Connect to the gateway before cancelling a mocked turn.';
lastError = 'response.cancel requires an active WebSocket connection';
return;
}
if (!sessionReadyReceived) {
connectionDetail = 'Wait for the gateway session to be ready before cancelling a mocked turn.';
lastError = 'response.cancel requires session.ready';
return;
}
lastError = 'none';
socket.send(JSON.stringify(createMessageEnvelope('response.cancel', {})));
}
function connect() { function connect() {
if (typeof window === 'undefined') { if (typeof window === 'undefined') {
return; return;
@@ -301,6 +325,9 @@
<button data-testid="mocked-turn-button" on:click={triggerMockedTurn} disabled={!canTriggerMockedTurn}> <button data-testid="mocked-turn-button" on:click={triggerMockedTurn} disabled={!canTriggerMockedTurn}>
Run mocked turn Run mocked turn
</button> </button>
<button data-testid="cancel-turn-button" on:click={cancelActiveResponse} disabled={!canCancelMockedTurn}>
Cancel active turn
</button>
</div> </div>
<div class="conversation"> <div class="conversation">

View File

@@ -154,4 +154,48 @@ describe('voice session shell', () => {
expect(socket.sent).toHaveLength(1); expect(socket.sent).toHaveLength(1);
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger'); expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
}); });
it('shows cancel control during an active mocked turn and preserves rendered text after cancel', async () => {
render(VoiceSessionShell);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-cancel' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
});
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(createMessageEnvelope('transcript.final', { text: 'Keep this transcript.' }));
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
socket.message(createMessageEnvelope('response.text.delta', { text: 'Partial response' }));
await waitFor(() => {
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('assistant-response').textContent).toBe('Partial response');
});
await fireEvent.click(getByTestId('cancel-turn-button'));
expect(JSON.parse(socket.sent[1]).type).toBe('response.cancel');
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('mocked-turn-status').textContent).toBe('idle');
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('user-transcript').textContent).toBe('Keep this transcript.');
expect(getByTestId('assistant-response').textContent).toBe('Partial response');
});
});
}); });

View File

@@ -41,7 +41,7 @@ Prove the end-to-end interaction model with mocked or stubbed providers.
- implement mocked STT flow for partial transcript events - implement mocked STT flow for partial transcript events
- implement mocked LLM response streaming beyond the fixed deterministic slice - implement mocked LLM response streaming beyond the fixed deterministic slice
- implement stubbed audio playback or placeholder TTS output - implement stubbed audio playback or placeholder TTS output
- implement interrupt handling across the mocked pipeline - [x] implement interrupt handling across the mocked pipeline
### Exit Criteria ### Exit Criteria
@@ -188,5 +188,7 @@ Polish the system after the core voice loop is reliable.
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints - `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
- `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling - `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling
- `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session - `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session
- `apps/vela-ui` now exposes a cancel control for active mocked turns and keeps already-rendered transcript/response text visible after cancellation
- `apps/vela-gateway` now honors `response.cancel` during mocked turns by stopping pending mocked response events, returning the session to `idle`, and allowing a new mocked turn on the same socket
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway - `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
- backend framework choice is now concrete: Fastify - backend framework choice is now concrete: Fastify

View File

@@ -62,7 +62,8 @@ type ClientEvent =
- a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order - a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order
- `input_audio.append` updates the ephemeral session record and moves the session to `listening` - `input_audio.append` updates the ephemeral session record and moves the session to `listening`
- `input_audio.commit` resets the minimal buffered state and returns the session to `idle` - `input_audio.commit` resets the minimal buffered state and returns the session to `idle`
- `response.cancel` resets the minimal session state back to `idle` - `response.cancel` is safe to send even when no mocked turn is active
- `response.cancel` stops any still-pending mocked turn events for the active turn and resets the minimal session state back to `idle`
- a second mocked-turn trigger during an active mocked turn produces `error` with code `mocked_turn_in_flight` - a second mocked-turn trigger during an active mocked turn produces `error` with code `mocked_turn_in_flight`
- malformed JSON produces `error` with code `invalid_json` - malformed JSON produces `error` with code `invalid_json`
- invalid envelopes or unsupported client event names produce `error` with code `invalid_message` - invalid envelopes or unsupported client event names produce `error` with code `invalid_message`
@@ -85,6 +86,8 @@ Notes:
- this UI state is transport-oriented and is separate from the shared gateway `session.state` payload - this UI state is transport-oriented and is separate from the shared gateway `session.state` payload
- `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`) - `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`)
- the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight - the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight
- the UI shows a cancel control and enables it only while a mocked turn is active
- after cancel returns the gateway to `idle`, the UI clears the active-turn indicator but keeps any transcript or response text that was already rendered
- the UI treats malformed server messages, browser WebSocket errors, and gateway `error` events as safe error states instead of throwing - the UI treats malformed server messages, browser WebSocket errors, and gateway `error` events as safe error states instead of throwing
### Server → Client ### Server → Client
@@ -134,7 +137,7 @@ Notes:
- the content is intentionally fixed and obviously mocked - the content is intentionally fixed and obviously mocked
- no audio, STT, LLM, TTS, or external providers participate in this flow - no audio, STT, LLM, TTS, or external providers participate in this flow
- `response.cancel` can stop the mocked turn early and return the session to `idle` - `response.cancel` can stop the mocked turn early, suppress any later mocked response events for that turn, and return the session to `idle`
## Contract Scope for This Increment ## Contract Scope for This Increment
@@ -162,6 +165,14 @@ idle
→ idle → idle
``` ```
Current mocked-pipeline behavior:
- during an active mocked turn, `response.cancel` returns the session to `idle` immediately
- any mocked turn timers that have not fired yet are dropped, so no later `response.text.delta` or `response.completed` events are emitted for the cancelled turn
- once `idle` is restored, the same WebSocket session can start another mocked turn without reconnecting
More general future-state expectations:
`response.cancel` can occur at: `response.cancel` can occur at:
- listening → restart - listening → restart