diff --git a/apps/vela-gateway/src/index.js b/apps/vela-gateway/src/index.js index e7f5148..5ee489f 100644 --- a/apps/vela-gateway/src/index.js +++ b/apps/vela-gateway/src/index.js @@ -22,7 +22,8 @@ function createSessionRecord() { audioChunkCount: 0, started: false, mockedTurnInFlight: false, - mockedTurnTimers: [] + mockedTurnTimers: [], + activeMockedTurnId: null }; } @@ -33,11 +34,17 @@ function clearMockedTurn(session) { session.mockedTurnTimers = []; session.mockedTurnInFlight = false; + session.activeMockedTurnId = null; } -function scheduleMockedTurnStep(session, delay, callback) { +function scheduleMockedTurnStep(session, turnId, delay, callback) { const timer = setTimeout(() => { session.mockedTurnTimers = session.mockedTurnTimers.filter((activeTimer) => activeTimer !== timer); + + if (!session.mockedTurnInFlight || session.activeMockedTurnId !== turnId) { + return; + } + callback(); }, delay); @@ -53,23 +60,25 @@ function startMockedTurn(socket, session) { clearMockedTurn(session); session.audioChunkCount = 0; session.mockedTurnInFlight = true; + const turnId = crypto.randomUUID(); + session.activeMockedTurnId = turnId; updateSessionState(socket, session, 'listening'); - scheduleMockedTurnStep(session, 75, () => { + scheduleMockedTurnStep(session, turnId, 75, () => { sendSocketMessage(socket, 'transcript.final', { text: MOCKED_USER_TRANSCRIPT }); updateSessionState(socket, session, 'thinking'); }); - scheduleMockedTurnStep(session, 150, () => { + scheduleMockedTurnStep(session, turnId, 150, () => { updateSessionState(socket, session, 'speaking'); sendSocketMessage(socket, 'response.text.delta', { text: '[mocked assistant] ' }); }); - scheduleMockedTurnStep(session, 225, () => { + scheduleMockedTurnStep(session, turnId, 225, () => { sendSocketMessage(socket, 'response.text.delta', { text: MOCKED_ASSISTANT_RESPONSE.replace('[mocked assistant] ', '') }); }); - scheduleMockedTurnStep(session, 300, () => { + scheduleMockedTurnStep(session, turnId, 300, () => { sendSocketMessage(socket, 'response.completed', {}); clearMockedTurn(session); updateSessionState(socket, session, 'idle'); diff --git a/apps/vela-gateway/test/websocket-session.test.js b/apps/vela-gateway/test/websocket-session.test.js index 7330812..689d542 100644 --- a/apps/vela-gateway/test/websocket-session.test.js +++ b/apps/vela-gateway/test/websocket-session.test.js @@ -405,3 +405,67 @@ test('websocket rejects a second mocked turn while one is in flight', async () = await server.close(); } }); + +test('websocket cancel stops an active mocked turn and allows a new one without reconnecting', async () => { + const server = await startServer(); + + try { + const client = await connectWebSocket(server.port); + await client.nextMessage(); + await client.nextMessage(); + + client.sendJson({ type: 'mocked.turn.trigger', payload: {} }); + assert.deepEqual(await client.nextMessage(), { + type: 'session.state', + payload: { value: 'listening' } + }); + assert.deepEqual(await client.nextMessage(), { + type: 'transcript.final', + payload: { text: '[mocked user] What is the current mocked vertical slice?' } + }); + assert.deepEqual(await client.nextMessage(), { + type: 'session.state', + payload: { value: 'thinking' } + }); + + client.sendJson({ type: 'response.cancel', payload: {} }); + assert.deepEqual(await client.nextMessage(), { + type: 'session.state', + payload: { value: 'idle' } + }); + await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/); + + client.sendJson({ type: 'mocked.turn.trigger', payload: {} }); + assert.deepEqual(await client.nextMessage(), { + type: 'session.state', + payload: { value: 'listening' } + }); + + await client.close(); + } finally { + await server.close(); + } +}); + +test('websocket safely accepts cancel when no turn is active', async () => { + const server = await startServer(); + + try { + const client = await connectWebSocket(server.port); + await client.nextMessage(); + await client.nextMessage(); + + client.sendJson({ type: 'response.cancel', payload: {} }); + await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/); + + client.sendJson({ type: 'mocked.turn.trigger', payload: {} }); + assert.deepEqual(await client.nextMessage(), { + type: 'session.state', + payload: { value: 'listening' } + }); + + await client.close(); + } finally { + await server.close(); + } +}); diff --git a/apps/vela-ui/e2e/voice-session.spec.js b/apps/vela-ui/e2e/voice-session.spec.js index 56eacdb..bf72cd3 100644 --- a/apps/vela-ui/e2e/voice-session.spec.js +++ b/apps/vela-ui/e2e/voice-session.spec.js @@ -46,3 +46,34 @@ test('voice session shell covers the mocked transcript/response slice', async ({ await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT); await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE); }); + +test('voice session shell can cancel an active mocked turn and start another one', async ({ page }) => { + await page.goto('/'); + await expect(page.getByTestId('hydration-status')).toHaveText('ready'); + + await expect(page.getByTestId('cancel-turn-button')).toBeDisabled(); + await page.getByTestId('connect-button').click(); + + await expect(page.getByTestId('connection-state')).toHaveText('connected'); + await expect(page.getByTestId('mocked-turn-button')).toBeEnabled(); + + await page.getByTestId('mocked-turn-button').click(); + await expect(page.getByTestId('mocked-turn-status')).toHaveText('running'); + await expect(page.getByTestId('cancel-turn-button')).toBeEnabled(); + await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT); + await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]'); + + await page.getByTestId('cancel-turn-button').click(); + + await expect(page.getByTestId('gateway-session-state')).toHaveText('idle'); + await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle'); + await expect(page.getByTestId('cancel-turn-button')).toBeDisabled(); + await expect(page.getByTestId('mocked-turn-button')).toBeEnabled(); + await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT); + await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]'); + + await page.getByTestId('mocked-turn-button').click(); + await expect(page.getByTestId('mocked-turn-status')).toHaveText('running'); + await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE); + await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle'); +}); diff --git a/apps/vela-ui/src/lib/VoiceSessionShell.svelte b/apps/vela-ui/src/lib/VoiceSessionShell.svelte index 5dd82b8..184fe3d 100644 --- a/apps/vela-ui/src/lib/VoiceSessionShell.svelte +++ b/apps/vela-ui/src/lib/VoiceSessionShell.svelte @@ -62,6 +62,13 @@ sessionReadyReceived && !mockedTurnInFlight; + $: canCancelMockedTurn = + typeof WebSocket !== 'undefined' && + connectionState === 'connected' && + socket?.readyState === WebSocket.OPEN && + sessionReadyReceived && + mockedTurnInFlight; + function clearSocketHandlers(targetSocket) { targetSocket.onopen = null; targetSocket.onmessage = null; @@ -105,6 +112,23 @@ socket.send(JSON.stringify(createMessageEnvelope('mocked.turn.trigger', {}))); } + function cancelActiveResponse() { + if (!socket || socket.readyState !== WebSocket.OPEN || connectionState !== 'connected') { + connectionDetail = 'Connect to the gateway before cancelling a mocked turn.'; + lastError = 'response.cancel requires an active WebSocket connection'; + return; + } + + if (!sessionReadyReceived) { + connectionDetail = 'Wait for the gateway session to be ready before cancelling a mocked turn.'; + lastError = 'response.cancel requires session.ready'; + return; + } + + lastError = 'none'; + socket.send(JSON.stringify(createMessageEnvelope('response.cancel', {}))); + } + function connect() { if (typeof window === 'undefined') { return; @@ -301,6 +325,9 @@ +
diff --git a/apps/vela-ui/tests/voice-session.test.js b/apps/vela-ui/tests/voice-session.test.js index 82ce725..93d1cde 100644 --- a/apps/vela-ui/tests/voice-session.test.js +++ b/apps/vela-ui/tests/voice-session.test.js @@ -154,4 +154,48 @@ describe('voice session shell', () => { expect(socket.sent).toHaveLength(1); expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger'); }); + + it('shows cancel control during an active mocked turn and preserves rendered text after cancel', async () => { + render(VoiceSessionShell); + + await fireEvent.click(getByTestId('connect-button')); + const socket = MockWebSocket.latest(); + socket.open(); + socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-cancel' })); + socket.message(createMessageEnvelope('session.state', { value: 'idle' })); + + await waitFor(() => { + expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true); + }); + + await fireEvent.click(getByTestId('mocked-turn-button')); + + expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger'); + + socket.message(createMessageEnvelope('session.state', { value: 'listening' })); + socket.message(createMessageEnvelope('transcript.final', { text: 'Keep this transcript.' })); + socket.message(createMessageEnvelope('session.state', { value: 'thinking' })); + socket.message(createMessageEnvelope('session.state', { value: 'speaking' })); + socket.message(createMessageEnvelope('response.text.delta', { text: 'Partial response' })); + + await waitFor(() => { + expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(false); + expect(getByTestId('assistant-response').textContent).toBe('Partial response'); + }); + + await fireEvent.click(getByTestId('cancel-turn-button')); + + expect(JSON.parse(socket.sent[1]).type).toBe('response.cancel'); + + socket.message(createMessageEnvelope('session.state', { value: 'idle' })); + + await waitFor(() => { + expect(getByTestId('mocked-turn-status').textContent).toBe('idle'); + expect(getByTestId('gateway-session-state').textContent).toBe('idle'); + expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true); + expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false); + expect(getByTestId('user-transcript').textContent).toBe('Keep this transcript.'); + expect(getByTestId('assistant-response').textContent).toBe('Partial response'); + }); + }); }); diff --git a/docs/backlog.md b/docs/backlog.md index 1d98a81..3073bba 100644 --- a/docs/backlog.md +++ b/docs/backlog.md @@ -41,7 +41,7 @@ Prove the end-to-end interaction model with mocked or stubbed providers. - implement mocked STT flow for partial transcript events - implement mocked LLM response streaming beyond the fixed deterministic slice - implement stubbed audio playback or placeholder TTS output -- implement interrupt handling across the mocked pipeline +- [x] implement interrupt handling across the mocked pipeline ### Exit Criteria @@ -188,5 +188,7 @@ Polish the system after the core voice loop is reliable. - `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints - `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling - `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session +- `apps/vela-ui` now exposes a cancel control for active mocked turns and keeps already-rendered transcript/response text visible after cancellation +- `apps/vela-gateway` now honors `response.cancel` during mocked turns by stopping pending mocked response events, returning the session to `idle`, and allowing a new mocked turn on the same socket - `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway - backend framework choice is now concrete: Fastify diff --git a/docs/protocol.md b/docs/protocol.md index a808f4b..3d3b864 100644 --- a/docs/protocol.md +++ b/docs/protocol.md @@ -62,7 +62,8 @@ type ClientEvent = - a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order - `input_audio.append` updates the ephemeral session record and moves the session to `listening` - `input_audio.commit` resets the minimal buffered state and returns the session to `idle` -- `response.cancel` resets the minimal session state back to `idle` +- `response.cancel` is safe to send even when no mocked turn is active +- `response.cancel` stops any still-pending mocked turn events for the active turn and resets the minimal session state back to `idle` - a second mocked-turn trigger during an active mocked turn produces `error` with code `mocked_turn_in_flight` - malformed JSON produces `error` with code `invalid_json` - invalid envelopes or unsupported client event names produce `error` with code `invalid_message` @@ -85,6 +86,8 @@ Notes: - this UI state is transport-oriented and is separate from the shared gateway `session.state` payload - `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`) - the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight +- the UI shows a cancel control and enables it only while a mocked turn is active +- after cancel returns the gateway to `idle`, the UI clears the active-turn indicator but keeps any transcript or response text that was already rendered - the UI treats malformed server messages, browser WebSocket errors, and gateway `error` events as safe error states instead of throwing ### Server → Client @@ -134,7 +137,7 @@ Notes: - the content is intentionally fixed and obviously mocked - no audio, STT, LLM, TTS, or external providers participate in this flow -- `response.cancel` can stop the mocked turn early and return the session to `idle` +- `response.cancel` can stop the mocked turn early, suppress any later mocked response events for that turn, and return the session to `idle` ## Contract Scope for This Increment @@ -162,6 +165,14 @@ idle → idle ``` +Current mocked-pipeline behavior: + +- during an active mocked turn, `response.cancel` returns the session to `idle` immediately +- any mocked turn timers that have not fired yet are dropped, so no later `response.text.delta` or `response.completed` events are emitted for the cancelled turn +- once `idle` is restored, the same WebSocket session can start another mocked turn without reconnecting + +More general future-state expectations: + `response.cancel` can occur at: - listening → restart