feat(vela): support end-to-end mocked turn cancelation
This commit is contained in:
@@ -22,7 +22,8 @@ function createSessionRecord() {
|
|||||||
audioChunkCount: 0,
|
audioChunkCount: 0,
|
||||||
started: false,
|
started: false,
|
||||||
mockedTurnInFlight: false,
|
mockedTurnInFlight: false,
|
||||||
mockedTurnTimers: []
|
mockedTurnTimers: [],
|
||||||
|
activeMockedTurnId: null
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -33,11 +34,17 @@ function clearMockedTurn(session) {
|
|||||||
|
|
||||||
session.mockedTurnTimers = [];
|
session.mockedTurnTimers = [];
|
||||||
session.mockedTurnInFlight = false;
|
session.mockedTurnInFlight = false;
|
||||||
|
session.activeMockedTurnId = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scheduleMockedTurnStep(session, delay, callback) {
|
function scheduleMockedTurnStep(session, turnId, delay, callback) {
|
||||||
const timer = setTimeout(() => {
|
const timer = setTimeout(() => {
|
||||||
session.mockedTurnTimers = session.mockedTurnTimers.filter((activeTimer) => activeTimer !== timer);
|
session.mockedTurnTimers = session.mockedTurnTimers.filter((activeTimer) => activeTimer !== timer);
|
||||||
|
|
||||||
|
if (!session.mockedTurnInFlight || session.activeMockedTurnId !== turnId) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
callback();
|
callback();
|
||||||
}, delay);
|
}, delay);
|
||||||
|
|
||||||
@@ -53,23 +60,25 @@ function startMockedTurn(socket, session) {
|
|||||||
clearMockedTurn(session);
|
clearMockedTurn(session);
|
||||||
session.audioChunkCount = 0;
|
session.audioChunkCount = 0;
|
||||||
session.mockedTurnInFlight = true;
|
session.mockedTurnInFlight = true;
|
||||||
|
const turnId = crypto.randomUUID();
|
||||||
|
session.activeMockedTurnId = turnId;
|
||||||
updateSessionState(socket, session, 'listening');
|
updateSessionState(socket, session, 'listening');
|
||||||
|
|
||||||
scheduleMockedTurnStep(session, 75, () => {
|
scheduleMockedTurnStep(session, turnId, 75, () => {
|
||||||
sendSocketMessage(socket, 'transcript.final', { text: MOCKED_USER_TRANSCRIPT });
|
sendSocketMessage(socket, 'transcript.final', { text: MOCKED_USER_TRANSCRIPT });
|
||||||
updateSessionState(socket, session, 'thinking');
|
updateSessionState(socket, session, 'thinking');
|
||||||
});
|
});
|
||||||
|
|
||||||
scheduleMockedTurnStep(session, 150, () => {
|
scheduleMockedTurnStep(session, turnId, 150, () => {
|
||||||
updateSessionState(socket, session, 'speaking');
|
updateSessionState(socket, session, 'speaking');
|
||||||
sendSocketMessage(socket, 'response.text.delta', { text: '[mocked assistant] ' });
|
sendSocketMessage(socket, 'response.text.delta', { text: '[mocked assistant] ' });
|
||||||
});
|
});
|
||||||
|
|
||||||
scheduleMockedTurnStep(session, 225, () => {
|
scheduleMockedTurnStep(session, turnId, 225, () => {
|
||||||
sendSocketMessage(socket, 'response.text.delta', { text: MOCKED_ASSISTANT_RESPONSE.replace('[mocked assistant] ', '') });
|
sendSocketMessage(socket, 'response.text.delta', { text: MOCKED_ASSISTANT_RESPONSE.replace('[mocked assistant] ', '') });
|
||||||
});
|
});
|
||||||
|
|
||||||
scheduleMockedTurnStep(session, 300, () => {
|
scheduleMockedTurnStep(session, turnId, 300, () => {
|
||||||
sendSocketMessage(socket, 'response.completed', {});
|
sendSocketMessage(socket, 'response.completed', {});
|
||||||
clearMockedTurn(session);
|
clearMockedTurn(session);
|
||||||
updateSessionState(socket, session, 'idle');
|
updateSessionState(socket, session, 'idle');
|
||||||
|
|||||||
@@ -405,3 +405,67 @@ test('websocket rejects a second mocked turn while one is in flight', async () =
|
|||||||
await server.close();
|
await server.close();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('websocket cancel stops an active mocked turn and allows a new one without reconnecting', async () => {
|
||||||
|
const server = await startServer();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const client = await connectWebSocket(server.port);
|
||||||
|
await client.nextMessage();
|
||||||
|
await client.nextMessage();
|
||||||
|
|
||||||
|
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'session.state',
|
||||||
|
payload: { value: 'listening' }
|
||||||
|
});
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'transcript.final',
|
||||||
|
payload: { text: '[mocked user] What is the current mocked vertical slice?' }
|
||||||
|
});
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'session.state',
|
||||||
|
payload: { value: 'thinking' }
|
||||||
|
});
|
||||||
|
|
||||||
|
client.sendJson({ type: 'response.cancel', payload: {} });
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'session.state',
|
||||||
|
payload: { value: 'idle' }
|
||||||
|
});
|
||||||
|
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
||||||
|
|
||||||
|
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'session.state',
|
||||||
|
payload: { value: 'listening' }
|
||||||
|
});
|
||||||
|
|
||||||
|
await client.close();
|
||||||
|
} finally {
|
||||||
|
await server.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('websocket safely accepts cancel when no turn is active', async () => {
|
||||||
|
const server = await startServer();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const client = await connectWebSocket(server.port);
|
||||||
|
await client.nextMessage();
|
||||||
|
await client.nextMessage();
|
||||||
|
|
||||||
|
client.sendJson({ type: 'response.cancel', payload: {} });
|
||||||
|
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
||||||
|
|
||||||
|
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'session.state',
|
||||||
|
payload: { value: 'listening' }
|
||||||
|
});
|
||||||
|
|
||||||
|
await client.close();
|
||||||
|
} finally {
|
||||||
|
await server.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|||||||
@@ -46,3 +46,34 @@ test('voice session shell covers the mocked transcript/response slice', async ({
|
|||||||
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
||||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('voice session shell can cancel an active mocked turn and start another one', async ({ page }) => {
|
||||||
|
await page.goto('/');
|
||||||
|
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
|
||||||
|
|
||||||
|
await expect(page.getByTestId('cancel-turn-button')).toBeDisabled();
|
||||||
|
await page.getByTestId('connect-button').click();
|
||||||
|
|
||||||
|
await expect(page.getByTestId('connection-state')).toHaveText('connected');
|
||||||
|
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
||||||
|
|
||||||
|
await page.getByTestId('mocked-turn-button').click();
|
||||||
|
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
|
||||||
|
await expect(page.getByTestId('cancel-turn-button')).toBeEnabled();
|
||||||
|
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
||||||
|
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
|
||||||
|
|
||||||
|
await page.getByTestId('cancel-turn-button').click();
|
||||||
|
|
||||||
|
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
||||||
|
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
||||||
|
await expect(page.getByTestId('cancel-turn-button')).toBeDisabled();
|
||||||
|
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
||||||
|
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
||||||
|
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
|
||||||
|
|
||||||
|
await page.getByTestId('mocked-turn-button').click();
|
||||||
|
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
|
||||||
|
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
||||||
|
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
||||||
|
});
|
||||||
|
|||||||
@@ -62,6 +62,13 @@
|
|||||||
sessionReadyReceived &&
|
sessionReadyReceived &&
|
||||||
!mockedTurnInFlight;
|
!mockedTurnInFlight;
|
||||||
|
|
||||||
|
$: canCancelMockedTurn =
|
||||||
|
typeof WebSocket !== 'undefined' &&
|
||||||
|
connectionState === 'connected' &&
|
||||||
|
socket?.readyState === WebSocket.OPEN &&
|
||||||
|
sessionReadyReceived &&
|
||||||
|
mockedTurnInFlight;
|
||||||
|
|
||||||
function clearSocketHandlers(targetSocket) {
|
function clearSocketHandlers(targetSocket) {
|
||||||
targetSocket.onopen = null;
|
targetSocket.onopen = null;
|
||||||
targetSocket.onmessage = null;
|
targetSocket.onmessage = null;
|
||||||
@@ -105,6 +112,23 @@
|
|||||||
socket.send(JSON.stringify(createMessageEnvelope('mocked.turn.trigger', {})));
|
socket.send(JSON.stringify(createMessageEnvelope('mocked.turn.trigger', {})));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function cancelActiveResponse() {
|
||||||
|
if (!socket || socket.readyState !== WebSocket.OPEN || connectionState !== 'connected') {
|
||||||
|
connectionDetail = 'Connect to the gateway before cancelling a mocked turn.';
|
||||||
|
lastError = 'response.cancel requires an active WebSocket connection';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sessionReadyReceived) {
|
||||||
|
connectionDetail = 'Wait for the gateway session to be ready before cancelling a mocked turn.';
|
||||||
|
lastError = 'response.cancel requires session.ready';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lastError = 'none';
|
||||||
|
socket.send(JSON.stringify(createMessageEnvelope('response.cancel', {})));
|
||||||
|
}
|
||||||
|
|
||||||
function connect() {
|
function connect() {
|
||||||
if (typeof window === 'undefined') {
|
if (typeof window === 'undefined') {
|
||||||
return;
|
return;
|
||||||
@@ -301,6 +325,9 @@
|
|||||||
<button data-testid="mocked-turn-button" on:click={triggerMockedTurn} disabled={!canTriggerMockedTurn}>
|
<button data-testid="mocked-turn-button" on:click={triggerMockedTurn} disabled={!canTriggerMockedTurn}>
|
||||||
Run mocked turn
|
Run mocked turn
|
||||||
</button>
|
</button>
|
||||||
|
<button data-testid="cancel-turn-button" on:click={cancelActiveResponse} disabled={!canCancelMockedTurn}>
|
||||||
|
Cancel active turn
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="conversation">
|
<div class="conversation">
|
||||||
|
|||||||
@@ -154,4 +154,48 @@ describe('voice session shell', () => {
|
|||||||
expect(socket.sent).toHaveLength(1);
|
expect(socket.sent).toHaveLength(1);
|
||||||
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
|
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('shows cancel control during an active mocked turn and preserves rendered text after cancel', async () => {
|
||||||
|
render(VoiceSessionShell);
|
||||||
|
|
||||||
|
await fireEvent.click(getByTestId('connect-button'));
|
||||||
|
const socket = MockWebSocket.latest();
|
||||||
|
socket.open();
|
||||||
|
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-cancel' }));
|
||||||
|
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
await fireEvent.click(getByTestId('mocked-turn-button'));
|
||||||
|
|
||||||
|
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
|
||||||
|
|
||||||
|
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
|
||||||
|
socket.message(createMessageEnvelope('transcript.final', { text: 'Keep this transcript.' }));
|
||||||
|
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
|
||||||
|
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
|
||||||
|
socket.message(createMessageEnvelope('response.text.delta', { text: 'Partial response' }));
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(false);
|
||||||
|
expect(getByTestId('assistant-response').textContent).toBe('Partial response');
|
||||||
|
});
|
||||||
|
|
||||||
|
await fireEvent.click(getByTestId('cancel-turn-button'));
|
||||||
|
|
||||||
|
expect(JSON.parse(socket.sent[1]).type).toBe('response.cancel');
|
||||||
|
|
||||||
|
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(getByTestId('mocked-turn-status').textContent).toBe('idle');
|
||||||
|
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
|
||||||
|
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
|
||||||
|
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
|
||||||
|
expect(getByTestId('user-transcript').textContent).toBe('Keep this transcript.');
|
||||||
|
expect(getByTestId('assistant-response').textContent).toBe('Partial response');
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ Prove the end-to-end interaction model with mocked or stubbed providers.
|
|||||||
- implement mocked STT flow for partial transcript events
|
- implement mocked STT flow for partial transcript events
|
||||||
- implement mocked LLM response streaming beyond the fixed deterministic slice
|
- implement mocked LLM response streaming beyond the fixed deterministic slice
|
||||||
- implement stubbed audio playback or placeholder TTS output
|
- implement stubbed audio playback or placeholder TTS output
|
||||||
- implement interrupt handling across the mocked pipeline
|
- [x] implement interrupt handling across the mocked pipeline
|
||||||
|
|
||||||
### Exit Criteria
|
### Exit Criteria
|
||||||
|
|
||||||
@@ -188,5 +188,7 @@ Polish the system after the core voice loop is reliable.
|
|||||||
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
|
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
|
||||||
- `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling
|
- `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling
|
||||||
- `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session
|
- `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session
|
||||||
|
- `apps/vela-ui` now exposes a cancel control for active mocked turns and keeps already-rendered transcript/response text visible after cancellation
|
||||||
|
- `apps/vela-gateway` now honors `response.cancel` during mocked turns by stopping pending mocked response events, returning the session to `idle`, and allowing a new mocked turn on the same socket
|
||||||
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
|
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
|
||||||
- backend framework choice is now concrete: Fastify
|
- backend framework choice is now concrete: Fastify
|
||||||
|
|||||||
@@ -62,7 +62,8 @@ type ClientEvent =
|
|||||||
- a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order
|
- a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order
|
||||||
- `input_audio.append` updates the ephemeral session record and moves the session to `listening`
|
- `input_audio.append` updates the ephemeral session record and moves the session to `listening`
|
||||||
- `input_audio.commit` resets the minimal buffered state and returns the session to `idle`
|
- `input_audio.commit` resets the minimal buffered state and returns the session to `idle`
|
||||||
- `response.cancel` resets the minimal session state back to `idle`
|
- `response.cancel` is safe to send even when no mocked turn is active
|
||||||
|
- `response.cancel` stops any still-pending mocked turn events for the active turn and resets the minimal session state back to `idle`
|
||||||
- a second mocked-turn trigger during an active mocked turn produces `error` with code `mocked_turn_in_flight`
|
- a second mocked-turn trigger during an active mocked turn produces `error` with code `mocked_turn_in_flight`
|
||||||
- malformed JSON produces `error` with code `invalid_json`
|
- malformed JSON produces `error` with code `invalid_json`
|
||||||
- invalid envelopes or unsupported client event names produce `error` with code `invalid_message`
|
- invalid envelopes or unsupported client event names produce `error` with code `invalid_message`
|
||||||
@@ -85,6 +86,8 @@ Notes:
|
|||||||
- this UI state is transport-oriented and is separate from the shared gateway `session.state` payload
|
- this UI state is transport-oriented and is separate from the shared gateway `session.state` payload
|
||||||
- `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`)
|
- `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`)
|
||||||
- the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight
|
- the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight
|
||||||
|
- the UI shows a cancel control and enables it only while a mocked turn is active
|
||||||
|
- after cancel returns the gateway to `idle`, the UI clears the active-turn indicator but keeps any transcript or response text that was already rendered
|
||||||
- the UI treats malformed server messages, browser WebSocket errors, and gateway `error` events as safe error states instead of throwing
|
- the UI treats malformed server messages, browser WebSocket errors, and gateway `error` events as safe error states instead of throwing
|
||||||
|
|
||||||
### Server → Client
|
### Server → Client
|
||||||
@@ -134,7 +137,7 @@ Notes:
|
|||||||
|
|
||||||
- the content is intentionally fixed and obviously mocked
|
- the content is intentionally fixed and obviously mocked
|
||||||
- no audio, STT, LLM, TTS, or external providers participate in this flow
|
- no audio, STT, LLM, TTS, or external providers participate in this flow
|
||||||
- `response.cancel` can stop the mocked turn early and return the session to `idle`
|
- `response.cancel` can stop the mocked turn early, suppress any later mocked response events for that turn, and return the session to `idle`
|
||||||
|
|
||||||
## Contract Scope for This Increment
|
## Contract Scope for This Increment
|
||||||
|
|
||||||
@@ -162,6 +165,14 @@ idle
|
|||||||
→ idle
|
→ idle
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Current mocked-pipeline behavior:
|
||||||
|
|
||||||
|
- during an active mocked turn, `response.cancel` returns the session to `idle` immediately
|
||||||
|
- any mocked turn timers that have not fired yet are dropped, so no later `response.text.delta` or `response.completed` events are emitted for the cancelled turn
|
||||||
|
- once `idle` is restored, the same WebSocket session can start another mocked turn without reconnecting
|
||||||
|
|
||||||
|
More general future-state expectations:
|
||||||
|
|
||||||
`response.cancel` can occur at:
|
`response.cancel` can occur at:
|
||||||
|
|
||||||
- listening → restart
|
- listening → restart
|
||||||
|
|||||||
Reference in New Issue
Block a user