feat(vela): start mocked response flow after push-to-talk commit
This commit is contained in:
@@ -67,21 +67,23 @@ function scheduleMockedTurnStep(session, turnId, delay, callback) {
|
||||
session.mockedTurnTimers.push(timer);
|
||||
}
|
||||
|
||||
function startMockedTurn(socket, session) {
|
||||
function startMockedTurn(socket, session, { transcript = MOCKED_USER_TRANSCRIPT, includeListeningState = true } = {}) {
|
||||
if (session.mockedTurnInFlight) {
|
||||
sendSocketError(socket, 'mocked_turn_in_flight', 'Only one mocked turn can run per session at a time.');
|
||||
return;
|
||||
}
|
||||
|
||||
clearMockedTurn(session);
|
||||
session.audioChunkCount = 0;
|
||||
session.mockedTurnInFlight = true;
|
||||
const turnId = crypto.randomUUID();
|
||||
session.activeMockedTurnId = turnId;
|
||||
updateSessionState(socket, session, 'listening');
|
||||
|
||||
if (includeListeningState) {
|
||||
updateSessionState(socket, session, 'listening');
|
||||
}
|
||||
|
||||
scheduleMockedTurnStep(session, turnId, 75, () => {
|
||||
sendSocketMessage(socket, 'transcript.final', { text: MOCKED_USER_TRANSCRIPT });
|
||||
sendSocketMessage(socket, 'transcript.final', { text: transcript });
|
||||
updateSessionState(socket, session, 'thinking');
|
||||
});
|
||||
|
||||
@@ -264,11 +266,12 @@ function handleClientMessage(socket, session, rawMessage) {
|
||||
break;
|
||||
}
|
||||
|
||||
sendSocketMessage(socket, 'transcript.final', {
|
||||
text: createPlaceholderFinalTranscript(session.audioChunkCount)
|
||||
});
|
||||
const finalTranscript = createPlaceholderFinalTranscript(session.audioChunkCount);
|
||||
session.audioChunkCount = 0;
|
||||
updateSessionState(socket, session, 'idle');
|
||||
startMockedTurn(socket, session, {
|
||||
transcript: finalTranscript,
|
||||
includeListeningState: false
|
||||
});
|
||||
break;
|
||||
case 'response.cancel':
|
||||
clearMockedTurn(session);
|
||||
|
||||
@@ -303,6 +303,26 @@ test('websocket handles valid and invalid client messages safely', async () => {
|
||||
type: 'transcript.final',
|
||||
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'thinking' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'speaking' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.text.delta',
|
||||
payload: { text: '[mocked assistant] ' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.text.delta',
|
||||
payload: { text: 'This is a deterministic mocked response from the gateway vertical slice.' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.completed',
|
||||
payload: {}
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'idle' }
|
||||
@@ -358,6 +378,26 @@ test('websocket accepts a placeholder input cycle before a mocked turn on the sa
|
||||
type: 'transcript.final',
|
||||
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'thinking' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'speaking' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.text.delta',
|
||||
payload: { text: '[mocked assistant] ' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.text.delta',
|
||||
payload: { text: 'This is a deterministic mocked response from the gateway vertical slice.' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.completed',
|
||||
payload: {}
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'idle' }
|
||||
@@ -404,6 +444,37 @@ test('websocket emits deterministic partials for repeated appends and a determin
|
||||
type: 'transcript.final',
|
||||
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 2 appended chunks.' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'thinking' }
|
||||
});
|
||||
|
||||
client.sendJson({ type: 'input_audio.commit', payload: {} });
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'error',
|
||||
payload: {
|
||||
code: 'mocked_turn_in_flight',
|
||||
message: 'Wait for the mocked turn to finish before committing input.',
|
||||
retryable: true
|
||||
}
|
||||
});
|
||||
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'speaking' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.text.delta',
|
||||
payload: { text: '[mocked assistant] ' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.text.delta',
|
||||
payload: { text: 'This is a deterministic mocked response from the gateway vertical slice.' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.completed',
|
||||
payload: {}
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'idle' }
|
||||
@@ -414,7 +485,30 @@ test('websocket emits deterministic partials for repeated appends and a determin
|
||||
type: 'transcript.final',
|
||||
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed without appended audio.' }
|
||||
});
|
||||
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'thinking' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'speaking' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.text.delta',
|
||||
payload: { text: '[mocked assistant] ' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.text.delta',
|
||||
payload: { text: 'This is a deterministic mocked response from the gateway vertical slice.' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'response.completed',
|
||||
payload: {}
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'idle' }
|
||||
});
|
||||
|
||||
await client.close();
|
||||
} finally {
|
||||
@@ -542,6 +636,53 @@ test('websocket cancel stops an active mocked turn and allows a new one without
|
||||
}
|
||||
});
|
||||
|
||||
test('websocket cancel stops a push-to-talk commit response and allows another turn', async () => {
|
||||
const server = await startServer();
|
||||
|
||||
try {
|
||||
const client = await connectWebSocket(server.port);
|
||||
await client.nextMessage();
|
||||
await client.nextMessage();
|
||||
|
||||
client.sendJson({ type: 'input_audio.append', payload: { chunk: 'chunk-1' } });
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'listening' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'transcript.partial',
|
||||
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
|
||||
});
|
||||
|
||||
client.sendJson({ type: 'input_audio.commit', payload: {} });
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'transcript.final',
|
||||
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.' }
|
||||
});
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'thinking' }
|
||||
});
|
||||
|
||||
client.sendJson({ type: 'response.cancel', payload: {} });
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'idle' }
|
||||
});
|
||||
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
||||
|
||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
||||
assert.deepEqual(await client.nextMessage(), {
|
||||
type: 'session.state',
|
||||
payload: { value: 'listening' }
|
||||
});
|
||||
|
||||
await client.close();
|
||||
} finally {
|
||||
await server.close();
|
||||
}
|
||||
});
|
||||
|
||||
test('websocket safely accepts cancel when no turn is active', async () => {
|
||||
const server = await startServer();
|
||||
|
||||
|
||||
@@ -105,6 +105,7 @@ test('voice session shell supports a placeholder mic-control cycle before anothe
|
||||
await expect(page.getByTestId('user-transcript')).toHaveText(
|
||||
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||
);
|
||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
||||
await expect(page.getByTestId('partial-transcript')).toHaveText('none');
|
||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
||||
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
||||
@@ -114,3 +115,34 @@ test('voice session shell supports a placeholder mic-control cycle before anothe
|
||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
||||
});
|
||||
|
||||
test('voice session shell can cancel a push-to-talk mocked response and start another turn', async ({ page }) => {
|
||||
await page.goto('/');
|
||||
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
|
||||
|
||||
await page.getByTestId('connect-button').click();
|
||||
await expect(page.getByTestId('connection-state')).toHaveText('connected');
|
||||
|
||||
await page.getByTestId('mic-control-button').dispatchEvent('mousedown');
|
||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('listening');
|
||||
await page.getByTestId('mic-control-button').dispatchEvent('mouseup');
|
||||
|
||||
await expect(page.getByTestId('user-transcript')).toHaveText(
|
||||
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||
);
|
||||
await expect(page.getByTestId('cancel-turn-button')).toBeEnabled();
|
||||
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
|
||||
|
||||
await page.getByTestId('cancel-turn-button').click();
|
||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
||||
await expect(page.getByTestId('user-transcript')).toHaveText(
|
||||
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||
);
|
||||
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
|
||||
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
||||
|
||||
await page.getByTestId('mocked-turn-button').click();
|
||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
||||
});
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
let inProgressPartialTranscript = 'none';
|
||||
let mockedAssistantResponse = 'none';
|
||||
let mockedTurnInFlight = false;
|
||||
let assistantResponseExpected = false;
|
||||
let mockedConversationRenderOrder = [];
|
||||
let micControlActive = false;
|
||||
let hydrationStatus = 'mounting';
|
||||
@@ -78,6 +79,7 @@
|
||||
connectionState === 'connected' &&
|
||||
socket?.readyState === WebSocket.OPEN &&
|
||||
sessionReadyReceived &&
|
||||
gatewaySessionState === 'idle' &&
|
||||
!mockedTurnInFlight;
|
||||
|
||||
function clearSocketHandlers(targetSocket) {
|
||||
@@ -96,6 +98,7 @@
|
||||
inProgressPartialTranscript = 'none';
|
||||
mockedAssistantResponse = 'none';
|
||||
mockedTurnInFlight = false;
|
||||
assistantResponseExpected = false;
|
||||
mockedConversationRenderOrder = [];
|
||||
micControlActive = false;
|
||||
}
|
||||
@@ -125,6 +128,7 @@
|
||||
}
|
||||
|
||||
connectionDetail = 'Mic control shell released. Sending placeholder input_audio.commit.';
|
||||
assistantResponseExpected = true;
|
||||
socket.send(JSON.stringify(createMessageEnvelope('input_audio.commit', {})));
|
||||
}
|
||||
|
||||
@@ -150,6 +154,7 @@
|
||||
inProgressPartialTranscript = 'none';
|
||||
mockedAssistantResponse = 'waiting for mocked response…';
|
||||
mockedTurnInFlight = true;
|
||||
assistantResponseExpected = true;
|
||||
lastError = 'none';
|
||||
socket.send(JSON.stringify(createMessageEnvelope('mocked.turn.trigger', {})));
|
||||
}
|
||||
@@ -235,6 +240,7 @@
|
||||
gatewaySessionState = message.payload.value;
|
||||
if (message.payload.value === 'idle') {
|
||||
mockedTurnInFlight = false;
|
||||
assistantResponseExpected = false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -242,7 +248,8 @@
|
||||
if (message.type === 'transcript.final') {
|
||||
inProgressPartialTranscript = 'none';
|
||||
mockedUserTranscript = message.payload.text;
|
||||
if (mockedTurnInFlight) {
|
||||
if (assistantResponseExpected) {
|
||||
mockedTurnInFlight = true;
|
||||
mockedAssistantResponse = '…';
|
||||
}
|
||||
if (!mockedConversationRenderOrder.includes('transcript')) {
|
||||
@@ -276,12 +283,14 @@
|
||||
|
||||
if (message.type === 'response.completed') {
|
||||
mockedTurnInFlight = false;
|
||||
assistantResponseExpected = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type === 'error') {
|
||||
if (message.payload.retryable === false) {
|
||||
mockedTurnInFlight = false;
|
||||
assistantResponseExpected = false;
|
||||
connectionState = 'error';
|
||||
connectionDetail = 'Gateway reported a protocol error.';
|
||||
} else {
|
||||
|
||||
@@ -93,7 +93,7 @@ describe('voice session shell', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('runs a placeholder mic-control cycle and keeps mocked turn usable on the same socket', async () => {
|
||||
it('runs a placeholder mic-control cycle, streams mocked assistant text, and keeps mocked turn usable on the same socket', async () => {
|
||||
render(VoiceSessionShell);
|
||||
|
||||
await fireEvent.click(getByTestId('connect-button'));
|
||||
@@ -154,6 +154,15 @@ describe('voice session shell', () => {
|
||||
text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||
})
|
||||
);
|
||||
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
|
||||
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
|
||||
socket.message(createMessageEnvelope('response.text.delta', { text: '[mocked assistant] ' }));
|
||||
socket.message(
|
||||
createMessageEnvelope('response.text.delta', {
|
||||
text: 'This is a deterministic mocked response from the gateway vertical slice.'
|
||||
})
|
||||
);
|
||||
socket.message(createMessageEnvelope('response.completed', {}));
|
||||
|
||||
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
||||
|
||||
@@ -163,6 +172,9 @@ describe('voice session shell', () => {
|
||||
expect(getByTestId('user-transcript').textContent).toBe(
|
||||
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||
);
|
||||
expect(getByTestId('assistant-response').textContent).toBe(
|
||||
'[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.'
|
||||
);
|
||||
expect(getByTestId('partial-transcript').textContent).toBe('none');
|
||||
});
|
||||
|
||||
@@ -172,6 +184,61 @@ describe('voice session shell', () => {
|
||||
expect(JSON.parse(socket.sent[2]).type).toBe('mocked.turn.trigger');
|
||||
});
|
||||
|
||||
it('keeps rendered push-to-talk transcript and assistant text visible after cancel', async () => {
|
||||
render(VoiceSessionShell);
|
||||
|
||||
await fireEvent.click(getByTestId('connect-button'));
|
||||
const socket = MockWebSocket.latest();
|
||||
socket.open();
|
||||
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-ptt-cancel' }));
|
||||
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
|
||||
});
|
||||
|
||||
await fireEvent.mouseDown(getByTestId('mic-control-button'));
|
||||
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
|
||||
socket.message(
|
||||
createMessageEnvelope('transcript.partial', {
|
||||
text: '[mocked partial] Placeholder push-to-talk transcript in progress.'
|
||||
})
|
||||
);
|
||||
|
||||
await fireEvent.mouseUp(getByTestId('mic-control-button'));
|
||||
expect(JSON.parse(socket.sent[1]).type).toBe('input_audio.commit');
|
||||
|
||||
socket.message(
|
||||
createMessageEnvelope('transcript.final', {
|
||||
text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||
})
|
||||
);
|
||||
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
|
||||
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
|
||||
socket.message(createMessageEnvelope('response.text.delta', { text: '[mocked assistant] ' }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(false);
|
||||
expect(getByTestId('assistant-response').textContent).toBe('[mocked assistant] ');
|
||||
});
|
||||
|
||||
await fireEvent.click(getByTestId('cancel-turn-button'));
|
||||
expect(JSON.parse(socket.sent[2]).type).toBe('response.cancel');
|
||||
|
||||
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
|
||||
expect(getByTestId('mocked-turn-status').textContent).toBe('idle');
|
||||
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
|
||||
expect(getByTestId('user-transcript').textContent).toBe(
|
||||
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||
);
|
||||
expect(getByTestId('assistant-response').textContent).toBe('[mocked assistant] ');
|
||||
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
it('renders mocked transcript before assistant response for a connected session', async () => {
|
||||
render(VoiceSessionShell);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user