feat(vela): mock push-to-talk transcript updates

This commit is contained in:
2026-04-08 20:13:36 +02:00
parent 103bb11954
commit 98bcc543f5
8 changed files with 179 additions and 6 deletions

View File

@@ -14,6 +14,22 @@ const WEBSOCKET_GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
const MOCKED_USER_TRANSCRIPT = '[mocked user] What is the current mocked vertical slice?';
const MOCKED_ASSISTANT_RESPONSE = '[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.';
function createPlaceholderPartialTranscript(audioChunkCount) {
return audioChunkCount === 1
? '[mocked partial] Placeholder push-to-talk transcript in progress.'
: `[mocked partial] Placeholder push-to-talk transcript in progress (${audioChunkCount} chunks).`;
}
function createPlaceholderFinalTranscript(audioChunkCount) {
if (audioChunkCount === 0) {
return '[mocked final] Placeholder push-to-talk transcript completed without appended audio.';
}
return audioChunkCount === 1
? '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
: `[mocked final] Placeholder push-to-talk transcript completed from ${audioChunkCount} appended chunks.`;
}
function createSessionRecord() {
return {
id: crypto.randomUUID(),
@@ -238,6 +254,9 @@ function handleClientMessage(socket, session, rawMessage) {
session.audioChunkCount += 1;
updateSessionState(socket, session, 'listening');
sendSocketMessage(socket, 'transcript.partial', {
text: createPlaceholderPartialTranscript(session.audioChunkCount)
});
break;
case 'input_audio.commit':
if (session.mockedTurnInFlight) {
@@ -245,6 +264,9 @@ function handleClientMessage(socket, session, rawMessage) {
break;
}
sendSocketMessage(socket, 'transcript.final', {
text: createPlaceholderFinalTranscript(session.audioChunkCount)
});
session.audioChunkCount = 0;
updateSessionState(socket, session, 'idle');
break;

View File

@@ -293,8 +293,16 @@ test('websocket handles valid and invalid client messages safely', async () => {
type: 'session.state',
payload: { value: 'listening' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.partial',
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
});
client.sendJson({ type: 'input_audio.commit', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'idle' }
@@ -340,8 +348,16 @@ test('websocket accepts a placeholder input cycle before a mocked turn on the sa
type: 'session.state',
payload: { value: 'listening' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.partial',
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
});
client.sendJson({ type: 'input_audio.commit', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'idle' }
@@ -359,6 +375,53 @@ test('websocket accepts a placeholder input cycle before a mocked turn on the sa
}
});
test('websocket emits deterministic partials for repeated appends and a deterministic final for commit without append', async () => {
const server = await startServer();
try {
const client = await connectWebSocket(server.port);
await client.nextMessage();
await client.nextMessage();
client.sendJson({ type: 'input_audio.append', payload: { chunk: 'chunk-1' } });
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'listening' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.partial',
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
});
client.sendJson({ type: 'input_audio.append', payload: { chunk: 'chunk-2' } });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.partial',
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress (2 chunks).' }
});
client.sendJson({ type: 'input_audio.commit', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 2 appended chunks.' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'idle' }
});
client.sendJson({ type: 'input_audio.commit', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed without appended audio.' }
});
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
await client.close();
} finally {
await server.close();
}
});
test('websocket mocked turn emits deterministic transcript and response events in order', async () => {
const server = await startServer();

View File

@@ -92,10 +92,20 @@ test('voice session shell supports a placeholder mic-control cycle before anothe
await page.getByTestId('mic-control-button').dispatchEvent('mousedown');
await expect(page.getByTestId('mic-control-status')).toHaveText('holding');
await expect(page.getByTestId('gateway-session-state')).toHaveText('listening');
await expect(page.getByTestId('partial-transcript')).toHaveText(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
await expect(page.getByTestId('user-transcript')).toHaveText(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
await expect(page.getByTestId('mocked-turn-button')).toBeDisabled();
await page.getByTestId('mic-control-button').dispatchEvent('mouseup');
await expect(page.getByTestId('mic-control-status')).toHaveText('idle');
await expect(page.getByTestId('user-transcript')).toHaveText(
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
);
await expect(page.getByTestId('partial-transcript')).toHaveText('none');
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();

View File

@@ -50,6 +50,7 @@
let socket = null;
let connectionAttempts = 0;
let mockedUserTranscript = 'none';
let inProgressPartialTranscript = 'none';
let mockedAssistantResponse = 'none';
let mockedTurnInFlight = false;
let mockedConversationRenderOrder = [];
@@ -92,6 +93,7 @@
sessionReadyReceived = false;
lastServerEvent = 'none';
mockedUserTranscript = 'none';
inProgressPartialTranscript = 'none';
mockedAssistantResponse = 'none';
mockedTurnInFlight = false;
mockedConversationRenderOrder = [];
@@ -145,6 +147,7 @@
}
mockedUserTranscript = 'waiting for mocked transcript…';
inProgressPartialTranscript = 'none';
mockedAssistantResponse = 'waiting for mocked response…';
mockedTurnInFlight = true;
lastError = 'none';
@@ -237,9 +240,23 @@
}
if (message.type === 'transcript.final') {
inProgressPartialTranscript = 'none';
mockedUserTranscript = message.payload.text;
mockedAssistantResponse = '…';
mockedConversationRenderOrder = [...mockedConversationRenderOrder, 'transcript'];
if (mockedTurnInFlight) {
mockedAssistantResponse = '…';
}
if (!mockedConversationRenderOrder.includes('transcript')) {
mockedConversationRenderOrder = [...mockedConversationRenderOrder, 'transcript'];
}
return;
}
if (message.type === 'transcript.partial') {
inProgressPartialTranscript = message.payload.text;
mockedUserTranscript = message.payload.text;
if (!mockedConversationRenderOrder.includes('transcript')) {
mockedConversationRenderOrder = [...mockedConversationRenderOrder, 'transcript'];
}
return;
}
@@ -395,6 +412,10 @@
<span>Mocked user transcript</span>
<p data-testid="user-transcript">{mockedUserTranscript}</p>
</div>
<div>
<span>In-progress partial transcript</span>
<p data-testid="partial-transcript">{inProgressPartialTranscript}</p>
</div>
<div>
<span>Mocked assistant response</span>
<p data-testid="assistant-response">{mockedAssistantResponse}</p>

View File

@@ -123,10 +123,21 @@ describe('voice session shell', () => {
expect(getByTestId('mic-control-status').textContent).toBe('holding');
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(
createMessageEnvelope('transcript.partial', {
text: '[mocked partial] Placeholder push-to-talk transcript in progress.'
})
);
await waitFor(() => {
expect(getByTestId('gateway-session-state').textContent).toBe('listening');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('user-transcript').textContent).toBe(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
expect(getByTestId('partial-transcript').textContent).toBe(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
});
await fireEvent.mouseUp(getByTestId('mic-control-button'));
@@ -138,11 +149,21 @@ describe('voice session shell', () => {
});
expect(getByTestId('mic-control-status').textContent).toBe('idle');
socket.message(
createMessageEnvelope('transcript.final', {
text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
})
);
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('user-transcript').textContent).toBe(
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
);
expect(getByTestId('partial-transcript').textContent).toBe('none');
});
await fireEvent.click(getByTestId('mocked-turn-button'));
@@ -171,6 +192,11 @@ describe('voice session shell', () => {
expect(sentMessage.type).toBe('mocked.turn.trigger');
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(
createMessageEnvelope('transcript.partial', {
text: '[mocked partial] Placeholder push-to-talk transcript in progress.'
})
);
socket.message(createMessageEnvelope('transcript.final', { text: 'Turn on the office lamp.' }));
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
@@ -181,6 +207,7 @@ describe('voice session shell', () => {
await waitFor(() => {
expect(getByTestId('user-transcript').textContent).toBe('Turn on the office lamp.');
expect(getByTestId('partial-transcript').textContent).toBe('none');
expect(getByTestId('assistant-response').textContent).toBe('Mocked assistant response.');
expect(getByTestId('conversation-render-order').textContent).toBe('transcript>response');
expect(getByTestId('gateway-session-state').textContent).toBe('idle');