feat(vela): mock push-to-talk transcript updates

This commit is contained in:
2026-04-08 20:13:36 +02:00
parent 103bb11954
commit 98bcc543f5
8 changed files with 179 additions and 6 deletions

View File

@@ -14,6 +14,22 @@ const WEBSOCKET_GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
const MOCKED_USER_TRANSCRIPT = '[mocked user] What is the current mocked vertical slice?'; const MOCKED_USER_TRANSCRIPT = '[mocked user] What is the current mocked vertical slice?';
const MOCKED_ASSISTANT_RESPONSE = '[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.'; const MOCKED_ASSISTANT_RESPONSE = '[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.';
function createPlaceholderPartialTranscript(audioChunkCount) {
return audioChunkCount === 1
? '[mocked partial] Placeholder push-to-talk transcript in progress.'
: `[mocked partial] Placeholder push-to-talk transcript in progress (${audioChunkCount} chunks).`;
}
function createPlaceholderFinalTranscript(audioChunkCount) {
if (audioChunkCount === 0) {
return '[mocked final] Placeholder push-to-talk transcript completed without appended audio.';
}
return audioChunkCount === 1
? '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
: `[mocked final] Placeholder push-to-talk transcript completed from ${audioChunkCount} appended chunks.`;
}
function createSessionRecord() { function createSessionRecord() {
return { return {
id: crypto.randomUUID(), id: crypto.randomUUID(),
@@ -238,6 +254,9 @@ function handleClientMessage(socket, session, rawMessage) {
session.audioChunkCount += 1; session.audioChunkCount += 1;
updateSessionState(socket, session, 'listening'); updateSessionState(socket, session, 'listening');
sendSocketMessage(socket, 'transcript.partial', {
text: createPlaceholderPartialTranscript(session.audioChunkCount)
});
break; break;
case 'input_audio.commit': case 'input_audio.commit':
if (session.mockedTurnInFlight) { if (session.mockedTurnInFlight) {
@@ -245,6 +264,9 @@ function handleClientMessage(socket, session, rawMessage) {
break; break;
} }
sendSocketMessage(socket, 'transcript.final', {
text: createPlaceholderFinalTranscript(session.audioChunkCount)
});
session.audioChunkCount = 0; session.audioChunkCount = 0;
updateSessionState(socket, session, 'idle'); updateSessionState(socket, session, 'idle');
break; break;

View File

@@ -293,8 +293,16 @@ test('websocket handles valid and invalid client messages safely', async () => {
type: 'session.state', type: 'session.state',
payload: { value: 'listening' } payload: { value: 'listening' }
}); });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.partial',
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
});
client.sendJson({ type: 'input_audio.commit', payload: {} }); client.sendJson({ type: 'input_audio.commit', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.' }
});
assert.deepEqual(await client.nextMessage(), { assert.deepEqual(await client.nextMessage(), {
type: 'session.state', type: 'session.state',
payload: { value: 'idle' } payload: { value: 'idle' }
@@ -340,8 +348,16 @@ test('websocket accepts a placeholder input cycle before a mocked turn on the sa
type: 'session.state', type: 'session.state',
payload: { value: 'listening' } payload: { value: 'listening' }
}); });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.partial',
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
});
client.sendJson({ type: 'input_audio.commit', payload: {} }); client.sendJson({ type: 'input_audio.commit', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.' }
});
assert.deepEqual(await client.nextMessage(), { assert.deepEqual(await client.nextMessage(), {
type: 'session.state', type: 'session.state',
payload: { value: 'idle' } payload: { value: 'idle' }
@@ -359,6 +375,53 @@ test('websocket accepts a placeholder input cycle before a mocked turn on the sa
} }
}); });
test('websocket emits deterministic partials for repeated appends and a deterministic final for commit without append', async () => {
const server = await startServer();
try {
const client = await connectWebSocket(server.port);
await client.nextMessage();
await client.nextMessage();
client.sendJson({ type: 'input_audio.append', payload: { chunk: 'chunk-1' } });
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'listening' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.partial',
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
});
client.sendJson({ type: 'input_audio.append', payload: { chunk: 'chunk-2' } });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.partial',
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress (2 chunks).' }
});
client.sendJson({ type: 'input_audio.commit', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed from 2 appended chunks.' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'idle' }
});
client.sendJson({ type: 'input_audio.commit', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked final] Placeholder push-to-talk transcript completed without appended audio.' }
});
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
await client.close();
} finally {
await server.close();
}
});
test('websocket mocked turn emits deterministic transcript and response events in order', async () => { test('websocket mocked turn emits deterministic transcript and response events in order', async () => {
const server = await startServer(); const server = await startServer();

View File

@@ -92,10 +92,20 @@ test('voice session shell supports a placeholder mic-control cycle before anothe
await page.getByTestId('mic-control-button').dispatchEvent('mousedown'); await page.getByTestId('mic-control-button').dispatchEvent('mousedown');
await expect(page.getByTestId('mic-control-status')).toHaveText('holding'); await expect(page.getByTestId('mic-control-status')).toHaveText('holding');
await expect(page.getByTestId('gateway-session-state')).toHaveText('listening'); await expect(page.getByTestId('gateway-session-state')).toHaveText('listening');
await expect(page.getByTestId('partial-transcript')).toHaveText(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
await expect(page.getByTestId('user-transcript')).toHaveText(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
await expect(page.getByTestId('mocked-turn-button')).toBeDisabled(); await expect(page.getByTestId('mocked-turn-button')).toBeDisabled();
await page.getByTestId('mic-control-button').dispatchEvent('mouseup'); await page.getByTestId('mic-control-button').dispatchEvent('mouseup');
await expect(page.getByTestId('mic-control-status')).toHaveText('idle'); await expect(page.getByTestId('mic-control-status')).toHaveText('idle');
await expect(page.getByTestId('user-transcript')).toHaveText(
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
);
await expect(page.getByTestId('partial-transcript')).toHaveText('none');
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle'); await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled(); await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();

View File

@@ -50,6 +50,7 @@
let socket = null; let socket = null;
let connectionAttempts = 0; let connectionAttempts = 0;
let mockedUserTranscript = 'none'; let mockedUserTranscript = 'none';
let inProgressPartialTranscript = 'none';
let mockedAssistantResponse = 'none'; let mockedAssistantResponse = 'none';
let mockedTurnInFlight = false; let mockedTurnInFlight = false;
let mockedConversationRenderOrder = []; let mockedConversationRenderOrder = [];
@@ -92,6 +93,7 @@
sessionReadyReceived = false; sessionReadyReceived = false;
lastServerEvent = 'none'; lastServerEvent = 'none';
mockedUserTranscript = 'none'; mockedUserTranscript = 'none';
inProgressPartialTranscript = 'none';
mockedAssistantResponse = 'none'; mockedAssistantResponse = 'none';
mockedTurnInFlight = false; mockedTurnInFlight = false;
mockedConversationRenderOrder = []; mockedConversationRenderOrder = [];
@@ -145,6 +147,7 @@
} }
mockedUserTranscript = 'waiting for mocked transcript…'; mockedUserTranscript = 'waiting for mocked transcript…';
inProgressPartialTranscript = 'none';
mockedAssistantResponse = 'waiting for mocked response…'; mockedAssistantResponse = 'waiting for mocked response…';
mockedTurnInFlight = true; mockedTurnInFlight = true;
lastError = 'none'; lastError = 'none';
@@ -237,9 +240,23 @@
} }
if (message.type === 'transcript.final') { if (message.type === 'transcript.final') {
inProgressPartialTranscript = 'none';
mockedUserTranscript = message.payload.text; mockedUserTranscript = message.payload.text;
mockedAssistantResponse = '…'; if (mockedTurnInFlight) {
mockedConversationRenderOrder = [...mockedConversationRenderOrder, 'transcript']; mockedAssistantResponse = '…';
}
if (!mockedConversationRenderOrder.includes('transcript')) {
mockedConversationRenderOrder = [...mockedConversationRenderOrder, 'transcript'];
}
return;
}
if (message.type === 'transcript.partial') {
inProgressPartialTranscript = message.payload.text;
mockedUserTranscript = message.payload.text;
if (!mockedConversationRenderOrder.includes('transcript')) {
mockedConversationRenderOrder = [...mockedConversationRenderOrder, 'transcript'];
}
return; return;
} }
@@ -395,6 +412,10 @@
<span>Mocked user transcript</span> <span>Mocked user transcript</span>
<p data-testid="user-transcript">{mockedUserTranscript}</p> <p data-testid="user-transcript">{mockedUserTranscript}</p>
</div> </div>
<div>
<span>In-progress partial transcript</span>
<p data-testid="partial-transcript">{inProgressPartialTranscript}</p>
</div>
<div> <div>
<span>Mocked assistant response</span> <span>Mocked assistant response</span>
<p data-testid="assistant-response">{mockedAssistantResponse}</p> <p data-testid="assistant-response">{mockedAssistantResponse}</p>

View File

@@ -123,10 +123,21 @@ describe('voice session shell', () => {
expect(getByTestId('mic-control-status').textContent).toBe('holding'); expect(getByTestId('mic-control-status').textContent).toBe('holding');
socket.message(createMessageEnvelope('session.state', { value: 'listening' })); socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(
createMessageEnvelope('transcript.partial', {
text: '[mocked partial] Placeholder push-to-talk transcript in progress.'
})
);
await waitFor(() => { await waitFor(() => {
expect(getByTestId('gateway-session-state').textContent).toBe('listening'); expect(getByTestId('gateway-session-state').textContent).toBe('listening');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true); expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('user-transcript').textContent).toBe(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
expect(getByTestId('partial-transcript').textContent).toBe(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
}); });
await fireEvent.mouseUp(getByTestId('mic-control-button')); await fireEvent.mouseUp(getByTestId('mic-control-button'));
@@ -138,11 +149,21 @@ describe('voice session shell', () => {
}); });
expect(getByTestId('mic-control-status').textContent).toBe('idle'); expect(getByTestId('mic-control-status').textContent).toBe('idle');
socket.message(
createMessageEnvelope('transcript.final', {
text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
})
);
socket.message(createMessageEnvelope('session.state', { value: 'idle' })); socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => { await waitFor(() => {
expect(getByTestId('gateway-session-state').textContent).toBe('idle'); expect(getByTestId('gateway-session-state').textContent).toBe('idle');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false); expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('user-transcript').textContent).toBe(
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
);
expect(getByTestId('partial-transcript').textContent).toBe('none');
}); });
await fireEvent.click(getByTestId('mocked-turn-button')); await fireEvent.click(getByTestId('mocked-turn-button'));
@@ -171,6 +192,11 @@ describe('voice session shell', () => {
expect(sentMessage.type).toBe('mocked.turn.trigger'); expect(sentMessage.type).toBe('mocked.turn.trigger');
socket.message(createMessageEnvelope('session.state', { value: 'listening' })); socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(
createMessageEnvelope('transcript.partial', {
text: '[mocked partial] Placeholder push-to-talk transcript in progress.'
})
);
socket.message(createMessageEnvelope('transcript.final', { text: 'Turn on the office lamp.' })); socket.message(createMessageEnvelope('transcript.final', { text: 'Turn on the office lamp.' }));
socket.message(createMessageEnvelope('session.state', { value: 'thinking' })); socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
socket.message(createMessageEnvelope('session.state', { value: 'speaking' })); socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
@@ -181,6 +207,7 @@ describe('voice session shell', () => {
await waitFor(() => { await waitFor(() => {
expect(getByTestId('user-transcript').textContent).toBe('Turn on the office lamp.'); expect(getByTestId('user-transcript').textContent).toBe('Turn on the office lamp.');
expect(getByTestId('partial-transcript').textContent).toBe('none');
expect(getByTestId('assistant-response').textContent).toBe('Mocked assistant response.'); expect(getByTestId('assistant-response').textContent).toBe('Mocked assistant response.');
expect(getByTestId('conversation-render-order').textContent).toBe('transcript>response'); expect(getByTestId('conversation-render-order').textContent).toBe('transcript>response');
expect(getByTestId('gateway-session-state').textContent).toBe('idle'); expect(getByTestId('gateway-session-state').textContent).toBe('idle');

View File

@@ -36,7 +36,7 @@ The repository now includes separate runnable workspaces for the UI and gateway
- PWA enabled - PWA enabled
- WebSocket client - WebSocket client
The current implementation is a minimal SvelteKit app with a single voice-session shell page. The shipped UI can open and close a browser WebSocket connection to the gateway `/ws` endpoint, show explicit connection status (`not connected`, `connecting`, `connected`, `disconnected`, `error`), expose mic control shell interactions that emit placeholder `input_audio.append` / `input_audio.commit` events, trigger one deterministic mocked turn while connected, and render the mocked user transcript plus mocked assistant response for the active session. This remains a shell only: there is no real microphone capture, real provider integration, or audio playback yet. The current implementation is a minimal SvelteKit app with a single voice-session shell page. The shipped UI can open and close a browser WebSocket connection to the gateway `/ws` endpoint, show explicit connection status (`not connected`, `connecting`, `connected`, `disconnected`, `error`), expose mic control shell interactions that emit placeholder `input_audio.append` / `input_audio.commit` events, trigger one deterministic mocked turn while connected, render deterministic placeholder partial/final transcripts for the push-to-talk shell, and render the mocked user transcript plus mocked assistant response for the existing mocked-turn path. This remains a shell only: there is no real microphone capture, real provider integration, or audio playback yet.
#### Responsibilities #### Responsibilities
@@ -105,6 +105,7 @@ The current implementation is a minimal Fastify service with `/`, `/health`, and
- WebSocket upgrades on `/ws` create an ephemeral session immediately - WebSocket upgrades on `/ws` create an ephemeral session immediately
- the gateway sends `session.ready` followed by `session.state` (`idle`) when the socket is established - the gateway sends `session.ready` followed by `session.state` (`idle`) when the socket is established
- valid minimal client events, including placeholder `input_audio.append` / `input_audio.commit`, can move the session between `idle` and `listening` - valid minimal client events, including placeholder `input_audio.append` / `input_audio.commit`, can move the session between `idle` and `listening`
- placeholder `input_audio.append` emits deterministic mocked `transcript.partial` events and `input_audio.commit` emits one deterministic mocked `transcript.final`
- `mocked.turn.trigger` drives a fixed transcript/response event sequence over the existing shared protocol - `mocked.turn.trigger` drives a fixed transcript/response event sequence over the existing shared protocol
- only one mocked turn is allowed in flight per session at a time - only one mocked turn is allowed in flight per session at a time
- invalid JSON, invalid envelopes, and malformed frames are handled defensively so the process stays up - invalid JSON, invalid envelopes, and malformed frames are handled defensively so the process stays up
@@ -115,12 +116,13 @@ The current implementation is a minimal Fastify service with `/`, `/health`, and
- exposes connect, disconnect, mic-control shell interactions, and mocked-turn controls - exposes connect, disconnect, mic-control shell interactions, and mocked-turn controls
- does not request microphone permission or capture real microphone audio - does not request microphone permission or capture real microphone audio
- only emits placeholder `input_audio.append` / `input_audio.commit` events; it does not send real audio data or play back audio - only emits placeholder `input_audio.append` / `input_audio.commit` events; it does not send real audio data or play back audio
- renders the latest placeholder partial transcript during a push-to-talk shell turn and replaces it with the final deterministic transcript on commit
- reads mocked transcript and mocked response events from the shared protocol contract - reads mocked transcript and mocked response events from the shared protocol contract
## Voice Pipeline ## Voice Pipeline
```text ```text
Mic control shell / mocked turn button → Placeholder `input_audio.append` / `input_audio.commit` or mocked session flow → Transcript events → Response text events → UI Mic control shell / mocked turn button → Placeholder `input_audio.append` / `input_audio.commit` or mocked session flow → Deterministic transcript events → Mocked response text events when using mocked.turn.trigger → UI
``` ```
This mocked vertical slice intentionally stands in for the future real pipeline: This mocked vertical slice intentionally stands in for the future real pipeline:

View File

@@ -38,7 +38,7 @@ Prove the end-to-end interaction model with mocked or stubbed providers.
- [x] create a minimal UI with mic control - [x] create a minimal UI with mic control
- [x] create a gateway WebSocket session skeleton - [x] create a gateway WebSocket session skeleton
- [x] implement a mocked transcript/response vertical slice over the existing WebSocket session - [x] implement a mocked transcript/response vertical slice over the existing WebSocket session
- implement mocked STT flow for partial transcript events - [x] implement mocked STT flow for partial transcript events
- implement mocked LLM response streaming beyond the fixed deterministic slice - implement mocked LLM response streaming beyond the fixed deterministic slice
- implement stubbed audio playback or placeholder TTS output - implement stubbed audio playback or placeholder TTS output
- [x] implement interrupt handling across the mocked pipeline - [x] implement interrupt handling across the mocked pipeline
@@ -190,6 +190,8 @@ Polish the system after the core voice loop is reliable.
- `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling - `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling
- `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session - `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session
- `apps/vela-gateway` now supports placeholder input-audio append/commit cycles before running another mocked turn on the same socket - `apps/vela-gateway` now supports placeholder input-audio append/commit cycles before running another mocked turn on the same socket
- `apps/vela-gateway` now emits deterministic `transcript.partial` events for placeholder `input_audio.append` messages and exactly one deterministic `transcript.final` for each placeholder `input_audio.commit`
- `apps/vela-ui` now renders the latest placeholder partial transcript during the push-to-talk shell turn and replaces it with the deterministic final transcript on commit
- `apps/vela-ui` now exposes a cancel control for active mocked turns and keeps already-rendered transcript/response text visible after cancellation - `apps/vela-ui` now exposes a cancel control for active mocked turns and keeps already-rendered transcript/response text visible after cancellation
- `apps/vela-gateway` now honors `response.cancel` during mocked turns by stopping pending mocked response events, returning the session to `idle`, and allowing a new mocked turn on the same socket - `apps/vela-gateway` now honors `response.cancel` during mocked turns by stopping pending mocked response events, returning the session to `idle`, and allowing a new mocked turn on the same socket
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway - `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway

View File

@@ -62,7 +62,8 @@ type ClientEvent =
- `mocked.turn.trigger` is accepted only when no other mocked turn is already in flight for that session - `mocked.turn.trigger` is accepted only when no other mocked turn is already in flight for that session
- a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order - a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order
- `input_audio.append` updates the ephemeral session record and moves the session to `listening` - `input_audio.append` updates the ephemeral session record and moves the session to `listening`
- `input_audio.commit` resets the minimal buffered state and returns the session to `idle` - each accepted `input_audio.append` emits one deterministic `transcript.partial` for the current placeholder turn
- `input_audio.commit` emits exactly one deterministic `transcript.final`, resets the minimal buffered state, and returns the session to `idle`
- after a completed placeholder input cycle, the same socket can still send `mocked.turn.trigger` - after a completed placeholder input cycle, the same socket can still send `mocked.turn.trigger`
- `response.cancel` is safe to send even when no mocked turn is active - `response.cancel` is safe to send even when no mocked turn is active
- `response.cancel` stops any still-pending mocked turn events for the active turn and resets the minimal session state back to `idle` - `response.cancel` stops any still-pending mocked turn events for the active turn and resets the minimal session state back to `idle`
@@ -90,6 +91,8 @@ Notes:
- the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight - the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight
- the UI disables the mic control while disconnected, before `session.ready`, or while a mocked turn is already in flight - the UI disables the mic control while disconnected, before `session.ready`, or while a mocked turn is already in flight
- pressing the mic control sends one placeholder `input_audio.append` chunk and releasing it sends `input_audio.commit` - pressing the mic control sends one placeholder `input_audio.append` chunk and releasing it sends `input_audio.commit`
- while a placeholder push-to-talk turn is in progress, the UI renders the latest `transcript.partial`
- after placeholder commit, the UI renders the `transcript.final` and clears the partial-only display
- the UI copy explicitly labels the mic button as a control shell and not real microphone capture - the UI copy explicitly labels the mic button as a control shell and not real microphone capture
- the UI shows a cancel control and enables it only while a mocked turn is active - the UI shows a cancel control and enables it only while a mocked turn is active
- after cancel returns the gateway to `idle`, the UI clears the active-turn indicator but keeps any transcript or response text that was already rendered - after cancel returns the gateway to `idle`, the UI clears the active-turn indicator but keeps any transcript or response text that was already rendered
@@ -144,6 +147,29 @@ Notes:
- no audio, STT, LLM, TTS, or external providers participate in this flow - no audio, STT, LLM, TTS, or external providers participate in this flow
- `response.cancel` can stop the mocked turn early, suppress any later mocked response events for that turn, and return the session to `idle` - `response.cancel` can stop the mocked turn early, suppress any later mocked response events for that turn, and return the session to `idle`
### Deterministic placeholder push-to-talk transcript sequence
For this increment, the existing mic-control shell still sends placeholder `input_audio.append` on press and `input_audio.commit` on release. The gateway now translates that shell flow into deterministic mocked transcript events only:
```text
input_audio.append #1
→ session.state(listening) when entering the turn
→ transcript.partial("[mocked partial] Placeholder push-to-talk transcript in progress.")
input_audio.append #N (N > 1)
→ transcript.partial("[mocked partial] Placeholder push-to-talk transcript in progress (N chunks).")
input_audio.commit after N appends
→ transcript.final("[mocked final] Placeholder push-to-talk transcript completed from N appended chunk(s).")
→ session.state(idle)
```
Safe deterministic edge cases for this mocked placeholder flow:
- commit without any prior append is accepted and emits `transcript.final("[mocked final] Placeholder push-to-talk transcript completed without appended audio.")`
- repeated appends during one placeholder turn are accepted and each append replaces the latest partial transcript with a chunk-count-based deterministic value
- placeholder commit does not automatically start assistant thinking, response streaming, or audio playback
## Contract Scope for This Increment ## Contract Scope for This Increment
This contract is intentionally limited to the smallest event set needed to unblock: This contract is intentionally limited to the smallest event set needed to unblock: