feat(vela): retire legacy mocked turn trigger
This commit is contained in:
@@ -11,7 +11,6 @@ const {
|
|||||||
|
|
||||||
const WEBSOCKET_ROUTE = '/ws';
|
const WEBSOCKET_ROUTE = '/ws';
|
||||||
const WEBSOCKET_GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
|
const WEBSOCKET_GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
|
||||||
const MOCKED_USER_TRANSCRIPT = '[mocked user] What is the current mocked vertical slice?';
|
|
||||||
const MOCKED_ASSISTANT_RESPONSE = '[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.';
|
const MOCKED_ASSISTANT_RESPONSE = '[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.';
|
||||||
|
|
||||||
function createPlaceholderPartialTranscript(audioChunkCount) {
|
function createPlaceholderPartialTranscript(audioChunkCount) {
|
||||||
@@ -67,7 +66,7 @@ function scheduleMockedTurnStep(session, turnId, delay, callback) {
|
|||||||
session.mockedTurnTimers.push(timer);
|
session.mockedTurnTimers.push(timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
function startMockedTurn(socket, session, { transcript = MOCKED_USER_TRANSCRIPT, includeListeningState = true } = {}) {
|
function startMockedTurn(socket, session, { transcript, includeListeningState = true } = {}) {
|
||||||
if (session.mockedTurnInFlight) {
|
if (session.mockedTurnInFlight) {
|
||||||
sendSocketError(socket, 'mocked_turn_in_flight', 'Only one mocked turn can run per session at a time.');
|
sendSocketError(socket, 'mocked_turn_in_flight', 'Only one mocked turn can run per session at a time.');
|
||||||
return;
|
return;
|
||||||
@@ -246,7 +245,11 @@ function handleClientMessage(socket, session, rawMessage) {
|
|||||||
sendSocketMessage(socket, 'session.state', { value: session.state });
|
sendSocketMessage(socket, 'session.state', { value: session.state });
|
||||||
break;
|
break;
|
||||||
case 'mocked.turn.trigger':
|
case 'mocked.turn.trigger':
|
||||||
startMockedTurn(socket, session);
|
sendSocketError(
|
||||||
|
socket,
|
||||||
|
'unsupported_mocked_turn_trigger',
|
||||||
|
'mocked.turn.trigger is no longer supported; use input_audio.append and input_audio.commit instead.'
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case 'input_audio.append':
|
case 'input_audio.append':
|
||||||
if (session.mockedTurnInFlight) {
|
if (session.mockedTurnInFlight) {
|
||||||
|
|||||||
@@ -355,7 +355,7 @@ test('websocket handles valid and invalid client messages safely', async () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
test('websocket accepts a placeholder input cycle before a mocked turn on the same socket', async () => {
|
test('websocket accepts repeated placeholder input cycles on the same socket', async () => {
|
||||||
const server = await startServer();
|
const server = await startServer();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -403,11 +403,15 @@ test('websocket accepts a placeholder input cycle before a mocked turn on the sa
|
|||||||
payload: { value: 'idle' }
|
payload: { value: 'idle' }
|
||||||
});
|
});
|
||||||
|
|
||||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
client.sendJson({ type: 'input_audio.append', payload: { chunk: 'placeholder-control-shell-chunk-2' } });
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
type: 'session.state',
|
type: 'session.state',
|
||||||
payload: { value: 'listening' }
|
payload: { value: 'listening' }
|
||||||
});
|
});
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'transcript.partial',
|
||||||
|
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
|
||||||
|
});
|
||||||
|
|
||||||
await client.close();
|
await client.close();
|
||||||
} finally {
|
} finally {
|
||||||
@@ -516,7 +520,7 @@ test('websocket emits deterministic partials for repeated appends and a determin
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
test('websocket mocked turn emits deterministic transcript and response events in order', async () => {
|
test('websocket rejects the retired mocked.turn.trigger path deterministically', async () => {
|
||||||
const server = await startServer();
|
const server = await startServer();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -526,110 +530,17 @@ test('websocket mocked turn emits deterministic transcript and response events i
|
|||||||
|
|
||||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
||||||
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'listening' }
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'transcript.final',
|
|
||||||
payload: { text: '[mocked user] What is the current mocked vertical slice?' }
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'thinking' }
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'speaking' }
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'response.text.delta',
|
|
||||||
payload: { text: '[mocked assistant] ' }
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'response.text.delta',
|
|
||||||
payload: { text: 'This is a deterministic mocked response from the gateway vertical slice.' }
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'response.completed',
|
|
||||||
payload: {}
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'idle' }
|
|
||||||
});
|
|
||||||
|
|
||||||
await client.close();
|
|
||||||
} finally {
|
|
||||||
await server.close();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
test('websocket rejects a second mocked turn while one is in flight', async () => {
|
|
||||||
const server = await startServer();
|
|
||||||
|
|
||||||
try {
|
|
||||||
const client = await connectWebSocket(server.port);
|
|
||||||
await client.nextMessage();
|
|
||||||
await client.nextMessage();
|
|
||||||
|
|
||||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'listening' }
|
|
||||||
});
|
|
||||||
|
|
||||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
type: 'error',
|
type: 'error',
|
||||||
payload: {
|
payload: {
|
||||||
code: 'mocked_turn_in_flight',
|
code: 'unsupported_mocked_turn_trigger',
|
||||||
message: 'Only one mocked turn can run per session at a time.',
|
message:
|
||||||
|
'mocked.turn.trigger is no longer supported; use input_audio.append and input_audio.commit instead.',
|
||||||
retryable: true
|
retryable: true
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
await client.close();
|
|
||||||
} finally {
|
|
||||||
await server.close();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
test('websocket cancel stops an active mocked turn and allows a new one without reconnecting', async () => {
|
|
||||||
const server = await startServer();
|
|
||||||
|
|
||||||
try {
|
|
||||||
const client = await connectWebSocket(server.port);
|
|
||||||
await client.nextMessage();
|
|
||||||
await client.nextMessage();
|
|
||||||
|
|
||||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'listening' }
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'transcript.final',
|
|
||||||
payload: { text: '[mocked user] What is the current mocked vertical slice?' }
|
|
||||||
});
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'thinking' }
|
|
||||||
});
|
|
||||||
|
|
||||||
client.sendJson({ type: 'response.cancel', payload: {} });
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'idle' }
|
|
||||||
});
|
|
||||||
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
||||||
|
|
||||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
|
||||||
type: 'session.state',
|
|
||||||
payload: { value: 'listening' }
|
|
||||||
});
|
|
||||||
|
|
||||||
await client.close();
|
await client.close();
|
||||||
} finally {
|
} finally {
|
||||||
await server.close();
|
await server.close();
|
||||||
@@ -671,11 +582,15 @@ test('websocket cancel stops a push-to-talk commit response and allows another t
|
|||||||
});
|
});
|
||||||
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
||||||
|
|
||||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
client.sendJson({ type: 'input_audio.append', payload: { chunk: 'chunk-2' } });
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
type: 'session.state',
|
type: 'session.state',
|
||||||
payload: { value: 'listening' }
|
payload: { value: 'listening' }
|
||||||
});
|
});
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'transcript.partial',
|
||||||
|
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
|
||||||
|
});
|
||||||
|
|
||||||
await client.close();
|
await client.close();
|
||||||
} finally {
|
} finally {
|
||||||
@@ -694,11 +609,15 @@ test('websocket safely accepts cancel when no turn is active', async () => {
|
|||||||
client.sendJson({ type: 'response.cancel', payload: {} });
|
client.sendJson({ type: 'response.cancel', payload: {} });
|
||||||
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
await assert.rejects(() => client.nextMessage(150), /timed out waiting for websocket message/);
|
||||||
|
|
||||||
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
|
client.sendJson({ type: 'input_audio.append', payload: { chunk: 'chunk-1' } });
|
||||||
assert.deepEqual(await client.nextMessage(), {
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
type: 'session.state',
|
type: 'session.state',
|
||||||
payload: { value: 'listening' }
|
payload: { value: 'listening' }
|
||||||
});
|
});
|
||||||
|
assert.deepEqual(await client.nextMessage(), {
|
||||||
|
type: 'transcript.partial',
|
||||||
|
payload: { text: '[mocked partial] Placeholder push-to-talk transcript in progress.' }
|
||||||
|
});
|
||||||
|
|
||||||
await client.close();
|
await client.close();
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ Current status:
|
|||||||
- SvelteKit app boots in the Yarn workspace
|
- SvelteKit app boots in the Yarn workspace
|
||||||
- root page shows a minimal voice-session shell with connect/disconnect controls
|
- root page shows a minimal voice-session shell with connect/disconnect controls
|
||||||
- the shell can connect to the gateway `/ws` endpoint and display developer-visible session status
|
- the shell can connect to the gateway `/ws` endpoint and display developer-visible session status
|
||||||
- the shell can trigger one deterministic mocked turn and render the mocked transcript plus assistant response
|
- the current mocked interaction path is push-to-talk only and renders the mocked transcript plus assistant response
|
||||||
- Vitest covers connect/disconnect plus the deterministic mocked transcript/response UI flow without requiring a browser harness
|
- Vitest covers connect/disconnect plus the deterministic mocked transcript/response UI flow without requiring a browser harness
|
||||||
- Playwright remains optional for deeper browser-level checks
|
- Playwright remains optional for deeper browser-level checks
|
||||||
- microphone capture and audio playback remain future increments
|
- microphone capture and audio playback remain future increments
|
||||||
|
|||||||
@@ -1,87 +1,15 @@
|
|||||||
import { expect, test } from '@playwright/test';
|
import { expect, test } from '@playwright/test';
|
||||||
|
|
||||||
const MOCKED_USER_TRANSCRIPT = '[mocked user] What is the current mocked vertical slice?';
|
|
||||||
const MOCKED_ASSISTANT_RESPONSE =
|
const MOCKED_ASSISTANT_RESPONSE =
|
||||||
'[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.';
|
'[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.';
|
||||||
|
|
||||||
test('voice session shell covers the mocked transcript/response slice', async ({ page }) => {
|
test('voice session shell supports the placeholder push-to-talk path', async ({ page }) => {
|
||||||
await page.goto('/');
|
await page.goto('/');
|
||||||
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
|
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
|
||||||
|
|
||||||
await expect(page.getByTestId('connection-state')).toHaveText('not connected');
|
await expect(page.getByTestId('connection-state')).toHaveText('not connected');
|
||||||
await expect(page.getByTestId('mocked-turn-button')).toBeDisabled();
|
|
||||||
await expect(page.getByTestId('session-id')).toHaveText('not assigned');
|
|
||||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('not received');
|
|
||||||
|
|
||||||
await page.getByTestId('connect-button').click();
|
|
||||||
|
|
||||||
await expect(page.getByTestId('connection-state')).toHaveText('connected');
|
|
||||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
|
||||||
await expect(page.getByTestId('session-id')).not.toHaveText('not assigned');
|
|
||||||
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
|
||||||
const sessionId = await page.getByTestId('session-id').textContent();
|
|
||||||
|
|
||||||
await page.getByTestId('mocked-turn-button').click();
|
|
||||||
|
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
|
|
||||||
await expect(page.getByTestId('user-transcript')).toHaveText('waiting for mocked transcript…');
|
|
||||||
await expect(page.getByTestId('assistant-response')).toHaveText('waiting for mocked response…');
|
|
||||||
|
|
||||||
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
|
||||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
|
||||||
await expect(page.getByTestId('conversation-render-order')).toHaveText('transcript>response');
|
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
|
||||||
|
|
||||||
await page.getByTestId('disconnect-button').click();
|
|
||||||
|
|
||||||
await expect(page.getByTestId('connection-state')).toHaveText('disconnected');
|
|
||||||
await expect(page.getByTestId('connection-detail')).toHaveText('Gateway WebSocket is closed.');
|
|
||||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
|
||||||
await expect(page.getByTestId('session-id')).toHaveText(sessionId ?? '');
|
|
||||||
await expect(page.getByTestId('mocked-turn-button')).toBeDisabled();
|
|
||||||
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
|
||||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
|
||||||
await expect(page.getByTestId('session-id')).toHaveText(sessionId ?? '');
|
|
||||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
|
||||||
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
|
||||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
|
||||||
});
|
|
||||||
|
|
||||||
test('voice session shell can cancel an active mocked turn and start another one', async ({ page }) => {
|
|
||||||
await page.goto('/');
|
|
||||||
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
|
|
||||||
|
|
||||||
await expect(page.getByTestId('cancel-turn-button')).toBeDisabled();
|
|
||||||
await page.getByTestId('connect-button').click();
|
|
||||||
|
|
||||||
await expect(page.getByTestId('connection-state')).toHaveText('connected');
|
|
||||||
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
|
||||||
|
|
||||||
await page.getByTestId('mocked-turn-button').click();
|
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
|
|
||||||
await expect(page.getByTestId('cancel-turn-button')).toBeEnabled();
|
|
||||||
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
|
||||||
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
|
|
||||||
|
|
||||||
await page.getByTestId('cancel-turn-button').click();
|
|
||||||
|
|
||||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
|
||||||
await expect(page.getByTestId('cancel-turn-button')).toBeDisabled();
|
|
||||||
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
|
||||||
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
|
|
||||||
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
|
|
||||||
|
|
||||||
await page.getByTestId('mocked-turn-button').click();
|
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
|
|
||||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
|
||||||
});
|
|
||||||
|
|
||||||
test('voice session shell supports a placeholder mic-control cycle before another mocked turn', async ({ page }) => {
|
|
||||||
await page.goto('/');
|
|
||||||
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
|
|
||||||
await expect(page.getByTestId('mic-control-button')).toBeDisabled();
|
await expect(page.getByTestId('mic-control-button')).toBeDisabled();
|
||||||
|
await expect(page.getByTestId('mocked-turn-button')).toHaveCount(0);
|
||||||
|
|
||||||
await page.getByTestId('connect-button').click();
|
await page.getByTestId('connect-button').click();
|
||||||
|
|
||||||
@@ -95,10 +23,6 @@ test('voice session shell supports a placeholder mic-control cycle before anothe
|
|||||||
await expect(page.getByTestId('partial-transcript')).toHaveText(
|
await expect(page.getByTestId('partial-transcript')).toHaveText(
|
||||||
'[mocked partial] Placeholder push-to-talk transcript in progress.'
|
'[mocked partial] Placeholder push-to-talk transcript in progress.'
|
||||||
);
|
);
|
||||||
await expect(page.getByTestId('user-transcript')).toHaveText(
|
|
||||||
'[mocked partial] Placeholder push-to-talk transcript in progress.'
|
|
||||||
);
|
|
||||||
await expect(page.getByTestId('mocked-turn-button')).toBeDisabled();
|
|
||||||
|
|
||||||
await page.getByTestId('mic-control-button').dispatchEvent('mouseup');
|
await page.getByTestId('mic-control-button').dispatchEvent('mouseup');
|
||||||
await expect(page.getByTestId('mic-control-status')).toHaveText('idle');
|
await expect(page.getByTestId('mic-control-status')).toHaveText('idle');
|
||||||
@@ -108,15 +32,10 @@ test('voice session shell supports a placeholder mic-control cycle before anothe
|
|||||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
||||||
await expect(page.getByTestId('partial-transcript')).toHaveText('none');
|
await expect(page.getByTestId('partial-transcript')).toHaveText('none');
|
||||||
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
|
||||||
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
await expect(page.getByTestId('mic-control-button')).toBeEnabled();
|
||||||
|
|
||||||
await page.getByTestId('mocked-turn-button').click();
|
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
|
|
||||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
|
||||||
});
|
});
|
||||||
|
|
||||||
test('voice session shell can cancel a push-to-talk mocked response and start another turn', async ({ page }) => {
|
test('voice session shell can cancel a push-to-talk mocked response and start another push-to-talk turn', async ({ page }) => {
|
||||||
await page.goto('/');
|
await page.goto('/');
|
||||||
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
|
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
|
||||||
|
|
||||||
@@ -140,9 +59,11 @@ test('voice session shell can cancel a push-to-talk mocked response and start an
|
|||||||
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||||
);
|
);
|
||||||
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
|
await expect(page.getByTestId('assistant-response')).toContainText('[mocked assistant]');
|
||||||
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
|
await expect(page.getByTestId('mic-control-button')).toBeEnabled();
|
||||||
|
|
||||||
await page.getByTestId('mocked-turn-button').click();
|
await page.getByTestId('mic-control-button').dispatchEvent('mousedown');
|
||||||
|
await expect(page.getByTestId('gateway-session-state')).toHaveText('listening');
|
||||||
|
await page.getByTestId('mic-control-button').dispatchEvent('mouseup');
|
||||||
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
|
||||||
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -58,15 +58,6 @@
|
|||||||
let micControlActive = false;
|
let micControlActive = false;
|
||||||
let hydrationStatus = 'mounting';
|
let hydrationStatus = 'mounting';
|
||||||
|
|
||||||
$: canTriggerMockedTurn =
|
|
||||||
typeof WebSocket !== 'undefined' &&
|
|
||||||
connectionState === 'connected' &&
|
|
||||||
socket?.readyState === WebSocket.OPEN &&
|
|
||||||
sessionReadyReceived &&
|
|
||||||
gatewaySessionState === 'idle' &&
|
|
||||||
!micControlActive &&
|
|
||||||
!mockedTurnInFlight;
|
|
||||||
|
|
||||||
$: canCancelMockedTurn =
|
$: canCancelMockedTurn =
|
||||||
typeof WebSocket !== 'undefined' &&
|
typeof WebSocket !== 'undefined' &&
|
||||||
connectionState === 'connected' &&
|
connectionState === 'connected' &&
|
||||||
@@ -132,33 +123,6 @@
|
|||||||
socket.send(JSON.stringify(createMessageEnvelope('input_audio.commit', {})));
|
socket.send(JSON.stringify(createMessageEnvelope('input_audio.commit', {})));
|
||||||
}
|
}
|
||||||
|
|
||||||
function triggerMockedTurn() {
|
|
||||||
if (!socket || socket.readyState !== WebSocket.OPEN || connectionState !== 'connected') {
|
|
||||||
connectionDetail = 'Connect to the gateway before triggering a mocked turn.';
|
|
||||||
lastError = 'mocked turn requires an active WebSocket connection';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!sessionReadyReceived) {
|
|
||||||
connectionDetail = 'Wait for the gateway session to be ready before triggering a mocked turn.';
|
|
||||||
lastError = 'mocked turn requires session.ready';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mockedTurnInFlight) {
|
|
||||||
connectionDetail = 'A mocked turn is already running for this session.';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
mockedUserTranscript = 'waiting for mocked transcript…';
|
|
||||||
inProgressPartialTranscript = 'none';
|
|
||||||
mockedAssistantResponse = 'waiting for mocked response…';
|
|
||||||
mockedTurnInFlight = true;
|
|
||||||
assistantResponseExpected = true;
|
|
||||||
lastError = 'none';
|
|
||||||
socket.send(JSON.stringify(createMessageEnvelope('mocked.turn.trigger', {})));
|
|
||||||
}
|
|
||||||
|
|
||||||
function cancelActiveResponse() {
|
function cancelActiveResponse() {
|
||||||
if (!socket || socket.readyState !== WebSocket.OPEN || connectionState !== 'connected') {
|
if (!socket || socket.readyState !== WebSocket.OPEN || connectionState !== 'connected') {
|
||||||
connectionDetail = 'Connect to the gateway before cancelling a mocked turn.';
|
connectionDetail = 'Connect to the gateway before cancelling a mocked turn.';
|
||||||
@@ -363,9 +327,8 @@
|
|||||||
<p class="eyebrow">Vela UI</p>
|
<p class="eyebrow">Vela UI</p>
|
||||||
<h1>Voice session shell</h1>
|
<h1>Voice session shell</h1>
|
||||||
<p>
|
<p>
|
||||||
This minimal browser shell can connect to the gateway WebSocket, trigger one deterministic
|
This minimal browser shell can connect to the gateway WebSocket and expose a push-to-talk
|
||||||
mocked turn, and expose a push-to-talk control shell that only sends placeholder protocol
|
control shell that only sends placeholder protocol events for the active session.
|
||||||
events for the active session.
|
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p class="contract-note">
|
<p class="contract-note">
|
||||||
@@ -408,9 +371,6 @@
|
|||||||
>
|
>
|
||||||
Disconnect
|
Disconnect
|
||||||
</button>
|
</button>
|
||||||
<button data-testid="mocked-turn-button" on:click={triggerMockedTurn} disabled={!canTriggerMockedTurn}>
|
|
||||||
Run mocked turn
|
|
||||||
</button>
|
|
||||||
<button data-testid="cancel-turn-button" on:click={cancelActiveResponse} disabled={!canCancelMockedTurn}>
|
<button data-testid="cancel-turn-button" on:click={cancelActiveResponse} disabled={!canCancelMockedTurn}>
|
||||||
Cancel active turn
|
Cancel active turn
|
||||||
</button>
|
</button>
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ describe('voice session shell', () => {
|
|||||||
render(VoiceSessionShell);
|
render(VoiceSessionShell);
|
||||||
|
|
||||||
expect(getByTestId('connection-state').textContent).toBe('not connected');
|
expect(getByTestId('connection-state').textContent).toBe('not connected');
|
||||||
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
|
expect(screen.queryByTestId('mocked-turn-button')).toBeNull();
|
||||||
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
|
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('connect-button'));
|
await fireEvent.click(getByTestId('connect-button'));
|
||||||
@@ -78,7 +78,7 @@ describe('voice session shell', () => {
|
|||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(getByTestId('connection-state').textContent).toBe('connected');
|
expect(getByTestId('connection-state').textContent).toBe('connected');
|
||||||
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
|
expect(screen.queryByTestId('mocked-turn-button')).toBeNull();
|
||||||
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
|
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -87,13 +87,13 @@ describe('voice session shell', () => {
|
|||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(getByTestId('connection-state').textContent).toBe('disconnected');
|
expect(getByTestId('connection-state').textContent).toBe('disconnected');
|
||||||
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
|
expect(screen.queryByTestId('mocked-turn-button')).toBeNull();
|
||||||
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
|
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
|
||||||
expect(getByTestId('session-id').textContent).toBe('session-123');
|
expect(getByTestId('session-id').textContent).toBe('session-123');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('runs a placeholder mic-control cycle, streams mocked assistant text, and keeps mocked turn usable on the same socket', async () => {
|
it('runs a placeholder mic-control cycle, streams mocked assistant text, and leaves push-to-talk usable on the same socket', async () => {
|
||||||
render(VoiceSessionShell);
|
render(VoiceSessionShell);
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('connect-button'));
|
await fireEvent.click(getByTestId('connect-button'));
|
||||||
@@ -131,7 +131,6 @@ describe('voice session shell', () => {
|
|||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(getByTestId('gateway-session-state').textContent).toBe('listening');
|
expect(getByTestId('gateway-session-state').textContent).toBe('listening');
|
||||||
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
|
|
||||||
expect(getByTestId('user-transcript').textContent).toBe(
|
expect(getByTestId('user-transcript').textContent).toBe(
|
||||||
'[mocked partial] Placeholder push-to-talk transcript in progress.'
|
'[mocked partial] Placeholder push-to-talk transcript in progress.'
|
||||||
);
|
);
|
||||||
@@ -168,7 +167,7 @@ describe('voice session shell', () => {
|
|||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
|
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
|
||||||
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
|
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
|
||||||
expect(getByTestId('user-transcript').textContent).toBe(
|
expect(getByTestId('user-transcript').textContent).toBe(
|
||||||
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
|
||||||
);
|
);
|
||||||
@@ -177,11 +176,6 @@ describe('voice session shell', () => {
|
|||||||
);
|
);
|
||||||
expect(getByTestId('partial-transcript').textContent).toBe('none');
|
expect(getByTestId('partial-transcript').textContent).toBe('none');
|
||||||
});
|
});
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('mocked-turn-button'));
|
|
||||||
|
|
||||||
expect(socket.sent).toHaveLength(3);
|
|
||||||
expect(JSON.parse(socket.sent[2]).type).toBe('mocked.turn.trigger');
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('keeps rendered push-to-talk transcript and assistant text visible after cancel', async () => {
|
it('keeps rendered push-to-talk transcript and assistant text visible after cancel', async () => {
|
||||||
@@ -239,118 +233,33 @@ describe('voice session shell', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('renders mocked transcript before assistant response for a connected session', async () => {
|
it('shows protocol errors returned for the retired mocked turn trigger path', async () => {
|
||||||
render(VoiceSessionShell);
|
render(VoiceSessionShell);
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('connect-button'));
|
await fireEvent.click(getByTestId('connect-button'));
|
||||||
const socket = MockWebSocket.latest();
|
const socket = MockWebSocket.latest();
|
||||||
socket.open();
|
socket.open();
|
||||||
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-456' }));
|
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-legacy' }));
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(getByTestId('connection-state').textContent).toBe('connected');
|
expect(getByTestId('connection-state').textContent).toBe('connected');
|
||||||
});
|
});
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('mocked-turn-button'));
|
|
||||||
|
|
||||||
expect(socket.sent).toHaveLength(1);
|
|
||||||
const sentMessage = JSON.parse(socket.sent[0]);
|
|
||||||
expect(sentMessage.type).toBe('mocked.turn.trigger');
|
|
||||||
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
|
|
||||||
socket.message(
|
socket.message(
|
||||||
createMessageEnvelope('transcript.partial', {
|
createMessageEnvelope('error', {
|
||||||
text: '[mocked partial] Placeholder push-to-talk transcript in progress.'
|
code: 'unsupported_mocked_turn_trigger',
|
||||||
|
message:
|
||||||
|
'mocked.turn.trigger is no longer supported; use input_audio.append and input_audio.commit instead.',
|
||||||
|
retryable: true
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
socket.message(createMessageEnvelope('transcript.final', { text: 'Turn on the office lamp.' }));
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
|
|
||||||
socket.message(createMessageEnvelope('response.text.delta', { text: 'Mocked ' }));
|
|
||||||
socket.message(createMessageEnvelope('response.text.delta', { text: 'assistant response.' }));
|
|
||||||
socket.message(createMessageEnvelope('response.completed', { reason: 'mocked_turn_complete' }));
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
|
||||||
|
|
||||||
await waitFor(() => {
|
await waitFor(() => {
|
||||||
expect(getByTestId('user-transcript').textContent).toBe('Turn on the office lamp.');
|
expect(getByTestId('last-error').textContent).toBe(
|
||||||
expect(getByTestId('partial-transcript').textContent).toBe('none');
|
'unsupported_mocked_turn_trigger: mocked.turn.trigger is no longer supported; use input_audio.append and input_audio.commit instead.'
|
||||||
expect(getByTestId('assistant-response').textContent).toBe('Mocked assistant response.');
|
);
|
||||||
expect(getByTestId('conversation-render-order').textContent).toBe('transcript>response');
|
|
||||||
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
|
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('blocks mocked turn trigger before session.ready and allows it after session.ready', async () => {
|
|
||||||
render(VoiceSessionShell);
|
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('connect-button'));
|
|
||||||
const socket = MockWebSocket.latest();
|
|
||||||
socket.open();
|
|
||||||
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(getByTestId('connection-state').textContent).toBe('connected');
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
|
|
||||||
await fireEvent.click(getByTestId('mocked-turn-button'));
|
|
||||||
expect(socket.sent).toHaveLength(0);
|
|
||||||
expect(getByTestId('last-error').textContent).toBe('mocked turn requires session.ready');
|
|
||||||
|
|
||||||
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-789' }));
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
|
||||||
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
|
|
||||||
});
|
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('mocked-turn-button'));
|
|
||||||
|
|
||||||
expect(socket.sent).toHaveLength(1);
|
|
||||||
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
|
|
||||||
});
|
|
||||||
|
|
||||||
it('shows cancel control during an active mocked turn and preserves rendered text after cancel', async () => {
|
|
||||||
render(VoiceSessionShell);
|
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('connect-button'));
|
|
||||||
const socket = MockWebSocket.latest();
|
|
||||||
socket.open();
|
|
||||||
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-cancel' }));
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
|
||||||
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('mocked-turn-button'));
|
|
||||||
|
|
||||||
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
|
|
||||||
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
|
|
||||||
socket.message(createMessageEnvelope('transcript.final', { text: 'Keep this transcript.' }));
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
|
|
||||||
socket.message(createMessageEnvelope('response.text.delta', { text: 'Partial response' }));
|
|
||||||
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(false);
|
|
||||||
expect(getByTestId('assistant-response').textContent).toBe('Partial response');
|
|
||||||
});
|
|
||||||
|
|
||||||
await fireEvent.click(getByTestId('cancel-turn-button'));
|
|
||||||
|
|
||||||
expect(JSON.parse(socket.sent[1]).type).toBe('response.cancel');
|
|
||||||
|
|
||||||
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
|
|
||||||
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(getByTestId('mocked-turn-status').textContent).toBe('idle');
|
|
||||||
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
|
|
||||||
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
|
|
||||||
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
|
|
||||||
expect(getByTestId('user-transcript').textContent).toBe('Keep this transcript.');
|
|
||||||
expect(getByTestId('assistant-response').textContent).toBe('Partial response');
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -36,14 +36,13 @@ The repository now includes separate runnable workspaces for the UI and gateway
|
|||||||
- PWA enabled
|
- PWA enabled
|
||||||
- WebSocket client
|
- WebSocket client
|
||||||
|
|
||||||
The current implementation is a minimal SvelteKit app with a single voice-session shell page. The shipped UI can open and close a browser WebSocket connection to the gateway `/ws` endpoint, show explicit connection status (`not connected`, `connecting`, `connected`, `disconnected`, `error`), expose mic control shell interactions that emit placeholder `input_audio.append` / `input_audio.commit` events, trigger one deterministic mocked turn while connected, render deterministic placeholder partial/final transcripts for the push-to-talk shell, and stream the mocked assistant response both for `mocked.turn.trigger` and for push-to-talk commits. This remains a shell only: there is no real microphone capture, real provider integration, or audio playback yet.
|
The current implementation is a minimal SvelteKit app with a single voice-session shell page. The shipped UI can open and close a browser WebSocket connection to the gateway `/ws` endpoint, show explicit connection status (`not connected`, `connecting`, `connected`, `disconnected`, `error`), expose mic control shell interactions that emit placeholder `input_audio.append` / `input_audio.commit` events, render deterministic placeholder partial/final transcripts for the push-to-talk shell, and stream the mocked assistant response after push-to-talk commit. This remains a shell only: there is no real microphone capture, real provider integration, or audio playback yet.
|
||||||
|
|
||||||
#### Responsibilities
|
#### Responsibilities
|
||||||
|
|
||||||
Current shell responsibilities:
|
Current shell responsibilities:
|
||||||
|
|
||||||
- connection state rendering
|
- connection state rendering
|
||||||
- mocked-turn trigger rendering with disconnected/in-flight guards
|
|
||||||
- mocked transcript and mocked assistant response rendering
|
- mocked transcript and mocked assistant response rendering
|
||||||
- developer-oriented session metadata rendering
|
- developer-oriented session metadata rendering
|
||||||
- browser session connect/disconnect controls
|
- browser session connect/disconnect controls
|
||||||
@@ -62,7 +61,6 @@ Current shell:
|
|||||||
- developer-focused voice-session panel
|
- developer-focused voice-session panel
|
||||||
- connect button
|
- connect button
|
||||||
- disconnect button
|
- disconnect button
|
||||||
- mocked-turn button
|
|
||||||
- connection status indicator
|
- connection status indicator
|
||||||
- mocked transcript display
|
- mocked transcript display
|
||||||
- mocked assistant response display
|
- mocked assistant response display
|
||||||
@@ -106,14 +104,14 @@ The current implementation is a minimal Fastify service with `/`, `/health`, and
|
|||||||
- the gateway sends `session.ready` followed by `session.state` (`idle`) when the socket is established
|
- the gateway sends `session.ready` followed by `session.state` (`idle`) when the socket is established
|
||||||
- valid minimal client events, including placeholder `input_audio.append` / `input_audio.commit`, can move the session through the mocked turn states on one socket
|
- valid minimal client events, including placeholder `input_audio.append` / `input_audio.commit`, can move the session through the mocked turn states on one socket
|
||||||
- placeholder `input_audio.append` emits deterministic mocked `transcript.partial` events and `input_audio.commit` emits one deterministic mocked `transcript.final` before starting the existing mocked assistant response stream
|
- placeholder `input_audio.append` emits deterministic mocked `transcript.partial` events and `input_audio.commit` emits one deterministic mocked `transcript.final` before starting the existing mocked assistant response stream
|
||||||
- `mocked.turn.trigger` drives a fixed transcript/response event sequence over the existing shared protocol
|
|
||||||
- only one mocked turn is allowed in flight per session at a time
|
- only one mocked turn is allowed in flight per session at a time
|
||||||
- invalid JSON, invalid envelopes, and malformed frames are handled defensively so the process stays up
|
- invalid JSON, invalid envelopes, and malformed frames are handled defensively so the process stays up
|
||||||
|
- retired `mocked.turn.trigger` messages are rejected with a deterministic recoverable error
|
||||||
|
|
||||||
### Current UI shell behavior
|
### Current UI shell behavior
|
||||||
|
|
||||||
- renders a minimal developer-focused voice-session panel
|
- renders a minimal developer-focused voice-session panel
|
||||||
- exposes connect, disconnect, mic-control shell interactions, and mocked-turn controls
|
- exposes connect, disconnect, and mic-control shell interactions
|
||||||
- does not request microphone permission or capture real microphone audio
|
- does not request microphone permission or capture real microphone audio
|
||||||
- only emits placeholder `input_audio.append` / `input_audio.commit` events; it does not send real audio data or play back audio
|
- only emits placeholder `input_audio.append` / `input_audio.commit` events; it does not send real audio data or play back audio
|
||||||
- renders the latest placeholder partial transcript during a push-to-talk shell turn, replaces it with the final deterministic transcript on commit, and appends streamed mocked assistant text for that same push-to-talk turn
|
- renders the latest placeholder partial transcript during a push-to-talk shell turn, replaces it with the final deterministic transcript on commit, and appends streamed mocked assistant text for that same push-to-talk turn
|
||||||
@@ -122,7 +120,7 @@ The current implementation is a minimal Fastify service with `/`, `/health`, and
|
|||||||
## Voice Pipeline
|
## Voice Pipeline
|
||||||
|
|
||||||
```text
|
```text
|
||||||
Mic control shell / mocked turn button → Placeholder `input_audio.append` / `input_audio.commit` or mocked session flow → Deterministic transcript events → Shared mocked response engine → Mocked response text events → UI
|
Mic control shell → Placeholder `input_audio.append` / `input_audio.commit` → Deterministic transcript events → Shared mocked response engine → Mocked response text events → UI
|
||||||
```
|
```
|
||||||
|
|
||||||
This mocked vertical slice intentionally stands in for the future real pipeline:
|
This mocked vertical slice intentionally stands in for the future real pipeline:
|
||||||
|
|||||||
@@ -183,16 +183,15 @@ Polish the system after the core voice loop is reliable.
|
|||||||
|
|
||||||
- `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page
|
- `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page
|
||||||
- `apps/vela-ui` now includes a minimal voice-session shell that can connect to the gateway `/ws` endpoint and display developer-visible session status
|
- `apps/vela-ui` now includes a minimal voice-session shell that can connect to the gateway `/ws` endpoint and display developer-visible session status
|
||||||
- `apps/vela-ui` can now trigger one deterministic mocked turn while connected and render the mocked transcript plus assistant response for the active session
|
|
||||||
- `apps/vela-ui` now exposes a visible push-to-talk mic control shell that sends placeholder `input_audio.append` / `input_audio.commit` events without requesting browser mic permission or capturing real audio
|
- `apps/vela-ui` now exposes a visible push-to-talk mic control shell that sends placeholder `input_audio.append` / `input_audio.commit` events without requesting browser mic permission or capturing real audio
|
||||||
- `apps/vela-ui` now includes browser-level coverage for the mocked transcript/response slice, including connect, disconnect, and disconnected-state trigger guarding
|
- `apps/vela-ui` now includes browser-level coverage for the placeholder push-to-talk mocked transcript/response slice, including connect, disconnect, and cancel behavior
|
||||||
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
|
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
|
||||||
- `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling
|
- `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling
|
||||||
- `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session
|
- `apps/vela-gateway` now rejects retired `mocked.turn.trigger` requests with a deterministic recoverable error instead of starting a mocked turn
|
||||||
- `apps/vela-gateway` now supports placeholder input-audio append/commit cycles before running another mocked turn on the same socket
|
- `apps/vela-gateway` now supports repeated placeholder input-audio append/commit cycles on the same socket
|
||||||
- `apps/vela-gateway` now emits deterministic `transcript.partial` events for placeholder `input_audio.append` messages and, after each accepted `input_audio.commit`, reuses the mocked response engine to stream a deterministic assistant reply for that push-to-talk turn
|
- `apps/vela-gateway` now emits deterministic `transcript.partial` events for placeholder `input_audio.append` messages and, after each accepted `input_audio.commit`, reuses the mocked response engine to stream a deterministic assistant reply for that push-to-talk turn
|
||||||
- `apps/vela-ui` now renders the latest placeholder partial transcript during the push-to-talk shell turn, replaces it with the deterministic final transcript on commit, and shows streamed assistant text for the same push-to-talk flow
|
- `apps/vela-ui` now renders the latest placeholder partial transcript during the push-to-talk shell turn, replaces it with the deterministic final transcript on commit, and shows streamed assistant text for that same push-to-talk flow
|
||||||
- `apps/vela-ui` now exposes a cancel control for active mocked turns and mocked push-to-talk responses, and keeps already-rendered transcript/response text visible after cancellation
|
- `apps/vela-ui` now exposes a cancel control for active push-to-talk-triggered mocked responses, and keeps already-rendered transcript/response text visible after cancellation
|
||||||
- `apps/vela-gateway` now honors `response.cancel` during mocked turns and push-to-talk-triggered mocked responses by stopping pending mocked response events, returning the session to `idle`, and allowing a new turn on the same socket
|
- `apps/vela-gateway` now honors `response.cancel` during push-to-talk-triggered mocked responses by stopping pending mocked response events, returning the session to `idle`, and allowing a new turn on the same socket
|
||||||
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
|
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
|
||||||
- backend framework choice is now concrete: Fastify
|
- backend framework choice is now concrete: Fastify
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ Vela is a fully local, voice-first assistant system with:
|
|||||||
|
|
||||||
- browser-based PWA
|
- browser-based PWA
|
||||||
- push-to-talk interaction
|
- push-to-talk interaction
|
||||||
|
- current mocked vertical slice enters turns only through the placeholder push-to-talk shell
|
||||||
- transcript and response display
|
- transcript and response display
|
||||||
- playback of streamed or returned audio
|
- playback of streamed or returned audio
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ Current UI baseline:
|
|||||||
|
|
||||||
- the browser opens a WebSocket directly to `/ws`
|
- the browser opens a WebSocket directly to `/ws`
|
||||||
- the UI tracks connection status separately from gateway session status
|
- the UI tracks connection status separately from gateway session status
|
||||||
- the UI can send `mocked.turn.trigger` after `session.ready` while connected to request one deterministic mocked turn for the active session
|
|
||||||
- the UI exposes a push-to-talk mic control shell that sends placeholder `input_audio.append` on press and `input_audio.commit` on release without capturing real audio
|
- the UI exposes a push-to-talk mic control shell that sends placeholder `input_audio.append` on press and `input_audio.commit` on release without capturing real audio
|
||||||
|
- the push-to-talk shell is the only supported mocked turn entry path from the shipped UI
|
||||||
|
|
||||||
## WebSocket Message Envelope
|
## WebSocket Message Envelope
|
||||||
|
|
||||||
@@ -50,7 +50,7 @@ type ClientEvent =
|
|||||||
#### Client event intent
|
#### Client event intent
|
||||||
|
|
||||||
- `session.start` initializes a voice session without locking in transport or auth details yet
|
- `session.start` initializes a voice session without locking in transport or auth details yet
|
||||||
- `mocked.turn.trigger` asks the gateway to run one obviously mocked, deterministic transcript/response turn
|
- `mocked.turn.trigger` is a retired legacy event name that the gateway now rejects with a deterministic recoverable error
|
||||||
- `input_audio.append` carries a chunk of captured input audio as an encoded string
|
- `input_audio.append` carries a chunk of captured input audio as an encoded string
|
||||||
- `input_audio.commit` marks the current buffered user turn as ready for downstream processing
|
- `input_audio.commit` marks the current buffered user turn as ready for downstream processing
|
||||||
- `response.cancel` interrupts the active listen/think/speak flow
|
- `response.cancel` interrupts the active listen/think/speak flow
|
||||||
@@ -59,15 +59,13 @@ type ClientEvent =
|
|||||||
|
|
||||||
- on connect, the gateway creates an ephemeral in-memory session and emits `session.ready` plus `session.state`
|
- on connect, the gateway creates an ephemeral in-memory session and emits `session.ready` plus `session.state`
|
||||||
- `session.start` is accepted as an idempotent session acknowledgment and re-sends readiness/state
|
- `session.start` is accepted as an idempotent session acknowledgment and re-sends readiness/state
|
||||||
- `mocked.turn.trigger` is accepted only when no other mocked turn is already in flight for that session
|
- `mocked.turn.trigger` is rejected deterministically with `error.code = unsupported_mocked_turn_trigger`
|
||||||
- a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order
|
|
||||||
- `input_audio.append` updates the ephemeral session record and moves the session to `listening`
|
- `input_audio.append` updates the ephemeral session record and moves the session to `listening`
|
||||||
- each accepted `input_audio.append` emits one deterministic `transcript.partial` for the current placeholder turn
|
- each accepted `input_audio.append` emits one deterministic `transcript.partial` for the current placeholder turn
|
||||||
- `input_audio.commit` emits exactly one deterministic `transcript.final` and then starts the same deterministic mocked assistant response stream used by `mocked.turn.trigger`
|
- `input_audio.commit` emits exactly one deterministic `transcript.final` and then starts the deterministic mocked assistant response stream for that push-to-talk turn
|
||||||
- after a completed placeholder input cycle, the same socket can still send `mocked.turn.trigger`
|
- after a completed placeholder input cycle, the same socket can start another placeholder push-to-talk turn without reconnecting
|
||||||
- `response.cancel` is safe to send even when no mocked turn is active
|
- `response.cancel` is safe to send even when no mocked turn is active
|
||||||
- `response.cancel` stops any still-pending mocked turn events for the active turn and resets the minimal session state back to `idle`
|
- `response.cancel` stops any still-pending mocked turn events for the active turn and resets the minimal session state back to `idle`
|
||||||
- a second mocked-turn trigger during an active mocked turn produces `error` with code `mocked_turn_in_flight`
|
|
||||||
- malformed JSON produces `error` with code `invalid_json`
|
- malformed JSON produces `error` with code `invalid_json`
|
||||||
- invalid envelopes or unsupported client event names produce `error` with code `invalid_message`
|
- invalid envelopes or unsupported client event names produce `error` with code `invalid_message`
|
||||||
- malformed WebSocket frames are rejected without crashing the gateway process
|
- malformed WebSocket frames are rejected without crashing the gateway process
|
||||||
@@ -88,7 +86,6 @@ Notes:
|
|||||||
|
|
||||||
- this UI state is transport-oriented and is separate from the shared gateway `session.state` payload
|
- this UI state is transport-oriented and is separate from the shared gateway `session.state` payload
|
||||||
- `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`)
|
- `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`)
|
||||||
- the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight
|
|
||||||
- the UI disables the mic control while disconnected, before `session.ready`, or while a mocked turn is already in flight
|
- the UI disables the mic control while disconnected, before `session.ready`, or while a mocked turn is already in flight
|
||||||
- pressing the mic control sends one placeholder `input_audio.append` chunk and releasing it sends `input_audio.commit`
|
- pressing the mic control sends one placeholder `input_audio.append` chunk and releasing it sends `input_audio.commit`
|
||||||
- while a placeholder push-to-talk turn is in progress, the UI renders the latest `transcript.partial`
|
- while a placeholder push-to-talk turn is in progress, the UI renders the latest `transcript.partial`
|
||||||
@@ -126,26 +123,19 @@ type ServerEvent =
|
|||||||
- `response.completed` marks the current assistant turn as done
|
- `response.completed` marks the current assistant turn as done
|
||||||
- `error` is the minimal recoverable failure shape for both UI and gateway work
|
- `error` is the minimal recoverable failure shape for both UI and gateway work
|
||||||
|
|
||||||
### Deterministic mocked turn sequence
|
### Legacy mocked turn trigger rejection
|
||||||
|
|
||||||
For this increment, `mocked.turn.trigger` produces one fixed interaction for the active session:
|
For this increment, direct `mocked.turn.trigger` requests no longer start a mocked turn:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
session.state(listening)
|
mocked.turn.trigger
|
||||||
→ transcript.final("[mocked user] What is the current mocked vertical slice?")
|
→ error(code="unsupported_mocked_turn_trigger", message="mocked.turn.trigger is no longer supported; use input_audio.append and input_audio.commit instead.")
|
||||||
→ session.state(thinking)
|
|
||||||
→ session.state(speaking)
|
|
||||||
→ response.text.delta("[mocked assistant] ")
|
|
||||||
→ response.text.delta("This is a deterministic mocked response from the gateway vertical slice.")
|
|
||||||
→ response.completed
|
|
||||||
→ session.state(idle)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
|
|
||||||
- the content is intentionally fixed and obviously mocked
|
- this rejection is deterministic and recoverable
|
||||||
- no audio, STT, LLM, TTS, or external providers participate in this flow
|
- the session remains available for the supported push-to-talk flow on the same socket
|
||||||
- `response.cancel` can stop the mocked turn early, suppress any later mocked response events for that turn, and return the session to `idle`
|
|
||||||
|
|
||||||
### Deterministic placeholder push-to-talk transcript and mocked response sequence
|
### Deterministic placeholder push-to-talk transcript and mocked response sequence
|
||||||
|
|
||||||
@@ -173,8 +163,8 @@ Safe deterministic edge cases for this mocked placeholder flow:
|
|||||||
|
|
||||||
- commit without any prior append is accepted and emits `transcript.final("[mocked final] Placeholder push-to-talk transcript completed without appended audio.")`
|
- commit without any prior append is accepted and emits `transcript.final("[mocked final] Placeholder push-to-talk transcript completed without appended audio.")`
|
||||||
- repeated appends during one placeholder turn are accepted and each append replaces the latest partial transcript with a chunk-count-based deterministic value
|
- repeated appends during one placeholder turn are accepted and each append replaces the latest partial transcript with a chunk-count-based deterministic value
|
||||||
- after the final transcript, placeholder commit follows the same mocked `thinking → speaking → response.text.delta* → response.completed → idle` path as `mocked.turn.trigger`
|
- after the final transcript, placeholder commit follows the deterministic mocked `thinking → speaking → response.text.delta* → response.completed → idle` path
|
||||||
- `response.cancel` can interrupt this mocked post-commit response path the same way it interrupts `mocked.turn.trigger`; already-rendered transcript or assistant text is not retracted
|
- `response.cancel` can interrupt this mocked post-commit response path; already-rendered transcript or assistant text is not retracted
|
||||||
|
|
||||||
## Contract Scope for This Increment
|
## Contract Scope for This Increment
|
||||||
|
|
||||||
@@ -207,7 +197,7 @@ Current mocked-pipeline behavior:
|
|||||||
- during an active mocked turn, `response.cancel` returns the session to `idle` immediately
|
- during an active mocked turn, `response.cancel` returns the session to `idle` immediately
|
||||||
- any mocked turn timers that have not fired yet are dropped, so no later `response.text.delta` or `response.completed` events are emitted for the cancelled turn
|
- any mocked turn timers that have not fired yet are dropped, so no later `response.text.delta` or `response.completed` events are emitted for the cancelled turn
|
||||||
- the same cancellation behavior applies when a mocked turn was started by `input_audio.commit`
|
- the same cancellation behavior applies when a mocked turn was started by `input_audio.commit`
|
||||||
- once `idle` is restored, the same WebSocket session can start another mocked turn without reconnecting
|
- once `idle` is restored, the same WebSocket session can start another placeholder push-to-talk turn without reconnecting
|
||||||
|
|
||||||
More general future-state expectations:
|
More general future-state expectations:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user