feat(vela): add mocked turn transcript response slice

This commit is contained in:
2026-04-08 19:39:20 +02:00
parent 4b11703c93
commit ff78fc4c8f
20 changed files with 997 additions and 372 deletions

2
.gitignore vendored
View File

@@ -5,3 +5,5 @@ dist/
build/ build/
.svelte-kit/ .svelte-kit/
.vite/ .vite/
playwright-report/
test-results/

View File

@@ -7,4 +7,5 @@ Current status:
- Fastify server boots in the Yarn workspace - Fastify server boots in the Yarn workspace
- `/` and `/health` endpoints provide a runnable service baseline - `/` and `/health` endpoints provide a runnable service baseline
- WebSocket session skeleton wiring now exists - WebSocket session skeleton wiring now exists
- Full WebSocket session orchestration and behavior remain future work - the gateway accepts one deterministic mocked-turn trigger per session and emits protocol-valid transcript/response events
- Full provider-backed WebSocket session orchestration remains future work

View File

@@ -11,6 +11,8 @@ const {
const WEBSOCKET_ROUTE = '/ws'; const WEBSOCKET_ROUTE = '/ws';
const WEBSOCKET_GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11'; const WEBSOCKET_GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11';
const MOCKED_USER_TRANSCRIPT = '[mocked user] What is the current mocked vertical slice?';
const MOCKED_ASSISTANT_RESPONSE = '[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.';
function createSessionRecord() { function createSessionRecord() {
return { return {
@@ -18,10 +20,62 @@ function createSessionRecord() {
connectedAt: new Date().toISOString(), connectedAt: new Date().toISOString(),
state: 'idle', state: 'idle',
audioChunkCount: 0, audioChunkCount: 0,
started: false started: false,
mockedTurnInFlight: false,
mockedTurnTimers: []
}; };
} }
function clearMockedTurn(session) {
for (const timer of session.mockedTurnTimers) {
clearTimeout(timer);
}
session.mockedTurnTimers = [];
session.mockedTurnInFlight = false;
}
function scheduleMockedTurnStep(session, delay, callback) {
const timer = setTimeout(() => {
session.mockedTurnTimers = session.mockedTurnTimers.filter((activeTimer) => activeTimer !== timer);
callback();
}, delay);
session.mockedTurnTimers.push(timer);
}
function startMockedTurn(socket, session) {
if (session.mockedTurnInFlight) {
sendSocketError(socket, 'mocked_turn_in_flight', 'Only one mocked turn can run per session at a time.');
return;
}
clearMockedTurn(session);
session.audioChunkCount = 0;
session.mockedTurnInFlight = true;
updateSessionState(socket, session, 'listening');
scheduleMockedTurnStep(session, 75, () => {
sendSocketMessage(socket, 'transcript.final', { text: MOCKED_USER_TRANSCRIPT });
updateSessionState(socket, session, 'thinking');
});
scheduleMockedTurnStep(session, 150, () => {
updateSessionState(socket, session, 'speaking');
sendSocketMessage(socket, 'response.text.delta', { text: '[mocked assistant] ' });
});
scheduleMockedTurnStep(session, 225, () => {
sendSocketMessage(socket, 'response.text.delta', { text: MOCKED_ASSISTANT_RESPONSE.replace('[mocked assistant] ', '') });
});
scheduleMockedTurnStep(session, 300, () => {
sendSocketMessage(socket, 'response.completed', {});
clearMockedTurn(session);
updateSessionState(socket, session, 'idle');
});
}
function createWebSocketAcceptValue(key) { function createWebSocketAcceptValue(key) {
return crypto.createHash('sha1').update(`${key}${WEBSOCKET_GUID}`).digest('base64'); return crypto.createHash('sha1').update(`${key}${WEBSOCKET_GUID}`).digest('base64');
} }
@@ -164,15 +218,29 @@ function handleClientMessage(socket, session, rawMessage) {
sendSocketMessage(socket, 'session.ready', { sessionId: session.id }); sendSocketMessage(socket, 'session.ready', { sessionId: session.id });
sendSocketMessage(socket, 'session.state', { value: session.state }); sendSocketMessage(socket, 'session.state', { value: session.state });
break; break;
case 'mocked.turn.trigger':
startMockedTurn(socket, session);
break;
case 'input_audio.append': case 'input_audio.append':
if (session.mockedTurnInFlight) {
sendSocketError(socket, 'mocked_turn_in_flight', 'Wait for the mocked turn to finish before sending more input.');
break;
}
session.audioChunkCount += 1; session.audioChunkCount += 1;
updateSessionState(socket, session, 'listening'); updateSessionState(socket, session, 'listening');
break; break;
case 'input_audio.commit': case 'input_audio.commit':
if (session.mockedTurnInFlight) {
sendSocketError(socket, 'mocked_turn_in_flight', 'Wait for the mocked turn to finish before committing input.');
break;
}
session.audioChunkCount = 0; session.audioChunkCount = 0;
updateSessionState(socket, session, 'idle'); updateSessionState(socket, session, 'idle');
break; break;
case 'response.cancel': case 'response.cancel':
clearMockedTurn(session);
session.audioChunkCount = 0; session.audioChunkCount = 0;
updateSessionState(socket, session, 'idle'); updateSessionState(socket, session, 'idle');
break; break;
@@ -241,6 +309,7 @@ function registerWebSocketSessionRoute(app) {
} }
closed = true; closed = true;
clearMockedTurn(session);
app.websocketSessions.delete(session.id); app.websocketSessions.delete(session.id);
app.log.info({ sessionId: session.id }, 'websocket session disconnected'); app.log.info({ sessionId: session.id }, 'websocket session disconnected');
}; };

View File

@@ -326,3 +326,82 @@ test('websocket handles valid and invalid client messages safely', async () => {
await server.close(); await server.close();
} }
}); });
test('websocket mocked turn emits deterministic transcript and response events in order', async () => {
const server = await startServer();
try {
const client = await connectWebSocket(server.port);
await client.nextMessage();
await client.nextMessage();
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'listening' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'transcript.final',
payload: { text: '[mocked user] What is the current mocked vertical slice?' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'thinking' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'speaking' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'response.text.delta',
payload: { text: '[mocked assistant] ' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'response.text.delta',
payload: { text: 'This is a deterministic mocked response from the gateway vertical slice.' }
});
assert.deepEqual(await client.nextMessage(), {
type: 'response.completed',
payload: {}
});
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'idle' }
});
await client.close();
} finally {
await server.close();
}
});
test('websocket rejects a second mocked turn while one is in flight', async () => {
const server = await startServer();
try {
const client = await connectWebSocket(server.port);
await client.nextMessage();
await client.nextMessage();
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'session.state',
payload: { value: 'listening' }
});
client.sendJson({ type: 'mocked.turn.trigger', payload: {} });
assert.deepEqual(await client.nextMessage(), {
type: 'error',
payload: {
code: 'mocked_turn_in_flight',
message: 'Only one mocked turn can run per session at a time.',
retryable: true
}
});
await client.close();
} finally {
await server.close();
}
});

View File

@@ -4,6 +4,7 @@ const SESSION_STATES = Object.freeze(['idle', 'listening', 'thinking', 'speaking
const CLIENT_EVENT_TYPES = Object.freeze([ const CLIENT_EVENT_TYPES = Object.freeze([
'session.start', 'session.start',
'mocked.turn.trigger',
'input_audio.append', 'input_audio.append',
'input_audio.commit', 'input_audio.commit',
'response.cancel' 'response.cancel'

View File

@@ -7,6 +7,7 @@ export type MessageEnvelope<TType extends string, TPayload> = {
export type ClientEventPayloads = { export type ClientEventPayloads = {
'session.start': Record<string, never>; 'session.start': Record<string, never>;
'mocked.turn.trigger': Record<string, never>;
'input_audio.append': { 'input_audio.append': {
chunk: string; chunk: string;
}; };

View File

@@ -4,6 +4,7 @@ export const SESSION_STATES = Object.freeze(['idle', 'listening', 'thinking', 's
export const CLIENT_EVENT_TYPES = Object.freeze([ export const CLIENT_EVENT_TYPES = Object.freeze([
'session.start', 'session.start',
'mocked.turn.trigger',
'input_audio.append', 'input_audio.append',
'input_audio.commit', 'input_audio.commit',
'response.cancel' 'response.cancel'

View File

@@ -7,4 +7,7 @@ Current status:
- SvelteKit app boots in the Yarn workspace - SvelteKit app boots in the Yarn workspace
- root page shows a minimal voice-session shell with connect/disconnect controls - root page shows a minimal voice-session shell with connect/disconnect controls
- the shell can connect to the gateway `/ws` endpoint and display developer-visible session status - the shell can connect to the gateway `/ws` endpoint and display developer-visible session status
- microphone capture, transcript rendering, and audio playback remain future increments - the shell can trigger one deterministic mocked turn and render the mocked transcript plus assistant response
- Vitest covers connect/disconnect plus the deterministic mocked transcript/response UI flow without requiring a browser harness
- Playwright remains optional for deeper browser-level checks
- microphone capture and audio playback remain future increments

View File

@@ -0,0 +1,48 @@
import { expect, test } from '@playwright/test';
const MOCKED_USER_TRANSCRIPT = '[mocked user] What is the current mocked vertical slice?';
const MOCKED_ASSISTANT_RESPONSE =
'[mocked assistant] This is a deterministic mocked response from the gateway vertical slice.';
test('voice session shell covers the mocked transcript/response slice', async ({ page }) => {
await page.goto('/');
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
await expect(page.getByTestId('connection-state')).toHaveText('not connected');
await expect(page.getByTestId('mocked-turn-button')).toBeDisabled();
await expect(page.getByTestId('session-id')).toHaveText('not assigned');
await expect(page.getByTestId('gateway-session-state')).toHaveText('not received');
await page.getByTestId('connect-button').click();
await expect(page.getByTestId('connection-state')).toHaveText('connected');
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
await expect(page.getByTestId('session-id')).not.toHaveText('not assigned');
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
const sessionId = await page.getByTestId('session-id').textContent();
await page.getByTestId('mocked-turn-button').click();
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
await expect(page.getByTestId('user-transcript')).toHaveText('waiting for mocked transcript…');
await expect(page.getByTestId('assistant-response')).toHaveText('waiting for mocked response…');
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
await expect(page.getByTestId('conversation-render-order')).toHaveText('transcript>response');
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
await page.getByTestId('disconnect-button').click();
await expect(page.getByTestId('connection-state')).toHaveText('disconnected');
await expect(page.getByTestId('connection-detail')).toHaveText('Gateway WebSocket is closed.');
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
await expect(page.getByTestId('session-id')).toHaveText(sessionId ?? '');
await expect(page.getByTestId('mocked-turn-button')).toBeDisabled();
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
await expect(page.getByTestId('session-id')).toHaveText(sessionId ?? '');
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
await expect(page.getByTestId('user-transcript')).toHaveText(MOCKED_USER_TRANSCRIPT);
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
});

View File

@@ -4,22 +4,28 @@
"version": "0.0.0", "version": "0.0.0",
"description": "Minimal SvelteKit app for the Vela browser UI.", "description": "Minimal SvelteKit app for the Vela browser UI.",
"type": "module", "type": "module",
"scripts": { "scripts": {
"dev": "svelte-kit sync && vite dev", "dev": "svelte-kit sync && vite dev",
"build": "svelte-kit sync && vite build", "build": "svelte-kit sync && vite build",
"preview": "vite preview", "preview": "vite preview",
"check": "svelte-kit sync && svelte-check --tsconfig ./jsconfig.json" "check": "svelte-kit sync && svelte-check --tsconfig ./jsconfig.json",
}, "test": "vitest run",
"test:e2e": "playwright test"
},
"dependencies": { "dependencies": {
"@vela/protocol": "0.0.0", "@vela/protocol": "0.0.0",
"@sveltejs/adapter-auto": "^3.3.1", "@sveltejs/adapter-auto": "^3.3.1",
"@sveltejs/kit": "^2.17.1", "@sveltejs/kit": "^2.17.1",
"svelte": "^5.19.5" "svelte": "^5.19.5"
}, },
"devDependencies": { "devDependencies": {
"@sveltejs/vite-plugin-svelte": "^5.0.3", "@playwright/test": "^1.54.2",
"svelte-check": "^4.1.4", "@sveltejs/vite-plugin-svelte": "^5.0.3",
"typescript": "^5.7.3", "@testing-library/svelte": "^5.2.8",
"vite": "^6.0.11" "jsdom": "^26.1.0",
} "svelte-check": "^4.1.4",
"typescript": "^5.7.3",
"vite": "^6.0.11",
"vitest": "^3.2.4"
}
} }

View File

@@ -0,0 +1,41 @@
import { defineConfig } from '@playwright/test';
import { fileURLToPath } from 'node:url';
import path from 'node:path';
const workspaceDir = path.dirname(fileURLToPath(import.meta.url));
const repoRoot = path.resolve(workspaceDir, '..', '..');
export default defineConfig({
testDir: './e2e',
timeout: 15_000,
expect: {
timeout: 5_000
},
fullyParallel: false,
workers: 1,
reporter: 'line',
use: {
baseURL: 'http://127.0.0.1:4173',
browserName: 'chromium',
headless: true,
launchOptions: process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH
? {
executablePath: process.env.PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH
}
: undefined
},
webServer: [
{
command: `HOST=127.0.0.1 PORT=3001 npm run start --workspace vela-gateway --prefix "${repoRoot}"`,
url: 'http://127.0.0.1:3001/health',
reuseExistingServer: true,
timeout: 15_000
},
{
command: `VITE_VELA_GATEWAY_WS_URL=ws://127.0.0.1:3001/ws npm run dev --workspace vela-ui --prefix "${repoRoot}" -- --host 127.0.0.1 --port 4173`,
url: 'http://127.0.0.1:4173',
reuseExistingServer: true,
timeout: 15_000
}
]
});

View File

@@ -0,0 +1,480 @@
<script>
import { onDestroy, onMount } from 'svelte';
import {
CLIENT_EVENT_TYPES,
PROTOCOL_PACKAGE_NAME,
SERVER_EVENT_TYPES,
SESSION_STATES,
createMessageEnvelope,
isMessageEnvelope,
isServerEventType
} from '@vela/protocol';
const DEFAULT_GATEWAY_PORT = '3001';
const FALLBACK_GATEWAY_URL = `ws://localhost:${DEFAULT_GATEWAY_PORT}/ws`;
const configuredGatewayUrl = import.meta.env.VITE_VELA_GATEWAY_WS_URL;
function resolveGatewayWebSocketUrl() {
if (configuredGatewayUrl) {
return configuredGatewayUrl;
}
if (typeof window === 'undefined') {
return FALLBACK_GATEWAY_URL;
}
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const isLocalhost = ['localhost', '127.0.0.1'].includes(window.location.hostname);
if (isLocalhost && window.location.port !== DEFAULT_GATEWAY_PORT) {
return `${protocol}//${window.location.hostname}:${DEFAULT_GATEWAY_PORT}/ws`;
}
return `${protocol}//${window.location.host}/ws`;
}
function formatCloseReason(event) {
const reason = event.reason ? ` (${event.reason})` : '';
return `code ${event.code}, clean ${event.wasClean ? 'yes' : 'no'}${reason}`;
}
let gatewayWebSocketUrl = resolveGatewayWebSocketUrl();
let connectionState = 'not connected';
let connectionDetail = 'Socket is idle.';
let gatewaySessionState = 'not received';
let sessionId = 'not assigned';
let sessionReadyReceived = false;
let lastServerEvent = 'none';
let lastError = 'none';
let lastClose = 'not closed';
let socket = null;
let connectionAttempts = 0;
let mockedUserTranscript = 'none';
let mockedAssistantResponse = 'none';
let mockedTurnInFlight = false;
let mockedConversationRenderOrder = [];
let hydrationStatus = 'mounting';
$: canTriggerMockedTurn =
typeof WebSocket !== 'undefined' &&
connectionState === 'connected' &&
socket?.readyState === WebSocket.OPEN &&
sessionReadyReceived &&
!mockedTurnInFlight;
function clearSocketHandlers(targetSocket) {
targetSocket.onopen = null;
targetSocket.onmessage = null;
targetSocket.onerror = null;
targetSocket.onclose = null;
}
function resetSessionStatus() {
gatewaySessionState = 'not received';
sessionId = 'not assigned';
sessionReadyReceived = false;
lastServerEvent = 'none';
mockedUserTranscript = 'none';
mockedAssistantResponse = 'none';
mockedTurnInFlight = false;
mockedConversationRenderOrder = [];
}
function triggerMockedTurn() {
if (!socket || socket.readyState !== WebSocket.OPEN || connectionState !== 'connected') {
connectionDetail = 'Connect to the gateway before triggering a mocked turn.';
lastError = 'mocked turn requires an active WebSocket connection';
return;
}
if (!sessionReadyReceived) {
connectionDetail = 'Wait for the gateway session to be ready before triggering a mocked turn.';
lastError = 'mocked turn requires session.ready';
return;
}
if (mockedTurnInFlight) {
connectionDetail = 'A mocked turn is already running for this session.';
return;
}
mockedUserTranscript = 'waiting for mocked transcript…';
mockedAssistantResponse = 'waiting for mocked response…';
mockedTurnInFlight = true;
lastError = 'none';
socket.send(JSON.stringify(createMessageEnvelope('mocked.turn.trigger', {})));
}
function connect() {
if (typeof window === 'undefined') {
return;
}
if (socket && (connectionState === 'connecting' || connectionState === 'connected')) {
return;
}
gatewayWebSocketUrl = resolveGatewayWebSocketUrl();
resetSessionStatus();
lastError = 'none';
lastClose = 'not closed';
connectionState = 'connecting';
connectionDetail = 'Opening WebSocket connection to gateway.';
connectionAttempts += 1;
const nextSocket = new WebSocket(gatewayWebSocketUrl);
socket = nextSocket;
nextSocket.onopen = () => {
if (socket !== nextSocket) {
return;
}
connectionState = 'connected';
connectionDetail = 'Gateway WebSocket is open.';
};
nextSocket.onmessage = ({ data }) => {
if (socket !== nextSocket || typeof data !== 'string') {
return;
}
let message;
try {
message = JSON.parse(data);
} catch {
connectionState = 'error';
connectionDetail = 'Received non-JSON message from gateway.';
lastError = 'invalid server message: JSON parse failed';
return;
}
if (!isMessageEnvelope(message) || !isServerEventType(message.type)) {
connectionState = 'error';
connectionDetail = 'Received unsupported message from gateway.';
lastError = 'invalid server message: envelope or event type mismatch';
return;
}
lastServerEvent = message.type;
if (message.type === 'session.ready') {
sessionReadyReceived = true;
sessionId = message.payload.sessionId;
return;
}
if (message.type === 'session.state') {
gatewaySessionState = message.payload.value;
if (message.payload.value === 'idle') {
mockedTurnInFlight = false;
}
return;
}
if (message.type === 'transcript.final') {
mockedUserTranscript = message.payload.text;
mockedAssistantResponse = '…';
mockedConversationRenderOrder = [...mockedConversationRenderOrder, 'transcript'];
return;
}
if (message.type === 'response.text.delta') {
if (!mockedConversationRenderOrder.includes('response')) {
mockedConversationRenderOrder = [...mockedConversationRenderOrder, 'response'];
}
mockedAssistantResponse =
mockedAssistantResponse === 'none' ||
mockedAssistantResponse === 'waiting for mocked response…' ||
mockedAssistantResponse === '…'
? message.payload.text
: `${mockedAssistantResponse}${message.payload.text}`;
return;
}
if (message.type === 'response.completed') {
mockedTurnInFlight = false;
return;
}
if (message.type === 'error') {
if (message.payload.retryable === false) {
mockedTurnInFlight = false;
connectionState = 'error';
connectionDetail = 'Gateway reported a protocol error.';
} else {
connectionDetail = 'Gateway reported a recoverable protocol error.';
}
lastError = `${message.payload.code}: ${message.payload.message}`;
}
};
nextSocket.onerror = () => {
if (socket !== nextSocket) {
return;
}
connectionState = 'error';
connectionDetail = 'Browser reported a WebSocket error.';
lastError = 'browser websocket error';
};
nextSocket.onclose = (event) => {
if (socket !== nextSocket) {
return;
}
lastClose = formatCloseReason(event);
mockedTurnInFlight = false;
connectionState = connectionState === 'error' ? 'error' : 'disconnected';
connectionDetail =
connectionState === 'error' ? 'Socket closed after an error.' : 'Gateway WebSocket is closed.';
clearSocketHandlers(nextSocket);
socket = null;
};
}
function disconnect() {
if (!socket) {
connectionState = 'disconnected';
connectionDetail = 'No active socket to close.';
return;
}
connectionDetail = 'Closing WebSocket connection.';
if (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING) {
socket.close(1000, 'client disconnect');
}
}
onDestroy(() => {
if (!socket) {
return;
}
const activeSocket = socket;
clearSocketHandlers(activeSocket);
socket = null;
if (activeSocket.readyState === WebSocket.OPEN || activeSocket.readyState === WebSocket.CONNECTING) {
activeSocket.close(1000, 'page dispose');
}
});
onMount(() => {
hydrationStatus = 'ready';
});
</script>
<div class="page">
<section class="card">
<p class="eyebrow">Vela UI</p>
<h1>Voice session shell</h1>
<p>
This minimal browser shell can connect to the gateway WebSocket, trigger one deterministic
mocked turn, and render the mocked transcript plus assistant response for the active session.
</p>
<p class="contract-note">
Shared protocol package loaded with {CLIENT_EVENT_TYPES.length} client event types and
{SERVER_EVENT_TYPES.length} server event types across {SESSION_STATES.length} gateway session
states.
</p>
<div class="controls">
<button
data-testid="connect-button"
on:click={connect}
disabled={connectionState === 'connecting' || connectionState === 'connected'}
>
Connect
</button>
<button
data-testid="disconnect-button"
on:click={disconnect}
disabled={!socket && connectionState !== 'connected' && connectionState !== 'connecting'}
>
Disconnect
</button>
<button data-testid="mocked-turn-button" on:click={triggerMockedTurn} disabled={!canTriggerMockedTurn}>
Run mocked turn
</button>
</div>
<div class="conversation">
<div>
<span>Mocked user transcript</span>
<p data-testid="user-transcript">{mockedUserTranscript}</p>
</div>
<div>
<span>Mocked assistant response</span>
<p data-testid="assistant-response">{mockedAssistantResponse}</p>
</div>
</div>
<p class="test-hook" data-testid="conversation-render-order">{mockedConversationRenderOrder.join('>') || 'none'}</p>
<p class="test-hook" data-testid="hydration-status">{hydrationStatus}</p>
<div class="meta">
<div>
<span>UI connection state</span>
<strong data-testid="connection-state">{connectionState}</strong>
</div>
<div>
<span>Connection detail</span>
<strong data-testid="connection-detail">{connectionDetail}</strong>
</div>
<div>
<span>Gateway WebSocket URL</span>
<strong data-testid="gateway-url">{gatewayWebSocketUrl}</strong>
</div>
<div>
<span>Session ID</span>
<strong data-testid="session-id">{sessionId}</strong>
</div>
<div>
<span>Gateway session state</span>
<strong data-testid="gateway-session-state">{gatewaySessionState}</strong>
</div>
<div>
<span>Last server event</span>
<strong data-testid="last-server-event">{lastServerEvent}</strong>
</div>
<div>
<span>Last error</span>
<strong data-testid="last-error">{lastError}</strong>
</div>
<div>
<span>Last close</span>
<strong data-testid="last-close">{lastClose}</strong>
</div>
<div>
<span>Connection attempts</span>
<strong data-testid="connection-attempts">{connectionAttempts}</strong>
</div>
<div>
<span>Mocked turn status</span>
<strong data-testid="mocked-turn-status">{mockedTurnInFlight ? 'running' : 'idle'}</strong>
</div>
<div>
<span>Protocol package</span>
<strong>{PROTOCOL_PACKAGE_NAME}</strong>
</div>
</div>
</section>
</div>
<style>
:global(body) {
margin: 0;
font-family: Inter, system-ui, sans-serif;
background: #08111f;
color: #e6eef8;
}
.page {
min-height: 100vh;
display: grid;
place-items: center;
padding: 2rem;
}
.card {
max-width: 52rem;
padding: 2rem;
border: 1px solid #1f3147;
border-radius: 1rem;
background: linear-gradient(180deg, #0d1728 0%, #0a1321 100%);
box-shadow: 0 24px 80px rgba(0, 0, 0, 0.35);
}
.eyebrow {
margin: 0 0 0.5rem;
text-transform: uppercase;
letter-spacing: 0.12em;
font-size: 0.8rem;
color: #8bb9ff;
}
h1 {
margin: 0 0 1rem;
font-size: clamp(2rem, 5vw, 3rem);
}
p {
margin: 0;
line-height: 1.6;
color: #c7d6e8;
}
.contract-note {
margin-top: 1rem;
font-size: 0.95rem;
color: #9ab4d1;
}
.controls {
margin-top: 1.5rem;
display: flex;
gap: 0.75rem;
flex-wrap: wrap;
}
button {
padding: 0.75rem 1rem;
border-radius: 999px;
border: 1px solid #36516f;
background: #102138;
color: #e6eef8;
font: inherit;
cursor: pointer;
}
button:disabled {
cursor: not-allowed;
opacity: 0.55;
}
.conversation {
margin-top: 1.5rem;
display: grid;
gap: 1rem;
}
.conversation div,
.meta div {
padding: 1rem;
border-radius: 0.75rem;
background: rgba(139, 185, 255, 0.08);
}
.test-hook {
position: absolute;
left: -9999px;
top: auto;
width: 1px;
height: 1px;
overflow: hidden;
}
.meta {
margin-top: 1.5rem;
display: grid;
gap: 1rem;
grid-template-columns: repeat(auto-fit, minmax(14rem, 1fr));
}
span {
display: block;
font-size: 0.85rem;
color: #8da3bf;
margin-bottom: 0.35rem;
}
strong {
font-size: 1rem;
word-break: break-word;
}
</style>

View File

@@ -7,352 +7,7 @@
</svelte:head> </svelte:head>
<script> <script>
import { onDestroy } from 'svelte'; import VoiceSessionShell from '$lib/VoiceSessionShell.svelte';
import {
CLIENT_EVENT_TYPES,
PROTOCOL_PACKAGE_NAME,
SERVER_EVENT_TYPES,
SESSION_STATES,
isMessageEnvelope,
isServerEventType
} from '@vela/protocol';
const DEFAULT_GATEWAY_PORT = '3001';
const FALLBACK_GATEWAY_URL = `ws://localhost:${DEFAULT_GATEWAY_PORT}/ws`;
const configuredGatewayUrl = import.meta.env.VITE_VELA_GATEWAY_WS_URL;
function resolveGatewayWebSocketUrl() {
if (configuredGatewayUrl) {
return configuredGatewayUrl;
}
if (typeof window === 'undefined') {
return FALLBACK_GATEWAY_URL;
}
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const isLocalhost = ['localhost', '127.0.0.1'].includes(window.location.hostname);
if (isLocalhost && window.location.port !== DEFAULT_GATEWAY_PORT) {
return `${protocol}//${window.location.hostname}:${DEFAULT_GATEWAY_PORT}/ws`;
}
return `${protocol}//${window.location.host}/ws`;
}
function formatCloseReason(event) {
const reason = event.reason ? ` (${event.reason})` : '';
return `code ${event.code}, clean ${event.wasClean ? 'yes' : 'no'}${reason}`;
}
let gatewayWebSocketUrl = resolveGatewayWebSocketUrl();
let connectionState = 'not connected';
let connectionDetail = 'Socket is idle.';
let gatewaySessionState = 'not received';
let sessionId = 'not assigned';
let lastServerEvent = 'none';
let lastError = 'none';
let lastClose = 'not closed';
let socket = null;
let connectionAttempts = 0;
function clearSocketHandlers(targetSocket) {
targetSocket.onopen = null;
targetSocket.onmessage = null;
targetSocket.onerror = null;
targetSocket.onclose = null;
}
function resetSessionStatus() {
gatewaySessionState = 'not received';
sessionId = 'not assigned';
lastServerEvent = 'none';
}
function connect() {
if (typeof window === 'undefined') {
return;
}
if (socket && (connectionState === 'connecting' || connectionState === 'connected')) {
return;
}
gatewayWebSocketUrl = resolveGatewayWebSocketUrl();
resetSessionStatus();
lastError = 'none';
lastClose = 'not closed';
connectionState = 'connecting';
connectionDetail = 'Opening WebSocket connection to gateway.';
connectionAttempts += 1;
const nextSocket = new WebSocket(gatewayWebSocketUrl);
socket = nextSocket;
nextSocket.onopen = () => {
if (socket !== nextSocket) {
return;
}
connectionState = 'connected';
connectionDetail = 'Gateway WebSocket is open.';
};
nextSocket.onmessage = ({ data }) => {
if (socket !== nextSocket || typeof data !== 'string') {
return;
}
let message;
try {
message = JSON.parse(data);
} catch {
connectionState = 'error';
connectionDetail = 'Received non-JSON message from gateway.';
lastError = 'invalid server message: JSON parse failed';
return;
}
if (!isMessageEnvelope(message) || !isServerEventType(message.type)) {
connectionState = 'error';
connectionDetail = 'Received unsupported message from gateway.';
lastError = 'invalid server message: envelope or event type mismatch';
return;
}
lastServerEvent = message.type;
if (message.type === 'session.ready') {
sessionId = message.payload.sessionId;
return;
}
if (message.type === 'session.state') {
gatewaySessionState = message.payload.value;
return;
}
if (message.type === 'error') {
connectionState = 'error';
connectionDetail = 'Gateway reported a protocol error.';
lastError = `${message.payload.code}: ${message.payload.message}`;
}
};
nextSocket.onerror = () => {
if (socket !== nextSocket) {
return;
}
connectionState = 'error';
connectionDetail = 'Browser reported a WebSocket error.';
lastError = 'browser websocket error';
};
nextSocket.onclose = (event) => {
if (socket !== nextSocket) {
return;
}
lastClose = formatCloseReason(event);
connectionState = connectionState === 'error' ? 'error' : 'disconnected';
connectionDetail = connectionState === 'error'
? 'Socket closed after an error.'
: 'Gateway WebSocket is closed.';
clearSocketHandlers(nextSocket);
socket = null;
};
}
function disconnect() {
if (!socket) {
connectionState = 'disconnected';
connectionDetail = 'No active socket to close.';
return;
}
connectionDetail = 'Closing WebSocket connection.';
if (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING) {
socket.close(1000, 'client disconnect');
}
}
onDestroy(() => {
if (!socket) {
return;
}
const activeSocket = socket;
clearSocketHandlers(activeSocket);
socket = null;
if (activeSocket.readyState === WebSocket.OPEN || activeSocket.readyState === WebSocket.CONNECTING) {
activeSocket.close(1000, 'page dispose');
}
});
</script> </script>
<div class="page"> <VoiceSessionShell />
<section class="card">
<p class="eyebrow">Vela UI</p>
<h1>Voice session shell</h1>
<p>
This minimal browser shell can connect to the gateway WebSocket and expose developer-visible
session status. Microphone capture, transcript rendering, and audio playback remain future
increments.
</p>
<p class="contract-note">
Shared protocol package loaded with {CLIENT_EVENT_TYPES.length} client event types and
{SERVER_EVENT_TYPES.length} server event types across {SESSION_STATES.length} gateway session
states.
</p>
<div class="controls">
<button on:click={connect} disabled={connectionState === 'connecting' || connectionState === 'connected'}>
Connect
</button>
<button on:click={disconnect} disabled={!socket && connectionState !== 'connected' && connectionState !== 'connecting'}>
Disconnect
</button>
</div>
<div class="meta">
<div>
<span>UI connection state</span>
<strong>{connectionState}</strong>
</div>
<div>
<span>Connection detail</span>
<strong>{connectionDetail}</strong>
</div>
<div>
<span>Gateway WebSocket URL</span>
<strong>{gatewayWebSocketUrl}</strong>
</div>
<div>
<span>Session ID</span>
<strong>{sessionId}</strong>
</div>
<div>
<span>Gateway session state</span>
<strong>{gatewaySessionState}</strong>
</div>
<div>
<span>Last server event</span>
<strong>{lastServerEvent}</strong>
</div>
<div>
<span>Last error</span>
<strong>{lastError}</strong>
</div>
<div>
<span>Last close</span>
<strong>{lastClose}</strong>
</div>
<div>
<span>Connection attempts</span>
<strong>{connectionAttempts}</strong>
</div>
<div>
<span>Protocol package</span>
<strong>{PROTOCOL_PACKAGE_NAME}</strong>
</div>
</div>
</section>
</div>
<style>
:global(body) {
margin: 0;
font-family: Inter, system-ui, sans-serif;
background: #08111f;
color: #e6eef8;
}
.page {
min-height: 100vh;
display: grid;
place-items: center;
padding: 2rem;
}
.card {
max-width: 42rem;
padding: 2rem;
border: 1px solid #1f3147;
border-radius: 1rem;
background: linear-gradient(180deg, #0d1728 0%, #0a1321 100%);
box-shadow: 0 24px 80px rgba(0, 0, 0, 0.35);
}
.eyebrow {
margin: 0 0 0.5rem;
text-transform: uppercase;
letter-spacing: 0.12em;
font-size: 0.8rem;
color: #8bb9ff;
}
h1 {
margin: 0 0 1rem;
font-size: clamp(2rem, 5vw, 3rem);
}
p {
margin: 0;
line-height: 1.6;
color: #c7d6e8;
}
.contract-note {
margin-top: 1rem;
}
.controls {
margin-top: 1.5rem;
display: flex;
gap: 0.75rem;
flex-wrap: wrap;
}
button {
padding: 0.8rem 1.1rem;
border: 1px solid #2b4a6b;
border-radius: 0.75rem;
background: #12233a;
color: #e6eef8;
font: inherit;
cursor: pointer;
}
button:disabled {
opacity: 0.55;
cursor: not-allowed;
}
.meta {
margin-top: 1.5rem;
display: grid;
grid-template-columns: repeat(auto-fit, minmax(14rem, 1fr));
gap: 1rem;
}
.meta div {
padding: 1rem;
border-radius: 0.75rem;
background: rgba(139, 185, 255, 0.08);
}
span {
display: block;
font-size: 0.85rem;
color: #8da3bf;
margin-bottom: 0.35rem;
}
strong {
font-size: 1rem;
}
</style>

View File

@@ -0,0 +1,157 @@
import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/svelte';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { createMessageEnvelope } from '@vela/protocol';
import VoiceSessionShell from '../src/lib/VoiceSessionShell.svelte';
class MockWebSocket {
static CONNECTING = 0;
static OPEN = 1;
static CLOSING = 2;
static CLOSED = 3;
static instances = [];
constructor(url) {
this.url = url;
this.readyState = MockWebSocket.CONNECTING;
this.sent = [];
this.onopen = null;
this.onmessage = null;
this.onerror = null;
this.onclose = null;
MockWebSocket.instances.push(this);
}
send(message) {
this.sent.push(message);
}
open() {
this.readyState = MockWebSocket.OPEN;
this.onopen?.();
}
message(payload) {
this.onmessage?.({ data: JSON.stringify(payload) });
}
close(code = 1000, reason = 'client disconnect', wasClean = true) {
this.readyState = MockWebSocket.CLOSED;
this.onclose?.({ code, reason, wasClean });
}
static latest() {
return MockWebSocket.instances.at(-1);
}
static reset() {
MockWebSocket.instances = [];
}
}
function getByTestId(id) {
return screen.getByTestId(id);
}
describe('voice session shell', () => {
beforeEach(() => {
MockWebSocket.reset();
vi.stubGlobal('WebSocket', MockWebSocket);
});
afterEach(() => {
cleanup();
vi.unstubAllGlobals();
});
it('keeps mocked turn unavailable while disconnected and after disconnect', async () => {
render(VoiceSessionShell);
expect(getByTestId('connection-state').textContent).toBe('not connected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-123' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
});
await fireEvent.click(getByTestId('disconnect-button'));
socket.close(1000, 'client disconnect', true);
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('disconnected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('session-id').textContent).toBe('session-123');
});
});
it('renders mocked transcript before assistant response for a connected session', async () => {
render(VoiceSessionShell);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-456' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
});
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(socket.sent).toHaveLength(1);
const sentMessage = JSON.parse(socket.sent[0]);
expect(sentMessage.type).toBe('mocked.turn.trigger');
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(createMessageEnvelope('transcript.final', { text: 'Turn on the office lamp.' }));
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
socket.message(createMessageEnvelope('response.text.delta', { text: 'Mocked ' }));
socket.message(createMessageEnvelope('response.text.delta', { text: 'assistant response.' }));
socket.message(createMessageEnvelope('response.completed', { reason: 'mocked_turn_complete' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('user-transcript').textContent).toBe('Turn on the office lamp.');
expect(getByTestId('assistant-response').textContent).toBe('Mocked assistant response.');
expect(getByTestId('conversation-render-order').textContent).toBe('transcript>response');
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
});
});
it('blocks mocked turn trigger before session.ready and allows it after session.ready', async () => {
render(VoiceSessionShell);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
});
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(socket.sent).toHaveLength(0);
expect(getByTestId('last-error').textContent).toBe('mocked turn requires session.ready');
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-789' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
});
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(socket.sent).toHaveLength(1);
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
});
});

View File

@@ -2,5 +2,13 @@ import { sveltekit } from '@sveltejs/kit/vite';
import { defineConfig } from 'vite'; import { defineConfig } from 'vite';
export default defineConfig({ export default defineConfig({
plugins: [sveltekit()] plugins: [sveltekit()],
resolve: {
conditions: ['browser']
},
test: {
environment: 'jsdom',
include: ['tests/**/*.test.js'],
exclude: ['e2e/**']
}
}); });

View File

@@ -36,13 +36,15 @@ The repository now includes separate runnable workspaces for the UI and gateway
- PWA enabled - PWA enabled
- WebSocket client - WebSocket client
The current implementation is a minimal SvelteKit app with a single voice-session shell page. The shipped UI can open and close a browser WebSocket connection to the gateway `/ws` endpoint, show explicit connection status (`not connected`, `connecting`, `connected`, `disconnected`, `error`), and surface session metadata for developers. Microphone capture, transcript rendering, interrupt controls, streamed assistant response display, and audio playback are not part of the current shell and remain future work. The current implementation is a minimal SvelteKit app with a single voice-session shell page. The shipped UI can open and close a browser WebSocket connection to the gateway `/ws` endpoint, show explicit connection status (`not connected`, `connecting`, `connected`, `disconnected`, `error`), trigger one deterministic mocked turn while connected, and render the mocked user transcript plus mocked assistant response for the active session. Microphone capture, real provider integration, and audio playback are still future work.
#### Responsibilities #### Responsibilities
Current shell responsibilities: Current shell responsibilities:
- connection state rendering - connection state rendering
- mocked-turn trigger rendering with disconnected/in-flight guards
- mocked transcript and mocked assistant response rendering
- developer-oriented session metadata rendering - developer-oriented session metadata rendering
- browser session connect/disconnect controls - browser session connect/disconnect controls
@@ -60,7 +62,10 @@ Current shell:
- developer-focused voice-session panel - developer-focused voice-session panel
- connect button - connect button
- disconnect button - disconnect button
- mocked-turn button
- connection status indicator - connection status indicator
- mocked transcript display
- mocked assistant response display
- session metadata display - session metadata display
Future interactive voice screen: Future interactive voice screen:
@@ -82,7 +87,7 @@ Future interactive voice screen:
- Fastify (Node) - Fastify (Node)
- WebSocket-based session layer - WebSocket-based session layer
The current implementation is a minimal Fastify service with `/`, `/health`, and a documented `/ws` WebSocket session endpoint. The gateway keeps one ephemeral in-memory session record per live socket connection and removes it on disconnect. The current implementation is a minimal Fastify service with `/`, `/health`, and a documented `/ws` WebSocket session endpoint. The gateway keeps one ephemeral in-memory session record per live socket connection, removes it on disconnect, and can run one deterministic mocked turn per session without involving any external providers.
#### Responsibilities #### Responsibilities
@@ -100,18 +105,26 @@ The current implementation is a minimal Fastify service with `/`, `/health`, and
- WebSocket upgrades on `/ws` create an ephemeral session immediately - WebSocket upgrades on `/ws` create an ephemeral session immediately
- the gateway sends `session.ready` followed by `session.state` (`idle`) when the socket is established - the gateway sends `session.ready` followed by `session.state` (`idle`) when the socket is established
- valid minimal client events can move the session between `idle` and `listening` - valid minimal client events can move the session between `idle` and `listening`
- `mocked.turn.trigger` drives a fixed transcript/response event sequence over the existing shared protocol
- only one mocked turn is allowed in flight per session at a time
- invalid JSON, invalid envelopes, and malformed frames are handled defensively so the process stays up - invalid JSON, invalid envelopes, and malformed frames are handled defensively so the process stays up
### Current UI shell behavior ### Current UI shell behavior
- renders a minimal developer-focused voice-session panel - renders a minimal developer-focused voice-session panel
- exposes connect and disconnect controls only - exposes connect, disconnect, and mocked-turn controls
- does not request microphone permission - does not request microphone permission
- does not send or process audio data - does not send or process audio data
- reads `session.ready`, `session.state`, and `error` messages from the shared protocol contract - reads mocked transcript and mocked response events from the shared protocol contract
## Voice Pipeline ## Voice Pipeline
```text
Mocked turn button → Gateway mocked session flow → Transcript events → Response text events → UI
```
This mocked vertical slice intentionally stands in for the future real pipeline:
```text ```text
Mic → Gateway → STT → Transcript Mic → Gateway → STT → Transcript
→ LLM → Tool Calls → Results → LLM → Tool Calls → Results

View File

@@ -34,10 +34,12 @@ Prove the end-to-end interaction model with mocked or stubbed providers.
- [x] bootstrap `vela-ui` as a runnable SvelteKit app in the Yarn workspace - [x] bootstrap `vela-ui` as a runnable SvelteKit app in the Yarn workspace
- [x] bootstrap `vela-gateway` as a runnable Fastify app in the Yarn workspace - [x] bootstrap `vela-gateway` as a runnable Fastify app in the Yarn workspace
- [x] add the first UI voice-session shell with connect/disconnect controls and explicit WebSocket status - [x] add the first UI voice-session shell with connect/disconnect controls and explicit WebSocket status
- create a minimal UI with mic control, transcript, and response text - [x] create a minimal mocked-turn UI with transcript and response text over the shared WebSocket session
- create a minimal UI with mic control
- [x] create a gateway WebSocket session skeleton - [x] create a gateway WebSocket session skeleton
- implement mocked STT flow for partial and final transcript events - [x] implement a mocked transcript/response vertical slice over the existing WebSocket session
- implement mocked LLM response streaming - implement mocked STT flow for partial transcript events
- implement mocked LLM response streaming beyond the fixed deterministic slice
- implement stubbed audio playback or placeholder TTS output - implement stubbed audio playback or placeholder TTS output
- implement interrupt handling across the mocked pipeline - implement interrupt handling across the mocked pipeline
@@ -181,7 +183,10 @@ Polish the system after the core voice loop is reliable.
- `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page - `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page
- `apps/vela-ui` now includes a minimal voice-session shell that can connect to the gateway `/ws` endpoint and display developer-visible session status - `apps/vela-ui` now includes a minimal voice-session shell that can connect to the gateway `/ws` endpoint and display developer-visible session status
- `apps/vela-ui` can now trigger one deterministic mocked turn while connected and render the mocked transcript plus assistant response for the active session
- `apps/vela-ui` now includes browser-level coverage for the mocked transcript/response slice, including connect, disconnect, and disconnected-state trigger guarding
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints - `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
- `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling - `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling
- `apps/vela-gateway` now accepts `mocked.turn.trigger` and emits protocol-valid mocked transcript/response events with one in-flight mocked turn per session
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway - `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
- backend framework choice is now concrete: Fastify - backend framework choice is now concrete: Fastify

View File

@@ -15,7 +15,7 @@ Current UI baseline:
- the browser opens a WebSocket directly to `/ws` - the browser opens a WebSocket directly to `/ws`
- the UI tracks connection status separately from gateway session status - the UI tracks connection status separately from gateway session status
- the UI currently consumes server events but does not send `session.start` or any audio events yet - the UI can send `mocked.turn.trigger` after `session.ready` while connected to request one deterministic mocked turn for the active session
## WebSocket Message Envelope ## WebSocket Message Envelope
@@ -40,6 +40,7 @@ This increment intentionally keeps the envelope minimal:
```ts ```ts
type ClientEvent = type ClientEvent =
| { type: "session.start"; payload: {} } | { type: "session.start"; payload: {} }
| { type: "mocked.turn.trigger"; payload: {} }
| { type: "input_audio.append"; payload: { chunk: string } } | { type: "input_audio.append"; payload: { chunk: string } }
| { type: "input_audio.commit"; payload: {} } | { type: "input_audio.commit"; payload: {} }
| { type: "response.cancel"; payload: {} }; | { type: "response.cancel"; payload: {} };
@@ -48,6 +49,7 @@ type ClientEvent =
#### Client event intent #### Client event intent
- `session.start` initializes a voice session without locking in transport or auth details yet - `session.start` initializes a voice session without locking in transport or auth details yet
- `mocked.turn.trigger` asks the gateway to run one obviously mocked, deterministic transcript/response turn
- `input_audio.append` carries a chunk of captured input audio as an encoded string - `input_audio.append` carries a chunk of captured input audio as an encoded string
- `input_audio.commit` marks the current buffered user turn as ready for downstream processing - `input_audio.commit` marks the current buffered user turn as ready for downstream processing
- `response.cancel` interrupts the active listen/think/speak flow - `response.cancel` interrupts the active listen/think/speak flow
@@ -56,9 +58,12 @@ type ClientEvent =
- on connect, the gateway creates an ephemeral in-memory session and emits `session.ready` plus `session.state` - on connect, the gateway creates an ephemeral in-memory session and emits `session.ready` plus `session.state`
- `session.start` is accepted as an idempotent session acknowledgment and re-sends readiness/state - `session.start` is accepted as an idempotent session acknowledgment and re-sends readiness/state
- `mocked.turn.trigger` is accepted only when no other mocked turn is already in flight for that session
- a mocked turn emits deterministic `transcript.final`, `response.text.delta`, `response.completed`, and `session.state` events in protocol-valid order
- `input_audio.append` updates the ephemeral session record and moves the session to `listening` - `input_audio.append` updates the ephemeral session record and moves the session to `listening`
- `input_audio.commit` resets the minimal buffered state and returns the session to `idle` - `input_audio.commit` resets the minimal buffered state and returns the session to `idle`
- `response.cancel` resets the minimal session state back to `idle` - `response.cancel` resets the minimal session state back to `idle`
- a second mocked-turn trigger during an active mocked turn produces `error` with code `mocked_turn_in_flight`
- malformed JSON produces `error` with code `invalid_json` - malformed JSON produces `error` with code `invalid_json`
- invalid envelopes or unsupported client event names produce `error` with code `invalid_message` - invalid envelopes or unsupported client event names produce `error` with code `invalid_message`
- malformed WebSocket frames are rejected without crashing the gateway process - malformed WebSocket frames are rejected without crashing the gateway process
@@ -79,6 +84,7 @@ Notes:
- this UI state is transport-oriented and is separate from the shared gateway `session.state` payload - this UI state is transport-oriented and is separate from the shared gateway `session.state` payload
- `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`) - `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`)
- the UI disables the mocked-turn control until `session.ready` arrives, while disconnected, or while a mocked turn is already in flight
- the UI treats malformed server messages, browser WebSocket errors, and gateway `error` events as safe error states instead of throwing - the UI treats malformed server messages, browser WebSocket errors, and gateway `error` events as safe error states instead of throwing
### Server → Client ### Server → Client
@@ -109,6 +115,27 @@ type ServerEvent =
- `response.completed` marks the current assistant turn as done - `response.completed` marks the current assistant turn as done
- `error` is the minimal recoverable failure shape for both UI and gateway work - `error` is the minimal recoverable failure shape for both UI and gateway work
### Deterministic mocked turn sequence
For this increment, `mocked.turn.trigger` produces one fixed interaction for the active session:
```text
session.state(listening)
→ transcript.final("[mocked user] What is the current mocked vertical slice?")
→ session.state(thinking)
→ session.state(speaking)
→ response.text.delta("[mocked assistant] ")
→ response.text.delta("This is a deterministic mocked response from the gateway vertical slice.")
→ response.completed
→ session.state(idle)
```
Notes:
- the content is intentionally fixed and obviously mocked
- no audio, STT, LLM, TTS, or external providers participate in this flow
- `response.cancel` can stop the mocked turn early and return the session to `idle`
## Contract Scope for This Increment ## Contract Scope for This Increment
This contract is intentionally limited to the smallest event set needed to unblock: This contract is intentionally limited to the smallest event set needed to unblock:
@@ -118,6 +145,7 @@ This contract is intentionally limited to the smallest event set needed to unblo
Explicitly deferred for later increments: Explicitly deferred for later increments:
- freeform typed user input
- tool-calling events - tool-calling events
- streamed TTS/output-audio events - streamed TTS/output-audio events
- reconnect/resume semantics - reconnect/resume semantics

View File

@@ -61,13 +61,14 @@ Additional verification commands:
mise exec -- yarn check:ui mise exec -- yarn check:ui
mise exec -- yarn build:ui mise exec -- yarn build:ui
mise exec -- yarn build:gateway mise exec -- yarn build:gateway
npm run test --workspace vela-ui
``` ```
## Notes ## Notes
- the concrete framework choices are now SvelteKit for `vela-ui` and Fastify for `vela-gateway` - the concrete framework choices are now SvelteKit for `vela-ui` and Fastify for `vela-gateway`
- the UI is intentionally minimal and currently includes only a developer-facing WebSocket voice-session shell - the UI is intentionally minimal and currently includes only a developer-facing WebSocket voice-session shell
- the UI does not yet include mic capture, transcript rendering, assistant response rendering, or audio playback - the UI now renders a deterministic mocked transcript and assistant response flow, but does not yet include mic capture or audio playback
- the gateway now exposes the minimal shared-protocol `/ws` WebSocket contract used by that shell - the gateway now exposes the minimal shared-protocol `/ws` WebSocket contract used by that shell
- if your shell is configured for mise activation, plain `yarn` commands can be used after `mise install` - if your shell is configured for mise activation, plain `yarn` commands can be used after `mise install`
- update this document when the repo layout or package manager workflow changes - update this document when the repo layout or package manager workflow changes

View File

@@ -212,6 +212,13 @@
resolved "https://registry.yarnpkg.com/@pinojs/redact/-/redact-0.4.0.tgz#c3de060dd12640dcc838516aa2a6803cc7b2e9d6" resolved "https://registry.yarnpkg.com/@pinojs/redact/-/redact-0.4.0.tgz#c3de060dd12640dcc838516aa2a6803cc7b2e9d6"
integrity sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg== integrity sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==
"@playwright/test@^1.54.2":
version "1.59.1"
resolved "https://registry.yarnpkg.com/@playwright/test/-/test-1.59.1.tgz#5c4d38eac84a61527af466602ae20277685a02d6"
integrity sha512-PG6q63nQg5c9rIi4/Z5lR5IVF7yU5MqmKaPOe0HSc0O2cX1fPi96sUQu5j7eo4gKCkB2AnNGoWt7y4/Xx3Kcqg==
dependencies:
playwright "1.59.1"
"@polka/url@^1.0.0-next.24": "@polka/url@^1.0.0-next.24":
version "1.0.0-next.29" version "1.0.0-next.29"
resolved "https://registry.yarnpkg.com/@polka/url/-/url-1.0.0-next.29.tgz#5a40109a1ab5f84d6fd8fc928b19f367cbe7e7b1" resolved "https://registry.yarnpkg.com/@polka/url/-/url-1.0.0-next.29.tgz#5a40109a1ab5f84d6fd8fc928b19f367cbe7e7b1"
@@ -631,6 +638,11 @@ find-my-way@^9.0.0:
fast-querystring "^1.0.0" fast-querystring "^1.0.0"
safe-regex2 "^5.0.0" safe-regex2 "^5.0.0"
fsevents@2.3.2:
version "2.3.2"
resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a"
integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
fsevents@~2.3.2, fsevents@~2.3.3: fsevents@~2.3.2, fsevents@~2.3.3:
version "2.3.3" version "2.3.3"
resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6"
@@ -755,6 +767,20 @@ pino-std-serializers@^7.0.0:
sonic-boom "^4.0.1" sonic-boom "^4.0.1"
thread-stream "^4.0.0" thread-stream "^4.0.0"
playwright-core@1.59.1:
version "1.59.1"
resolved "https://registry.yarnpkg.com/playwright-core/-/playwright-core-1.59.1.tgz#d8a2b28bcb8f2bd08ef3df93b02ae83c813244b2"
integrity sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==
playwright@1.59.1:
version "1.59.1"
resolved "https://registry.yarnpkg.com/playwright/-/playwright-1.59.1.tgz#f7b0ca61637ae25264cec370df671bbe1f368a4a"
integrity sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==
dependencies:
playwright-core "1.59.1"
optionalDependencies:
fsevents "2.3.2"
postcss@^8.5.3: postcss@^8.5.3:
version "8.5.9" version "8.5.9"
resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.5.9.tgz#f6ee9e0b94f0f19c97d2f172bfbd7fc71fe1cca4" resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.5.9.tgz#f6ee9e0b94f0f19c97d2f172bfbd7fc71fe1cca4"