feat(vela-ui): add voice session shell

Add a minimal UI shell that connects to the gateway WebSocket and exposes developer-visible session state. Align the architecture, protocol, setup, integration, and backlog docs with the current UI increment.
This commit is contained in:
2026-04-08 18:40:45 +02:00
parent fa5a458003
commit 4b11703c93
7 changed files with 317 additions and 20 deletions

View File

@@ -5,5 +5,6 @@ This workspace contains the Vela browser UI as a minimal SvelteKit app.
Current status: Current status:
- SvelteKit app boots in the Yarn workspace - SvelteKit app boots in the Yarn workspace
- root page shows the initial Vela UI starter screen - root page shows a minimal voice-session shell with connect/disconnect controls
- PWA features and voice interaction flows remain future increments - the shell can connect to the gateway `/ws` endpoint and display developer-visible session status
- microphone capture, transcript rendering, and audio playback remain future increments

View File

@@ -2,43 +2,262 @@
<title>Vela UI</title> <title>Vela UI</title>
<meta <meta
name="description" name="description"
content="Minimal starter UI for the Vela voice assistant." content="Minimal voice-session shell for the Vela voice assistant."
/> />
</svelte:head> </svelte:head>
<script> <script>
import { onDestroy } from 'svelte';
import { import {
CLIENT_EVENT_TYPES, CLIENT_EVENT_TYPES,
PROTOCOL_PACKAGE_NAME, PROTOCOL_PACKAGE_NAME,
SERVER_EVENT_TYPES SERVER_EVENT_TYPES,
SESSION_STATES,
isMessageEnvelope,
isServerEventType
} from '@vela/protocol'; } from '@vela/protocol';
const appStatus = 'Bootstrapped'; const DEFAULT_GATEWAY_PORT = '3001';
const nextFocus = `Build the voice session shell on top of ${PROTOCOL_PACKAGE_NAME}.`; const FALLBACK_GATEWAY_URL = `ws://localhost:${DEFAULT_GATEWAY_PORT}/ws`;
const configuredGatewayUrl = import.meta.env.VITE_VELA_GATEWAY_WS_URL;
function resolveGatewayWebSocketUrl() {
if (configuredGatewayUrl) {
return configuredGatewayUrl;
}
if (typeof window === 'undefined') {
return FALLBACK_GATEWAY_URL;
}
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const isLocalhost = ['localhost', '127.0.0.1'].includes(window.location.hostname);
if (isLocalhost && window.location.port !== DEFAULT_GATEWAY_PORT) {
return `${protocol}//${window.location.hostname}:${DEFAULT_GATEWAY_PORT}/ws`;
}
return `${protocol}//${window.location.host}/ws`;
}
function formatCloseReason(event) {
const reason = event.reason ? ` (${event.reason})` : '';
return `code ${event.code}, clean ${event.wasClean ? 'yes' : 'no'}${reason}`;
}
let gatewayWebSocketUrl = resolveGatewayWebSocketUrl();
let connectionState = 'not connected';
let connectionDetail = 'Socket is idle.';
let gatewaySessionState = 'not received';
let sessionId = 'not assigned';
let lastServerEvent = 'none';
let lastError = 'none';
let lastClose = 'not closed';
let socket = null;
let connectionAttempts = 0;
function clearSocketHandlers(targetSocket) {
targetSocket.onopen = null;
targetSocket.onmessage = null;
targetSocket.onerror = null;
targetSocket.onclose = null;
}
function resetSessionStatus() {
gatewaySessionState = 'not received';
sessionId = 'not assigned';
lastServerEvent = 'none';
}
function connect() {
if (typeof window === 'undefined') {
return;
}
if (socket && (connectionState === 'connecting' || connectionState === 'connected')) {
return;
}
gatewayWebSocketUrl = resolveGatewayWebSocketUrl();
resetSessionStatus();
lastError = 'none';
lastClose = 'not closed';
connectionState = 'connecting';
connectionDetail = 'Opening WebSocket connection to gateway.';
connectionAttempts += 1;
const nextSocket = new WebSocket(gatewayWebSocketUrl);
socket = nextSocket;
nextSocket.onopen = () => {
if (socket !== nextSocket) {
return;
}
connectionState = 'connected';
connectionDetail = 'Gateway WebSocket is open.';
};
nextSocket.onmessage = ({ data }) => {
if (socket !== nextSocket || typeof data !== 'string') {
return;
}
let message;
try {
message = JSON.parse(data);
} catch {
connectionState = 'error';
connectionDetail = 'Received non-JSON message from gateway.';
lastError = 'invalid server message: JSON parse failed';
return;
}
if (!isMessageEnvelope(message) || !isServerEventType(message.type)) {
connectionState = 'error';
connectionDetail = 'Received unsupported message from gateway.';
lastError = 'invalid server message: envelope or event type mismatch';
return;
}
lastServerEvent = message.type;
if (message.type === 'session.ready') {
sessionId = message.payload.sessionId;
return;
}
if (message.type === 'session.state') {
gatewaySessionState = message.payload.value;
return;
}
if (message.type === 'error') {
connectionState = 'error';
connectionDetail = 'Gateway reported a protocol error.';
lastError = `${message.payload.code}: ${message.payload.message}`;
}
};
nextSocket.onerror = () => {
if (socket !== nextSocket) {
return;
}
connectionState = 'error';
connectionDetail = 'Browser reported a WebSocket error.';
lastError = 'browser websocket error';
};
nextSocket.onclose = (event) => {
if (socket !== nextSocket) {
return;
}
lastClose = formatCloseReason(event);
connectionState = connectionState === 'error' ? 'error' : 'disconnected';
connectionDetail = connectionState === 'error'
? 'Socket closed after an error.'
: 'Gateway WebSocket is closed.';
clearSocketHandlers(nextSocket);
socket = null;
};
}
function disconnect() {
if (!socket) {
connectionState = 'disconnected';
connectionDetail = 'No active socket to close.';
return;
}
connectionDetail = 'Closing WebSocket connection.';
if (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING) {
socket.close(1000, 'client disconnect');
}
}
onDestroy(() => {
if (!socket) {
return;
}
const activeSocket = socket;
clearSocketHandlers(activeSocket);
socket = null;
if (activeSocket.readyState === WebSocket.OPEN || activeSocket.readyState === WebSocket.CONNECTING) {
activeSocket.close(1000, 'page dispose');
}
});
</script> </script>
<div class="page"> <div class="page">
<section class="card"> <section class="card">
<p class="eyebrow">Vela UI</p> <p class="eyebrow">Vela UI</p>
<h1>Minimal SvelteKit starter</h1> <h1>Voice session shell</h1>
<p> <p>
This workspace now runs as the browser shell for Vela. The voice controls, transcript, and This minimal browser shell can connect to the gateway WebSocket and expose developer-visible
streaming session UI will be added in later increments. session status. Microphone capture, transcript rendering, and audio playback remain future
increments.
</p> </p>
<p class="contract-note"> <p class="contract-note">
Shared protocol package loaded with {CLIENT_EVENT_TYPES.length} client event types and Shared protocol package loaded with {CLIENT_EVENT_TYPES.length} client event types and
{SERVER_EVENT_TYPES.length} server event types. {SERVER_EVENT_TYPES.length} server event types across {SESSION_STATES.length} gateway session
states.
</p> </p>
<div class="controls">
<button on:click={connect} disabled={connectionState === 'connecting' || connectionState === 'connected'}>
Connect
</button>
<button on:click={disconnect} disabled={!socket && connectionState !== 'connected' && connectionState !== 'connecting'}>
Disconnect
</button>
</div>
<div class="meta"> <div class="meta">
<div> <div>
<span>Status</span> <span>UI connection state</span>
<strong>{appStatus}</strong> <strong>{connectionState}</strong>
</div> </div>
<div> <div>
<span>Next</span> <span>Connection detail</span>
<strong>{nextFocus}</strong> <strong>{connectionDetail}</strong>
</div>
<div>
<span>Gateway WebSocket URL</span>
<strong>{gatewayWebSocketUrl}</strong>
</div>
<div>
<span>Session ID</span>
<strong>{sessionId}</strong>
</div>
<div>
<span>Gateway session state</span>
<strong>{gatewaySessionState}</strong>
</div>
<div>
<span>Last server event</span>
<strong>{lastServerEvent}</strong>
</div>
<div>
<span>Last error</span>
<strong>{lastError}</strong>
</div>
<div>
<span>Last close</span>
<strong>{lastClose}</strong>
</div>
<div>
<span>Connection attempts</span>
<strong>{connectionAttempts}</strong>
</div>
<div>
<span>Protocol package</span>
<strong>{PROTOCOL_PACKAGE_NAME}</strong>
</div> </div>
</div> </div>
</section> </section>
@@ -91,9 +310,32 @@
margin-top: 1rem; margin-top: 1rem;
} }
.controls {
margin-top: 1.5rem;
display: flex;
gap: 0.75rem;
flex-wrap: wrap;
}
button {
padding: 0.8rem 1.1rem;
border: 1px solid #2b4a6b;
border-radius: 0.75rem;
background: #12233a;
color: #e6eef8;
font: inherit;
cursor: pointer;
}
button:disabled {
opacity: 0.55;
cursor: not-allowed;
}
.meta { .meta {
margin-top: 1.5rem; margin-top: 1.5rem;
display: grid; display: grid;
grid-template-columns: repeat(auto-fit, minmax(14rem, 1fr));
gap: 1rem; gap: 1rem;
} }

View File

@@ -36,18 +36,35 @@ The repository now includes separate runnable workspaces for the UI and gateway
- PWA enabled - PWA enabled
- WebSocket client - WebSocket client
The current implementation is a minimal SvelteKit app with a single starter page. PWA behavior, microphone capture, and the WebSocket client are later increments. The current implementation is a minimal SvelteKit app with a single voice-session shell page. The shipped UI can open and close a browser WebSocket connection to the gateway `/ws` endpoint, show explicit connection status (`not connected`, `connecting`, `connected`, `disconnected`, `error`), and surface session metadata for developers. Microphone capture, transcript rendering, interrupt controls, streamed assistant response display, and audio playback are not part of the current shell and remain future work.
#### Responsibilities #### Responsibilities
Current shell responsibilities:
- connection state rendering
- developer-oriented session metadata rendering
- browser session connect/disconnect controls
Future UI responsibilities:
- audio capture from microphone - audio capture from microphone
- audio playback for TTS - audio playback for TTS
- UI state rendering - broader voice-session UI state rendering
- session management
- interrupt handling - interrupt handling
#### Main Screen #### Main Screen
Current shell:
- developer-focused voice-session panel
- connect button
- disconnect button
- connection status indicator
- session metadata display
Future interactive voice screen:
- large mic button - large mic button
- live transcript - live transcript
- streamed assistant response text - streamed assistant response text
@@ -85,6 +102,14 @@ The current implementation is a minimal Fastify service with `/`, `/health`, and
- valid minimal client events can move the session between `idle` and `listening` - valid minimal client events can move the session between `idle` and `listening`
- invalid JSON, invalid envelopes, and malformed frames are handled defensively so the process stays up - invalid JSON, invalid envelopes, and malformed frames are handled defensively so the process stays up
### Current UI shell behavior
- renders a minimal developer-focused voice-session panel
- exposes connect and disconnect controls only
- does not request microphone permission
- does not send or process audio data
- reads `session.ready`, `session.state`, and `error` messages from the shared protocol contract
## Voice Pipeline ## Voice Pipeline
```text ```text

View File

@@ -33,7 +33,8 @@ Prove the end-to-end interaction model with mocked or stubbed providers.
- [x] bootstrap `vela-ui` as a runnable SvelteKit app in the Yarn workspace - [x] bootstrap `vela-ui` as a runnable SvelteKit app in the Yarn workspace
- [x] bootstrap `vela-gateway` as a runnable Fastify app in the Yarn workspace - [x] bootstrap `vela-gateway` as a runnable Fastify app in the Yarn workspace
- create a minimal UI with mic control, state indicator, transcript, and response text - [x] add the first UI voice-session shell with connect/disconnect controls and explicit WebSocket status
- create a minimal UI with mic control, transcript, and response text
- [x] create a gateway WebSocket session skeleton - [x] create a gateway WebSocket session skeleton
- implement mocked STT flow for partial and final transcript events - implement mocked STT flow for partial and final transcript events
- implement mocked LLM response streaming - implement mocked LLM response streaming
@@ -179,6 +180,7 @@ Polish the system after the core voice loop is reliable.
## Current Progress Notes ## Current Progress Notes
- `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page - `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page
- `apps/vela-ui` now includes a minimal voice-session shell that can connect to the gateway `/ws` endpoint and display developer-visible session status
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints - `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
- `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling - `apps/vela-gateway` now exposes a minimal `/ws` WebSocket session skeleton with ephemeral in-memory sessions and defensive message handling
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway - `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway

View File

@@ -5,6 +5,7 @@
- `vela-ui` is implemented as a SvelteKit application - `vela-ui` is implemented as a SvelteKit application
- `vela-gateway` is implemented as a Fastify service - `vela-gateway` is implemented as a Fastify service
- `vela-gateway` now exposes `/ws` as the minimal WebSocket session entrypoint using the shared `@vela/protocol` contract - `vela-gateway` now exposes `/ws` as the minimal WebSocket session entrypoint using the shared `@vela/protocol` contract
- `vela-ui` now opens a minimal browser WebSocket client against that `/ws` entrypoint and surfaces connection/session status for developers
- current integration work beyond the gateway WebSocket/session baseline remains future implementation - current integration work beyond the gateway WebSocket/session baseline remains future implementation
## Gateway Session Contract ## Gateway Session Contract
@@ -14,6 +15,7 @@
- message format: `@vela/protocol` `MessageEnvelope<{ type, payload }>` - message format: `@vela/protocol` `MessageEnvelope<{ type, payload }>`
- current server behavior: acknowledge connect with `session.ready` and `session.state` - current server behavior: acknowledge connect with `session.ready` and `session.state`
- safety baseline: invalid JSON, invalid envelopes, and malformed frames return protocol errors or close that socket without taking down the service - safety baseline: invalid JSON, invalid envelopes, and malformed frames return protocol errors or close that socket without taking down the service
- current UI behavior: connect/disconnect only, no microphone access, no audio payloads, and safe error-state handling for `open`/`error`/`close`
## STT (Speech-to-Text) ## STT (Speech-to-Text)

View File

@@ -11,6 +11,12 @@ Current gateway baseline:
- the gateway sends `session.ready` and `session.state` immediately after a successful socket upgrade - the gateway sends `session.ready` and `session.state` immediately after a successful socket upgrade
- the gateway accepts JSON text messages only in the shared envelope shape - the gateway accepts JSON text messages only in the shared envelope shape
Current UI baseline:
- the browser opens a WebSocket directly to `/ws`
- the UI tracks connection status separately from gateway session status
- the UI currently consumes server events but does not send `session.start` or any audio events yet
## WebSocket Message Envelope ## WebSocket Message Envelope
Every WebSocket message uses one envelope format: Every WebSocket message uses one envelope format:
@@ -57,6 +63,24 @@ type ClientEvent =
- invalid envelopes or unsupported client event names produce `error` with code `invalid_message` - invalid envelopes or unsupported client event names produce `error` with code `invalid_message`
- malformed WebSocket frames are rejected without crashing the gateway process - malformed WebSocket frames are rejected without crashing the gateway process
### UI connection shell behavior
The UI currently exposes a small browser-side connection state machine for the WebSocket transport:
```text
not connected
→ connecting
→ connected
→ disconnected
→ error
```
Notes:
- this UI state is transport-oriented and is separate from the shared gateway `session.state` payload
- `session.state` currently reflects the gateway session phase (`idle`, `listening`, `thinking`, `speaking`)
- the UI treats malformed server messages, browser WebSocket errors, and gateway `error` events as safe error states instead of throwing
### Server → Client ### Server → Client
```ts ```ts

View File

@@ -66,7 +66,8 @@ mise exec -- yarn build:gateway
## Notes ## Notes
- the concrete framework choices are now SvelteKit for `vela-ui` and Fastify for `vela-gateway` - the concrete framework choices are now SvelteKit for `vela-ui` and Fastify for `vela-gateway`
- the UI is intentionally minimal and does not yet include mic capture, transcript rendering, or WebSocket session state - the UI is intentionally minimal and currently includes only a developer-facing WebSocket voice-session shell
- the gateway is intentionally minimal and does not yet expose the planned WebSocket contract - the UI does not yet include mic capture, transcript rendering, assistant response rendering, or audio playback
- the gateway now exposes the minimal shared-protocol `/ws` WebSocket contract used by that shell
- if your shell is configured for mise activation, plain `yarn` commands can be used after `mise install` - if your shell is configured for mise activation, plain `yarn` commands can be used after `mise install`
- update this document when the repo layout or package manager workflow changes - update this document when the repo layout or package manager workflow changes