feat(protocol): add shared WebSocket contract package

This commit is contained in:
2026-04-08 18:15:31 +02:00
parent ffab815f6b
commit 4fd27db11e
10 changed files with 286 additions and 26 deletions

View File

@@ -9,6 +9,7 @@
"build": "node -e \"console.log('vela-gateway: no build step required')\"" "build": "node -e \"console.log('vela-gateway: no build step required')\""
}, },
"dependencies": { "dependencies": {
"@vela/protocol": "0.0.0",
"fastify": "^5.2.1" "fastify": "^5.2.1"
} }
} }

View File

@@ -1,4 +1,9 @@
const Fastify = require('fastify'); const Fastify = require('fastify');
const {
CLIENT_EVENT_TYPES,
PROTOCOL_PACKAGE_NAME,
SERVER_EVENT_TYPES
} = require('@vela/protocol');
function buildServer() { function buildServer() {
const app = Fastify({ logger: true }); const app = Fastify({ logger: true });
@@ -7,7 +12,12 @@ function buildServer() {
service: 'vela-gateway', service: 'vela-gateway',
status: 'ok', status: 'ok',
transport: 'http', transport: 'http',
next: 'websocket session skeleton' next: 'websocket session skeleton',
protocol: {
package: PROTOCOL_PACKAGE_NAME,
clientEventCount: CLIENT_EVENT_TYPES.length,
serverEventCount: SERVER_EVENT_TYPES.length
}
})); }));
app.get('/health', async () => ({ status: 'ok' })); app.get('/health', async () => ({ status: 'ok' }));

View File

@@ -0,0 +1,13 @@
{
"name": "@vela/protocol",
"private": true,
"version": "0.0.0",
"type": "module",
"exports": {
".": {
"types": "./src/index.d.ts",
"import": "./src/index.js",
"require": "./src/index.cjs"
}
}
}

View File

@@ -0,0 +1,52 @@
const PROTOCOL_PACKAGE_NAME = '@vela/protocol';
const SESSION_STATES = Object.freeze(['idle', 'listening', 'thinking', 'speaking']);
const CLIENT_EVENT_TYPES = Object.freeze([
'session.start',
'input_audio.append',
'input_audio.commit',
'response.cancel'
]);
const SERVER_EVENT_TYPES = Object.freeze([
'session.ready',
'session.state',
'transcript.partial',
'transcript.final',
'response.text.delta',
'response.completed',
'error'
]);
function createMessageEnvelope(type, payload) {
return { type, payload };
}
function isMessageEnvelope(value) {
return Boolean(
value &&
typeof value === 'object' &&
typeof value.type === 'string' &&
'payload' in value
);
}
function isClientEventType(type) {
return CLIENT_EVENT_TYPES.includes(type);
}
function isServerEventType(type) {
return SERVER_EVENT_TYPES.includes(type);
}
module.exports = {
PROTOCOL_PACKAGE_NAME,
SESSION_STATES,
CLIENT_EVENT_TYPES,
SERVER_EVENT_TYPES,
createMessageEnvelope,
isMessageEnvelope,
isClientEventType,
isServerEventType
};

68
apps/vela-protocol/src/index.d.ts vendored Normal file
View File

@@ -0,0 +1,68 @@
export type SessionState = 'idle' | 'listening' | 'thinking' | 'speaking';
export type MessageEnvelope<TType extends string, TPayload> = {
type: TType;
payload: TPayload;
};
export type ClientEventPayloads = {
'session.start': Record<string, never>;
'input_audio.append': {
chunk: string;
};
'input_audio.commit': Record<string, never>;
'response.cancel': Record<string, never>;
};
export type ServerEventPayloads = {
'session.ready': {
sessionId: string;
};
'session.state': {
value: SessionState;
};
'transcript.partial': {
text: string;
};
'transcript.final': {
text: string;
};
'response.text.delta': {
text: string;
};
'response.completed': Record<string, never>;
'error': {
code: string;
message: string;
retryable?: boolean;
};
};
export type ClientEventType = keyof ClientEventPayloads;
export type ServerEventType = keyof ServerEventPayloads;
export type ClientEvent = {
[Type in ClientEventType]: MessageEnvelope<Type, ClientEventPayloads[Type]>;
}[ClientEventType];
export type ServerEvent = {
[Type in ServerEventType]: MessageEnvelope<Type, ServerEventPayloads[Type]>;
}[ServerEventType];
export const PROTOCOL_PACKAGE_NAME: '@vela/protocol';
export const SESSION_STATES: readonly SessionState[];
export const CLIENT_EVENT_TYPES: readonly ClientEventType[];
export const SERVER_EVENT_TYPES: readonly ServerEventType[];
export function createMessageEnvelope<TType extends ClientEventType>(
type: TType,
payload: ClientEventPayloads[TType]
): MessageEnvelope<TType, ClientEventPayloads[TType]>;
export function createMessageEnvelope<TType extends ServerEventType>(
type: TType,
payload: ServerEventPayloads[TType]
): MessageEnvelope<TType, ServerEventPayloads[TType]>;
export function isMessageEnvelope(value: unknown): value is MessageEnvelope<string, unknown>;
export function isClientEventType(type: string): type is ClientEventType;
export function isServerEventType(type: string): type is ServerEventType;

View File

@@ -0,0 +1,41 @@
export const PROTOCOL_PACKAGE_NAME = '@vela/protocol';
export const SESSION_STATES = Object.freeze(['idle', 'listening', 'thinking', 'speaking']);
export const CLIENT_EVENT_TYPES = Object.freeze([
'session.start',
'input_audio.append',
'input_audio.commit',
'response.cancel'
]);
export const SERVER_EVENT_TYPES = Object.freeze([
'session.ready',
'session.state',
'transcript.partial',
'transcript.final',
'response.text.delta',
'response.completed',
'error'
]);
export function createMessageEnvelope(type, payload) {
return { type, payload };
}
export function isMessageEnvelope(value) {
return Boolean(
value &&
typeof value === 'object' &&
typeof value.type === 'string' &&
'payload' in value
);
}
export function isClientEventType(type) {
return CLIENT_EVENT_TYPES.includes(type);
}
export function isServerEventType(type) {
return SERVER_EVENT_TYPES.includes(type);
}

View File

@@ -11,6 +11,7 @@
"check": "svelte-kit sync && svelte-check --tsconfig ./jsconfig.json" "check": "svelte-kit sync && svelte-check --tsconfig ./jsconfig.json"
}, },
"dependencies": { "dependencies": {
"@vela/protocol": "0.0.0",
"@sveltejs/adapter-auto": "^3.3.1", "@sveltejs/adapter-auto": "^3.3.1",
"@sveltejs/kit": "^2.17.1", "@sveltejs/kit": "^2.17.1",
"svelte": "^5.19.5" "svelte": "^5.19.5"

View File

@@ -7,8 +7,14 @@
</svelte:head> </svelte:head>
<script> <script>
import {
CLIENT_EVENT_TYPES,
PROTOCOL_PACKAGE_NAME,
SERVER_EVENT_TYPES
} from '@vela/protocol';
const appStatus = 'Bootstrapped'; const appStatus = 'Bootstrapped';
const nextFocus = 'Wire the voice session contract to the gateway.'; const nextFocus = `Build the voice session shell on top of ${PROTOCOL_PACKAGE_NAME}.`;
</script> </script>
<div class="page"> <div class="page">
@@ -20,6 +26,11 @@
streaming session UI will be added in later increments. streaming session UI will be added in later increments.
</p> </p>
<p class="contract-note">
Shared protocol package loaded with {CLIENT_EVENT_TYPES.length} client event types and
{SERVER_EVENT_TYPES.length} server event types.
</p>
<div class="meta"> <div class="meta">
<div> <div>
<span>Status</span> <span>Status</span>
@@ -76,6 +87,10 @@
color: #c7d6e8; color: #c7d6e8;
} }
.contract-note {
margin-top: 1rem;
}
.meta { .meta {
margin-top: 1.5rem; margin-top: 1.5rem;
display: grid; display: grid;

View File

@@ -11,7 +11,7 @@ Establish the boundaries, protocol, and state model for the system before integr
### Backlog Items ### Backlog Items
- [x] define repository structure for `vela-ui` and `vela-gateway` - [x] define repository structure for `vela-ui` and `vela-gateway`
- define the WebSocket event contract used by the UI and gateway - [x] define the WebSocket event contract used by the UI and gateway via shared package
- define the session state machine and interrupt semantics - define the session state machine and interrupt semantics
- define provider adapter interfaces for STT, LLM, TTS, and tools - define provider adapter interfaces for STT, LLM, TTS, and tools
- document error handling and cancellation behavior - document error handling and cancellation behavior
@@ -180,4 +180,5 @@ Polish the system after the core voice loop is reliable.
- `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page - `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints - `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
- backend framework choice is now concrete: Fastify - backend framework choice is now concrete: Fastify

View File

@@ -2,31 +2,87 @@
## Event Protocol ## Event Protocol
The shared code-level contract lives in the Yarn workspace package `@vela/protocol` so both the
gateway and UI import the same event names and envelope shape.
## WebSocket Message Envelope
Every WebSocket message uses one envelope format:
```ts
type MessageEnvelope<TType extends string, TPayload> = {
type: TType;
payload: TPayload;
};
```
This increment intentionally keeps the envelope minimal:
- `type` identifies the event
- `payload` carries the event body
- no sequence numbers, timestamps, or protocol version fields yet
- future changes should be additive when possible
### Client → Server ### Client → Server
```ts ```ts
type ClientEvent = type ClientEvent =
| { type: "start_listening" } | { type: "session.start"; payload: {} }
| { type: "stop_listening" } | { type: "input_audio.append"; payload: { chunk: string } }
| { type: "audio_chunk"; data: string } // PCM16 base64 | { type: "input_audio.commit"; payload: {} }
| { type: "interrupt" }; | { type: "response.cancel"; payload: {} };
``` ```
#### Client event intent
- `session.start` initializes a voice session without locking in transport or auth details yet
- `input_audio.append` carries a chunk of captured input audio as an encoded string
- `input_audio.commit` marks the current buffered user turn as ready for downstream processing
- `response.cancel` interrupts the active listen/think/speak flow
### Server → Client ### Server → Client
```ts ```ts
type ServerEvent = type ServerEvent =
| { type: "state"; value: "idle" | "listening" | "thinking" | "speaking" } | { type: "session.ready"; payload: { sessionId: string } }
| { type: "partial_transcript"; text: string } | {
| { type: "final_transcript"; text: string } type: "session.state";
| { type: "assistant_text_delta"; text: string } payload: { value: "idle" | "listening" | "thinking" | "speaking" };
| { type: "tool_call_started"; tool: string } }
| { type: "tool_call_finished"; tool: string; result: unknown } | { type: "transcript.partial"; payload: { text: string } }
| { type: "tts_audio_chunk"; data: string } | { type: "transcript.final"; payload: { text: string } }
| { type: "assistant_done" } | { type: "response.text.delta"; payload: { text: string } }
| { type: "error"; message: string }; | { type: "response.completed"; payload: {} }
| {
type: "error";
payload: { code: string; message: string; retryable?: boolean };
};
``` ```
#### Server event intent
- `session.ready` confirms that the gateway created a session identity
- `session.state` exposes the coarse session phase needed by the later UI shell
- `transcript.partial` and `transcript.final` support incremental and completed user text display
- `response.text.delta` supports streamed assistant text without committing to audio output details yet
- `response.completed` marks the current assistant turn as done
- `error` is the minimal recoverable failure shape for both UI and gateway work
## Contract Scope for This Increment
This contract is intentionally limited to the smallest event set needed to unblock:
- the later gateway WebSocket session skeleton
- the later UI voice-session shell
Explicitly deferred for later increments:
- tool-calling events
- streamed TTS/output-audio events
- reconnect/resume semantics
- protocol version negotiation
- provider-specific metadata fields
## State Machine ## State Machine
```text ```text
@@ -37,13 +93,13 @@ idle
→ idle → idle
``` ```
Interrupt can occur at: `response.cancel` can occur at:
- listening → restart - listening → restart
- thinking → cancel - thinking → cancel
- speaking → stop immediately - speaking → stop immediately
## Interrupt Handling Requirements ## `response.cancel` Handling Requirements
- immediate stop of TTS playback - immediate stop of TTS playback
- immediate stop of LLM streaming - immediate stop of LLM streaming
@@ -51,12 +107,14 @@ Interrupt can occur at:
### Mechanism ### Mechanism
The `interrupt` event cancels: The `response.cancel` event cancels:
- TTS process - TTS process
- current LLM request - current LLM request
- tool execution when possible - tool execution when possible
This shared contract uses `response.cancel` consistently for that cancellation signal.
## Protocol Notes for Implementation ## Protocol Notes for Implementation
- keep the protocol backward compatible when possible - keep the protocol backward compatible when possible