feat(protocol): add shared WebSocket contract package
This commit is contained in:
@@ -8,7 +8,8 @@
|
|||||||
"start": "node src/index.js",
|
"start": "node src/index.js",
|
||||||
"build": "node -e \"console.log('vela-gateway: no build step required')\""
|
"build": "node -e \"console.log('vela-gateway: no build step required')\""
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"fastify": "^5.2.1"
|
"@vela/protocol": "0.0.0",
|
||||||
}
|
"fastify": "^5.2.1"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,9 @@
|
|||||||
const Fastify = require('fastify');
|
const Fastify = require('fastify');
|
||||||
|
const {
|
||||||
|
CLIENT_EVENT_TYPES,
|
||||||
|
PROTOCOL_PACKAGE_NAME,
|
||||||
|
SERVER_EVENT_TYPES
|
||||||
|
} = require('@vela/protocol');
|
||||||
|
|
||||||
function buildServer() {
|
function buildServer() {
|
||||||
const app = Fastify({ logger: true });
|
const app = Fastify({ logger: true });
|
||||||
@@ -7,7 +12,12 @@ function buildServer() {
|
|||||||
service: 'vela-gateway',
|
service: 'vela-gateway',
|
||||||
status: 'ok',
|
status: 'ok',
|
||||||
transport: 'http',
|
transport: 'http',
|
||||||
next: 'websocket session skeleton'
|
next: 'websocket session skeleton',
|
||||||
|
protocol: {
|
||||||
|
package: PROTOCOL_PACKAGE_NAME,
|
||||||
|
clientEventCount: CLIENT_EVENT_TYPES.length,
|
||||||
|
serverEventCount: SERVER_EVENT_TYPES.length
|
||||||
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
app.get('/health', async () => ({ status: 'ok' }));
|
app.get('/health', async () => ({ status: 'ok' }));
|
||||||
|
|||||||
13
apps/vela-protocol/package.json
Normal file
13
apps/vela-protocol/package.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"name": "@vela/protocol",
|
||||||
|
"private": true,
|
||||||
|
"version": "0.0.0",
|
||||||
|
"type": "module",
|
||||||
|
"exports": {
|
||||||
|
".": {
|
||||||
|
"types": "./src/index.d.ts",
|
||||||
|
"import": "./src/index.js",
|
||||||
|
"require": "./src/index.cjs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
52
apps/vela-protocol/src/index.cjs
Normal file
52
apps/vela-protocol/src/index.cjs
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
const PROTOCOL_PACKAGE_NAME = '@vela/protocol';
|
||||||
|
|
||||||
|
const SESSION_STATES = Object.freeze(['idle', 'listening', 'thinking', 'speaking']);
|
||||||
|
|
||||||
|
const CLIENT_EVENT_TYPES = Object.freeze([
|
||||||
|
'session.start',
|
||||||
|
'input_audio.append',
|
||||||
|
'input_audio.commit',
|
||||||
|
'response.cancel'
|
||||||
|
]);
|
||||||
|
|
||||||
|
const SERVER_EVENT_TYPES = Object.freeze([
|
||||||
|
'session.ready',
|
||||||
|
'session.state',
|
||||||
|
'transcript.partial',
|
||||||
|
'transcript.final',
|
||||||
|
'response.text.delta',
|
||||||
|
'response.completed',
|
||||||
|
'error'
|
||||||
|
]);
|
||||||
|
|
||||||
|
function createMessageEnvelope(type, payload) {
|
||||||
|
return { type, payload };
|
||||||
|
}
|
||||||
|
|
||||||
|
function isMessageEnvelope(value) {
|
||||||
|
return Boolean(
|
||||||
|
value &&
|
||||||
|
typeof value === 'object' &&
|
||||||
|
typeof value.type === 'string' &&
|
||||||
|
'payload' in value
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isClientEventType(type) {
|
||||||
|
return CLIENT_EVENT_TYPES.includes(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isServerEventType(type) {
|
||||||
|
return SERVER_EVENT_TYPES.includes(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
PROTOCOL_PACKAGE_NAME,
|
||||||
|
SESSION_STATES,
|
||||||
|
CLIENT_EVENT_TYPES,
|
||||||
|
SERVER_EVENT_TYPES,
|
||||||
|
createMessageEnvelope,
|
||||||
|
isMessageEnvelope,
|
||||||
|
isClientEventType,
|
||||||
|
isServerEventType
|
||||||
|
};
|
||||||
68
apps/vela-protocol/src/index.d.ts
vendored
Normal file
68
apps/vela-protocol/src/index.d.ts
vendored
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
export type SessionState = 'idle' | 'listening' | 'thinking' | 'speaking';
|
||||||
|
|
||||||
|
export type MessageEnvelope<TType extends string, TPayload> = {
|
||||||
|
type: TType;
|
||||||
|
payload: TPayload;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type ClientEventPayloads = {
|
||||||
|
'session.start': Record<string, never>;
|
||||||
|
'input_audio.append': {
|
||||||
|
chunk: string;
|
||||||
|
};
|
||||||
|
'input_audio.commit': Record<string, never>;
|
||||||
|
'response.cancel': Record<string, never>;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type ServerEventPayloads = {
|
||||||
|
'session.ready': {
|
||||||
|
sessionId: string;
|
||||||
|
};
|
||||||
|
'session.state': {
|
||||||
|
value: SessionState;
|
||||||
|
};
|
||||||
|
'transcript.partial': {
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
'transcript.final': {
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
'response.text.delta': {
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
'response.completed': Record<string, never>;
|
||||||
|
'error': {
|
||||||
|
code: string;
|
||||||
|
message: string;
|
||||||
|
retryable?: boolean;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export type ClientEventType = keyof ClientEventPayloads;
|
||||||
|
export type ServerEventType = keyof ServerEventPayloads;
|
||||||
|
|
||||||
|
export type ClientEvent = {
|
||||||
|
[Type in ClientEventType]: MessageEnvelope<Type, ClientEventPayloads[Type]>;
|
||||||
|
}[ClientEventType];
|
||||||
|
|
||||||
|
export type ServerEvent = {
|
||||||
|
[Type in ServerEventType]: MessageEnvelope<Type, ServerEventPayloads[Type]>;
|
||||||
|
}[ServerEventType];
|
||||||
|
|
||||||
|
export const PROTOCOL_PACKAGE_NAME: '@vela/protocol';
|
||||||
|
export const SESSION_STATES: readonly SessionState[];
|
||||||
|
export const CLIENT_EVENT_TYPES: readonly ClientEventType[];
|
||||||
|
export const SERVER_EVENT_TYPES: readonly ServerEventType[];
|
||||||
|
|
||||||
|
export function createMessageEnvelope<TType extends ClientEventType>(
|
||||||
|
type: TType,
|
||||||
|
payload: ClientEventPayloads[TType]
|
||||||
|
): MessageEnvelope<TType, ClientEventPayloads[TType]>;
|
||||||
|
export function createMessageEnvelope<TType extends ServerEventType>(
|
||||||
|
type: TType,
|
||||||
|
payload: ServerEventPayloads[TType]
|
||||||
|
): MessageEnvelope<TType, ServerEventPayloads[TType]>;
|
||||||
|
|
||||||
|
export function isMessageEnvelope(value: unknown): value is MessageEnvelope<string, unknown>;
|
||||||
|
export function isClientEventType(type: string): type is ClientEventType;
|
||||||
|
export function isServerEventType(type: string): type is ServerEventType;
|
||||||
41
apps/vela-protocol/src/index.js
Normal file
41
apps/vela-protocol/src/index.js
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
export const PROTOCOL_PACKAGE_NAME = '@vela/protocol';
|
||||||
|
|
||||||
|
export const SESSION_STATES = Object.freeze(['idle', 'listening', 'thinking', 'speaking']);
|
||||||
|
|
||||||
|
export const CLIENT_EVENT_TYPES = Object.freeze([
|
||||||
|
'session.start',
|
||||||
|
'input_audio.append',
|
||||||
|
'input_audio.commit',
|
||||||
|
'response.cancel'
|
||||||
|
]);
|
||||||
|
|
||||||
|
export const SERVER_EVENT_TYPES = Object.freeze([
|
||||||
|
'session.ready',
|
||||||
|
'session.state',
|
||||||
|
'transcript.partial',
|
||||||
|
'transcript.final',
|
||||||
|
'response.text.delta',
|
||||||
|
'response.completed',
|
||||||
|
'error'
|
||||||
|
]);
|
||||||
|
|
||||||
|
export function createMessageEnvelope(type, payload) {
|
||||||
|
return { type, payload };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isMessageEnvelope(value) {
|
||||||
|
return Boolean(
|
||||||
|
value &&
|
||||||
|
typeof value === 'object' &&
|
||||||
|
typeof value.type === 'string' &&
|
||||||
|
'payload' in value
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isClientEventType(type) {
|
||||||
|
return CLIENT_EVENT_TYPES.includes(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isServerEventType(type) {
|
||||||
|
return SERVER_EVENT_TYPES.includes(type);
|
||||||
|
}
|
||||||
@@ -10,10 +10,11 @@
|
|||||||
"preview": "vite preview",
|
"preview": "vite preview",
|
||||||
"check": "svelte-kit sync && svelte-check --tsconfig ./jsconfig.json"
|
"check": "svelte-kit sync && svelte-check --tsconfig ./jsconfig.json"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@sveltejs/adapter-auto": "^3.3.1",
|
"@vela/protocol": "0.0.0",
|
||||||
"@sveltejs/kit": "^2.17.1",
|
"@sveltejs/adapter-auto": "^3.3.1",
|
||||||
"svelte": "^5.19.5"
|
"@sveltejs/kit": "^2.17.1",
|
||||||
|
"svelte": "^5.19.5"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
||||||
|
|||||||
@@ -7,8 +7,14 @@
|
|||||||
</svelte:head>
|
</svelte:head>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
|
import {
|
||||||
|
CLIENT_EVENT_TYPES,
|
||||||
|
PROTOCOL_PACKAGE_NAME,
|
||||||
|
SERVER_EVENT_TYPES
|
||||||
|
} from '@vela/protocol';
|
||||||
|
|
||||||
const appStatus = 'Bootstrapped';
|
const appStatus = 'Bootstrapped';
|
||||||
const nextFocus = 'Wire the voice session contract to the gateway.';
|
const nextFocus = `Build the voice session shell on top of ${PROTOCOL_PACKAGE_NAME}.`;
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="page">
|
<div class="page">
|
||||||
@@ -20,6 +26,11 @@
|
|||||||
streaming session UI will be added in later increments.
|
streaming session UI will be added in later increments.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p class="contract-note">
|
||||||
|
Shared protocol package loaded with {CLIENT_EVENT_TYPES.length} client event types and
|
||||||
|
{SERVER_EVENT_TYPES.length} server event types.
|
||||||
|
</p>
|
||||||
|
|
||||||
<div class="meta">
|
<div class="meta">
|
||||||
<div>
|
<div>
|
||||||
<span>Status</span>
|
<span>Status</span>
|
||||||
@@ -76,6 +87,10 @@
|
|||||||
color: #c7d6e8;
|
color: #c7d6e8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.contract-note {
|
||||||
|
margin-top: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
.meta {
|
.meta {
|
||||||
margin-top: 1.5rem;
|
margin-top: 1.5rem;
|
||||||
display: grid;
|
display: grid;
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ Establish the boundaries, protocol, and state model for the system before integr
|
|||||||
### Backlog Items
|
### Backlog Items
|
||||||
|
|
||||||
- [x] define repository structure for `vela-ui` and `vela-gateway`
|
- [x] define repository structure for `vela-ui` and `vela-gateway`
|
||||||
- define the WebSocket event contract used by the UI and gateway
|
- [x] define the WebSocket event contract used by the UI and gateway via shared package
|
||||||
- define the session state machine and interrupt semantics
|
- define the session state machine and interrupt semantics
|
||||||
- define provider adapter interfaces for STT, LLM, TTS, and tools
|
- define provider adapter interfaces for STT, LLM, TTS, and tools
|
||||||
- document error handling and cancellation behavior
|
- document error handling and cancellation behavior
|
||||||
@@ -180,4 +180,5 @@ Polish the system after the core voice loop is reliable.
|
|||||||
|
|
||||||
- `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page
|
- `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page
|
||||||
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
|
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
|
||||||
|
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
|
||||||
- backend framework choice is now concrete: Fastify
|
- backend framework choice is now concrete: Fastify
|
||||||
|
|||||||
@@ -2,31 +2,87 @@
|
|||||||
|
|
||||||
## Event Protocol
|
## Event Protocol
|
||||||
|
|
||||||
|
The shared code-level contract lives in the Yarn workspace package `@vela/protocol` so both the
|
||||||
|
gateway and UI import the same event names and envelope shape.
|
||||||
|
|
||||||
|
## WebSocket Message Envelope
|
||||||
|
|
||||||
|
Every WebSocket message uses one envelope format:
|
||||||
|
|
||||||
|
```ts
|
||||||
|
type MessageEnvelope<TType extends string, TPayload> = {
|
||||||
|
type: TType;
|
||||||
|
payload: TPayload;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
This increment intentionally keeps the envelope minimal:
|
||||||
|
|
||||||
|
- `type` identifies the event
|
||||||
|
- `payload` carries the event body
|
||||||
|
- no sequence numbers, timestamps, or protocol version fields yet
|
||||||
|
- future changes should be additive when possible
|
||||||
|
|
||||||
### Client → Server
|
### Client → Server
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
type ClientEvent =
|
type ClientEvent =
|
||||||
| { type: "start_listening" }
|
| { type: "session.start"; payload: {} }
|
||||||
| { type: "stop_listening" }
|
| { type: "input_audio.append"; payload: { chunk: string } }
|
||||||
| { type: "audio_chunk"; data: string } // PCM16 base64
|
| { type: "input_audio.commit"; payload: {} }
|
||||||
| { type: "interrupt" };
|
| { type: "response.cancel"; payload: {} };
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Client event intent
|
||||||
|
|
||||||
|
- `session.start` initializes a voice session without locking in transport or auth details yet
|
||||||
|
- `input_audio.append` carries a chunk of captured input audio as an encoded string
|
||||||
|
- `input_audio.commit` marks the current buffered user turn as ready for downstream processing
|
||||||
|
- `response.cancel` interrupts the active listen/think/speak flow
|
||||||
|
|
||||||
### Server → Client
|
### Server → Client
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
type ServerEvent =
|
type ServerEvent =
|
||||||
| { type: "state"; value: "idle" | "listening" | "thinking" | "speaking" }
|
| { type: "session.ready"; payload: { sessionId: string } }
|
||||||
| { type: "partial_transcript"; text: string }
|
| {
|
||||||
| { type: "final_transcript"; text: string }
|
type: "session.state";
|
||||||
| { type: "assistant_text_delta"; text: string }
|
payload: { value: "idle" | "listening" | "thinking" | "speaking" };
|
||||||
| { type: "tool_call_started"; tool: string }
|
}
|
||||||
| { type: "tool_call_finished"; tool: string; result: unknown }
|
| { type: "transcript.partial"; payload: { text: string } }
|
||||||
| { type: "tts_audio_chunk"; data: string }
|
| { type: "transcript.final"; payload: { text: string } }
|
||||||
| { type: "assistant_done" }
|
| { type: "response.text.delta"; payload: { text: string } }
|
||||||
| { type: "error"; message: string };
|
| { type: "response.completed"; payload: {} }
|
||||||
|
| {
|
||||||
|
type: "error";
|
||||||
|
payload: { code: string; message: string; retryable?: boolean };
|
||||||
|
};
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Server event intent
|
||||||
|
|
||||||
|
- `session.ready` confirms that the gateway created a session identity
|
||||||
|
- `session.state` exposes the coarse session phase needed by the later UI shell
|
||||||
|
- `transcript.partial` and `transcript.final` support incremental and completed user text display
|
||||||
|
- `response.text.delta` supports streamed assistant text without committing to audio output details yet
|
||||||
|
- `response.completed` marks the current assistant turn as done
|
||||||
|
- `error` is the minimal recoverable failure shape for both UI and gateway work
|
||||||
|
|
||||||
|
## Contract Scope for This Increment
|
||||||
|
|
||||||
|
This contract is intentionally limited to the smallest event set needed to unblock:
|
||||||
|
|
||||||
|
- the later gateway WebSocket session skeleton
|
||||||
|
- the later UI voice-session shell
|
||||||
|
|
||||||
|
Explicitly deferred for later increments:
|
||||||
|
|
||||||
|
- tool-calling events
|
||||||
|
- streamed TTS/output-audio events
|
||||||
|
- reconnect/resume semantics
|
||||||
|
- protocol version negotiation
|
||||||
|
- provider-specific metadata fields
|
||||||
|
|
||||||
## State Machine
|
## State Machine
|
||||||
|
|
||||||
```text
|
```text
|
||||||
@@ -37,13 +93,13 @@ idle
|
|||||||
→ idle
|
→ idle
|
||||||
```
|
```
|
||||||
|
|
||||||
Interrupt can occur at:
|
`response.cancel` can occur at:
|
||||||
|
|
||||||
- listening → restart
|
- listening → restart
|
||||||
- thinking → cancel
|
- thinking → cancel
|
||||||
- speaking → stop immediately
|
- speaking → stop immediately
|
||||||
|
|
||||||
## Interrupt Handling Requirements
|
## `response.cancel` Handling Requirements
|
||||||
|
|
||||||
- immediate stop of TTS playback
|
- immediate stop of TTS playback
|
||||||
- immediate stop of LLM streaming
|
- immediate stop of LLM streaming
|
||||||
@@ -51,12 +107,14 @@ Interrupt can occur at:
|
|||||||
|
|
||||||
### Mechanism
|
### Mechanism
|
||||||
|
|
||||||
The `interrupt` event cancels:
|
The `response.cancel` event cancels:
|
||||||
|
|
||||||
- TTS process
|
- TTS process
|
||||||
- current LLM request
|
- current LLM request
|
||||||
- tool execution when possible
|
- tool execution when possible
|
||||||
|
|
||||||
|
This shared contract uses `response.cancel` consistently for that cancellation signal.
|
||||||
|
|
||||||
## Protocol Notes for Implementation
|
## Protocol Notes for Implementation
|
||||||
|
|
||||||
- keep the protocol backward compatible when possible
|
- keep the protocol backward compatible when possible
|
||||||
|
|||||||
Reference in New Issue
Block a user