feat(protocol): add shared WebSocket contract package
This commit is contained in:
@@ -8,7 +8,8 @@
|
||||
"start": "node src/index.js",
|
||||
"build": "node -e \"console.log('vela-gateway: no build step required')\""
|
||||
},
|
||||
"dependencies": {
|
||||
"fastify": "^5.2.1"
|
||||
}
|
||||
"dependencies": {
|
||||
"@vela/protocol": "0.0.0",
|
||||
"fastify": "^5.2.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
const Fastify = require('fastify');
|
||||
const {
|
||||
CLIENT_EVENT_TYPES,
|
||||
PROTOCOL_PACKAGE_NAME,
|
||||
SERVER_EVENT_TYPES
|
||||
} = require('@vela/protocol');
|
||||
|
||||
function buildServer() {
|
||||
const app = Fastify({ logger: true });
|
||||
@@ -7,7 +12,12 @@ function buildServer() {
|
||||
service: 'vela-gateway',
|
||||
status: 'ok',
|
||||
transport: 'http',
|
||||
next: 'websocket session skeleton'
|
||||
next: 'websocket session skeleton',
|
||||
protocol: {
|
||||
package: PROTOCOL_PACKAGE_NAME,
|
||||
clientEventCount: CLIENT_EVENT_TYPES.length,
|
||||
serverEventCount: SERVER_EVENT_TYPES.length
|
||||
}
|
||||
}));
|
||||
|
||||
app.get('/health', async () => ({ status: 'ok' }));
|
||||
|
||||
13
apps/vela-protocol/package.json
Normal file
13
apps/vela-protocol/package.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "@vela/protocol",
|
||||
"private": true,
|
||||
"version": "0.0.0",
|
||||
"type": "module",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./src/index.d.ts",
|
||||
"import": "./src/index.js",
|
||||
"require": "./src/index.cjs"
|
||||
}
|
||||
}
|
||||
}
|
||||
52
apps/vela-protocol/src/index.cjs
Normal file
52
apps/vela-protocol/src/index.cjs
Normal file
@@ -0,0 +1,52 @@
|
||||
const PROTOCOL_PACKAGE_NAME = '@vela/protocol';
|
||||
|
||||
const SESSION_STATES = Object.freeze(['idle', 'listening', 'thinking', 'speaking']);
|
||||
|
||||
const CLIENT_EVENT_TYPES = Object.freeze([
|
||||
'session.start',
|
||||
'input_audio.append',
|
||||
'input_audio.commit',
|
||||
'response.cancel'
|
||||
]);
|
||||
|
||||
const SERVER_EVENT_TYPES = Object.freeze([
|
||||
'session.ready',
|
||||
'session.state',
|
||||
'transcript.partial',
|
||||
'transcript.final',
|
||||
'response.text.delta',
|
||||
'response.completed',
|
||||
'error'
|
||||
]);
|
||||
|
||||
function createMessageEnvelope(type, payload) {
|
||||
return { type, payload };
|
||||
}
|
||||
|
||||
function isMessageEnvelope(value) {
|
||||
return Boolean(
|
||||
value &&
|
||||
typeof value === 'object' &&
|
||||
typeof value.type === 'string' &&
|
||||
'payload' in value
|
||||
);
|
||||
}
|
||||
|
||||
function isClientEventType(type) {
|
||||
return CLIENT_EVENT_TYPES.includes(type);
|
||||
}
|
||||
|
||||
function isServerEventType(type) {
|
||||
return SERVER_EVENT_TYPES.includes(type);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
PROTOCOL_PACKAGE_NAME,
|
||||
SESSION_STATES,
|
||||
CLIENT_EVENT_TYPES,
|
||||
SERVER_EVENT_TYPES,
|
||||
createMessageEnvelope,
|
||||
isMessageEnvelope,
|
||||
isClientEventType,
|
||||
isServerEventType
|
||||
};
|
||||
68
apps/vela-protocol/src/index.d.ts
vendored
Normal file
68
apps/vela-protocol/src/index.d.ts
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
export type SessionState = 'idle' | 'listening' | 'thinking' | 'speaking';
|
||||
|
||||
export type MessageEnvelope<TType extends string, TPayload> = {
|
||||
type: TType;
|
||||
payload: TPayload;
|
||||
};
|
||||
|
||||
export type ClientEventPayloads = {
|
||||
'session.start': Record<string, never>;
|
||||
'input_audio.append': {
|
||||
chunk: string;
|
||||
};
|
||||
'input_audio.commit': Record<string, never>;
|
||||
'response.cancel': Record<string, never>;
|
||||
};
|
||||
|
||||
export type ServerEventPayloads = {
|
||||
'session.ready': {
|
||||
sessionId: string;
|
||||
};
|
||||
'session.state': {
|
||||
value: SessionState;
|
||||
};
|
||||
'transcript.partial': {
|
||||
text: string;
|
||||
};
|
||||
'transcript.final': {
|
||||
text: string;
|
||||
};
|
||||
'response.text.delta': {
|
||||
text: string;
|
||||
};
|
||||
'response.completed': Record<string, never>;
|
||||
'error': {
|
||||
code: string;
|
||||
message: string;
|
||||
retryable?: boolean;
|
||||
};
|
||||
};
|
||||
|
||||
export type ClientEventType = keyof ClientEventPayloads;
|
||||
export type ServerEventType = keyof ServerEventPayloads;
|
||||
|
||||
export type ClientEvent = {
|
||||
[Type in ClientEventType]: MessageEnvelope<Type, ClientEventPayloads[Type]>;
|
||||
}[ClientEventType];
|
||||
|
||||
export type ServerEvent = {
|
||||
[Type in ServerEventType]: MessageEnvelope<Type, ServerEventPayloads[Type]>;
|
||||
}[ServerEventType];
|
||||
|
||||
export const PROTOCOL_PACKAGE_NAME: '@vela/protocol';
|
||||
export const SESSION_STATES: readonly SessionState[];
|
||||
export const CLIENT_EVENT_TYPES: readonly ClientEventType[];
|
||||
export const SERVER_EVENT_TYPES: readonly ServerEventType[];
|
||||
|
||||
export function createMessageEnvelope<TType extends ClientEventType>(
|
||||
type: TType,
|
||||
payload: ClientEventPayloads[TType]
|
||||
): MessageEnvelope<TType, ClientEventPayloads[TType]>;
|
||||
export function createMessageEnvelope<TType extends ServerEventType>(
|
||||
type: TType,
|
||||
payload: ServerEventPayloads[TType]
|
||||
): MessageEnvelope<TType, ServerEventPayloads[TType]>;
|
||||
|
||||
export function isMessageEnvelope(value: unknown): value is MessageEnvelope<string, unknown>;
|
||||
export function isClientEventType(type: string): type is ClientEventType;
|
||||
export function isServerEventType(type: string): type is ServerEventType;
|
||||
41
apps/vela-protocol/src/index.js
Normal file
41
apps/vela-protocol/src/index.js
Normal file
@@ -0,0 +1,41 @@
|
||||
export const PROTOCOL_PACKAGE_NAME = '@vela/protocol';
|
||||
|
||||
export const SESSION_STATES = Object.freeze(['idle', 'listening', 'thinking', 'speaking']);
|
||||
|
||||
export const CLIENT_EVENT_TYPES = Object.freeze([
|
||||
'session.start',
|
||||
'input_audio.append',
|
||||
'input_audio.commit',
|
||||
'response.cancel'
|
||||
]);
|
||||
|
||||
export const SERVER_EVENT_TYPES = Object.freeze([
|
||||
'session.ready',
|
||||
'session.state',
|
||||
'transcript.partial',
|
||||
'transcript.final',
|
||||
'response.text.delta',
|
||||
'response.completed',
|
||||
'error'
|
||||
]);
|
||||
|
||||
export function createMessageEnvelope(type, payload) {
|
||||
return { type, payload };
|
||||
}
|
||||
|
||||
export function isMessageEnvelope(value) {
|
||||
return Boolean(
|
||||
value &&
|
||||
typeof value === 'object' &&
|
||||
typeof value.type === 'string' &&
|
||||
'payload' in value
|
||||
);
|
||||
}
|
||||
|
||||
export function isClientEventType(type) {
|
||||
return CLIENT_EVENT_TYPES.includes(type);
|
||||
}
|
||||
|
||||
export function isServerEventType(type) {
|
||||
return SERVER_EVENT_TYPES.includes(type);
|
||||
}
|
||||
@@ -10,10 +10,11 @@
|
||||
"preview": "vite preview",
|
||||
"check": "svelte-kit sync && svelte-check --tsconfig ./jsconfig.json"
|
||||
},
|
||||
"dependencies": {
|
||||
"@sveltejs/adapter-auto": "^3.3.1",
|
||||
"@sveltejs/kit": "^2.17.1",
|
||||
"svelte": "^5.19.5"
|
||||
"dependencies": {
|
||||
"@vela/protocol": "0.0.0",
|
||||
"@sveltejs/adapter-auto": "^3.3.1",
|
||||
"@sveltejs/kit": "^2.17.1",
|
||||
"svelte": "^5.19.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@sveltejs/vite-plugin-svelte": "^5.0.3",
|
||||
|
||||
@@ -7,8 +7,14 @@
|
||||
</svelte:head>
|
||||
|
||||
<script>
|
||||
import {
|
||||
CLIENT_EVENT_TYPES,
|
||||
PROTOCOL_PACKAGE_NAME,
|
||||
SERVER_EVENT_TYPES
|
||||
} from '@vela/protocol';
|
||||
|
||||
const appStatus = 'Bootstrapped';
|
||||
const nextFocus = 'Wire the voice session contract to the gateway.';
|
||||
const nextFocus = `Build the voice session shell on top of ${PROTOCOL_PACKAGE_NAME}.`;
|
||||
</script>
|
||||
|
||||
<div class="page">
|
||||
@@ -20,6 +26,11 @@
|
||||
streaming session UI will be added in later increments.
|
||||
</p>
|
||||
|
||||
<p class="contract-note">
|
||||
Shared protocol package loaded with {CLIENT_EVENT_TYPES.length} client event types and
|
||||
{SERVER_EVENT_TYPES.length} server event types.
|
||||
</p>
|
||||
|
||||
<div class="meta">
|
||||
<div>
|
||||
<span>Status</span>
|
||||
@@ -76,6 +87,10 @@
|
||||
color: #c7d6e8;
|
||||
}
|
||||
|
||||
.contract-note {
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
.meta {
|
||||
margin-top: 1.5rem;
|
||||
display: grid;
|
||||
|
||||
@@ -11,7 +11,7 @@ Establish the boundaries, protocol, and state model for the system before integr
|
||||
### Backlog Items
|
||||
|
||||
- [x] define repository structure for `vela-ui` and `vela-gateway`
|
||||
- define the WebSocket event contract used by the UI and gateway
|
||||
- [x] define the WebSocket event contract used by the UI and gateway via shared package
|
||||
- define the session state machine and interrupt semantics
|
||||
- define provider adapter interfaces for STT, LLM, TTS, and tools
|
||||
- document error handling and cancellation behavior
|
||||
@@ -180,4 +180,5 @@ Polish the system after the core voice loop is reliable.
|
||||
|
||||
- `apps/vela-ui` now boots as a minimal SvelteKit app with a starter page
|
||||
- `apps/vela-gateway` now boots as a minimal Fastify app with `/` and `/health` endpoints
|
||||
- `apps/vela-protocol` now provides the shared WebSocket event contract for the UI and gateway
|
||||
- backend framework choice is now concrete: Fastify
|
||||
|
||||
@@ -2,31 +2,87 @@
|
||||
|
||||
## Event Protocol
|
||||
|
||||
The shared code-level contract lives in the Yarn workspace package `@vela/protocol` so both the
|
||||
gateway and UI import the same event names and envelope shape.
|
||||
|
||||
## WebSocket Message Envelope
|
||||
|
||||
Every WebSocket message uses one envelope format:
|
||||
|
||||
```ts
|
||||
type MessageEnvelope<TType extends string, TPayload> = {
|
||||
type: TType;
|
||||
payload: TPayload;
|
||||
};
|
||||
```
|
||||
|
||||
This increment intentionally keeps the envelope minimal:
|
||||
|
||||
- `type` identifies the event
|
||||
- `payload` carries the event body
|
||||
- no sequence numbers, timestamps, or protocol version fields yet
|
||||
- future changes should be additive when possible
|
||||
|
||||
### Client → Server
|
||||
|
||||
```ts
|
||||
type ClientEvent =
|
||||
| { type: "start_listening" }
|
||||
| { type: "stop_listening" }
|
||||
| { type: "audio_chunk"; data: string } // PCM16 base64
|
||||
| { type: "interrupt" };
|
||||
| { type: "session.start"; payload: {} }
|
||||
| { type: "input_audio.append"; payload: { chunk: string } }
|
||||
| { type: "input_audio.commit"; payload: {} }
|
||||
| { type: "response.cancel"; payload: {} };
|
||||
```
|
||||
|
||||
#### Client event intent
|
||||
|
||||
- `session.start` initializes a voice session without locking in transport or auth details yet
|
||||
- `input_audio.append` carries a chunk of captured input audio as an encoded string
|
||||
- `input_audio.commit` marks the current buffered user turn as ready for downstream processing
|
||||
- `response.cancel` interrupts the active listen/think/speak flow
|
||||
|
||||
### Server → Client
|
||||
|
||||
```ts
|
||||
type ServerEvent =
|
||||
| { type: "state"; value: "idle" | "listening" | "thinking" | "speaking" }
|
||||
| { type: "partial_transcript"; text: string }
|
||||
| { type: "final_transcript"; text: string }
|
||||
| { type: "assistant_text_delta"; text: string }
|
||||
| { type: "tool_call_started"; tool: string }
|
||||
| { type: "tool_call_finished"; tool: string; result: unknown }
|
||||
| { type: "tts_audio_chunk"; data: string }
|
||||
| { type: "assistant_done" }
|
||||
| { type: "error"; message: string };
|
||||
| { type: "session.ready"; payload: { sessionId: string } }
|
||||
| {
|
||||
type: "session.state";
|
||||
payload: { value: "idle" | "listening" | "thinking" | "speaking" };
|
||||
}
|
||||
| { type: "transcript.partial"; payload: { text: string } }
|
||||
| { type: "transcript.final"; payload: { text: string } }
|
||||
| { type: "response.text.delta"; payload: { text: string } }
|
||||
| { type: "response.completed"; payload: {} }
|
||||
| {
|
||||
type: "error";
|
||||
payload: { code: string; message: string; retryable?: boolean };
|
||||
};
|
||||
```
|
||||
|
||||
#### Server event intent
|
||||
|
||||
- `session.ready` confirms that the gateway created a session identity
|
||||
- `session.state` exposes the coarse session phase needed by the later UI shell
|
||||
- `transcript.partial` and `transcript.final` support incremental and completed user text display
|
||||
- `response.text.delta` supports streamed assistant text without committing to audio output details yet
|
||||
- `response.completed` marks the current assistant turn as done
|
||||
- `error` is the minimal recoverable failure shape for both UI and gateway work
|
||||
|
||||
## Contract Scope for This Increment
|
||||
|
||||
This contract is intentionally limited to the smallest event set needed to unblock:
|
||||
|
||||
- the later gateway WebSocket session skeleton
|
||||
- the later UI voice-session shell
|
||||
|
||||
Explicitly deferred for later increments:
|
||||
|
||||
- tool-calling events
|
||||
- streamed TTS/output-audio events
|
||||
- reconnect/resume semantics
|
||||
- protocol version negotiation
|
||||
- provider-specific metadata fields
|
||||
|
||||
## State Machine
|
||||
|
||||
```text
|
||||
@@ -37,13 +93,13 @@ idle
|
||||
→ idle
|
||||
```
|
||||
|
||||
Interrupt can occur at:
|
||||
`response.cancel` can occur at:
|
||||
|
||||
- listening → restart
|
||||
- thinking → cancel
|
||||
- speaking → stop immediately
|
||||
|
||||
## Interrupt Handling Requirements
|
||||
## `response.cancel` Handling Requirements
|
||||
|
||||
- immediate stop of TTS playback
|
||||
- immediate stop of LLM streaming
|
||||
@@ -51,12 +107,14 @@ Interrupt can occur at:
|
||||
|
||||
### Mechanism
|
||||
|
||||
The `interrupt` event cancels:
|
||||
The `response.cancel` event cancels:
|
||||
|
||||
- TTS process
|
||||
- current LLM request
|
||||
- tool execution when possible
|
||||
|
||||
This shared contract uses `response.cancel` consistently for that cancellation signal.
|
||||
|
||||
## Protocol Notes for Implementation
|
||||
|
||||
- keep the protocol backward compatible when possible
|
||||
|
||||
Reference in New Issue
Block a user