const Fastify = require('fastify'); const crypto = require('node:crypto'); const { CLIENT_EVENT_TYPES, PROTOCOL_PACKAGE_NAME, SERVER_EVENT_TYPES, createMessageEnvelope, isClientEventType, isMessageEnvelope } = require('@vela/protocol'); const WEBSOCKET_ROUTE = '/ws'; const WEBSOCKET_GUID = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11'; function createSessionRecord() { return { id: crypto.randomUUID(), connectedAt: new Date().toISOString(), state: 'idle', audioChunkCount: 0, started: false }; } function createWebSocketAcceptValue(key) { return crypto.createHash('sha1').update(`${key}${WEBSOCKET_GUID}`).digest('base64'); } function encodeWebSocketFrame(payload, { opcode = 0x1 } = {}) { const body = Buffer.from(payload); const length = body.length; if (length >= 65536) { throw new Error('payload too large for minimal websocket implementation'); } if (length < 126) { return Buffer.concat([Buffer.from([0x80 | opcode, length]), body]); } const header = Buffer.alloc(4); header[0] = 0x80 | opcode; header[1] = 126; header.writeUInt16BE(length, 2); return Buffer.concat([header, body]); } function decodeWebSocketFrames(buffer) { const messages = []; let offset = 0; while (offset + 2 <= buffer.length) { const firstByte = buffer[offset]; const secondByte = buffer[offset + 1]; const fin = (firstByte & 0x80) !== 0; const opcode = firstByte & 0x0f; const masked = (secondByte & 0x80) !== 0; let payloadLength = secondByte & 0x7f; let headerLength = 2; if (!fin) { throw new Error('fragmented websocket frames are not supported'); } if (payloadLength === 126) { if (offset + 4 > buffer.length) { break; } payloadLength = buffer.readUInt16BE(offset + 2); headerLength = 4; } else if (payloadLength === 127) { throw new Error('64-bit websocket payloads are not supported'); } if (!masked) { throw new Error('client websocket frames must be masked'); } const maskOffset = offset + headerLength; const payloadOffset = maskOffset + 4; const frameLength = headerLength + 4 + payloadLength; if (offset + frameLength > buffer.length) { break; } const mask = buffer.subarray(maskOffset, payloadOffset); const payload = Buffer.from(buffer.subarray(payloadOffset, payloadOffset + payloadLength)); for (let index = 0; index < payload.length; index += 1) { payload[index] ^= mask[index % 4]; } messages.push({ opcode, payload }); offset += frameLength; } return { messages, remaining: buffer.subarray(offset) }; } function sendSocketMessage(socket, type, payload) { const frame = encodeWebSocketFrame(JSON.stringify(createMessageEnvelope(type, payload))); socket.write(frame); } function sendSocketError(socket, code, message, retryable = true) { sendSocketMessage(socket, 'error', { code, message, retryable }); } function updateSessionState(socket, session, nextState) { if (session.state === nextState) { return; } session.state = nextState; sendSocketMessage(socket, 'session.state', { value: session.state }); } function validateClientMessage(message) { if (!isMessageEnvelope(message)) { return 'message must match the shared envelope shape'; } if (!isClientEventType(message.type)) { return `unsupported client event type: ${message.type}`; } if (!message.payload || typeof message.payload !== 'object' || Array.isArray(message.payload)) { return 'message payload must be an object'; } if (message.type === 'input_audio.append' && typeof message.payload.chunk !== 'string') { return 'input_audio.append payload.chunk must be a string'; } return null; } function handleClientMessage(socket, session, rawMessage) { let message; try { message = JSON.parse(rawMessage); } catch { sendSocketError(socket, 'invalid_json', 'Message must be valid JSON.'); return; } const validationError = validateClientMessage(message); if (validationError) { sendSocketError(socket, 'invalid_message', validationError); return; } switch (message.type) { case 'session.start': session.started = true; sendSocketMessage(socket, 'session.ready', { sessionId: session.id }); sendSocketMessage(socket, 'session.state', { value: session.state }); break; case 'input_audio.append': session.audioChunkCount += 1; updateSessionState(socket, session, 'listening'); break; case 'input_audio.commit': session.audioChunkCount = 0; updateSessionState(socket, session, 'idle'); break; case 'response.cancel': session.audioChunkCount = 0; updateSessionState(socket, session, 'idle'); break; default: sendSocketError(socket, 'unhandled_message', `No handler is implemented for ${message.type}.`); } } function registerWebSocketSessionRoute(app) { app.get(WEBSOCKET_ROUTE, async (_request, reply) => { reply.code(426); return { service: 'vela-gateway', transport: 'websocket', route: WEBSOCKET_ROUTE, protocol: PROTOCOL_PACKAGE_NAME, messageEnvelope: 'MessageEnvelope<{type, payload}>', note: 'Connect with a WebSocket client to establish an ephemeral session.' }; }); app.server.on('upgrade', (request, socket) => { const origin = `http://${request.headers.host ?? 'localhost'}`; const { pathname } = new URL(request.url ?? '/', origin); if (pathname !== WEBSOCKET_ROUTE) { socket.destroy(); return; } if ((request.headers.upgrade ?? '').toLowerCase() !== 'websocket') { socket.destroy(); return; } const websocketKey = request.headers['sec-websocket-key']; if (typeof websocketKey !== 'string') { socket.destroy(); return; } const session = createSessionRecord(); app.websocketSessions.set(session.id, session); socket.write( [ 'HTTP/1.1 101 Switching Protocols', 'Upgrade: websocket', 'Connection: Upgrade', `Sec-WebSocket-Accept: ${createWebSocketAcceptValue(websocketKey)}`, '\r\n' ].join('\r\n') ); app.log.info({ sessionId: session.id, route: WEBSOCKET_ROUTE }, 'websocket session connected'); sendSocketMessage(socket, 'session.ready', { sessionId: session.id }); sendSocketMessage(socket, 'session.state', { value: session.state }); let frameBuffer = Buffer.alloc(0); let closed = false; const cleanup = () => { if (closed) { return; } closed = true; app.websocketSessions.delete(session.id); app.log.info({ sessionId: session.id }, 'websocket session disconnected'); }; socket.on('data', (chunk) => { frameBuffer = Buffer.concat([frameBuffer, chunk]); try { const decoded = decodeWebSocketFrames(frameBuffer); frameBuffer = decoded.remaining; for (const message of decoded.messages) { if (message.opcode === 0x8) { socket.end(encodeWebSocketFrame('', { opcode: 0x8 })); return; } if (message.opcode === 0x9) { socket.write(encodeWebSocketFrame(message.payload, { opcode: 0xA })); continue; } if (message.opcode !== 0x1) { sendSocketError(socket, 'unsupported_message', 'Only text messages are supported.'); continue; } handleClientMessage(socket, session, message.payload.toString('utf8')); } } catch (error) { app.log.warn({ err: error, sessionId: session.id }, 'closing malformed websocket session'); sendSocketError(socket, 'invalid_frame', error.message, false); socket.end(encodeWebSocketFrame('', { opcode: 0x8 })); } }); socket.on('end', cleanup); socket.on('close', cleanup); socket.on('error', (error) => { app.log.warn({ err: error, sessionId: session.id }, 'websocket session socket error'); cleanup(); }); }); } function buildServer() { const app = Fastify({ logger: true }); app.decorate('websocketSessions', new Map()); app.get('/', async () => ({ service: 'vela-gateway', status: 'ok', transport: 'http', next: 'websocket session skeleton', protocol: { package: PROTOCOL_PACKAGE_NAME, clientEventCount: CLIENT_EVENT_TYPES.length, serverEventCount: SERVER_EVENT_TYPES.length } })); app.get('/health', async () => ({ status: 'ok' })); registerWebSocketSessionRoute(app); return app; } async function start() { const app = buildServer(); const port = Number(process.env.PORT ?? 3001); const host = process.env.HOST ?? '0.0.0.0'; try { await app.listen({ port, host }); } catch (error) { app.log.error(error); process.exit(1); } } if (require.main === module) { start(); } module.exports = { buildServer };