Files
assistant/apps/vela-ui/tests/voice-session.test.js

290 lines
10 KiB
JavaScript

import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/svelte';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { createMessageEnvelope } from '@vela/protocol';
import VoiceSessionShell from '../src/lib/VoiceSessionShell.svelte';
class MockWebSocket {
static CONNECTING = 0;
static OPEN = 1;
static CLOSING = 2;
static CLOSED = 3;
static instances = [];
constructor(url) {
this.url = url;
this.readyState = MockWebSocket.CONNECTING;
this.sent = [];
this.onopen = null;
this.onmessage = null;
this.onerror = null;
this.onclose = null;
MockWebSocket.instances.push(this);
}
send(message) {
this.sent.push(message);
}
open() {
this.readyState = MockWebSocket.OPEN;
this.onopen?.();
}
message(payload) {
this.onmessage?.({ data: JSON.stringify(payload) });
}
close(code = 1000, reason = 'client disconnect', wasClean = true) {
this.readyState = MockWebSocket.CLOSED;
this.onclose?.({ code, reason, wasClean });
}
static latest() {
return MockWebSocket.instances.at(-1);
}
static reset() {
MockWebSocket.instances = [];
}
}
function getByTestId(id) {
return screen.getByTestId(id);
}
describe('voice session shell', () => {
beforeEach(() => {
MockWebSocket.reset();
vi.stubGlobal('WebSocket', MockWebSocket);
});
afterEach(() => {
cleanup();
vi.unstubAllGlobals();
});
it('keeps mocked turn unavailable while disconnected and after disconnect', async () => {
render(VoiceSessionShell);
expect(getByTestId('connection-state').textContent).toBe('not connected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-123' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
});
await fireEvent.click(getByTestId('disconnect-button'));
socket.close(1000, 'client disconnect', true);
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('disconnected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('session-id').textContent).toBe('session-123');
});
});
it('runs a placeholder mic-control cycle and keeps mocked turn usable on the same socket', async () => {
render(VoiceSessionShell);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
});
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-mic' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
});
await fireEvent.mouseDown(getByTestId('mic-control-button'));
expect(socket.sent).toHaveLength(1);
expect(JSON.parse(socket.sent[0])).toEqual({
type: 'input_audio.append',
payload: { chunk: 'placeholder-control-shell-chunk' }
});
expect(getByTestId('mic-control-status').textContent).toBe('holding');
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(
createMessageEnvelope('transcript.partial', {
text: '[mocked partial] Placeholder push-to-talk transcript in progress.'
})
);
await waitFor(() => {
expect(getByTestId('gateway-session-state').textContent).toBe('listening');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('user-transcript').textContent).toBe(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
expect(getByTestId('partial-transcript').textContent).toBe(
'[mocked partial] Placeholder push-to-talk transcript in progress.'
);
});
await fireEvent.mouseUp(getByTestId('mic-control-button'));
expect(socket.sent).toHaveLength(2);
expect(JSON.parse(socket.sent[1])).toEqual({
type: 'input_audio.commit',
payload: {}
});
expect(getByTestId('mic-control-status').textContent).toBe('idle');
socket.message(
createMessageEnvelope('transcript.final', {
text: '[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
})
);
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('user-transcript').textContent).toBe(
'[mocked final] Placeholder push-to-talk transcript completed from 1 appended chunk.'
);
expect(getByTestId('partial-transcript').textContent).toBe('none');
});
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(socket.sent).toHaveLength(3);
expect(JSON.parse(socket.sent[2]).type).toBe('mocked.turn.trigger');
});
it('renders mocked transcript before assistant response for a connected session', async () => {
render(VoiceSessionShell);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-456' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
});
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(socket.sent).toHaveLength(1);
const sentMessage = JSON.parse(socket.sent[0]);
expect(sentMessage.type).toBe('mocked.turn.trigger');
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(
createMessageEnvelope('transcript.partial', {
text: '[mocked partial] Placeholder push-to-talk transcript in progress.'
})
);
socket.message(createMessageEnvelope('transcript.final', { text: 'Turn on the office lamp.' }));
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
socket.message(createMessageEnvelope('response.text.delta', { text: 'Mocked ' }));
socket.message(createMessageEnvelope('response.text.delta', { text: 'assistant response.' }));
socket.message(createMessageEnvelope('response.completed', { reason: 'mocked_turn_complete' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('user-transcript').textContent).toBe('Turn on the office lamp.');
expect(getByTestId('partial-transcript').textContent).toBe('none');
expect(getByTestId('assistant-response').textContent).toBe('Mocked assistant response.');
expect(getByTestId('conversation-render-order').textContent).toBe('transcript>response');
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
});
});
it('blocks mocked turn trigger before session.ready and allows it after session.ready', async () => {
render(VoiceSessionShell);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
});
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(socket.sent).toHaveLength(0);
expect(getByTestId('last-error').textContent).toBe('mocked turn requires session.ready');
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-789' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
});
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(socket.sent).toHaveLength(1);
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
});
it('shows cancel control during an active mocked turn and preserves rendered text after cancel', async () => {
render(VoiceSessionShell);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-cancel' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
});
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(JSON.parse(socket.sent[0]).type).toBe('mocked.turn.trigger');
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
socket.message(createMessageEnvelope('transcript.final', { text: 'Keep this transcript.' }));
socket.message(createMessageEnvelope('session.state', { value: 'thinking' }));
socket.message(createMessageEnvelope('session.state', { value: 'speaking' }));
socket.message(createMessageEnvelope('response.text.delta', { text: 'Partial response' }));
await waitFor(() => {
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('assistant-response').textContent).toBe('Partial response');
});
await fireEvent.click(getByTestId('cancel-turn-button'));
expect(JSON.parse(socket.sent[1]).type).toBe('response.cancel');
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('mocked-turn-status').textContent).toBe('idle');
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
expect(getByTestId('cancel-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('user-transcript').textContent).toBe('Keep this transcript.');
expect(getByTestId('assistant-response').textContent).toBe('Partial response');
});
});
});