feat(vela-ui): add placeholder push-to-talk control shell

This commit is contained in:
2026-04-08 20:04:32 +02:00
parent 0d5b53be00
commit 103bb11954
7 changed files with 204 additions and 8 deletions

View File

@@ -77,3 +77,30 @@ test('voice session shell can cancel an active mocked turn and start another one
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
});
test('voice session shell supports a placeholder mic-control cycle before another mocked turn', async ({ page }) => {
await page.goto('/');
await expect(page.getByTestId('hydration-status')).toHaveText('ready');
await expect(page.getByTestId('mic-control-button')).toBeDisabled();
await page.getByTestId('connect-button').click();
await expect(page.getByTestId('connection-state')).toHaveText('connected');
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
await expect(page.getByTestId('mic-control-button')).toBeEnabled();
await page.getByTestId('mic-control-button').dispatchEvent('mousedown');
await expect(page.getByTestId('mic-control-status')).toHaveText('holding');
await expect(page.getByTestId('gateway-session-state')).toHaveText('listening');
await expect(page.getByTestId('mocked-turn-button')).toBeDisabled();
await page.getByTestId('mic-control-button').dispatchEvent('mouseup');
await expect(page.getByTestId('mic-control-status')).toHaveText('idle');
await expect(page.getByTestId('gateway-session-state')).toHaveText('idle');
await expect(page.getByTestId('mocked-turn-button')).toBeEnabled();
await page.getByTestId('mocked-turn-button').click();
await expect(page.getByTestId('mocked-turn-status')).toHaveText('running');
await expect(page.getByTestId('assistant-response')).toHaveText(MOCKED_ASSISTANT_RESPONSE);
await expect(page.getByTestId('mocked-turn-status')).toHaveText('idle');
});

View File

@@ -53,6 +53,7 @@
let mockedAssistantResponse = 'none';
let mockedTurnInFlight = false;
let mockedConversationRenderOrder = [];
let micControlActive = false;
let hydrationStatus = 'mounting';
$: canTriggerMockedTurn =
@@ -60,6 +61,8 @@
connectionState === 'connected' &&
socket?.readyState === WebSocket.OPEN &&
sessionReadyReceived &&
gatewaySessionState === 'idle' &&
!micControlActive &&
!mockedTurnInFlight;
$: canCancelMockedTurn =
@@ -69,6 +72,13 @@
sessionReadyReceived &&
mockedTurnInFlight;
$: canUseMicControl =
typeof WebSocket !== 'undefined' &&
connectionState === 'connected' &&
socket?.readyState === WebSocket.OPEN &&
sessionReadyReceived &&
!mockedTurnInFlight;
function clearSocketHandlers(targetSocket) {
targetSocket.onopen = null;
targetSocket.onmessage = null;
@@ -85,6 +95,35 @@
mockedAssistantResponse = 'none';
mockedTurnInFlight = false;
mockedConversationRenderOrder = [];
micControlActive = false;
}
function startMicControl() {
if (!canUseMicControl || micControlActive) {
return;
}
micControlActive = true;
lastError = 'none';
connectionDetail = 'Mic control shell active. Sending placeholder input_audio.append only.';
socket.send(
JSON.stringify(createMessageEnvelope('input_audio.append', { chunk: 'placeholder-control-shell-chunk' }))
);
}
function stopMicControl() {
if (!micControlActive) {
return;
}
micControlActive = false;
if (!socket || socket.readyState !== WebSocket.OPEN || connectionState !== 'connected' || !sessionReadyReceived) {
return;
}
connectionDetail = 'Mic control shell released. Sending placeholder input_audio.commit.';
socket.send(JSON.stringify(createMessageEnvelope('input_audio.commit', {})));
}
function triggerMockedTurn() {
@@ -252,6 +291,7 @@
lastClose = formatCloseReason(event);
mockedTurnInFlight = false;
micControlActive = false;
connectionState = connectionState === 'error' ? 'error' : 'disconnected';
connectionDetail =
connectionState === 'error' ? 'Socket closed after an error.' : 'Gateway WebSocket is closed.';
@@ -298,7 +338,13 @@
<h1>Voice session shell</h1>
<p>
This minimal browser shell can connect to the gateway WebSocket, trigger one deterministic
mocked turn, and render the mocked transcript plus assistant response for the active session.
mocked turn, and expose a push-to-talk control shell that only sends placeholder protocol
events for the active session.
</p>
<p class="contract-note">
The mic button in this increment does not request browser microphone permission and does not
capture real audio.
</p>
<p class="contract-note">
@@ -308,6 +354,20 @@
</p>
<div class="controls">
<button
data-testid="mic-control-button"
type="button"
class:mic-active={micControlActive}
on:mousedown={startMicControl}
on:mouseup={stopMicControl}
on:mouseleave={stopMicControl}
on:touchstart|preventDefault={startMicControl}
on:touchend|preventDefault={stopMicControl}
on:touchcancel|preventDefault={stopMicControl}
disabled={!canUseMicControl}
>
{micControlActive ? 'Release mic control shell' : 'Hold to send placeholder mic control'}
</button>
<button
data-testid="connect-button"
on:click={connect}
@@ -381,6 +441,10 @@
<span>Connection attempts</span>
<strong data-testid="connection-attempts">{connectionAttempts}</strong>
</div>
<div>
<span>Mic control shell</span>
<strong data-testid="mic-control-status">{micControlActive ? 'holding' : 'idle'}</strong>
</div>
<div>
<span>Mocked turn status</span>
<strong data-testid="mocked-turn-status">{mockedTurnInFlight ? 'running' : 'idle'}</strong>
@@ -464,6 +528,11 @@
opacity: 0.55;
}
button.mic-active {
background: #7d2034;
border-color: #ff7d9a;
}
.conversation {
margin-top: 1.5rem;
display: grid;

View File

@@ -68,6 +68,7 @@ describe('voice session shell', () => {
expect(getByTestId('connection-state').textContent).toBe('not connected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
@@ -78,6 +79,7 @@ describe('voice session shell', () => {
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
});
await fireEvent.click(getByTestId('disconnect-button'));
@@ -86,10 +88,69 @@ describe('voice session shell', () => {
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('disconnected');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
expect(getByTestId('session-id').textContent).toBe('session-123');
});
});
it('runs a placeholder mic-control cycle and keeps mocked turn usable on the same socket', async () => {
render(VoiceSessionShell);
await fireEvent.click(getByTestId('connect-button'));
const socket = MockWebSocket.latest();
socket.open();
await waitFor(() => {
expect(getByTestId('connection-state').textContent).toBe('connected');
});
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(true);
socket.message(createMessageEnvelope('session.ready', { sessionId: 'session-mic' }));
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('mic-control-button').hasAttribute('disabled')).toBe(false);
});
await fireEvent.mouseDown(getByTestId('mic-control-button'));
expect(socket.sent).toHaveLength(1);
expect(JSON.parse(socket.sent[0])).toEqual({
type: 'input_audio.append',
payload: { chunk: 'placeholder-control-shell-chunk' }
});
expect(getByTestId('mic-control-status').textContent).toBe('holding');
socket.message(createMessageEnvelope('session.state', { value: 'listening' }));
await waitFor(() => {
expect(getByTestId('gateway-session-state').textContent).toBe('listening');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(true);
});
await fireEvent.mouseUp(getByTestId('mic-control-button'));
expect(socket.sent).toHaveLength(2);
expect(JSON.parse(socket.sent[1])).toEqual({
type: 'input_audio.commit',
payload: {}
});
expect(getByTestId('mic-control-status').textContent).toBe('idle');
socket.message(createMessageEnvelope('session.state', { value: 'idle' }));
await waitFor(() => {
expect(getByTestId('gateway-session-state').textContent).toBe('idle');
expect(getByTestId('mocked-turn-button').hasAttribute('disabled')).toBe(false);
});
await fireEvent.click(getByTestId('mocked-turn-button'));
expect(socket.sent).toHaveLength(3);
expect(JSON.parse(socket.sent[2]).type).toBe('mocked.turn.trigger');
});
it('renders mocked transcript before assistant response for a connected session', async () => {
render(VoiceSessionShell);