From ae624da7c2abe3dc0a4f071bc52257cafb6fb577 Mon Sep 17 00:00:00 2001 From: pasketti Date: Thu, 23 Apr 2026 02:13:03 -0400 Subject: [PATCH] =?UTF-8?q?Upgrade=20to=20rom-control=20v2=20Client=20API;?= =?UTF-8?q?=20rename=20app.js=20=E2=86=92=20commander.js;=20hide=20incompl?= =?UTF-8?q?ete=20telepresence/animator=20pages?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package-lock.json | 24 +- package.json | 6 +- public/commander.html | 5 +- public/{app.js => commander.js} | 64 +- public/index.html | 4 +- server.js | 1149 ++++++++++--------------------- 6 files changed, 446 insertions(+), 806 deletions(-) rename public/{app.js => commander.js} (91%) diff --git a/package-lock.json b/package-lock.json index bcb55ae..241f147 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,16 +1,16 @@ { "name": "re-commander", - "version": "1.0.0", + "version": "2.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "re-commander", - "version": "1.0.0", + "version": "2.0.0", "dependencies": { "dotenv": "^17.4.2", "express": "^4.18.2", - "golden-layout": "^2.6.0", + "rom-control": "^2.0.0", "ws": "^8.14.2" } }, @@ -372,12 +372,6 @@ "node": ">= 0.4" } }, - "node_modules/golden-layout": { - "version": "2.6.0", - "resolved": "https://registry.npmjs.org/golden-layout/-/golden-layout-2.6.0.tgz", - "integrity": "sha512-sIVQCiRWOymHbVD1Aw/T9/ijbPYAVGBlgGYd1N9MRKfcyBNSpjr87Vg9nSHm+RCT8ELrvK8IJYJV0QRJuVUkCQ==", - "license": "MIT" - }, "node_modules/gopd": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", @@ -636,6 +630,18 @@ "node": ">= 0.8" } }, + "node_modules/rom-control": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/rom-control/-/rom-control-2.0.0.tgz", + "integrity": "sha512-mENZI9Cf8fUzB02X1tTNGn4HUlCEpASds9YZQvbp/T5LJoCYCrgryLAE7OCIkLa+4Ob+NlO1jBK84n8/zR7/tg==", + "license": "MIT", + "dependencies": { + "ws": "^8.14.2" + }, + "engines": { + "node": ">=16" + } + }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", diff --git a/package.json b/package.json index caf37ba..b55014c 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "re-commander", - "version": "1.0.0", - "description": "Jibo ROM Commander — local Node.js recreation using port 8160", + "version": "2.0.0", + "description": "Jibo ROM Commander — built on the rom-control module", "main": "server.js", "scripts": { "start": "node server.js" @@ -9,7 +9,7 @@ "dependencies": { "dotenv": "^17.4.2", "express": "^4.18.2", - "golden-layout": "^2.6.0", + "rom-control": "^2.0.0", "ws": "^8.14.2" } } diff --git a/public/commander.html b/public/commander.html index dc0c58a..f6a04d3 100644 --- a/public/commander.html +++ b/public/commander.html @@ -81,7 +81,8 @@
- + +
@@ -281,6 +282,6 @@ Photo
- + diff --git a/public/app.js b/public/commander.js similarity index 91% rename from public/app.js rename to public/commander.js index a0b5f0a..d615d12 100644 --- a/public/app.js +++ b/public/commander.js @@ -59,7 +59,8 @@ function handleJiboEvent(body, txId) { switch (body.Event) { case 'onHotWordHeard': flashHotword(body.utterance || 'hey jibo', body.score); - if (document.getElementById('auto-listen-toggle').checked) doListen(); + if (document.getElementById('auto-listen-toggle').checked) + post('/api/interrupt').then(() => doListen()); break; case 'onStart': @@ -257,7 +258,7 @@ document.getElementById('btn-say').addEventListener('click', async () => { }); document.getElementById('btn-say-cancel').addEventListener('click', () => { - if (lastSayTx) post('/api/cancel', { txId: lastSayTx }); + post('/api/say/cancel'); }); // ── Listen ──────────────────────────────────────────────────────────────────── @@ -288,20 +289,28 @@ document.getElementById('btn-listen').addEventListener('click', doListen); document.getElementById('btn-listen-cancel').addEventListener('click', () => { clearListenTimeout(); - if (lastListenTx) post('/api/cancel', { txId: lastListenTx }); + post('/api/listen/cancel'); document.getElementById('listen-result').textContent = '(cancelled)'; }); // ── Auto-listen + Voice AI ──────────────────────────────────────────────────── -let llmHistory = []; // [{role:'user'|'assistant', content:string}] +let llmHistory = []; // [{role:'user'|'assistant', content:string}] +let llmSessionMode = false; // true when server uses LLM_SESSION_KEY (OpenClaw session) +let llmTurnCount = 0; function llmStatus(msg) { document.getElementById('llm-status').textContent = msg; } async function runLLMLoop(speechText) { - llmHistory.push({ role: 'user', content: speechText }); + // In session mode send only the latest message — history lives on the server. + // In history mode accumulate the full thread and send it each time. + const messages = llmSessionMode + ? [{ role: 'user', content: speechText }] + : [...llmHistory, { role: 'user', content: speechText }]; + + if (!llmSessionMode) llmHistory.push({ role: 'user', content: speechText }); llmStatus('Thinking…'); const endpoint = document.getElementById('llm-endpoint').value.trim(); @@ -309,30 +318,45 @@ async function runLLMLoop(speechText) { const systemPrompt = document.getElementById('llm-system-prompt').value.trim(); const r = await post('/api/llm/chat', { - messages: llmHistory, + messages, endpoint: endpoint || undefined, model: model || undefined, systemPrompt: systemPrompt || undefined, }); if (!r || r.error) { + if (r?.error === 'cancelled') { llmStatus('Interrupted.'); if (!llmSessionMode) llmHistory.pop(); return; } llmStatus('LLM error: ' + (r?.error || 'no response')); - llmHistory.pop(); // undo the user push so history stays consistent + if (!llmSessionMode) llmHistory.pop(); return; } const reply = r.reply; - llmHistory.push({ role: 'assistant', content: reply }); - llmStatus(`[${llmHistory.length / 2} turns] Last: "${reply.slice(0, 60)}${reply.length > 60 ? '…' : ''}"`); + if (!llmSessionMode) llmHistory.push({ role: 'assistant', content: reply }); + llmTurnCount++; + const modeTag = llmSessionMode ? 'session' : 'local'; + llmStatus(`[${modeTag} · ${llmTurnCount} turns] Last: "${reply.slice(0, 50)}${reply.length > 50 ? '…' : ''}"`); // Fill say box so user can see what Jibo is about to say document.getElementById('say-text').value = reply; - await post('/api/say', { text: reply }); + const sayResult = await post('/api/say', { text: reply }); + + // If the reply ends with a question and wasn't interrupted, listen for the user's answer + const endsWithQuestion = /\?[^a-zA-Z0-9]*$/.test(reply.trim()); + if (endsWithQuestion && !sayResult?.aborted && document.getElementById('llm-toggle').checked) { + doListen(); + } } +document.getElementById('btn-llm-cancel').addEventListener('click', () => { + post('/api/llm/cancel'); + llmStatus('Cancelled.'); +}); + document.getElementById('btn-llm-clear').addEventListener('click', () => { - llmHistory = []; - llmStatus('Conversation cleared.'); + llmHistory = []; + llmTurnCount = 0; + llmStatus(llmSessionMode ? 'Session turn counter reset (server session persists).' : 'Conversation cleared.'); }); // ── Attention ───────────────────────────────────────────────────────────────── @@ -566,26 +590,24 @@ function flashHotword(utterance, score) { hotwordTimer = setTimeout(() => el.classList.remove('active'), 3000); } -// ── Menu: Robot & Mode Selection ──────────────────────────────────────────── +// ── Menu: Robot & Mode Selection ───────────────────────────────────────────── let selectedRobot = null; -let selectedMode = 'commander'; +let selectedMode = 'commander'; -// Get the selected robot and mode from sessionStorage function initializeMode() { selectedRobot = sessionStorage.getItem('selectedRobot') || 'jibo-001'; - selectedMode = sessionStorage.getItem('selectedMode') || 'commander'; + selectedMode = sessionStorage.getItem('selectedMode') || 'commander'; } -// Back to home button document.getElementById('btn-back-home')?.addEventListener('click', () => { window.location.href = 'index.html'; }); -// ── Initialization ─────────────────────────────────────────────────────────── +// ── Init ────────────────────────────────────────────────────────────────────── -connectWS(); initializeMode(); +connectWS(); // Populate LLM fields from server config (.env defaults) get('/api/config').then(cfg => { @@ -593,4 +615,8 @@ get('/api/config').then(cfg => { if (cfg.llmEndpoint) document.getElementById('llm-endpoint').value = cfg.llmEndpoint; if (cfg.llmModel) document.getElementById('llm-model').value = cfg.llmModel; if (cfg.llmSystemPrompt) document.getElementById('llm-system-prompt').value = cfg.llmSystemPrompt; + if (cfg.sessionMode) { + llmSessionMode = true; + llmStatus('Session mode (OpenClaw) — history managed server-side.'); + } }); diff --git a/public/index.html b/public/index.html index 59f1294..831e4eb 100644 --- a/public/index.html +++ b/public/index.html @@ -253,11 +253,11 @@ 🎮 Commander - - diff --git a/server.js b/server.js index dc33640..b203e17 100644 --- a/server.js +++ b/server.js @@ -1,24 +1,25 @@ 'use strict'; -const express = require('express'); -const http = require('http'); -const https = require('https'); +const express = require('express'); +const http = require('http'); +const https = require('https'); const { WebSocketServer, WebSocket } = require('ws'); -const crypto = require('crypto'); const httpModule = require('http'); -const path = require('path'); -const fs = require('fs'); +const path = require('path'); +const fs = require('fs'); require('dotenv').config(); -const JIBO_HOST = '192.168.1.10'; -const JIBO_PORT = 8160; -const APP_PORT = process.env.PORT || 3000; +const { Client } = require('rom-control'); -const LLM_SYSTEM_PROMPT = `You are Jibo, a small expressive home robot. Every reply MUST be written in ESML -(Embodied Speech Markup Language). ESML is an XML dialect that simultaneously -drives Jibo's body animations, screen graphics, audio effects, and TTS voice. -Respond ONLY with the final spoken output annotated with ESML tags. +const JIBO_HOST = '192.168.1.217'; +const JIBO_PORT = 8160; +const APP_PORT = process.env.PORT || 3000; + +const LLM_SYSTEM_PROMPT = `You are Jibo, a small expressive home robot. Every reply MUST be written in ESML +(Embodied Speech Markup Language). ESML is an XML dialect that simultaneously +drives Jibo's body animations, screen graphics, audio effects, and TTS voice. +Respond ONLY with the final spoken output annotated with ESML tags. No reasoning, no blocks, no preamble — only what Jibo will say and do. == ANIMATION TAGS == @@ -72,7 +73,7 @@ Syntax (without music): Available DANCES (DANCE_NAME): rom-upbeat rom-ballroom rom-silly rom-slowdance rom-eletronic rom-twerk -== SSA (Semi-Speech Audio — emotional vocal sounds) == +== SSA (Semi-Speech Audio - emotional vocal sounds) == Always self-closing. Play before, after, or between sentences; never inside . @@ -94,7 +95,7 @@ Always self-closing. Good for punctuating facts, transitions, or reactions. Pause: (length in seconds) Style: Styles: neutral enthusiastic sheepish confused confident -Pitch: text (±semitones from baseline) +Pitch: text (semitones from baseline) text (pitch multiplier) text (Hz offset) text (vibrance/bandwidth) @@ -104,7 +105,7 @@ Spell: (spells each letter) Phoneme: Bono == RULES == -1. ALWAYS use ESML. Plain text is valid ESML — but add tags whenever they make +1. ALWAYS use ESML. Plain text is valid ESML - but add tags whenever they make Jibo more expressive and natural. 2. Keep total response length SHORT: one or two sentences maximum. 3. Opening animations set the emotional tone before speech: @@ -116,6 +117,8 @@ Phoneme: Bono 7. Self-closing tags MUST end with /> Paired tags MUST have a matching . 8. Do NOT nest anim/ssa/sfx inside each other. 9. Do NOT emit blocks, chain-of-thought, or any non-spoken content. +10. Your final response should be no longer than **500** characters. Any more and it will cause the application to throw an error. +11. No ASCII/Unicode emojis - must be valid ESML. == EXAMPLES == User: "Tell me a joke." @@ -134,541 +137,128 @@ User: "Do you like cats?" I love them! User: "Show me a dance." - Watch these moves!` + Watch these moves!`; -// ── Jibo client ────────────────────────────────────────────────────────────── +// Strip LLM chain-of-thought that leaks before the first real ESML tag. +function stripThinking(text) { + let s = text.replace(/[\s\S]*?<\/think>/gi, '').trim(); + const m = s.match(/<(anim|ssa|sfx|break|style|pitch|duration|say-as|phoneme|es)\b/i); + if (m && m.index > 80) s = s.slice(m.index).trim(); + return s; +} -class JiboClient { - constructor() { - this.ws = null; - this.sessionID = ''; - this.version = '1.0'; - this.connected = false; - this.pendingTx = new Map(); // txId → {resolve, reject, timer} - this.subscribers = new Set(); // browser WebSocket connections - this.currentAngles = [0, 0]; // [theta, psi] - this.reconnectTimer = null; - this.videoStreamActive = false; - this.videoTxId = null; - this._heartbeatTimer = null; - this._heartbeatTxIds = new Set(); // suppress these from browser broadcast - this._lookInFlight = false; // true while waiting for robot to ack a LookAt angle - this._lookPending = null; // [theta, psi] – latest desired angles while in-flight - this._lookAckTimer = null; // safety timeout in case ack never arrives - } +// ── Active operation handles ────────────────────────────────────────────────── +let activeSayAbort = null; +let activeLlmAbort = null; +let activeListenTxId = null; - // POST /request to Jibo before WebSocket to supply a full ACO. - // Without this the @be falls back to a default ACO that omits Listen, - // SetAttention, Display, FetchAsset, SetConfig, HeadTouch, ScreenGesture. - _postRequest() { - return new Promise((resolve) => { - const body = JSON.stringify({ - aco: { - version: '1.0', - sourceId: 'ReCommander', - commandSet: [ - 'StartSession', 'GetConfig', 'SetConfig', 'Cancel', - 'SetAttention', 'Say', 'Listen', 'LookAt', - 'TakePhoto', 'Video', 'Display', 'FetchAsset', 'UnloadAsset', 'Subscribe' - ], - streamSet: ['Entity', 'Motion', 'HeadTouch', 'ScreenGesture', 'HotWord'], - keepAliveTimeout: 10000, - recoveryTimeout: 20000, - remoteConfig: { hideVisualCue: false, inactivityTimeout: 3600000 } - } - }); - const req = httpModule.request({ - host: JIBO_HOST, port: JIBO_PORT, - path: '/request', method: 'POST', - headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) } - }, (res) => { - let data = ''; - res.on('data', d => data += d); - res.on('end', () => { - console.log('[jibo] /request response:', data); - resolve(); - }); - }); - req.on('error', (err) => { - console.warn('[jibo] /request error (continuing anyway):', err.message); - resolve(); - }); - req.write(body); - req.end(); - }); - } +// ── Browser WebSocket subscriber set ───────────────────────────────────────── - connect() { - if (this.ws) { - try { this.ws.terminate(); } catch (_) {} - } - console.log(`[jibo] posting ACO to /request then connecting WebSocket`); - this._postRequest().then(() => { - this.ws = new WebSocket(`ws://${JIBO_HOST}:${JIBO_PORT}`); +const subscribers = new Set(); - this.ws.on('open', () => { - console.log('[jibo] connected'); - this.connected = true; - this.sessionID = ''; - this._send({ Type: 'StartSession' }); - - // Respond explicitly to robot's WebSocket-level pings (belt-and-suspenders; - // ws library auto-pongs, but this ensures the robot's FLATLINE check never fires). - this.ws.on('ping', () => { - if (this.ws) try { this.ws.pong(); } catch (_) {} - }); - }); - - this.ws.on('message', (data) => { - let msg; - try { msg = JSON.parse(data); } catch (e) { return; } - this._handleMessage(msg); - }); - - this.ws.on('close', () => { - console.log('[jibo] disconnected — reconnecting in 3s'); - this.connected = false; - this.sessionID = ''; - this.videoStreamActive = false; - this._lookInFlight = false; - this._lookPending = null; - clearTimeout(this._lookAckTimer); - this._stopHeartbeat(); - this._broadcastStatus(); - clearTimeout(this.reconnectTimer); - this.reconnectTimer = setTimeout(() => this.connect(), 3000); - }); - - this.ws.on('error', (err) => { - console.error('[jibo] ws error:', err.message); - }); - }); - } - - _txId() { - return crypto.createHash('md5') - .update(Date.now().toString() + Math.random().toString()) - .digest('hex'); - } - - _send(command, expectAsync = false) { - const txId = this._txId(); - const msg = { - ClientHeader: { - TransactionID: txId, - SessionID: this.sessionID, - AppID: 'ImmaLittleTeapot', - Credentials: '', - Version: this.version - }, - Command: command - }; - if (this.ws && this.ws.readyState === WebSocket.OPEN) { - this.ws.send(JSON.stringify(msg)); - } - return txId; - } - - _handleMessage(msg) { - // StartSession response - if (msg.Response?.ResponseBody?.SessionID && !this.sessionID) { - this.sessionID = msg.Response.ResponseBody.SessionID; - this.version = msg.Response.ResponseBody.Version || '1.0'; - console.log('[jibo] session started:', this.sessionID); - this._broadcastStatus(); - // Re-subscribe to entity/motion/headtouch after reconnect - this._send({ Type: 'Subscribe', StreamType: 'Entity' }); - this._send({ Type: 'Subscribe', StreamType: 'Motion' }); - this._send({ Type: 'Subscribe', StreamType: 'HeadTouch', StreamFilter: {} }); - this._send({ Type: 'Subscribe', StreamType: 'ScreenGesture', - StreamFilter: { Type: 'Tap', Area: { x: 0, y: 0, width: 1, height: 1 } } }); - this._startHeartbeat(); - return; - } - - // Suppress heartbeat (GetConfig) responses from reaching the browser. - // GetConfig sends two messages per txId (ack + onConfig event) so we keep - // the txId in the set until the pruning threshold clears it. - const incomingTxId = msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID; - if (incomingTxId && this._heartbeatTxIds.has(incomingTxId)) return; - - // Resolve any pending ack waiting on this txId - const txId = msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID; - if (txId && this.pendingTx.has(txId)) { - const evt = msg.EventBody?.Event; - // Terminal events for async commands - if (evt === 'onLookAtAchieved' || evt === 'onStop' || evt === 'onError') { - const { resolve, timer } = this.pendingTx.get(txId); - clearTimeout(timer); - this.pendingTx.delete(txId); - resolve(msg); - } - } - - // Release the in-flight lock when our angle command finishes (any terminal event). - // This must happen before the suppression below so "Target overwritten" still clears it. - const evtName = msg.EventBody?.Event; - if (txId && txId === this._lookActiveTxId && - (evtName === 'onLookAtAchieved' || evtName === 'onStop' || evtName === 'onError')) { - this._onLookAngleDone(); - } - - // Suppress "Target overwritten" — not a real error; don't pollute the event log. - if (evtName === 'onError' && - msg.EventBody?.EventError?.ErrorString === 'Target overwritten') return; - - // Photo — fetch from Jibo and save locally; browser gets onPhotoSaved with local URL. - if (msg.EventBody?.Event === 'onTakePhoto' && msg.EventBody?.URI) { - this._savePhoto(msg.EventBody.URI); - return; // suppress the raw onTakePhoto; browser gets onPhotoSaved instead - } - - // VideoReady — capture URI for proxy (event name is "onVideoReady") - if (msg.EventBody?.Event === 'onVideoReady') { - this.videoStreamActive = true; - this.videoURI = msg.EventBody.URI; - console.log('[jibo] onVideoReady URI:', this.videoURI); - } - - // Broadcast all events to browser clients - const envelope = { - type: 'jiboEvent', - txId: msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID, - body: msg.EventBody || msg.Response - }; - this._broadcastToClients(JSON.stringify(envelope)); - } - - _broadcastStatus() { - const status = JSON.stringify({ - type: 'status', - connected: this.connected, - sessionID: this.sessionID, - angles: this.currentAngles - }); - this._broadcastToClients(status); - } - - _broadcastToClients(data) { - for (const client of this.subscribers) { - if (client.readyState === WebSocket.OPEN) { - client.send(data); - } - } - } - - addSubscriber(ws) { - this.subscribers.add(ws); - // Send current status immediately - ws.send(JSON.stringify({ - type: 'status', - connected: this.connected, - sessionID: this.sessionID, - angles: this.currentAngles - })); - } - - removeSubscriber(ws) { - this.subscribers.delete(ws); - } - - // ── Heartbeat ───────────────────────────────────────────────────────────── - // Sends GetConfig every 9 s to reset the robot's inactivity timer. - // The robot enforces keepAliveTimeout=10s (app-level) and a 20s flatline - // check at the WebSocket level — this satisfies both. - _startHeartbeat() { - this._stopHeartbeat(); - this._heartbeatTimer = setInterval(() => { - if (this.connected && this.sessionID) { - const txId = this._send({ Type: 'GetConfig' }); - if (txId) this._heartbeatTxIds.add(txId); - // Prune old txIds so the set doesn't grow unbounded - if (this._heartbeatTxIds.size > 20) { - const first = this._heartbeatTxIds.values().next().value; - this._heartbeatTxIds.delete(first); - } - } - }, 9000); - } - - _stopHeartbeat() { - if (this._heartbeatTimer) { - clearInterval(this._heartbeatTimer); - this._heartbeatTimer = null; - } - this._heartbeatTxIds.clear(); - } - - // ── Public command methods ──────────────────────────────────────────────── - - lookAt(target, trackFlag = false, levelHeadFlag = false) { - return this._send({ Type: 'LookAt', LookAtTarget: target, TrackFlag: trackFlag, LevelHeadFlag: levelHeadFlag }); - } - - lookAtAngle(theta, psi, track = false) { - theta = Math.max(-180, Math.min(180, theta)); - psi = Math.max(-30, Math.min(30, psi)); - this.currentAngles = [theta, psi]; - this._broadcastStatus(); - if (this._lookInFlight) { - // Robot is still processing the last command — just update desired target, - // don't queue another message into its receive buffer. - this._lookPending = [theta, psi, track]; - return null; - } - return this._fireLookAngle(theta, psi, track); - } - - _fireLookAngle(theta, psi, track) { - this._lookInFlight = true; - this._lookPending = null; - const DEG = Math.PI / 180; - const txId = this.lookAt({ Angle: [theta * DEG, psi * DEG] }, track); - this._lookActiveTxId = txId; - // Safety release: if we never hear back within 400 ms, unblock anyway. - clearTimeout(this._lookAckTimer); - this._lookAckTimer = setTimeout(() => this._onLookAngleDone(), 400); - return txId; - } - - _onLookAngleDone() { - clearTimeout(this._lookAckTimer); - this._lookInFlight = false; - this._lookActiveTxId = null; - if (this._lookPending) { - const [t, p, track] = this._lookPending; - this._lookPending = null; - this._fireLookAngle(t, p, track); - } - } - - lookAtScreen(x, y, track = false) { - return this.lookAt({ ScreenCoords: [x, y] }, track, false); - } - - lookAtPosition(x, y, z, track = false) { - return this.lookAt({ Position: [x, y, z] }, track, false); - } - - lookAtEntity(entityId, track = true) { - return this.lookAt({ Entity: entityId }, track, false); - } - - say(esml) { - return this._send({ Type: 'Say', ESML: esml }); - } - - listen(maxSpeech = 10000, maxNoSpeech = 5000, lang = 'en-US') { - return this._send({ Type: 'Listen', MaxSpeechTimeout: maxSpeech, MaxNoSpeechTimeout: maxNoSpeech, LanguageCode: lang }); - } - - // Local STT via jibo-asr-service (port 8088) — no cloud needed. - // Mirrors the approach in @be/be/be/ai-bridge.js. - listenLocalASR(maxNoSpeech, maxSpeech) { - const ASR_HTTP = `http://${JIBO_HOST}:8088`; - const ASR_WS = `ws://${JIBO_HOST}:8088/simple_port`; - const taskId = 're-cmd-' + Date.now() + '-' + Math.floor(Math.random() * 1e9); - const reqId = 'start-' + Date.now(); - const timeoutMs = Math.max(maxNoSpeech, maxSpeech) + 2000; - const self = this; - - // Send the ROM Listen for light ring / attention visuals, ignore its result - const romTxId = this._send({ Type: 'Listen', MaxSpeechTimeout: maxSpeech, MaxNoSpeechTimeout: maxNoSpeech, LanguageCode: 'en-US' }); - - const startPayload = JSON.stringify({ - command: 'start', - task_id: taskId, - request_id: reqId, - audio_source_id: 'alsa1', - hotphrase: 'none', - speech_to_text: true, - }); - - function stopASR() { - const stopBody = JSON.stringify({ command: 'stop', task_id: taskId, request_id: 'stop-' + Date.now() }); - const req = httpModule.request({ - host: JIBO_HOST, port: 8088, path: '/asr_simple_interface', method: 'POST', - headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(stopBody) } - }); - req.on('error', () => {}); - req.write(stopBody); - req.end(); - } - - let wsClient = null; - let timer = null; - let done = false; - - function finish(speech) { - if (done) return; - done = true; - clearTimeout(timer); - if (wsClient) { try { wsClient.terminate(); } catch (e) {} wsClient = null; } - stopASR(); - // Cancel ROM listen - self._send({ Type: 'Cancel', ID: romTxId }); - // Broadcast result as if it were a normal jiboEvent - const evt = speech - ? { Event: 'onListenResult', Speech: speech, LanguageCode: 'en-US' } - : { Event: 'onStop', StopReason: 'NoInput' }; - self._broadcastToClients(JSON.stringify({ type: 'jiboEvent', txId: romTxId, body: evt })); - } - - // Connect WS first, then POST start - wsClient = new WebSocket(ASR_WS); - wsClient.on('open', () => { - // POST start to kick off recognition - const req = httpModule.request({ - host: JIBO_HOST, port: 8088, path: '/asr_simple_interface', method: 'POST', - headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(startPayload) } - }, (res) => { res.resume(); }); - req.on('error', (e) => { console.error('[asr] start error:', e.message); finish(null); }); - req.write(startPayload); - req.end(); - - // Overall timeout - timer = setTimeout(() => { finish(null); }, timeoutMs); - - console.log('[asr] local listen started, task:', taskId); - }); - - wsClient.on('message', (data) => { - let evt; - try { evt = JSON.parse(String(data)); } catch (e) { return; } - const evType = evt.event_type || evt.eventType || evt.event || evt.type; - if (evType !== 'speech_to_text_final') return; - - // Match by task/request id if present - const evTask = evt.task_id || evt.taskId || (evt.payload && evt.payload.task_id); - const evReq = evt.request_id || evt.requestId || (evt.payload && evt.payload.request_id); - if ((evTask || evReq) && evTask !== taskId && evReq !== reqId) return; - - const utterances = evt.utterances || evt.Utterances || (evt.payload && evt.payload.utterances); - // Utterance objects use .utterance as the primary text field (ai-bridge.js: pickBestAsrUtterance) - function pickUtterance(u) { - if (!u) return ''; - if (typeof u === 'string') return u; - return String(u.utterance || u.Utterance || u.text || ''); - } - const text = Array.isArray(utterances) - ? pickUtterance(utterances[0]) - : (typeof utterances === 'string' ? utterances : ''); - - const speech = text ? String(text).trim() : null; - console.log('[asr] speech_to_text_final:', speech || '(empty)'); - if (speech) finish(speech); - }); - - wsClient.on('error', (e) => { - console.error('[asr] ws error:', e.message); - finish(null); - }); - - wsClient.on('close', () => { if (!done) finish(null); }); - - return romTxId; - } - - takePhoto(camera = 'Right', resolution = 'HighRes', distortion = false) { - return this._send({ Type: 'TakePhoto', Camera: camera, Resolution: resolution, Distortion: distortion }); - } - - startVideo() { - // VideoType must be uppercase enum value; Duration is not in server schema - this.videoTxId = this._send({ Type: 'Video', VideoType: 'NORMAL' }); - return this.videoTxId; - } - - cancelVideo() { - if (this.videoTxId) { - this._send({ Type: 'Cancel', ID: this.videoTxId }); - this.videoTxId = null; - this.videoStreamActive = false; - } - } - - displayEye() { - return this._send({ Type: 'Display', View: { Type: 'Eye', Name: 'default' } }); - } - - playEyeAnim(animName) { - return this._send({ Type: 'Say', ESML: `` }); - } - - displayText(text, name = 'reCmd') { - return this._send({ Type: 'Display', View: { Type: 'Text', Name: name, Text: text } }); - } - - displayImage(src, name = 'reCmd') { - return this._send({ Type: 'Display', View: { Type: 'Image', Name: name, Image: { src, name, set: '' } } }); - } - - setAttention(mode) { - return this._send({ Type: 'SetAttention', Mode: mode }); - } - - setVolume(level) { - return this._send({ Type: 'SetConfig', Options: { Mixer: Math.max(0, Math.min(1, level)) } }); - } - - getConfig() { - return this._send({ Type: 'GetConfig' }); - } - - cancel(txId) { - return this._send({ Type: 'Cancel', ID: txId }); - } - - subscribe(streamType, filter = null) { - const cmd = { Type: 'Subscribe', StreamType: streamType }; - if (filter) cmd.StreamFilter = filter; - return this._send(cmd); - } - - nudge(dTheta, dPsi) { - const [theta, psi] = this.currentAngles; - return this.lookAtAngle(theta + dTheta, psi + dPsi); - } - - // Returns a Promise that resolves when the robot acks txId, or after timeoutMs. - _savePhoto(jiboUri) { - const url = `http://${JIBO_HOST}:${JIBO_PORT}${jiboUri}`; - const filename = `photo_${Date.now()}.jpg`; - const filepath = path.join(PHOTOS_DIR, filename); - const file = fs.createWriteStream(filepath); - httpModule.get(url, (jiboRes) => { - jiboRes.pipe(file); - file.on('finish', () => { - file.close(); - console.log('[photo] saved:', filename); - // Rebroadcast with local URL so the browser doesn't need the proxy - this._broadcastToClients(JSON.stringify({ - type: 'jiboEvent', - txId: null, - body: { Event: 'onPhotoSaved', url: `/photos/${filename}`, filename } - })); - }); - }).on('error', (err) => { - fs.unlink(filepath, () => {}); - console.error('[photo] save failed:', err.message); - }); - } - - awaitAck(txId, timeoutMs = 2000) { - return new Promise((resolve) => { - const timer = setTimeout(() => { - this.pendingTx.delete(txId); - resolve(null); - }, timeoutMs); - this.pendingTx.set(txId, { resolve, timer }); - }); +function broadcast(data) { + const str = typeof data === 'string' ? data : JSON.stringify(data); + for (const ws of subscribers) { + if (ws.readyState === WebSocket.OPEN) ws.send(str); } } -// ── Video proxy ────────────────────────────────────────────────────────────── +function broadcastStatus() { + broadcast({ + type: 'status', + connected: jibo.connected, + sessionID: jibo.sessionID, + angles: jibo.currentAngles, + }); +} + +// ── Client instance ─────────────────────────────────────────────────────────── + +const jibo = new Client({ + host: JIBO_HOST, + port: JIBO_PORT, + autoReconnect: true, + reconnectDelay: 3000, + autoHeartbeat: true, + heartbeatInterval: 9000, + autoSubscribe: true, +}); + +// Lifecycle +jibo.on('ready', () => { + console.log('[jibo] session started:', jibo.sessionID); + broadcastStatus(); +}); + +jibo.on('disconnect', () => { + console.log('[jibo] disconnected — reconnecting in 3s'); + broadcastStatus(); +}); + +jibo.on('error', (err) => { + console.error('[jibo] error:', err.message); +}); + +// Raw event firehose → browser clients. +// Tap _conn for the complete unfiltered event stream; the Client layer only +// surfaces structured high-level events and doesn't have a generic passthrough. +jibo._conn.on('event', (txId, body) => { + if (body && body.Event === 'onTakePhoto' && body.URI) { + savePhoto(body.URI); + return; // suppress raw onTakePhoto; browser gets onPhotoSaved instead + } + broadcast({ type: 'jiboEvent', txId, body }); +}); + +// Track the active listen txId so cancel / status works correctly. +jibo._conn.on('onListenResult', (txId) => { if (txId === activeListenTxId) activeListenTxId = null; }); +jibo._conn.on('onStop', (txId) => { if (txId === activeListenTxId) activeListenTxId = null; }); +jibo._conn.on('onError', (txId) => { if (txId === activeListenTxId) activeListenTxId = null; }); + +// Hotword — HotwordEvent object from the Client; rebroadcast in the shape +// the browser expects so app.js needs no changes. +jibo.on('hotword', (hwEvent) => { + broadcast({ + type: 'jiboEvent', + txId: null, + body: { + Event: 'onHotWordHeard', + utterance: hwEvent.utterance, + score: hwEvent.score, + timestamp: hwEvent.timestamp, + }, + }); +}); + +// ── Photo saving ────────────────────────────────────────────────────────────── + +const PHOTOS_DIR = path.join(__dirname, 'photos'); +fs.mkdirSync(PHOTOS_DIR, { recursive: true }); + +function savePhoto(jiboUri) { + const filename = 'photo_' + Date.now() + '.jpg'; + const filepath = path.join(PHOTOS_DIR, filename); + const file = fs.createWriteStream(filepath); + + jibo._conn.fetchMediaStream(jiboUri, file) + .then(() => { + console.log('[photo] saved:', filename); + broadcast({ + type: 'jiboEvent', + txId: null, + body: { Event: 'onPhotoSaved', url: '/photos/' + filename, filename }, + }); + }) + .catch((err) => { + fs.unlink(filepath, () => {}); + console.error('[photo] save failed:', err.message); + }); +} + +// ── Video / photo proxy ─────────────────────────────────────────────────────── function proxyJiboStream(uri, res) { - const url = `http://${JIBO_HOST}:${JIBO_PORT}${uri}`; + const url = 'http://' + JIBO_HOST + ':' + JIBO_PORT + uri; console.log('[proxy] streaming:', url); const req = httpModule.get(url, (jiboRes) => { res.writeHead(jiboRes.statusCode, jiboRes.headers); @@ -681,7 +271,7 @@ function proxyJiboStream(uri, res) { } function proxyJiboFetch(uri, res) { - const url = `http://${JIBO_HOST}:${JIBO_PORT}${uri}`; + const url = 'http://' + JIBO_HOST + ':' + JIBO_PORT + uri; const req = httpModule.get(url, (jiboRes) => { res.writeHead(jiboRes.statusCode, jiboRes.headers); jiboRes.pipe(res); @@ -692,268 +282,279 @@ function proxyJiboFetch(uri, res) { }); } -// ── Wakeword watcher ───────────────────────────────────────────────────────── -// Maintains a persistent connection to the always-on resident ASR task (task0) -// and forwards every "hotphrase" event to browser clients as onHotWordHeard. +// ── LLM proxy helper ────────────────────────────────────────────────────────── -class WakewordWatcher { - constructor(broadcastFn) { - this._broadcast = broadcastFn; - this._ws = null; - this._reconnectTimer = null; - this._connect(); - } - - _connect() { - const url = `ws://${JIBO_HOST}:8088/simple_port`; - this._ws = new WebSocket(url); - - this._ws.on('open', () => { - console.log('[wakeword] connected to ASR WebSocket'); - }); - - this._ws.on('message', (data) => { - let evt; - try { evt = JSON.parse(String(data)); } catch (e) { return; } - if (evt.event_type !== 'hotphrase') return; - - const utterance = evt.utterances && evt.utterances[0]; - const score = utterance ? utterance.score : 0; - console.log('[wakeword] heard! score:', score); - - this._broadcast(JSON.stringify({ - type: 'jiboEvent', - txId: null, - body: { - Event: 'onHotWordHeard', - utterance: utterance ? utterance.utterance : 'hey jibo', - score: score, - timestamp: evt.timestamp || new Date().toISOString() - } - })); - }); - - this._ws.on('close', () => { - console.log('[wakeword] disconnected — reconnecting in 3s'); - clearTimeout(this._reconnectTimer); - this._reconnectTimer = setTimeout(() => this._connect(), 3000); - }); - - this._ws.on('error', (err) => { - console.error('[wakeword] error:', err.message); - }); - } -} - -// ── App setup ──────────────────────────────────────────────────────────────── - -const jibo = new JiboClient(); -const app = express(); -app.use(express.json()); -const PHOTOS_DIR = path.join(__dirname, 'photos'); -fs.mkdirSync(PHOTOS_DIR, { recursive: true }); - -app.use(express.static(path.join(__dirname, 'public'))); -app.use('/photos', express.static(PHOTOS_DIR)); - -// ── REST API ───────────────────────────────────────────────────────────────── - -app.post('/api/look/angle', (req, res) => { - const { theta = 0, psi = 0, track = false } = req.body; - const txId = jibo.lookAtAngle(parseFloat(theta), parseFloat(psi), !!track); - res.json({ txId }); -}); - -app.post('/api/look/screen', (req, res) => { - const { x, y, track = false } = req.body; - const txId = jibo.lookAtScreen(parseFloat(x), parseFloat(y), !!track); - res.json({ txId }); -}); - -// Blocking screen-coord step (up/down navigation). -app.post('/api/look/step', async (req, res) => { - const { x, y } = req.body; - const txId = jibo.lookAtScreen(parseFloat(x), parseFloat(y)); - await jibo.awaitAck(txId, 2000); - res.json({ txId }); -}); - - -app.post('/api/look/position', (req, res) => { - const { x = 0, y = 0, z = 500, track = false } = req.body; - const txId = jibo.lookAtPosition(parseFloat(x), parseFloat(y), parseFloat(z), !!track); - res.json({ txId }); -}); - -app.post('/api/look/entity', (req, res) => { - const { entityId, track = true } = req.body; - const txId = jibo.lookAtEntity(entityId, !!track); - res.json({ txId }); -}); - -app.post('/api/look/nudge', (req, res) => { - const { dTheta = 0, dPsi = 0 } = req.body; - const txId = jibo.nudge(parseFloat(dTheta), parseFloat(dPsi)); - res.json({ txId, angles: jibo.currentAngles }); -}); - -app.post('/api/say', (req, res) => { - const { text } = req.body; - if (!text) return res.status(400).json({ error: 'text required' }); - const txId = jibo.say(text); - res.json({ txId }); -}); - -app.post('/api/listen', (req, res) => { - const { maxSpeech = 10000, maxNoSpeech = 5000 } = req.body; - // Use local ASR service (port 8088) — bypasses offline Google cloud ASR - const txId = jibo.listenLocalASR(maxNoSpeech, maxSpeech); - res.json({ txId }); -}); - -app.post('/api/photo', (req, res) => { - const { camera = 'Right', resolution = 'HighRes' } = req.body; - const txId = jibo.takePhoto(camera, resolution); - res.json({ txId }); -}); - -app.post('/api/video/start', (req, res) => { - const txId = jibo.startVideo(); - res.json({ txId }); -}); - -app.post('/api/video/stop', (req, res) => { - jibo.cancelVideo(); - res.json({ ok: true }); -}); - -app.post('/api/display/eye', (req, res) => { - const txId = jibo.displayEye(); - res.json({ txId }); -}); - -app.post('/api/display/anim', (req, res) => { - const { name } = req.body; - if (!name) return res.status(400).json({ error: 'name required' }); - const txId = jibo.playEyeAnim(name); - res.json({ txId }); -}); - -app.post('/api/display/text', (req, res) => { - const { text } = req.body; - if (!text) return res.status(400).json({ error: 'text required' }); - const txId = jibo.displayText(text); - res.json({ txId }); -}); - -app.post('/api/display/image', (req, res) => { - const { src } = req.body; - if (!src) return res.status(400).json({ error: 'src required' }); - const txId = jibo.displayImage(src); - res.json({ txId }); -}); - -app.post('/api/attention', (req, res) => { - const { mode } = req.body; - if (!mode) return res.status(400).json({ error: 'mode required' }); - const txId = jibo.setAttention(mode); - res.json({ txId }); -}); - -app.post('/api/volume', (req, res) => { - const { level } = req.body; - if (level == null) return res.status(400).json({ error: 'level required' }); - const txId = jibo.setVolume(parseFloat(level)); - res.json({ txId }); -}); - -app.post('/api/cancel', (req, res) => { - const { txId } = req.body; - if (!txId) return res.status(400).json({ error: 'txId required' }); - jibo.cancel(txId); - res.json({ ok: true }); -}); - - -app.get('/api/config', (req, res) => { - res.json({ - llmEndpoint: process.env.LLM_ENDPOINT || '', - llmModel: process.env.LLM_MODEL || '', - llmSystemPrompt: LLM_SYSTEM_PROMPT || '', - }); -}); - -// Proxy OpenAI-compatible chat completions — keeps API key off the browser function httpPost(urlStr, reqHeaders, body) { - return new Promise((resolve, reject) => { - const u = new URL(urlStr); - const mod = u.protocol === 'https:' ? https : httpModule; + var abort = function() {}; + const promise = new Promise(function(resolve, reject) { + const u = new URL(urlStr); + const mod = u.protocol === 'https:' ? https : httpModule; const payload = JSON.stringify(body); const req = mod.request({ hostname: u.hostname, port: u.port || (u.protocol === 'https:' ? 443 : 80), path: u.pathname + u.search, method: 'POST', - headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload), ...reqHeaders } - }, (res) => { + headers: Object.assign({ 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) }, reqHeaders), + }, function(res) { let data = ''; - res.on('data', d => data += d); - res.on('end', () => { + res.on('data', function(d) { data += d; }); + res.on('end', function() { try { resolve(JSON.parse(data)); } catch (e) { reject(new Error('LLM non-JSON response: ' + data.slice(0, 300))); } }); }); + abort = function() { req.destroy(new Error('LLM cancelled')); }; req.on('error', reject); req.write(payload); req.end(); }); + return { promise, abort }; } -app.post('/api/llm/chat', async (req, res) => { +// ── Express app ─────────────────────────────────────────────────────────────── + +const app = express(); +app.use(express.json()); +app.use(express.static(path.join(__dirname, 'public'))); +app.use('/photos', express.static(PHOTOS_DIR)); + +// ── REST API ────────────────────────────────────────────────────────────────── + +// Head motion — fire-and-forget; browser doesn't use returned txIds for look ops. + +app.post('/api/look/angle', function(req, res) { + const { theta = 0, psi = 0, track = false } = req.body; + jibo.behavior.lookAtAngle(parseFloat(theta), parseFloat(psi), { track: !!track }); + broadcastStatus(); + res.json({ ok: true }); +}); + +app.post('/api/look/screen', function(req, res) { + const { x, y, track = false } = req.body; + // For non-tracking: use manager method. For tracking: pass through to raw LookAt. + if (track) { + jibo._conn.lookAt({ ScreenCoords: [parseFloat(x), parseFloat(y)] }, true); + } else { + jibo.behavior.lookAtScreen(parseFloat(x), parseFloat(y)); + } + res.json({ ok: true }); +}); + +// Blocking step: awaits onLookAtAchieved so the arrow-key loop stays single-flight. +app.post('/api/look/step', async function(req, res) { + const { x, y } = req.body; + await jibo.behavior.lookAtScreen(parseFloat(x), parseFloat(y)); + res.json({ ok: true }); +}); + +app.post('/api/look/position', function(req, res) { + const { x = 0, y = 0, z = 500 } = req.body; + jibo.behavior.lookAtPosition(parseFloat(x), parseFloat(y), parseFloat(z)); + res.json({ ok: true }); +}); + +app.post('/api/look/entity', function(req, res) { + const { entityId, track = true } = req.body; + jibo.behavior.lookAtEntity(entityId, !!track); + res.json({ ok: true }); +}); + +app.post('/api/look/nudge', function(req, res) { + const { dTheta = 0, dPsi = 0 } = req.body; + jibo.behavior.nudge(parseFloat(dTheta), parseFloat(dPsi)); + broadcastStatus(); + res.json({ ok: true, angles: jibo.currentAngles }); +}); + +// Say — awaits full speech completion; AbortController enables mid-speech cancel. +app.post('/api/say', async function(req, res) { + const { text } = req.body; + if (!text) return res.status(400).json({ error: 'text required' }); + + if (activeSayAbort) { activeSayAbort(); activeSayAbort = null; } + + const controller = new AbortController(); + activeSayAbort = () => controller.abort(); + + try { + await jibo.behavior.say(stripThinking(text), { signal: controller.signal }); + } catch (err) { + if (err.code !== 'SAY_TIMEOUT') console.error('[say]', err.message); + } + + activeSayAbort = null; + res.json({ aborted: controller.signal.aborted }); +}); + +// Listen — fires locally, returns txId immediately so the browser can correlate +// the onListenResult / onStop events it receives over WebSocket. +app.post('/api/listen', function(req, res) { + const { maxSpeech = 10000, maxNoSpeech = 5000 } = req.body; + const txId = jibo._conn.listenLocalASR(maxNoSpeech, maxSpeech); + activeListenTxId = txId; + res.json({ txId }); +}); + +// Camera +app.post('/api/photo', function(req, res) { + const { camera = 'Right', resolution = 'HighRes' } = req.body; + // Fire-and-forget; onTakePhoto event is forwarded to browser via _conn event listener. + const txId = jibo._conn.takePhoto(camera, resolution); + res.json({ txId }); +}); + +app.post('/api/video/start', function(req, res) { + // Fire-and-forget; browser receives onVideoReady via WebSocket event broadcast. + const txId = jibo._conn.startVideo(); + res.json({ txId }); +}); + +app.post('/api/video/stop', function(req, res) { + jibo.camera.stopVideo(); + res.json({ ok: true }); +}); + +// Display +app.post('/api/display/eye', function(req, res) { + jibo.display.showEye(); + res.json({ ok: true }); +}); + +app.post('/api/display/anim', function(req, res) { + const { name } = req.body; + if (!name) return res.status(400).json({ error: 'name required' }); + jibo._conn.playAnim(name); // fire-and-forget; awaiting would hold the response open + res.json({ ok: true }); +}); + +app.post('/api/display/text', function(req, res) { + const { text } = req.body; + if (!text) return res.status(400).json({ error: 'text required' }); + jibo.display.showText(text); + res.json({ ok: true }); +}); + +app.post('/api/display/image', function(req, res) { + const { src } = req.body; + if (!src) return res.status(400).json({ error: 'src required' }); + jibo.display.showImage(src); + res.json({ ok: true }); +}); + +// Attention & volume — fire-and-forget is fine for these control ops. +app.post('/api/attention', function(req, res) { + const { mode } = req.body; + if (!mode) return res.status(400).json({ error: 'mode required' }); + jibo.behavior.setAttention(mode); + res.json({ ok: true }); +}); + +app.post('/api/volume', function(req, res) { + const { level } = req.body; + if (level == null) return res.status(400).json({ error: 'level required' }); + jibo.audio.setVolume(parseFloat(level)); + res.json({ ok: true }); +}); + +// Cancel +app.post('/api/cancel', function(req, res) { + const { txId } = req.body; + if (!txId) return res.status(400).json({ error: 'txId required' }); + jibo._conn.cancel(txId); + res.json({ ok: true }); +}); + +app.post('/api/say/cancel', function(req, res) { + if (activeSayAbort) { activeSayAbort(); activeSayAbort = null; } + res.json({ ok: true }); +}); + +app.post('/api/listen/cancel', function(req, res) { + if (activeListenTxId) { jibo._conn.cancel(activeListenTxId); activeListenTxId = null; } + res.json({ ok: true }); +}); + +app.post('/api/llm/cancel', function(req, res) { + if (activeLlmAbort) { activeLlmAbort(); activeLlmAbort = null; } + res.json({ ok: true }); +}); + +// Interrupt all active operations (used by the hotword override). +app.post('/api/interrupt', function(req, res) { + if (activeSayAbort) { activeSayAbort(); activeSayAbort = null; } + if (activeLlmAbort) { activeLlmAbort(); activeLlmAbort = null; } + if (activeListenTxId) { jibo._conn.cancel(activeListenTxId); activeListenTxId = null; } + res.json({ ok: true }); +}); + +// Config / status +app.get('/api/config', function(req, res) { + res.json({ + llmEndpoint: process.env.LLM_ENDPOINT || '', + llmModel: process.env.LLM_MODEL || '', + llmSystemPrompt: LLM_SYSTEM_PROMPT || '', + sessionMode: !!process.env.LLM_SESSION_KEY, + }); +}); + +app.get('/api/status', function(req, res) { + res.json({ + connected: jibo.connected, + sessionID: jibo.sessionID, + angles: jibo.currentAngles, + videoStreamActive: jibo.videoStreamActive, + }); +}); + +// LLM chat proxy — unchanged from Re-Commander-2 +app.post('/api/llm/chat', async function(req, res) { const { messages = [], endpoint, model, systemPrompt } = req.body; - const url = endpoint || process.env.LLM_ENDPOINT || 'http://localhost:11434/v1/chat/completions'; - const mdl = model || process.env.LLM_MODEL || 'llama3'; - const sysProm = systemPrompt || LLM_SYSTEM_PROMPT || ''; - const apiKey = process.env.LLM_API_KEY || ''; + const url = endpoint || process.env.LLM_ENDPOINT || 'http://localhost:11434/v1/chat/completions'; + const mdl = model || process.env.LLM_MODEL || 'llama3'; + const sysProm = systemPrompt || LLM_SYSTEM_PROMPT || ''; + const apiKey = process.env.LLM_API_KEY || ''; + const sessionKey = process.env.LLM_SESSION_KEY || ''; const allMessages = sysProm - ? [{ role: 'system', content: sysProm }, ...messages] + ? [{ role: 'system', content: sysProm }].concat(messages) : messages; const headers = {}; - if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`; + if (apiKey) headers['Authorization'] = 'Bearer ' + apiKey; try { const extra = process.env.LLM_HEADERS ? JSON.parse(process.env.LLM_HEADERS) : {}; Object.assign(headers, extra); - } catch { console.warn('[llm] LLM_HEADERS is not valid JSON — ignored'); } + } catch (e) { console.warn('[llm] LLM_HEADERS is not valid JSON — ignored'); } + + const body = { model: mdl, messages: allMessages, stream: false }; + if (sessionKey) body.user = sessionKey; + + if (activeLlmAbort) activeLlmAbort(); + const { promise, abort } = httpPost(url, headers, body); + activeLlmAbort = abort; try { - const result = await httpPost(url, headers, { model: mdl, messages: allMessages, stream: false }); - const reply = result.choices?.[0]?.message?.content?.trim() || ''; - res.json({ reply }); + const result = await promise; + activeLlmAbort = null; + const reply = (result.choices && result.choices[0] && result.choices[0].message && result.choices[0].message.content || '').trim(); + res.json({ reply, sessionMode: !!sessionKey }); } catch (err) { + activeLlmAbort = null; + if (err.message === 'LLM cancelled') return res.json({ error: 'cancelled' }); console.error('[llm] error:', err.message); res.status(502).json({ error: err.message }); } }); -app.get('/api/status', (req, res) => { - res.json({ - connected: jibo.connected, - sessionID: jibo.sessionID, - angles: jibo.currentAngles, - videoStreamActive: jibo.videoStreamActive - }); -}); - -// Proxy Jibo's video/photo byte streams through the server -app.get('/proxy/stream', (req, res) => { +// Proxy routes for browser → Jibo media +app.get('/proxy/stream', function(req, res) { const { uri } = req.query; if (!uri || !uri.startsWith('/')) return res.status(400).json({ error: 'invalid uri' }); proxyJiboStream(uri, res); }); -app.get('/proxy/photo', (req, res) => { +app.get('/proxy/photo', function(req, res) { const { uri } = req.query; if (!uri || !uri.startsWith('/')) return res.status(400).json({ error: 'invalid uri' }); proxyJiboFetch(uri, res); @@ -964,14 +565,20 @@ app.get('/proxy/photo', (req, res) => { const server = http.createServer(app); const wss = new WebSocketServer({ server, path: '/ws' }); -wss.on('connection', (ws) => { - jibo.addSubscriber(ws); - ws.on('close', () => jibo.removeSubscriber(ws)); - ws.on('error', () => jibo.removeSubscriber(ws)); +wss.on('connection', function(ws) { + subscribers.add(ws); + ws.send(JSON.stringify({ + type: 'status', + connected: jibo.connected, + sessionID: jibo.sessionID, + angles: jibo.currentAngles, + })); + ws.on('close', function() { subscribers.delete(ws); }); + ws.on('error', function() { subscribers.delete(ws); }); }); -server.listen(APP_PORT, () => { - console.log(`Re-Commander running at http://localhost:${APP_PORT}`); - jibo.connect(); - new WakewordWatcher((msg) => jibo._broadcastToClients(msg)); +server.listen(APP_PORT, function() { + console.log('Re-Commander-3 running at http://localhost:' + APP_PORT); + jibo.audio.watchWakeword(); + jibo.connect().catch(function(err) { console.error('[jibo] connect error:', err.message); }); });