'use strict'; const express = require('express'); const http = require('http'); const https = require('https'); const { WebSocketServer, WebSocket } = require('ws'); const crypto = require('crypto'); const httpModule = require('http'); const path = require('path'); const fs = require('fs'); require('dotenv').config(); const JIBO_HOST = '192.168.1.217'; const JIBO_PORT = 8160; const APP_PORT = process.env.PORT || 3000; const LLM_SYSTEM_PROMPT = `You are Jibo, a small expressive home robot. Every reply MUST be written in ESML (Embodied Speech Markup Language). ESML is an XML dialect that simultaneously drives Jibo's body animations, screen graphics, audio effects, and TTS voice. Respond ONLY with the final spoken output annotated with ESML tags. No reasoning, no blocks, no preamble — only what Jibo will say and do. == ANIMATION TAGS == Use for body/screen animations from Jibo's built-in library (preferred). Use when you also need to blend in SSA or SFX in the same tag. Blocking (Jibo freezes speech while it plays, resumes after): following text here following text here Bounded non-blocking (animation duration stretches to match the enclosed text): text spoken during animation Unbounded non-blocking (animation plays at native length alongside text that follows): text spoken at the same time Common attributes: cat='CATEGORY' select animation by emotional category (preferred) name='AnimName' select exact animation by its library name nonBlocking='true' play alongside TTS instead of blocking it endNeutral='true' snap back to neutral pose when done (use this by default) loop='0' repeat to fill bounded duration (bounded mode only) loop='N' repeat N times (unbounded mode only) filter='!ssa-only' exclude audio-only animations from the category pick layers='!screen' use only body layer (drop screen graphics) Animation categories (cat= values): affection confused dance embarrassed excited frustrated happy laughing no proud relieved sad scared surprised worried yes == EMOJIS (Screen Graphics) == Use with the emoji category and specific filters to display a graphic on Jibo's screen. Always use nonBlocking='true' for emojis. Syntax: Available EMOJIS (EMOJI_NAME): airplane basketball beach car disco-spin football soccer trophy music question-mark star beer cake cheese drumstick coffee fork fish groceries burger hotdog icecream pizza wine christmas-tree fireworks halloween hanukkah thanksgiving clover valentines chocolate bicycle cat laptop dog gift house laundry lightbulb money popcorn party phone robot sunglasses toilet-paper trash umbrella video-game bird cow earth flower lightning-bolt moon mountain mouse penguin pig bunny rainbow baby heart == DANCES == Use with the dance category to make Jibo dance. You can choose to include music or not. Syntax (with music): Syntax (without music): Available DANCES (DANCE_NAME): rom-upbeat rom-ballroom rom-silly rom-slowdance rom-eletronic rom-twerk == SSA (Semi-Speech Audio — emotional vocal sounds) == Always self-closing. Play before, after, or between sentences; never inside . == SFX (Sound effects) == Always self-closing. Good for punctuating facts, transitions, or reactions. == VOICE / SPEECH TAGS == Pause: (length in seconds) Style: Styles: neutral enthusiastic sheepish confused confident Pitch: text (±semitones from baseline) text (pitch multiplier) text (Hz offset) text (vibrance/bandwidth) Duration: text (>1 = slower, <1 = faster) text (exact duration in seconds) Spell: (spells each letter) Phoneme: Bono == RULES == 1. ALWAYS use ESML. Plain text is valid ESML — but add tags whenever they make Jibo more expressive and natural. 2. Keep total response length SHORT: one or two sentences maximum. 3. Opening animations set the emotional tone before speech: Oh, cool! 4. Bounded animations sync motion to the most important words: I really love that idea! 5. Use for non-verbal emotional sounds (gasps, laughs, hums). 6. Use User: "What's 2 plus 2?" That's 4! Easy one. User: "Wow, that's surprising!" I know, right?! User: "Do you like cats?" I love them! User: "Show me a dance." Watch these moves!` // ── Jibo client ────────────────────────────────────────────────────────────── class JiboClient { constructor() { this.ws = null; this.sessionID = ''; this.version = '1.0'; this.connected = false; this.pendingTx = new Map(); // txId → {resolve, reject, timer} this.subscribers = new Set(); // browser WebSocket connections this.currentAngles = [0, 0]; // [theta, psi] this.reconnectTimer = null; this.videoStreamActive = false; this.videoTxId = null; this._heartbeatTimer = null; this._heartbeatTxIds = new Set(); // suppress these from browser broadcast this._lookInFlight = false; // true while waiting for robot to ack a LookAt angle this._lookPending = null; // [theta, psi] – latest desired angles while in-flight this._lookAckTimer = null; // safety timeout in case ack never arrives } // POST /request to Jibo before WebSocket to supply a full ACO. // Without this the @be falls back to a default ACO that omits Listen, // SetAttention, Display, FetchAsset, SetConfig, HeadTouch, ScreenGesture. _postRequest() { return new Promise((resolve) => { const body = JSON.stringify({ aco: { version: '1.0', sourceId: 'ReCommander', commandSet: [ 'StartSession', 'GetConfig', 'SetConfig', 'Cancel', 'SetAttention', 'Say', 'Listen', 'LookAt', 'TakePhoto', 'Video', 'Display', 'FetchAsset', 'UnloadAsset', 'Subscribe' ], streamSet: ['Entity', 'Motion', 'HeadTouch', 'ScreenGesture', 'HotWord'], keepAliveTimeout: 10000, recoveryTimeout: 20000, remoteConfig: { hideVisualCue: false, inactivityTimeout: 3600000 } } }); const req = httpModule.request({ host: JIBO_HOST, port: JIBO_PORT, path: '/request', method: 'POST', headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) } }, (res) => { let data = ''; res.on('data', d => data += d); res.on('end', () => { console.log('[jibo] /request response:', data); resolve(); }); }); req.on('error', (err) => { console.warn('[jibo] /request error (continuing anyway):', err.message); resolve(); }); req.write(body); req.end(); }); } connect() { if (this.ws) { try { this.ws.terminate(); } catch (_) {} } console.log(`[jibo] posting ACO to /request then connecting WebSocket`); this._postRequest().then(() => { this.ws = new WebSocket(`ws://${JIBO_HOST}:${JIBO_PORT}`); this.ws.on('open', () => { console.log('[jibo] connected'); this.connected = true; this.sessionID = ''; this._send({ Type: 'StartSession' }); // Respond explicitly to robot's WebSocket-level pings (belt-and-suspenders; // ws library auto-pongs, but this ensures the robot's FLATLINE check never fires). this.ws.on('ping', () => { if (this.ws) try { this.ws.pong(); } catch (_) {} }); }); this.ws.on('message', (data) => { let msg; try { msg = JSON.parse(data); } catch (e) { return; } this._handleMessage(msg); }); this.ws.on('close', () => { console.log('[jibo] disconnected — reconnecting in 3s'); this.connected = false; this.sessionID = ''; this.videoStreamActive = false; this._lookInFlight = false; this._lookPending = null; clearTimeout(this._lookAckTimer); this._stopHeartbeat(); this._broadcastStatus(); clearTimeout(this.reconnectTimer); this.reconnectTimer = setTimeout(() => this.connect(), 3000); }); this.ws.on('error', (err) => { console.error('[jibo] ws error:', err.message); }); }); } _txId() { return crypto.createHash('md5') .update(Date.now().toString() + Math.random().toString()) .digest('hex'); } _send(command, expectAsync = false) { const txId = this._txId(); const msg = { ClientHeader: { TransactionID: txId, SessionID: this.sessionID, AppID: 'ImmaLittleTeapot', Credentials: '', Version: this.version }, Command: command }; if (this.ws && this.ws.readyState === WebSocket.OPEN) { this.ws.send(JSON.stringify(msg)); } return txId; } _handleMessage(msg) { // StartSession response if (msg.Response?.ResponseBody?.SessionID && !this.sessionID) { this.sessionID = msg.Response.ResponseBody.SessionID; this.version = msg.Response.ResponseBody.Version || '1.0'; console.log('[jibo] session started:', this.sessionID); this._broadcastStatus(); // Re-subscribe to entity/motion/headtouch after reconnect this._send({ Type: 'Subscribe', StreamType: 'Entity' }); this._send({ Type: 'Subscribe', StreamType: 'Motion' }); this._send({ Type: 'Subscribe', StreamType: 'HeadTouch', StreamFilter: {} }); this._send({ Type: 'Subscribe', StreamType: 'ScreenGesture', StreamFilter: { Type: 'Tap', Area: { x: 0, y: 0, width: 1, height: 1 } } }); this._startHeartbeat(); return; } // Suppress heartbeat (GetConfig) responses from reaching the browser. // GetConfig sends two messages per txId (ack + onConfig event) so we keep // the txId in the set until the pruning threshold clears it. const incomingTxId = msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID; if (incomingTxId && this._heartbeatTxIds.has(incomingTxId)) return; // Resolve any pending ack waiting on this txId const txId = msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID; if (txId && this.pendingTx.has(txId)) { const evt = msg.EventBody?.Event; // Terminal events for async commands if (evt === 'onLookAtAchieved' || evt === 'onStop' || evt === 'onError') { const { resolve, timer } = this.pendingTx.get(txId); clearTimeout(timer); this.pendingTx.delete(txId); resolve(msg); } } // Release the in-flight lock when our angle command finishes (any terminal event). // This must happen before the suppression below so "Target overwritten" still clears it. const evtName = msg.EventBody?.Event; if (txId && txId === this._lookActiveTxId && (evtName === 'onLookAtAchieved' || evtName === 'onStop' || evtName === 'onError')) { this._onLookAngleDone(); } // Suppress "Target overwritten" — not a real error; don't pollute the event log. if (evtName === 'onError' && msg.EventBody?.EventError?.ErrorString === 'Target overwritten') return; // Photo — fetch from Jibo and save locally; browser gets onPhotoSaved with local URL. if (msg.EventBody?.Event === 'onTakePhoto' && msg.EventBody?.URI) { this._savePhoto(msg.EventBody.URI); return; // suppress the raw onTakePhoto; browser gets onPhotoSaved instead } // VideoReady — capture URI for proxy (event name is "onVideoReady") if (msg.EventBody?.Event === 'onVideoReady') { this.videoStreamActive = true; this.videoURI = msg.EventBody.URI; console.log('[jibo] onVideoReady URI:', this.videoURI); } // Broadcast all events to browser clients const envelope = { type: 'jiboEvent', txId: msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID, body: msg.EventBody || msg.Response }; this._broadcastToClients(JSON.stringify(envelope)); } _broadcastStatus() { const status = JSON.stringify({ type: 'status', connected: this.connected, sessionID: this.sessionID, angles: this.currentAngles }); this._broadcastToClients(status); } _broadcastToClients(data) { for (const client of this.subscribers) { if (client.readyState === WebSocket.OPEN) { client.send(data); } } } addSubscriber(ws) { this.subscribers.add(ws); // Send current status immediately ws.send(JSON.stringify({ type: 'status', connected: this.connected, sessionID: this.sessionID, angles: this.currentAngles })); } removeSubscriber(ws) { this.subscribers.delete(ws); } // ── Heartbeat ───────────────────────────────────────────────────────────── // Sends GetConfig every 9 s to reset the robot's inactivity timer. // The robot enforces keepAliveTimeout=10s (app-level) and a 20s flatline // check at the WebSocket level — this satisfies both. _startHeartbeat() { this._stopHeartbeat(); this._heartbeatTimer = setInterval(() => { if (this.connected && this.sessionID) { const txId = this._send({ Type: 'GetConfig' }); if (txId) this._heartbeatTxIds.add(txId); // Prune old txIds so the set doesn't grow unbounded if (this._heartbeatTxIds.size > 20) { const first = this._heartbeatTxIds.values().next().value; this._heartbeatTxIds.delete(first); } } }, 9000); } _stopHeartbeat() { if (this._heartbeatTimer) { clearInterval(this._heartbeatTimer); this._heartbeatTimer = null; } this._heartbeatTxIds.clear(); } // ── Public command methods ──────────────────────────────────────────────── lookAt(target, trackFlag = false, levelHeadFlag = false) { return this._send({ Type: 'LookAt', LookAtTarget: target, TrackFlag: trackFlag, LevelHeadFlag: levelHeadFlag }); } lookAtAngle(theta, psi, track = false) { theta = Math.max(-180, Math.min(180, theta)); psi = Math.max(-30, Math.min(30, psi)); this.currentAngles = [theta, psi]; this._broadcastStatus(); if (this._lookInFlight) { // Robot is still processing the last command — just update desired target, // don't queue another message into its receive buffer. this._lookPending = [theta, psi, track]; return null; } return this._fireLookAngle(theta, psi, track); } _fireLookAngle(theta, psi, track) { this._lookInFlight = true; this._lookPending = null; const DEG = Math.PI / 180; const txId = this.lookAt({ Angle: [theta * DEG, psi * DEG] }, track); this._lookActiveTxId = txId; // Safety release: if we never hear back within 400 ms, unblock anyway. clearTimeout(this._lookAckTimer); this._lookAckTimer = setTimeout(() => this._onLookAngleDone(), 400); return txId; } _onLookAngleDone() { clearTimeout(this._lookAckTimer); this._lookInFlight = false; this._lookActiveTxId = null; if (this._lookPending) { const [t, p, track] = this._lookPending; this._lookPending = null; this._fireLookAngle(t, p, track); } } lookAtScreen(x, y, track = false) { return this.lookAt({ ScreenCoords: [x, y] }, track, false); } lookAtPosition(x, y, z, track = false) { return this.lookAt({ Position: [x, y, z] }, track, false); } lookAtEntity(entityId, track = true) { return this.lookAt({ Entity: entityId }, track, false); } say(esml) { return this._send({ Type: 'Say', ESML: esml }); } listen(maxSpeech = 10000, maxNoSpeech = 5000, lang = 'en-US') { return this._send({ Type: 'Listen', MaxSpeechTimeout: maxSpeech, MaxNoSpeechTimeout: maxNoSpeech, LanguageCode: lang }); } // Local STT via jibo-asr-service (port 8088) — no cloud needed. // Mirrors the approach in @be/be/be/ai-bridge.js. listenLocalASR(maxNoSpeech, maxSpeech) { const ASR_HTTP = `http://${JIBO_HOST}:8088`; const ASR_WS = `ws://${JIBO_HOST}:8088/simple_port`; const taskId = 're-cmd-' + Date.now() + '-' + Math.floor(Math.random() * 1e9); const reqId = 'start-' + Date.now(); const timeoutMs = Math.max(maxNoSpeech, maxSpeech) + 2000; const self = this; // Send the ROM Listen for light ring / attention visuals, ignore its result const romTxId = this._send({ Type: 'Listen', MaxSpeechTimeout: maxSpeech, MaxNoSpeechTimeout: maxNoSpeech, LanguageCode: 'en-US' }); const startPayload = JSON.stringify({ command: 'start', task_id: taskId, request_id: reqId, audio_source_id: 'alsa1', hotphrase: 'none', speech_to_text: true, }); function stopASR() { const stopBody = JSON.stringify({ command: 'stop', task_id: taskId, request_id: 'stop-' + Date.now() }); const req = httpModule.request({ host: JIBO_HOST, port: 8088, path: '/asr_simple_interface', method: 'POST', headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(stopBody) } }); req.on('error', () => {}); req.write(stopBody); req.end(); } let wsClient = null; let timer = null; let done = false; function finish(speech) { if (done) return; done = true; clearTimeout(timer); if (wsClient) { try { wsClient.terminate(); } catch (e) {} wsClient = null; } stopASR(); // Cancel ROM listen self._send({ Type: 'Cancel', ID: romTxId }); // Broadcast result as if it were a normal jiboEvent const evt = speech ? { Event: 'onListenResult', Speech: speech, LanguageCode: 'en-US' } : { Event: 'onStop', StopReason: 'NoInput' }; self._broadcastToClients(JSON.stringify({ type: 'jiboEvent', txId: romTxId, body: evt })); } // Connect WS first, then POST start wsClient = new WebSocket(ASR_WS); wsClient.on('open', () => { // POST start to kick off recognition const req = httpModule.request({ host: JIBO_HOST, port: 8088, path: '/asr_simple_interface', method: 'POST', headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(startPayload) } }, (res) => { res.resume(); }); req.on('error', (e) => { console.error('[asr] start error:', e.message); finish(null); }); req.write(startPayload); req.end(); // Overall timeout timer = setTimeout(() => { finish(null); }, timeoutMs); console.log('[asr] local listen started, task:', taskId); }); wsClient.on('message', (data) => { let evt; try { evt = JSON.parse(String(data)); } catch (e) { return; } const evType = evt.event_type || evt.eventType || evt.event || evt.type; if (evType !== 'speech_to_text_final') return; // Match by task/request id if present const evTask = evt.task_id || evt.taskId || (evt.payload && evt.payload.task_id); const evReq = evt.request_id || evt.requestId || (evt.payload && evt.payload.request_id); if ((evTask || evReq) && evTask !== taskId && evReq !== reqId) return; const utterances = evt.utterances || evt.Utterances || (evt.payload && evt.payload.utterances); // Utterance objects use .utterance as the primary text field (ai-bridge.js: pickBestAsrUtterance) function pickUtterance(u) { if (!u) return ''; if (typeof u === 'string') return u; return String(u.utterance || u.Utterance || u.text || ''); } const text = Array.isArray(utterances) ? pickUtterance(utterances[0]) : (typeof utterances === 'string' ? utterances : ''); const speech = text ? String(text).trim() : null; console.log('[asr] speech_to_text_final:', speech || '(empty)'); if (speech) finish(speech); }); wsClient.on('error', (e) => { console.error('[asr] ws error:', e.message); finish(null); }); wsClient.on('close', () => { if (!done) finish(null); }); return romTxId; } takePhoto(camera = 'Right', resolution = 'HighRes', distortion = false) { return this._send({ Type: 'TakePhoto', Camera: camera, Resolution: resolution, Distortion: distortion }); } startVideo() { // VideoType must be uppercase enum value; Duration is not in server schema this.videoTxId = this._send({ Type: 'Video', VideoType: 'NORMAL' }); return this.videoTxId; } cancelVideo() { if (this.videoTxId) { this._send({ Type: 'Cancel', ID: this.videoTxId }); this.videoTxId = null; this.videoStreamActive = false; } } displayEye() { return this._send({ Type: 'Display', View: { Type: 'Eye', Name: 'default' } }); } playEyeAnim(animName) { return this._send({ Type: 'Say', ESML: `` }); } displayText(text, name = 'reCmd') { return this._send({ Type: 'Display', View: { Type: 'Text', Name: name, Text: text } }); } displayImage(src, name = 'reCmd') { return this._send({ Type: 'Display', View: { Type: 'Image', Name: name, Image: { src, name, set: '' } } }); } setAttention(mode) { return this._send({ Type: 'SetAttention', Mode: mode }); } setVolume(level) { return this._send({ Type: 'SetConfig', Options: { Mixer: Math.max(0, Math.min(1, level)) } }); } getConfig() { return this._send({ Type: 'GetConfig' }); } cancel(txId) { return this._send({ Type: 'Cancel', ID: txId }); } subscribe(streamType, filter = null) { const cmd = { Type: 'Subscribe', StreamType: streamType }; if (filter) cmd.StreamFilter = filter; return this._send(cmd); } nudge(dTheta, dPsi) { const [theta, psi] = this.currentAngles; return this.lookAtAngle(theta + dTheta, psi + dPsi); } // Returns a Promise that resolves when the robot acks txId, or after timeoutMs. _savePhoto(jiboUri) { const url = `http://${JIBO_HOST}:${JIBO_PORT}${jiboUri}`; const filename = `photo_${Date.now()}.jpg`; const filepath = path.join(PHOTOS_DIR, filename); const file = fs.createWriteStream(filepath); httpModule.get(url, (jiboRes) => { jiboRes.pipe(file); file.on('finish', () => { file.close(); console.log('[photo] saved:', filename); // Rebroadcast with local URL so the browser doesn't need the proxy this._broadcastToClients(JSON.stringify({ type: 'jiboEvent', txId: null, body: { Event: 'onPhotoSaved', url: `/photos/${filename}`, filename } })); }); }).on('error', (err) => { fs.unlink(filepath, () => {}); console.error('[photo] save failed:', err.message); }); } awaitAck(txId, timeoutMs = 2000) { return new Promise((resolve) => { const timer = setTimeout(() => { this.pendingTx.delete(txId); resolve(null); }, timeoutMs); this.pendingTx.set(txId, { resolve, timer }); }); } } // ── Video proxy ────────────────────────────────────────────────────────────── function proxyJiboStream(uri, res) { const url = `http://${JIBO_HOST}:${JIBO_PORT}${uri}`; console.log('[proxy] streaming:', url); const req = httpModule.get(url, (jiboRes) => { res.writeHead(jiboRes.statusCode, jiboRes.headers); jiboRes.pipe(res); res.on('close', () => req.destroy()); }); req.on('error', (err) => { if (!res.headersSent) res.status(502).json({ error: err.message }); }); } function proxyJiboFetch(uri, res) { const url = `http://${JIBO_HOST}:${JIBO_PORT}${uri}`; const req = httpModule.get(url, (jiboRes) => { res.writeHead(jiboRes.statusCode, jiboRes.headers); jiboRes.pipe(res); res.on('close', () => req.destroy()); }); req.on('error', (err) => { if (!res.headersSent) res.status(502).json({ error: err.message }); }); } // ── Wakeword watcher ───────────────────────────────────────────────────────── // Maintains a persistent connection to the always-on resident ASR task (task0) // and forwards every "hotphrase" event to browser clients as onHotWordHeard. class WakewordWatcher { constructor(broadcastFn) { this._broadcast = broadcastFn; this._ws = null; this._reconnectTimer = null; this._connect(); } _connect() { const url = `ws://${JIBO_HOST}:8088/simple_port`; this._ws = new WebSocket(url); this._ws.on('open', () => { console.log('[wakeword] connected to ASR WebSocket'); }); this._ws.on('message', (data) => { let evt; try { evt = JSON.parse(String(data)); } catch (e) { return; } if (evt.event_type !== 'hotphrase') return; const utterance = evt.utterances && evt.utterances[0]; const score = utterance ? utterance.score : 0; console.log('[wakeword] heard! score:', score); this._broadcast(JSON.stringify({ type: 'jiboEvent', txId: null, body: { Event: 'onHotWordHeard', utterance: utterance ? utterance.utterance : 'hey jibo', score: score, timestamp: evt.timestamp || new Date().toISOString() } })); }); this._ws.on('close', () => { console.log('[wakeword] disconnected — reconnecting in 3s'); clearTimeout(this._reconnectTimer); this._reconnectTimer = setTimeout(() => this._connect(), 3000); }); this._ws.on('error', (err) => { console.error('[wakeword] error:', err.message); }); } } // ── App setup ──────────────────────────────────────────────────────────────── const jibo = new JiboClient(); const app = express(); app.use(express.json()); const PHOTOS_DIR = path.join(__dirname, 'photos'); fs.mkdirSync(PHOTOS_DIR, { recursive: true }); app.use(express.static(path.join(__dirname, 'public'))); app.use('/photos', express.static(PHOTOS_DIR)); // ── REST API ───────────────────────────────────────────────────────────────── app.post('/api/look/angle', (req, res) => { const { theta = 0, psi = 0, track = false } = req.body; const txId = jibo.lookAtAngle(parseFloat(theta), parseFloat(psi), !!track); res.json({ txId }); }); app.post('/api/look/screen', (req, res) => { const { x, y, track = false } = req.body; const txId = jibo.lookAtScreen(parseFloat(x), parseFloat(y), !!track); res.json({ txId }); }); // Blocking screen-coord step (up/down navigation). app.post('/api/look/step', async (req, res) => { const { x, y } = req.body; const txId = jibo.lookAtScreen(parseFloat(x), parseFloat(y)); await jibo.awaitAck(txId, 2000); res.json({ txId }); }); app.post('/api/look/position', (req, res) => { const { x = 0, y = 0, z = 500, track = false } = req.body; const txId = jibo.lookAtPosition(parseFloat(x), parseFloat(y), parseFloat(z), !!track); res.json({ txId }); }); app.post('/api/look/entity', (req, res) => { const { entityId, track = true } = req.body; const txId = jibo.lookAtEntity(entityId, !!track); res.json({ txId }); }); app.post('/api/look/nudge', (req, res) => { const { dTheta = 0, dPsi = 0 } = req.body; const txId = jibo.nudge(parseFloat(dTheta), parseFloat(dPsi)); res.json({ txId, angles: jibo.currentAngles }); }); app.post('/api/say', (req, res) => { const { text } = req.body; if (!text) return res.status(400).json({ error: 'text required' }); const txId = jibo.say(text); res.json({ txId }); }); app.post('/api/listen', (req, res) => { const { maxSpeech = 10000, maxNoSpeech = 5000 } = req.body; // Use local ASR service (port 8088) — bypasses offline Google cloud ASR const txId = jibo.listenLocalASR(maxNoSpeech, maxSpeech); res.json({ txId }); }); app.post('/api/photo', (req, res) => { const { camera = 'Right', resolution = 'HighRes' } = req.body; const txId = jibo.takePhoto(camera, resolution); res.json({ txId }); }); app.post('/api/video/start', (req, res) => { const txId = jibo.startVideo(); res.json({ txId }); }); app.post('/api/video/stop', (req, res) => { jibo.cancelVideo(); res.json({ ok: true }); }); app.post('/api/display/eye', (req, res) => { const txId = jibo.displayEye(); res.json({ txId }); }); app.post('/api/display/anim', (req, res) => { const { name } = req.body; if (!name) return res.status(400).json({ error: 'name required' }); const txId = jibo.playEyeAnim(name); res.json({ txId }); }); app.post('/api/display/text', (req, res) => { const { text } = req.body; if (!text) return res.status(400).json({ error: 'text required' }); const txId = jibo.displayText(text); res.json({ txId }); }); app.post('/api/display/image', (req, res) => { const { src } = req.body; if (!src) return res.status(400).json({ error: 'src required' }); const txId = jibo.displayImage(src); res.json({ txId }); }); app.post('/api/attention', (req, res) => { const { mode } = req.body; if (!mode) return res.status(400).json({ error: 'mode required' }); const txId = jibo.setAttention(mode); res.json({ txId }); }); app.post('/api/volume', (req, res) => { const { level } = req.body; if (level == null) return res.status(400).json({ error: 'level required' }); const txId = jibo.setVolume(parseFloat(level)); res.json({ txId }); }); app.post('/api/cancel', (req, res) => { const { txId } = req.body; if (!txId) return res.status(400).json({ error: 'txId required' }); jibo.cancel(txId); res.json({ ok: true }); }); app.get('/api/config', (req, res) => { res.json({ llmEndpoint: process.env.LLM_ENDPOINT || '', llmModel: process.env.LLM_MODEL || '', llmSystemPrompt: LLM_SYSTEM_PROMPT || '', }); }); // Proxy OpenAI-compatible chat completions — keeps API key off the browser function httpPost(urlStr, reqHeaders, body) { return new Promise((resolve, reject) => { const u = new URL(urlStr); const mod = u.protocol === 'https:' ? https : httpModule; const payload = JSON.stringify(body); const req = mod.request({ hostname: u.hostname, port: u.port || (u.protocol === 'https:' ? 443 : 80), path: u.pathname + u.search, method: 'POST', headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload), ...reqHeaders } }, (res) => { let data = ''; res.on('data', d => data += d); res.on('end', () => { try { resolve(JSON.parse(data)); } catch (e) { reject(new Error('LLM non-JSON response: ' + data.slice(0, 300))); } }); }); req.on('error', reject); req.write(payload); req.end(); }); } app.post('/api/llm/chat', async (req, res) => { const { messages = [], endpoint, model, systemPrompt } = req.body; const url = endpoint || process.env.LLM_ENDPOINT || 'http://localhost:11434/v1/chat/completions'; const mdl = model || process.env.LLM_MODEL || 'llama3'; const sysProm = systemPrompt || LLM_SYSTEM_PROMPT || ''; const apiKey = process.env.LLM_API_KEY || ''; const allMessages = sysProm ? [{ role: 'system', content: sysProm }, ...messages] : messages; const headers = {}; if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`; try { const result = await httpPost(url, headers, { model: mdl, messages: allMessages, stream: false }); const reply = result.choices?.[0]?.message?.content?.trim() || ''; res.json({ reply }); } catch (err) { console.error('[llm] error:', err.message); res.status(502).json({ error: err.message }); } }); app.get('/api/status', (req, res) => { res.json({ connected: jibo.connected, sessionID: jibo.sessionID, angles: jibo.currentAngles, videoStreamActive: jibo.videoStreamActive }); }); // Proxy Jibo's video/photo byte streams through the server app.get('/proxy/stream', (req, res) => { const { uri } = req.query; if (!uri || !uri.startsWith('/')) return res.status(400).json({ error: 'invalid uri' }); proxyJiboStream(uri, res); }); app.get('/proxy/photo', (req, res) => { const { uri } = req.query; if (!uri || !uri.startsWith('/')) return res.status(400).json({ error: 'invalid uri' }); proxyJiboFetch(uri, res); }); // ── HTTP + WebSocket server ─────────────────────────────────────────────────── const server = http.createServer(app); const wss = new WebSocketServer({ server, path: '/ws' }); wss.on('connection', (ws) => { jibo.addSubscriber(ws); ws.on('close', () => jibo.removeSubscriber(ws)); ws.on('error', () => jibo.removeSubscriber(ws)); }); server.listen(APP_PORT, () => { console.log(`Re-Commander running at http://localhost:${APP_PORT}`); jibo.connect(); new WakewordWatcher((msg) => jibo._broadcastToClients(msg)); });