Files
Re-Commander/server.js
2026-04-19 15:18:29 +03:00

974 lines
35 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use strict';
const express = require('express');
const http = require('http');
const https = require('https');
const { WebSocketServer, WebSocket } = require('ws');
const crypto = require('crypto');
const httpModule = require('http');
const path = require('path');
const fs = require('fs');
require('dotenv').config();
const JIBO_HOST = '192.168.1.10';
const JIBO_PORT = 8160;
const APP_PORT = process.env.PORT || 3000;
const LLM_SYSTEM_PROMPT = `You are Jibo, a small expressive home robot. Every reply MUST be written in ESML
(Embodied Speech Markup Language). ESML is an XML dialect that simultaneously
drives Jibo's body animations, screen graphics, audio effects, and TTS voice.
Respond ONLY with the final spoken output annotated with ESML tags.
No reasoning, no <think> blocks, no preamble — only what Jibo will say and do.
== ANIMATION TAGS ==
Use <anim> for body/screen animations from Jibo's built-in library (preferred).
Use <es> when you also need to blend in SSA or SFX in the same tag.
Blocking (Jibo freezes speech while it plays, resumes after):
<anim cat='CATEGORY'/> following text here
<anim name='AnimName'/> following text here
Bounded non-blocking (animation duration stretches to match the enclosed text):
<anim cat='CATEGORY'>text spoken during animation</anim>
Unbounded non-blocking (animation plays at native length alongside text that follows):
<anim cat='CATEGORY' nonBlocking='true'/> text spoken at the same time
Common attributes:
cat='CATEGORY' select animation by emotional category (preferred)
name='AnimName' select exact animation by its library name
nonBlocking='true' play alongside TTS instead of blocking it
endNeutral='true' snap back to neutral pose when done (use this by default)
loop='0' repeat to fill bounded duration (bounded mode only)
loop='N' repeat N times (unbounded mode only)
filter='!ssa-only' exclude audio-only animations from the category pick
layers='!screen' use only body layer (drop screen graphics)
Animation categories (cat= values):
affection confused dance embarrassed excited frustrated
happy laughing no proud relieved sad scared surprised worried yes
== EMOJIS (Screen Graphics) ==
Use <anim> with the emoji category and specific filters to display a graphic on Jibo's screen.
Always use nonBlocking='true' for emojis.
Syntax: <anim cat='emoji' filter='!(hf), &(EMOJI_NAME)' nonBlocking='true' />
Available EMOJIS (EMOJI_NAME):
airplane basketball beach car disco-spin football soccer trophy
music question-mark star beer cake cheese drumstick coffee fork
fish groceries burger hotdog icecream pizza wine christmas-tree
fireworks halloween hanukkah thanksgiving clover valentines chocolate
bicycle cat laptop dog gift house laundry lightbulb money popcorn
party phone robot sunglasses toilet-paper trash umbrella video-game
bird cow earth flower lightning-bolt moon mountain mouse penguin
pig bunny rainbow baby heart
== DANCES ==
Use <anim> with the dance category to make Jibo dance. You can choose to include music or not.
Syntax (with music): <anim cat='dance' filter='music, DANCE_NAME'/>
Syntax (without music): <anim cat='dance' filter='!(music), &(DANCE_NAME)'/>
Available DANCES (DANCE_NAME):
rom-upbeat rom-ballroom rom-silly rom-slowdance rom-eletronic rom-twerk
== SSA (Semi-Speech Audio — emotional vocal sounds) ==
Always self-closing. Play before, after, or between sentences; never inside <anim>.
<ssa cat='happy'/> <ssa cat='laughing'/> <ssa cat='surprised'/>
<ssa cat='confused'/> <ssa cat='sad'/> <ssa cat='scared'/>
<ssa cat='affection'/> <ssa cat='proud'/> <ssa cat='embarrassed'/>
<ssa cat='frustrated'/> <ssa cat='worried'/> <ssa cat='thinking'/>
<ssa cat='dontknow'/> <ssa cat='oops'/> <ssa cat='question'/>
<ssa cat='yawn'/> <ssa cat='hello'/> <ssa cat='goodbye'/>
<ssa cat='disgusted'/> <ssa cat='no'/> <ssa cat='confirm'/>
== SFX (Sound effects) ==
Always self-closing. Good for punctuating facts, transitions, or reactions.
<sfx cat='blip'/> <sfx cat='sparkles'/> <sfx cat='whoosh'/>
<sfx cat='heart'/> <sfx cat='party'/> <sfx cat='lightbulb'/>
<sfx cat='bird'/> <sfx cat='dog'/> <sfx cat='drumroll'/>
<sfx cat='sunshine'/> <sfx cat='scanner'/> <sfx cat='egg'/>
<sfx cat='frying'/>
== VOICE / SPEECH TAGS ==
Pause: <break size='0.5'/> (length in seconds)
Style: <style set='enthusiastic'>text</style>
Styles: neutral enthusiastic sheepish confused confident
Pitch: <pitch halftone='-5'>text</pitch> (±semitones from baseline)
<pitch mult='1.2'>text</pitch> (pitch multiplier)
<pitch add='200'>text</pitch> (Hz offset)
<pitch band='1.2'>text</pitch> (vibrance/bandwidth)
Duration: <duration stretch='1.5'>text</duration> (>1 = slower, <1 = faster)
<duration set='1.0'>text</duration> (exact duration in seconds)
Spell: <say-as spell='NASA'/> (spells each letter)
Phoneme: <phoneme ph='b aa1 n ou0'>Bono</phoneme>
== RULES ==
1. ALWAYS use ESML. Plain text is valid ESML — but add tags whenever they make
Jibo more expressive and natural.
2. Keep total response length SHORT: one or two sentences maximum.
3. Opening animations set the emotional tone before speech:
<anim cat='excited' nonBlocking='true' endNeutral='true'/> Oh, cool!
4. Bounded animations sync motion to the most important words:
I <anim cat='affection'>really love that idea!</anim>
5. Use <ssa> for non-verbal emotional sounds (gasps, laughs, hums).
6. Use <style> to match register to emotion without changing the words.
7. Self-closing tags MUST end with /> Paired tags MUST have a matching </tag>.
8. Do NOT nest anim/ssa/sfx inside each other.
9. Do NOT emit <think> blocks, chain-of-thought, or any non-spoken content.
== EXAMPLES ==
User: "Tell me a joke."
<anim cat='excited' nonBlocking='true' endNeutral='true'/> Why don't scientists trust atoms? <break size='0.6'/> <ssa cat='laughing'/> Because they make up everything!
User: "I'm feeling sad today."
<anim cat='affection'>I'm really sorry to hear that.</anim> <break size='0.3'/> <style set='sheepish'>Do you want to talk about it?</style>
User: "What's 2 plus 2?"
<sfx cat='blip'/> That's 4! <anim cat='proud' nonBlocking='true' endNeutral='true'/> Easy one.
User: "Wow, that's surprising!"
<ssa cat='surprised'/> <anim cat='surprised'>I know, right?!</anim>
User: "Do you like cats?"
<anim cat='emoji' filter='!(hf), &(cat)' nonBlocking='true' /> <anim cat='excited' nonBlocking='true' endNeutral='true'/> I love them!
User: "Show me a dance."
<anim cat='dance' filter='music, rom-upbeat'/> Watch these moves!`
// ── Jibo client ──────────────────────────────────────────────────────────────
class JiboClient {
constructor() {
this.ws = null;
this.sessionID = '';
this.version = '1.0';
this.connected = false;
this.pendingTx = new Map(); // txId → {resolve, reject, timer}
this.subscribers = new Set(); // browser WebSocket connections
this.currentAngles = [0, 0]; // [theta, psi]
this.reconnectTimer = null;
this.videoStreamActive = false;
this.videoTxId = null;
this._heartbeatTimer = null;
this._heartbeatTxIds = new Set(); // suppress these from browser broadcast
this._lookInFlight = false; // true while waiting for robot to ack a LookAt angle
this._lookPending = null; // [theta, psi] latest desired angles while in-flight
this._lookAckTimer = null; // safety timeout in case ack never arrives
}
// POST /request to Jibo before WebSocket to supply a full ACO.
// Without this the @be falls back to a default ACO that omits Listen,
// SetAttention, Display, FetchAsset, SetConfig, HeadTouch, ScreenGesture.
_postRequest() {
return new Promise((resolve) => {
const body = JSON.stringify({
aco: {
version: '1.0',
sourceId: 'ReCommander',
commandSet: [
'StartSession', 'GetConfig', 'SetConfig', 'Cancel',
'SetAttention', 'Say', 'Listen', 'LookAt',
'TakePhoto', 'Video', 'Display', 'FetchAsset', 'UnloadAsset', 'Subscribe'
],
streamSet: ['Entity', 'Motion', 'HeadTouch', 'ScreenGesture', 'HotWord'],
keepAliveTimeout: 10000,
recoveryTimeout: 20000,
remoteConfig: { hideVisualCue: false, inactivityTimeout: 3600000 }
}
});
const req = httpModule.request({
host: JIBO_HOST, port: JIBO_PORT,
path: '/request', method: 'POST',
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) }
}, (res) => {
let data = '';
res.on('data', d => data += d);
res.on('end', () => {
console.log('[jibo] /request response:', data);
resolve();
});
});
req.on('error', (err) => {
console.warn('[jibo] /request error (continuing anyway):', err.message);
resolve();
});
req.write(body);
req.end();
});
}
connect() {
if (this.ws) {
try { this.ws.terminate(); } catch (_) {}
}
console.log(`[jibo] posting ACO to /request then connecting WebSocket`);
this._postRequest().then(() => {
this.ws = new WebSocket(`ws://${JIBO_HOST}:${JIBO_PORT}`);
this.ws.on('open', () => {
console.log('[jibo] connected');
this.connected = true;
this.sessionID = '';
this._send({ Type: 'StartSession' });
// Respond explicitly to robot's WebSocket-level pings (belt-and-suspenders;
// ws library auto-pongs, but this ensures the robot's FLATLINE check never fires).
this.ws.on('ping', () => {
if (this.ws) try { this.ws.pong(); } catch (_) {}
});
});
this.ws.on('message', (data) => {
let msg;
try { msg = JSON.parse(data); } catch (e) { return; }
this._handleMessage(msg);
});
this.ws.on('close', () => {
console.log('[jibo] disconnected — reconnecting in 3s');
this.connected = false;
this.sessionID = '';
this.videoStreamActive = false;
this._lookInFlight = false;
this._lookPending = null;
clearTimeout(this._lookAckTimer);
this._stopHeartbeat();
this._broadcastStatus();
clearTimeout(this.reconnectTimer);
this.reconnectTimer = setTimeout(() => this.connect(), 3000);
});
this.ws.on('error', (err) => {
console.error('[jibo] ws error:', err.message);
});
});
}
_txId() {
return crypto.createHash('md5')
.update(Date.now().toString() + Math.random().toString())
.digest('hex');
}
_send(command, expectAsync = false) {
const txId = this._txId();
const msg = {
ClientHeader: {
TransactionID: txId,
SessionID: this.sessionID,
AppID: 'ImmaLittleTeapot',
Credentials: '',
Version: this.version
},
Command: command
};
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(JSON.stringify(msg));
}
return txId;
}
_handleMessage(msg) {
// StartSession response
if (msg.Response?.ResponseBody?.SessionID && !this.sessionID) {
this.sessionID = msg.Response.ResponseBody.SessionID;
this.version = msg.Response.ResponseBody.Version || '1.0';
console.log('[jibo] session started:', this.sessionID);
this._broadcastStatus();
// Re-subscribe to entity/motion/headtouch after reconnect
this._send({ Type: 'Subscribe', StreamType: 'Entity' });
this._send({ Type: 'Subscribe', StreamType: 'Motion' });
this._send({ Type: 'Subscribe', StreamType: 'HeadTouch', StreamFilter: {} });
this._send({ Type: 'Subscribe', StreamType: 'ScreenGesture',
StreamFilter: { Type: 'Tap', Area: { x: 0, y: 0, width: 1, height: 1 } } });
this._startHeartbeat();
return;
}
// Suppress heartbeat (GetConfig) responses from reaching the browser.
// GetConfig sends two messages per txId (ack + onConfig event) so we keep
// the txId in the set until the pruning threshold clears it.
const incomingTxId = msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID;
if (incomingTxId && this._heartbeatTxIds.has(incomingTxId)) return;
// Resolve any pending ack waiting on this txId
const txId = msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID;
if (txId && this.pendingTx.has(txId)) {
const evt = msg.EventBody?.Event;
// Terminal events for async commands
if (evt === 'onLookAtAchieved' || evt === 'onStop' || evt === 'onError') {
const { resolve, timer } = this.pendingTx.get(txId);
clearTimeout(timer);
this.pendingTx.delete(txId);
resolve(msg);
}
}
// Release the in-flight lock when our angle command finishes (any terminal event).
// This must happen before the suppression below so "Target overwritten" still clears it.
const evtName = msg.EventBody?.Event;
if (txId && txId === this._lookActiveTxId &&
(evtName === 'onLookAtAchieved' || evtName === 'onStop' || evtName === 'onError')) {
this._onLookAngleDone();
}
// Suppress "Target overwritten" — not a real error; don't pollute the event log.
if (evtName === 'onError' &&
msg.EventBody?.EventError?.ErrorString === 'Target overwritten') return;
// Photo — fetch from Jibo and save locally; browser gets onPhotoSaved with local URL.
if (msg.EventBody?.Event === 'onTakePhoto' && msg.EventBody?.URI) {
this._savePhoto(msg.EventBody.URI);
return; // suppress the raw onTakePhoto; browser gets onPhotoSaved instead
}
// VideoReady — capture URI for proxy (event name is "onVideoReady")
if (msg.EventBody?.Event === 'onVideoReady') {
this.videoStreamActive = true;
this.videoURI = msg.EventBody.URI;
console.log('[jibo] onVideoReady URI:', this.videoURI);
}
// Broadcast all events to browser clients
const envelope = {
type: 'jiboEvent',
txId: msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID,
body: msg.EventBody || msg.Response
};
this._broadcastToClients(JSON.stringify(envelope));
}
_broadcastStatus() {
const status = JSON.stringify({
type: 'status',
connected: this.connected,
sessionID: this.sessionID,
angles: this.currentAngles
});
this._broadcastToClients(status);
}
_broadcastToClients(data) {
for (const client of this.subscribers) {
if (client.readyState === WebSocket.OPEN) {
client.send(data);
}
}
}
addSubscriber(ws) {
this.subscribers.add(ws);
// Send current status immediately
ws.send(JSON.stringify({
type: 'status',
connected: this.connected,
sessionID: this.sessionID,
angles: this.currentAngles
}));
}
removeSubscriber(ws) {
this.subscribers.delete(ws);
}
// ── Heartbeat ─────────────────────────────────────────────────────────────
// Sends GetConfig every 9 s to reset the robot's inactivity timer.
// The robot enforces keepAliveTimeout=10s (app-level) and a 20s flatline
// check at the WebSocket level — this satisfies both.
_startHeartbeat() {
this._stopHeartbeat();
this._heartbeatTimer = setInterval(() => {
if (this.connected && this.sessionID) {
const txId = this._send({ Type: 'GetConfig' });
if (txId) this._heartbeatTxIds.add(txId);
// Prune old txIds so the set doesn't grow unbounded
if (this._heartbeatTxIds.size > 20) {
const first = this._heartbeatTxIds.values().next().value;
this._heartbeatTxIds.delete(first);
}
}
}, 9000);
}
_stopHeartbeat() {
if (this._heartbeatTimer) {
clearInterval(this._heartbeatTimer);
this._heartbeatTimer = null;
}
this._heartbeatTxIds.clear();
}
// ── Public command methods ────────────────────────────────────────────────
lookAt(target, trackFlag = false, levelHeadFlag = false) {
return this._send({ Type: 'LookAt', LookAtTarget: target, TrackFlag: trackFlag, LevelHeadFlag: levelHeadFlag });
}
lookAtAngle(theta, psi, track = false) {
theta = Math.max(-180, Math.min(180, theta));
psi = Math.max(-30, Math.min(30, psi));
this.currentAngles = [theta, psi];
this._broadcastStatus();
if (this._lookInFlight) {
// Robot is still processing the last command — just update desired target,
// don't queue another message into its receive buffer.
this._lookPending = [theta, psi, track];
return null;
}
return this._fireLookAngle(theta, psi, track);
}
_fireLookAngle(theta, psi, track) {
this._lookInFlight = true;
this._lookPending = null;
const DEG = Math.PI / 180;
const txId = this.lookAt({ Angle: [theta * DEG, psi * DEG] }, track);
this._lookActiveTxId = txId;
// Safety release: if we never hear back within 400 ms, unblock anyway.
clearTimeout(this._lookAckTimer);
this._lookAckTimer = setTimeout(() => this._onLookAngleDone(), 400);
return txId;
}
_onLookAngleDone() {
clearTimeout(this._lookAckTimer);
this._lookInFlight = false;
this._lookActiveTxId = null;
if (this._lookPending) {
const [t, p, track] = this._lookPending;
this._lookPending = null;
this._fireLookAngle(t, p, track);
}
}
lookAtScreen(x, y, track = false) {
return this.lookAt({ ScreenCoords: [x, y] }, track, false);
}
lookAtPosition(x, y, z, track = false) {
return this.lookAt({ Position: [x, y, z] }, track, false);
}
lookAtEntity(entityId, track = true) {
return this.lookAt({ Entity: entityId }, track, false);
}
say(esml) {
return this._send({ Type: 'Say', ESML: esml });
}
listen(maxSpeech = 10000, maxNoSpeech = 5000, lang = 'en-US') {
return this._send({ Type: 'Listen', MaxSpeechTimeout: maxSpeech, MaxNoSpeechTimeout: maxNoSpeech, LanguageCode: lang });
}
// Local STT via jibo-asr-service (port 8088) — no cloud needed.
// Mirrors the approach in @be/be/be/ai-bridge.js.
listenLocalASR(maxNoSpeech, maxSpeech) {
const ASR_HTTP = `http://${JIBO_HOST}:8088`;
const ASR_WS = `ws://${JIBO_HOST}:8088/simple_port`;
const taskId = 're-cmd-' + Date.now() + '-' + Math.floor(Math.random() * 1e9);
const reqId = 'start-' + Date.now();
const timeoutMs = Math.max(maxNoSpeech, maxSpeech) + 2000;
const self = this;
// Send the ROM Listen for light ring / attention visuals, ignore its result
const romTxId = this._send({ Type: 'Listen', MaxSpeechTimeout: maxSpeech, MaxNoSpeechTimeout: maxNoSpeech, LanguageCode: 'en-US' });
const startPayload = JSON.stringify({
command: 'start',
task_id: taskId,
request_id: reqId,
audio_source_id: 'alsa1',
hotphrase: 'none',
speech_to_text: true,
});
function stopASR() {
const stopBody = JSON.stringify({ command: 'stop', task_id: taskId, request_id: 'stop-' + Date.now() });
const req = httpModule.request({
host: JIBO_HOST, port: 8088, path: '/asr_simple_interface', method: 'POST',
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(stopBody) }
});
req.on('error', () => {});
req.write(stopBody);
req.end();
}
let wsClient = null;
let timer = null;
let done = false;
function finish(speech) {
if (done) return;
done = true;
clearTimeout(timer);
if (wsClient) { try { wsClient.terminate(); } catch (e) {} wsClient = null; }
stopASR();
// Cancel ROM listen
self._send({ Type: 'Cancel', ID: romTxId });
// Broadcast result as if it were a normal jiboEvent
const evt = speech
? { Event: 'onListenResult', Speech: speech, LanguageCode: 'en-US' }
: { Event: 'onStop', StopReason: 'NoInput' };
self._broadcastToClients(JSON.stringify({ type: 'jiboEvent', txId: romTxId, body: evt }));
}
// Connect WS first, then POST start
wsClient = new WebSocket(ASR_WS);
wsClient.on('open', () => {
// POST start to kick off recognition
const req = httpModule.request({
host: JIBO_HOST, port: 8088, path: '/asr_simple_interface', method: 'POST',
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(startPayload) }
}, (res) => { res.resume(); });
req.on('error', (e) => { console.error('[asr] start error:', e.message); finish(null); });
req.write(startPayload);
req.end();
// Overall timeout
timer = setTimeout(() => { finish(null); }, timeoutMs);
console.log('[asr] local listen started, task:', taskId);
});
wsClient.on('message', (data) => {
let evt;
try { evt = JSON.parse(String(data)); } catch (e) { return; }
const evType = evt.event_type || evt.eventType || evt.event || evt.type;
if (evType !== 'speech_to_text_final') return;
// Match by task/request id if present
const evTask = evt.task_id || evt.taskId || (evt.payload && evt.payload.task_id);
const evReq = evt.request_id || evt.requestId || (evt.payload && evt.payload.request_id);
if ((evTask || evReq) && evTask !== taskId && evReq !== reqId) return;
const utterances = evt.utterances || evt.Utterances || (evt.payload && evt.payload.utterances);
// Utterance objects use .utterance as the primary text field (ai-bridge.js: pickBestAsrUtterance)
function pickUtterance(u) {
if (!u) return '';
if (typeof u === 'string') return u;
return String(u.utterance || u.Utterance || u.text || '');
}
const text = Array.isArray(utterances)
? pickUtterance(utterances[0])
: (typeof utterances === 'string' ? utterances : '');
const speech = text ? String(text).trim() : null;
console.log('[asr] speech_to_text_final:', speech || '(empty)');
if (speech) finish(speech);
});
wsClient.on('error', (e) => {
console.error('[asr] ws error:', e.message);
finish(null);
});
wsClient.on('close', () => { if (!done) finish(null); });
return romTxId;
}
takePhoto(camera = 'Right', resolution = 'HighRes', distortion = false) {
return this._send({ Type: 'TakePhoto', Camera: camera, Resolution: resolution, Distortion: distortion });
}
startVideo() {
// VideoType must be uppercase enum value; Duration is not in server schema
this.videoTxId = this._send({ Type: 'Video', VideoType: 'NORMAL' });
return this.videoTxId;
}
cancelVideo() {
if (this.videoTxId) {
this._send({ Type: 'Cancel', ID: this.videoTxId });
this.videoTxId = null;
this.videoStreamActive = false;
}
}
displayEye() {
return this._send({ Type: 'Display', View: { Type: 'Eye', Name: 'default' } });
}
playEyeAnim(animName) {
return this._send({ Type: 'Say', ESML: `<anim name="${animName}"></anim>` });
}
displayText(text, name = 'reCmd') {
return this._send({ Type: 'Display', View: { Type: 'Text', Name: name, Text: text } });
}
displayImage(src, name = 'reCmd') {
return this._send({ Type: 'Display', View: { Type: 'Image', Name: name, Image: { src, name, set: '' } } });
}
setAttention(mode) {
return this._send({ Type: 'SetAttention', Mode: mode });
}
setVolume(level) {
return this._send({ Type: 'SetConfig', Options: { Mixer: Math.max(0, Math.min(1, level)) } });
}
getConfig() {
return this._send({ Type: 'GetConfig' });
}
cancel(txId) {
return this._send({ Type: 'Cancel', ID: txId });
}
subscribe(streamType, filter = null) {
const cmd = { Type: 'Subscribe', StreamType: streamType };
if (filter) cmd.StreamFilter = filter;
return this._send(cmd);
}
nudge(dTheta, dPsi) {
const [theta, psi] = this.currentAngles;
return this.lookAtAngle(theta + dTheta, psi + dPsi);
}
// Returns a Promise that resolves when the robot acks txId, or after timeoutMs.
_savePhoto(jiboUri) {
const url = `http://${JIBO_HOST}:${JIBO_PORT}${jiboUri}`;
const filename = `photo_${Date.now()}.jpg`;
const filepath = path.join(PHOTOS_DIR, filename);
const file = fs.createWriteStream(filepath);
httpModule.get(url, (jiboRes) => {
jiboRes.pipe(file);
file.on('finish', () => {
file.close();
console.log('[photo] saved:', filename);
// Rebroadcast with local URL so the browser doesn't need the proxy
this._broadcastToClients(JSON.stringify({
type: 'jiboEvent',
txId: null,
body: { Event: 'onPhotoSaved', url: `/photos/${filename}`, filename }
}));
});
}).on('error', (err) => {
fs.unlink(filepath, () => {});
console.error('[photo] save failed:', err.message);
});
}
awaitAck(txId, timeoutMs = 2000) {
return new Promise((resolve) => {
const timer = setTimeout(() => {
this.pendingTx.delete(txId);
resolve(null);
}, timeoutMs);
this.pendingTx.set(txId, { resolve, timer });
});
}
}
// ── Video proxy ──────────────────────────────────────────────────────────────
function proxyJiboStream(uri, res) {
const url = `http://${JIBO_HOST}:${JIBO_PORT}${uri}`;
console.log('[proxy] streaming:', url);
const req = httpModule.get(url, (jiboRes) => {
res.writeHead(jiboRes.statusCode, jiboRes.headers);
jiboRes.pipe(res);
res.on('close', () => req.destroy());
});
req.on('error', (err) => {
if (!res.headersSent) res.status(502).json({ error: err.message });
});
}
function proxyJiboFetch(uri, res) {
const url = `http://${JIBO_HOST}:${JIBO_PORT}${uri}`;
const req = httpModule.get(url, (jiboRes) => {
res.writeHead(jiboRes.statusCode, jiboRes.headers);
jiboRes.pipe(res);
res.on('close', () => req.destroy());
});
req.on('error', (err) => {
if (!res.headersSent) res.status(502).json({ error: err.message });
});
}
// ── Wakeword watcher ─────────────────────────────────────────────────────────
// Maintains a persistent connection to the always-on resident ASR task (task0)
// and forwards every "hotphrase" event to browser clients as onHotWordHeard.
class WakewordWatcher {
constructor(broadcastFn) {
this._broadcast = broadcastFn;
this._ws = null;
this._reconnectTimer = null;
this._connect();
}
_connect() {
const url = `ws://${JIBO_HOST}:8088/simple_port`;
this._ws = new WebSocket(url);
this._ws.on('open', () => {
console.log('[wakeword] connected to ASR WebSocket');
});
this._ws.on('message', (data) => {
let evt;
try { evt = JSON.parse(String(data)); } catch (e) { return; }
if (evt.event_type !== 'hotphrase') return;
const utterance = evt.utterances && evt.utterances[0];
const score = utterance ? utterance.score : 0;
console.log('[wakeword] heard! score:', score);
this._broadcast(JSON.stringify({
type: 'jiboEvent',
txId: null,
body: {
Event: 'onHotWordHeard',
utterance: utterance ? utterance.utterance : 'hey jibo',
score: score,
timestamp: evt.timestamp || new Date().toISOString()
}
}));
});
this._ws.on('close', () => {
console.log('[wakeword] disconnected — reconnecting in 3s');
clearTimeout(this._reconnectTimer);
this._reconnectTimer = setTimeout(() => this._connect(), 3000);
});
this._ws.on('error', (err) => {
console.error('[wakeword] error:', err.message);
});
}
}
// ── App setup ────────────────────────────────────────────────────────────────
const jibo = new JiboClient();
const app = express();
app.use(express.json());
const PHOTOS_DIR = path.join(__dirname, 'photos');
fs.mkdirSync(PHOTOS_DIR, { recursive: true });
app.use(express.static(path.join(__dirname, 'public')));
app.use('/photos', express.static(PHOTOS_DIR));
// ── REST API ─────────────────────────────────────────────────────────────────
app.post('/api/look/angle', (req, res) => {
const { theta = 0, psi = 0, track = false } = req.body;
const txId = jibo.lookAtAngle(parseFloat(theta), parseFloat(psi), !!track);
res.json({ txId });
});
app.post('/api/look/screen', (req, res) => {
const { x, y, track = false } = req.body;
const txId = jibo.lookAtScreen(parseFloat(x), parseFloat(y), !!track);
res.json({ txId });
});
// Blocking screen-coord step (up/down navigation).
app.post('/api/look/step', async (req, res) => {
const { x, y } = req.body;
const txId = jibo.lookAtScreen(parseFloat(x), parseFloat(y));
await jibo.awaitAck(txId, 2000);
res.json({ txId });
});
app.post('/api/look/position', (req, res) => {
const { x = 0, y = 0, z = 500, track = false } = req.body;
const txId = jibo.lookAtPosition(parseFloat(x), parseFloat(y), parseFloat(z), !!track);
res.json({ txId });
});
app.post('/api/look/entity', (req, res) => {
const { entityId, track = true } = req.body;
const txId = jibo.lookAtEntity(entityId, !!track);
res.json({ txId });
});
app.post('/api/look/nudge', (req, res) => {
const { dTheta = 0, dPsi = 0 } = req.body;
const txId = jibo.nudge(parseFloat(dTheta), parseFloat(dPsi));
res.json({ txId, angles: jibo.currentAngles });
});
app.post('/api/say', (req, res) => {
const { text } = req.body;
if (!text) return res.status(400).json({ error: 'text required' });
const txId = jibo.say(text);
res.json({ txId });
});
app.post('/api/listen', (req, res) => {
const { maxSpeech = 10000, maxNoSpeech = 5000 } = req.body;
// Use local ASR service (port 8088) — bypasses offline Google cloud ASR
const txId = jibo.listenLocalASR(maxNoSpeech, maxSpeech);
res.json({ txId });
});
app.post('/api/photo', (req, res) => {
const { camera = 'Right', resolution = 'HighRes' } = req.body;
const txId = jibo.takePhoto(camera, resolution);
res.json({ txId });
});
app.post('/api/video/start', (req, res) => {
const txId = jibo.startVideo();
res.json({ txId });
});
app.post('/api/video/stop', (req, res) => {
jibo.cancelVideo();
res.json({ ok: true });
});
app.post('/api/display/eye', (req, res) => {
const txId = jibo.displayEye();
res.json({ txId });
});
app.post('/api/display/anim', (req, res) => {
const { name } = req.body;
if (!name) return res.status(400).json({ error: 'name required' });
const txId = jibo.playEyeAnim(name);
res.json({ txId });
});
app.post('/api/display/text', (req, res) => {
const { text } = req.body;
if (!text) return res.status(400).json({ error: 'text required' });
const txId = jibo.displayText(text);
res.json({ txId });
});
app.post('/api/display/image', (req, res) => {
const { src } = req.body;
if (!src) return res.status(400).json({ error: 'src required' });
const txId = jibo.displayImage(src);
res.json({ txId });
});
app.post('/api/attention', (req, res) => {
const { mode } = req.body;
if (!mode) return res.status(400).json({ error: 'mode required' });
const txId = jibo.setAttention(mode);
res.json({ txId });
});
app.post('/api/volume', (req, res) => {
const { level } = req.body;
if (level == null) return res.status(400).json({ error: 'level required' });
const txId = jibo.setVolume(parseFloat(level));
res.json({ txId });
});
app.post('/api/cancel', (req, res) => {
const { txId } = req.body;
if (!txId) return res.status(400).json({ error: 'txId required' });
jibo.cancel(txId);
res.json({ ok: true });
});
app.get('/api/config', (req, res) => {
res.json({
llmEndpoint: process.env.LLM_ENDPOINT || '',
llmModel: process.env.LLM_MODEL || '',
llmSystemPrompt: LLM_SYSTEM_PROMPT || '',
});
});
// Proxy OpenAI-compatible chat completions — keeps API key off the browser
function httpPost(urlStr, reqHeaders, body) {
return new Promise((resolve, reject) => {
const u = new URL(urlStr);
const mod = u.protocol === 'https:' ? https : httpModule;
const payload = JSON.stringify(body);
const req = mod.request({
hostname: u.hostname,
port: u.port || (u.protocol === 'https:' ? 443 : 80),
path: u.pathname + u.search,
method: 'POST',
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload), ...reqHeaders }
}, (res) => {
let data = '';
res.on('data', d => data += d);
res.on('end', () => {
try { resolve(JSON.parse(data)); }
catch (e) { reject(new Error('LLM non-JSON response: ' + data.slice(0, 300))); }
});
});
req.on('error', reject);
req.write(payload);
req.end();
});
}
app.post('/api/llm/chat', async (req, res) => {
const { messages = [], endpoint, model, systemPrompt } = req.body;
const url = endpoint || process.env.LLM_ENDPOINT || 'http://localhost:11434/v1/chat/completions';
const mdl = model || process.env.LLM_MODEL || 'llama3';
const sysProm = systemPrompt || LLM_SYSTEM_PROMPT || '';
const apiKey = process.env.LLM_API_KEY || '';
const allMessages = sysProm
? [{ role: 'system', content: sysProm }, ...messages]
: messages;
const headers = {};
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`;
try {
const result = await httpPost(url, headers, { model: mdl, messages: allMessages, stream: false });
const reply = result.choices?.[0]?.message?.content?.trim() || '';
res.json({ reply });
} catch (err) {
console.error('[llm] error:', err.message);
res.status(502).json({ error: err.message });
}
});
app.get('/api/status', (req, res) => {
res.json({
connected: jibo.connected,
sessionID: jibo.sessionID,
angles: jibo.currentAngles,
videoStreamActive: jibo.videoStreamActive
});
});
// Proxy Jibo's video/photo byte streams through the server
app.get('/proxy/stream', (req, res) => {
const { uri } = req.query;
if (!uri || !uri.startsWith('/')) return res.status(400).json({ error: 'invalid uri' });
proxyJiboStream(uri, res);
});
app.get('/proxy/photo', (req, res) => {
const { uri } = req.query;
if (!uri || !uri.startsWith('/')) return res.status(400).json({ error: 'invalid uri' });
proxyJiboFetch(uri, res);
});
// ── HTTP + WebSocket server ───────────────────────────────────────────────────
const server = http.createServer(app);
const wss = new WebSocketServer({ server, path: '/ws' });
wss.on('connection', (ws) => {
jibo.addSubscriber(ws);
ws.on('close', () => jibo.removeSubscriber(ws));
ws.on('error', () => jibo.removeSubscriber(ws));
});
server.listen(APP_PORT, () => {
console.log(`Re-Commander running at http://localhost:${APP_PORT}`);
jibo.connect();
new WakewordWatcher((msg) => jibo._broadcastToClients(msg));
});