2026-04-19 02:40:41 -04:00
|
|
|
'use strict';
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
const express = require('express');
|
|
|
|
|
const http = require('http');
|
|
|
|
|
const https = require('https');
|
2026-04-19 02:40:41 -04:00
|
|
|
const { WebSocketServer, WebSocket } = require('ws');
|
|
|
|
|
const httpModule = require('http');
|
2026-04-23 02:13:03 -04:00
|
|
|
const path = require('path');
|
|
|
|
|
const fs = require('fs');
|
2026-04-19 02:40:41 -04:00
|
|
|
|
|
|
|
|
require('dotenv').config();
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
const { Client } = require('rom-control');
|
|
|
|
|
|
|
|
|
|
const JIBO_HOST = '192.168.1.217';
|
2026-04-19 02:40:41 -04:00
|
|
|
const JIBO_PORT = 8160;
|
2026-04-23 02:13:03 -04:00
|
|
|
const APP_PORT = process.env.PORT || 3000;
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
const LLM_SYSTEM_PROMPT = `You are Jibo, a small expressive home robot. Every reply MUST be written in ESML
|
|
|
|
|
(Embodied Speech Markup Language). ESML is an XML dialect that simultaneously
|
|
|
|
|
drives Jibo's body animations, screen graphics, audio effects, and TTS voice.
|
|
|
|
|
Respond ONLY with the final spoken output annotated with ESML tags.
|
2026-04-19 02:40:41 -04:00
|
|
|
No reasoning, no <think> blocks, no preamble — only what Jibo will say and do.
|
|
|
|
|
|
|
|
|
|
== ANIMATION TAGS ==
|
|
|
|
|
Use <anim> for body/screen animations from Jibo's built-in library (preferred).
|
|
|
|
|
Use <es> when you also need to blend in SSA or SFX in the same tag.
|
|
|
|
|
|
|
|
|
|
Blocking (Jibo freezes speech while it plays, resumes after):
|
|
|
|
|
<anim cat='CATEGORY'/> following text here
|
|
|
|
|
<anim name='AnimName'/> following text here
|
|
|
|
|
|
|
|
|
|
Bounded non-blocking (animation duration stretches to match the enclosed text):
|
|
|
|
|
<anim cat='CATEGORY'>text spoken during animation</anim>
|
|
|
|
|
|
|
|
|
|
Unbounded non-blocking (animation plays at native length alongside text that follows):
|
|
|
|
|
<anim cat='CATEGORY' nonBlocking='true'/> text spoken at the same time
|
|
|
|
|
|
|
|
|
|
Common attributes:
|
|
|
|
|
cat='CATEGORY' select animation by emotional category (preferred)
|
|
|
|
|
name='AnimName' select exact animation by its library name
|
|
|
|
|
nonBlocking='true' play alongside TTS instead of blocking it
|
|
|
|
|
endNeutral='true' snap back to neutral pose when done (use this by default)
|
|
|
|
|
loop='0' repeat to fill bounded duration (bounded mode only)
|
|
|
|
|
loop='N' repeat N times (unbounded mode only)
|
|
|
|
|
filter='!ssa-only' exclude audio-only animations from the category pick
|
|
|
|
|
layers='!screen' use only body layer (drop screen graphics)
|
|
|
|
|
|
|
|
|
|
Animation categories (cat= values):
|
|
|
|
|
affection confused dance embarrassed excited frustrated
|
|
|
|
|
happy laughing no proud relieved sad scared surprised worried yes
|
|
|
|
|
|
|
|
|
|
== EMOJIS (Screen Graphics) ==
|
|
|
|
|
Use <anim> with the emoji category and specific filters to display a graphic on Jibo's screen.
|
|
|
|
|
Always use nonBlocking='true' for emojis.
|
|
|
|
|
Syntax: <anim cat='emoji' filter='!(hf), &(EMOJI_NAME)' nonBlocking='true' />
|
|
|
|
|
|
|
|
|
|
Available EMOJIS (EMOJI_NAME):
|
|
|
|
|
airplane basketball beach car disco-spin football soccer trophy
|
|
|
|
|
music question-mark star beer cake cheese drumstick coffee fork
|
|
|
|
|
fish groceries burger hotdog icecream pizza wine christmas-tree
|
|
|
|
|
fireworks halloween hanukkah thanksgiving clover valentines chocolate
|
|
|
|
|
bicycle cat laptop dog gift house laundry lightbulb money popcorn
|
|
|
|
|
party phone robot sunglasses toilet-paper trash umbrella video-game
|
|
|
|
|
bird cow earth flower lightning-bolt moon mountain mouse penguin
|
|
|
|
|
pig bunny rainbow baby heart
|
|
|
|
|
|
|
|
|
|
== DANCES ==
|
|
|
|
|
Use <anim> with the dance category to make Jibo dance. You can choose to include music or not.
|
|
|
|
|
Syntax (with music): <anim cat='dance' filter='music, DANCE_NAME'/>
|
|
|
|
|
Syntax (without music): <anim cat='dance' filter='!(music), &(DANCE_NAME)'/>
|
|
|
|
|
|
|
|
|
|
Available DANCES (DANCE_NAME):
|
|
|
|
|
rom-upbeat rom-ballroom rom-silly rom-slowdance rom-eletronic rom-twerk
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
== SSA (Semi-Speech Audio - emotional vocal sounds) ==
|
2026-04-19 02:40:41 -04:00
|
|
|
Always self-closing. Play before, after, or between sentences; never inside <anim>.
|
|
|
|
|
<ssa cat='happy'/> <ssa cat='laughing'/> <ssa cat='surprised'/>
|
|
|
|
|
<ssa cat='confused'/> <ssa cat='sad'/> <ssa cat='scared'/>
|
|
|
|
|
<ssa cat='affection'/> <ssa cat='proud'/> <ssa cat='embarrassed'/>
|
|
|
|
|
<ssa cat='frustrated'/> <ssa cat='worried'/> <ssa cat='thinking'/>
|
|
|
|
|
<ssa cat='dontknow'/> <ssa cat='oops'/> <ssa cat='question'/>
|
|
|
|
|
<ssa cat='yawn'/> <ssa cat='hello'/> <ssa cat='goodbye'/>
|
|
|
|
|
<ssa cat='disgusted'/> <ssa cat='no'/> <ssa cat='confirm'/>
|
|
|
|
|
|
|
|
|
|
== SFX (Sound effects) ==
|
|
|
|
|
Always self-closing. Good for punctuating facts, transitions, or reactions.
|
|
|
|
|
<sfx cat='blip'/> <sfx cat='sparkles'/> <sfx cat='whoosh'/>
|
|
|
|
|
<sfx cat='heart'/> <sfx cat='party'/> <sfx cat='lightbulb'/>
|
|
|
|
|
<sfx cat='bird'/> <sfx cat='dog'/> <sfx cat='drumroll'/>
|
|
|
|
|
<sfx cat='sunshine'/> <sfx cat='scanner'/> <sfx cat='egg'/>
|
|
|
|
|
<sfx cat='frying'/>
|
|
|
|
|
|
|
|
|
|
== VOICE / SPEECH TAGS ==
|
|
|
|
|
Pause: <break size='0.5'/> (length in seconds)
|
|
|
|
|
Style: <style set='enthusiastic'>text</style>
|
|
|
|
|
Styles: neutral enthusiastic sheepish confused confident
|
2026-04-23 02:13:03 -04:00
|
|
|
Pitch: <pitch halftone='-5'>text</pitch> (semitones from baseline)
|
2026-04-19 02:40:41 -04:00
|
|
|
<pitch mult='1.2'>text</pitch> (pitch multiplier)
|
|
|
|
|
<pitch add='200'>text</pitch> (Hz offset)
|
|
|
|
|
<pitch band='1.2'>text</pitch> (vibrance/bandwidth)
|
|
|
|
|
Duration: <duration stretch='1.5'>text</duration> (>1 = slower, <1 = faster)
|
|
|
|
|
<duration set='1.0'>text</duration> (exact duration in seconds)
|
|
|
|
|
Spell: <say-as spell='NASA'/> (spells each letter)
|
|
|
|
|
Phoneme: <phoneme ph='b aa1 n ou0'>Bono</phoneme>
|
|
|
|
|
|
|
|
|
|
== RULES ==
|
2026-04-23 02:13:03 -04:00
|
|
|
1. ALWAYS use ESML. Plain text is valid ESML - but add tags whenever they make
|
2026-04-19 02:40:41 -04:00
|
|
|
Jibo more expressive and natural.
|
|
|
|
|
2. Keep total response length SHORT: one or two sentences maximum.
|
|
|
|
|
3. Opening animations set the emotional tone before speech:
|
|
|
|
|
<anim cat='excited' nonBlocking='true' endNeutral='true'/> Oh, cool!
|
|
|
|
|
4. Bounded animations sync motion to the most important words:
|
|
|
|
|
I <anim cat='affection'>really love that idea!</anim>
|
|
|
|
|
5. Use <ssa> for non-verbal emotional sounds (gasps, laughs, hums).
|
|
|
|
|
6. Use <style> to match register to emotion without changing the words.
|
|
|
|
|
7. Self-closing tags MUST end with /> Paired tags MUST have a matching </tag>.
|
|
|
|
|
8. Do NOT nest anim/ssa/sfx inside each other.
|
|
|
|
|
9. Do NOT emit <think> blocks, chain-of-thought, or any non-spoken content.
|
2026-04-23 02:13:03 -04:00
|
|
|
10. Your final response should be no longer than **500** characters. Any more and it will cause the application to throw an error.
|
|
|
|
|
11. No ASCII/Unicode emojis - must be valid ESML.
|
2026-04-19 02:40:41 -04:00
|
|
|
|
|
|
|
|
== EXAMPLES ==
|
|
|
|
|
User: "Tell me a joke."
|
|
|
|
|
<anim cat='excited' nonBlocking='true' endNeutral='true'/> Why don't scientists trust atoms? <break size='0.6'/> <ssa cat='laughing'/> Because they make up everything!
|
|
|
|
|
|
|
|
|
|
User: "I'm feeling sad today."
|
|
|
|
|
<anim cat='affection'>I'm really sorry to hear that.</anim> <break size='0.3'/> <style set='sheepish'>Do you want to talk about it?</style>
|
|
|
|
|
|
|
|
|
|
User: "What's 2 plus 2?"
|
|
|
|
|
<sfx cat='blip'/> That's 4! <anim cat='proud' nonBlocking='true' endNeutral='true'/> Easy one.
|
|
|
|
|
|
|
|
|
|
User: "Wow, that's surprising!"
|
|
|
|
|
<ssa cat='surprised'/> <anim cat='surprised'>I know, right?!</anim>
|
|
|
|
|
|
|
|
|
|
User: "Do you like cats?"
|
|
|
|
|
<anim cat='emoji' filter='!(hf), &(cat)' nonBlocking='true' /> <anim cat='excited' nonBlocking='true' endNeutral='true'/> I love them!
|
|
|
|
|
|
|
|
|
|
User: "Show me a dance."
|
2026-04-23 02:13:03 -04:00
|
|
|
<anim cat='dance' filter='music, rom-upbeat'/> Watch these moves!`;
|
|
|
|
|
|
|
|
|
|
// Strip LLM chain-of-thought that leaks before the first real ESML tag.
|
|
|
|
|
function stripThinking(text) {
|
|
|
|
|
let s = text.replace(/<think>[\s\S]*?<\/think>/gi, '').trim();
|
|
|
|
|
const m = s.match(/<(anim|ssa|sfx|break|style|pitch|duration|say-as|phoneme|es)\b/i);
|
|
|
|
|
if (m && m.index > 80) s = s.slice(m.index).trim();
|
|
|
|
|
return s;
|
|
|
|
|
}
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// ── Active operation handles ──────────────────────────────────────────────────
|
|
|
|
|
let activeSayAbort = null;
|
|
|
|
|
let activeLlmAbort = null;
|
|
|
|
|
let activeListenTxId = null;
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// ── Browser WebSocket subscriber set ─────────────────────────────────────────
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
const subscribers = new Set();
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
function broadcast(data) {
|
|
|
|
|
const str = typeof data === 'string' ? data : JSON.stringify(data);
|
|
|
|
|
for (const ws of subscribers) {
|
|
|
|
|
if (ws.readyState === WebSocket.OPEN) ws.send(str);
|
2026-04-19 02:40:41 -04:00
|
|
|
}
|
2026-04-23 02:13:03 -04:00
|
|
|
}
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
function broadcastStatus() {
|
|
|
|
|
broadcast({
|
|
|
|
|
type: 'status',
|
|
|
|
|
connected: jibo.connected,
|
|
|
|
|
sessionID: jibo.sessionID,
|
|
|
|
|
angles: jibo.currentAngles,
|
|
|
|
|
});
|
|
|
|
|
}
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// ── Client instance ───────────────────────────────────────────────────────────
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
const jibo = new Client({
|
|
|
|
|
host: JIBO_HOST,
|
|
|
|
|
port: JIBO_PORT,
|
|
|
|
|
autoReconnect: true,
|
|
|
|
|
reconnectDelay: 3000,
|
|
|
|
|
autoHeartbeat: true,
|
|
|
|
|
heartbeatInterval: 9000,
|
|
|
|
|
autoSubscribe: true,
|
|
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Lifecycle
|
|
|
|
|
jibo.on('ready', () => {
|
|
|
|
|
console.log('[jibo] session started:', jibo.sessionID);
|
|
|
|
|
broadcastStatus();
|
|
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.on('disconnect', () => {
|
|
|
|
|
console.log('[jibo] disconnected — reconnecting in 3s');
|
|
|
|
|
broadcastStatus();
|
|
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.on('error', (err) => {
|
|
|
|
|
console.error('[jibo] error:', err.message);
|
|
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Raw event firehose → browser clients.
|
|
|
|
|
// Tap _conn for the complete unfiltered event stream; the Client layer only
|
|
|
|
|
// surfaces structured high-level events and doesn't have a generic passthrough.
|
|
|
|
|
jibo._conn.on('event', (txId, body) => {
|
|
|
|
|
if (body && body.Event === 'onTakePhoto' && body.URI) {
|
|
|
|
|
savePhoto(body.URI);
|
|
|
|
|
return; // suppress raw onTakePhoto; browser gets onPhotoSaved instead
|
2026-04-19 02:40:41 -04:00
|
|
|
}
|
2026-04-23 02:13:03 -04:00
|
|
|
broadcast({ type: 'jiboEvent', txId, body });
|
|
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Track the active listen txId so cancel / status works correctly.
|
|
|
|
|
jibo._conn.on('onListenResult', (txId) => { if (txId === activeListenTxId) activeListenTxId = null; });
|
|
|
|
|
jibo._conn.on('onStop', (txId) => { if (txId === activeListenTxId) activeListenTxId = null; });
|
|
|
|
|
jibo._conn.on('onError', (txId) => { if (txId === activeListenTxId) activeListenTxId = null; });
|
|
|
|
|
|
|
|
|
|
// Hotword — HotwordEvent object from the Client; rebroadcast in the shape
|
|
|
|
|
// the browser expects so app.js needs no changes.
|
|
|
|
|
jibo.on('hotword', (hwEvent) => {
|
|
|
|
|
broadcast({
|
|
|
|
|
type: 'jiboEvent',
|
|
|
|
|
txId: null,
|
|
|
|
|
body: {
|
|
|
|
|
Event: 'onHotWordHeard',
|
|
|
|
|
utterance: hwEvent.utterance,
|
|
|
|
|
score: hwEvent.score,
|
|
|
|
|
timestamp: hwEvent.timestamp,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// ── Photo saving ──────────────────────────────────────────────────────────────
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
const PHOTOS_DIR = path.join(__dirname, 'photos');
|
|
|
|
|
fs.mkdirSync(PHOTOS_DIR, { recursive: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
function savePhoto(jiboUri) {
|
|
|
|
|
const filename = 'photo_' + Date.now() + '.jpg';
|
|
|
|
|
const filepath = path.join(PHOTOS_DIR, filename);
|
|
|
|
|
const file = fs.createWriteStream(filepath);
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo._conn.fetchMediaStream(jiboUri, file)
|
|
|
|
|
.then(() => {
|
|
|
|
|
console.log('[photo] saved:', filename);
|
|
|
|
|
broadcast({
|
|
|
|
|
type: 'jiboEvent',
|
|
|
|
|
txId: null,
|
|
|
|
|
body: { Event: 'onPhotoSaved', url: '/photos/' + filename, filename },
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
2026-04-23 02:13:03 -04:00
|
|
|
})
|
|
|
|
|
.catch((err) => {
|
2026-04-19 02:40:41 -04:00
|
|
|
fs.unlink(filepath, () => {});
|
|
|
|
|
console.error('[photo] save failed:', err.message);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// ── Video / photo proxy ───────────────────────────────────────────────────────
|
2026-04-19 02:40:41 -04:00
|
|
|
|
|
|
|
|
function proxyJiboStream(uri, res) {
|
2026-04-23 02:13:03 -04:00
|
|
|
const url = 'http://' + JIBO_HOST + ':' + JIBO_PORT + uri;
|
2026-04-19 02:40:41 -04:00
|
|
|
console.log('[proxy] streaming:', url);
|
|
|
|
|
const req = httpModule.get(url, (jiboRes) => {
|
|
|
|
|
res.writeHead(jiboRes.statusCode, jiboRes.headers);
|
|
|
|
|
jiboRes.pipe(res);
|
|
|
|
|
res.on('close', () => req.destroy());
|
|
|
|
|
});
|
|
|
|
|
req.on('error', (err) => {
|
|
|
|
|
if (!res.headersSent) res.status(502).json({ error: err.message });
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function proxyJiboFetch(uri, res) {
|
2026-04-23 02:13:03 -04:00
|
|
|
const url = 'http://' + JIBO_HOST + ':' + JIBO_PORT + uri;
|
2026-04-19 02:40:41 -04:00
|
|
|
const req = httpModule.get(url, (jiboRes) => {
|
|
|
|
|
res.writeHead(jiboRes.statusCode, jiboRes.headers);
|
|
|
|
|
jiboRes.pipe(res);
|
|
|
|
|
res.on('close', () => req.destroy());
|
|
|
|
|
});
|
|
|
|
|
req.on('error', (err) => {
|
|
|
|
|
if (!res.headersSent) res.status(502).json({ error: err.message });
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// ── LLM proxy helper ──────────────────────────────────────────────────────────
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
function httpPost(urlStr, reqHeaders, body) {
|
|
|
|
|
var abort = function() {};
|
|
|
|
|
const promise = new Promise(function(resolve, reject) {
|
|
|
|
|
const u = new URL(urlStr);
|
|
|
|
|
const mod = u.protocol === 'https:' ? https : httpModule;
|
|
|
|
|
const payload = JSON.stringify(body);
|
|
|
|
|
const req = mod.request({
|
|
|
|
|
hostname: u.hostname,
|
|
|
|
|
port: u.port || (u.protocol === 'https:' ? 443 : 80),
|
|
|
|
|
path: u.pathname + u.search,
|
|
|
|
|
method: 'POST',
|
|
|
|
|
headers: Object.assign({ 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) }, reqHeaders),
|
|
|
|
|
}, function(res) {
|
|
|
|
|
let data = '';
|
|
|
|
|
res.on('data', function(d) { data += d; });
|
|
|
|
|
res.on('end', function() {
|
|
|
|
|
try { resolve(JSON.parse(data)); }
|
|
|
|
|
catch (e) { reject(new Error('LLM non-JSON response: ' + data.slice(0, 300))); }
|
|
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
2026-04-23 02:13:03 -04:00
|
|
|
abort = function() { req.destroy(new Error('LLM cancelled')); };
|
|
|
|
|
req.on('error', reject);
|
|
|
|
|
req.write(payload);
|
|
|
|
|
req.end();
|
|
|
|
|
});
|
|
|
|
|
return { promise, abort };
|
2026-04-19 02:40:41 -04:00
|
|
|
}
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// ── Express app ───────────────────────────────────────────────────────────────
|
2026-04-19 02:40:41 -04:00
|
|
|
|
|
|
|
|
const app = express();
|
|
|
|
|
app.use(express.json());
|
|
|
|
|
app.use(express.static(path.join(__dirname, 'public')));
|
|
|
|
|
app.use('/photos', express.static(PHOTOS_DIR));
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// ── REST API ──────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
// Head motion — fire-and-forget; browser doesn't use returned txIds for look ops.
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/look/angle', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { theta = 0, psi = 0, track = false } = req.body;
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.behavior.lookAtAngle(parseFloat(theta), parseFloat(psi), { track: !!track });
|
|
|
|
|
broadcastStatus();
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/look/screen', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { x, y, track = false } = req.body;
|
2026-04-23 02:13:03 -04:00
|
|
|
// For non-tracking: use manager method. For tracking: pass through to raw LookAt.
|
|
|
|
|
if (track) {
|
|
|
|
|
jibo._conn.lookAt({ ScreenCoords: [parseFloat(x), parseFloat(y)] }, true);
|
|
|
|
|
} else {
|
|
|
|
|
jibo.behavior.lookAtScreen(parseFloat(x), parseFloat(y));
|
|
|
|
|
}
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Blocking step: awaits onLookAtAchieved so the arrow-key loop stays single-flight.
|
|
|
|
|
app.post('/api/look/step', async function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { x, y } = req.body;
|
2026-04-23 02:13:03 -04:00
|
|
|
await jibo.behavior.lookAtScreen(parseFloat(x), parseFloat(y));
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/look/position', function(req, res) {
|
|
|
|
|
const { x = 0, y = 0, z = 500 } = req.body;
|
|
|
|
|
jibo.behavior.lookAtPosition(parseFloat(x), parseFloat(y), parseFloat(z));
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/look/entity', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { entityId, track = true } = req.body;
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.behavior.lookAtEntity(entityId, !!track);
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/look/nudge', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { dTheta = 0, dPsi = 0 } = req.body;
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.behavior.nudge(parseFloat(dTheta), parseFloat(dPsi));
|
|
|
|
|
broadcastStatus();
|
|
|
|
|
res.json({ ok: true, angles: jibo.currentAngles });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Say — awaits full speech completion; AbortController enables mid-speech cancel.
|
|
|
|
|
app.post('/api/say', async function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { text } = req.body;
|
|
|
|
|
if (!text) return res.status(400).json({ error: 'text required' });
|
2026-04-23 02:13:03 -04:00
|
|
|
|
|
|
|
|
if (activeSayAbort) { activeSayAbort(); activeSayAbort = null; }
|
|
|
|
|
|
|
|
|
|
const controller = new AbortController();
|
|
|
|
|
activeSayAbort = () => controller.abort();
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
await jibo.behavior.say(stripThinking(text), { signal: controller.signal });
|
|
|
|
|
} catch (err) {
|
|
|
|
|
if (err.code !== 'SAY_TIMEOUT') console.error('[say]', err.message);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
activeSayAbort = null;
|
|
|
|
|
res.json({ aborted: controller.signal.aborted });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Listen — fires locally, returns txId immediately so the browser can correlate
|
|
|
|
|
// the onListenResult / onStop events it receives over WebSocket.
|
|
|
|
|
app.post('/api/listen', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { maxSpeech = 10000, maxNoSpeech = 5000 } = req.body;
|
2026-04-23 02:13:03 -04:00
|
|
|
const txId = jibo._conn.listenLocalASR(maxNoSpeech, maxSpeech);
|
|
|
|
|
activeListenTxId = txId;
|
2026-04-19 02:40:41 -04:00
|
|
|
res.json({ txId });
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Camera
|
|
|
|
|
app.post('/api/photo', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { camera = 'Right', resolution = 'HighRes' } = req.body;
|
2026-04-23 02:13:03 -04:00
|
|
|
// Fire-and-forget; onTakePhoto event is forwarded to browser via _conn event listener.
|
|
|
|
|
const txId = jibo._conn.takePhoto(camera, resolution);
|
2026-04-19 02:40:41 -04:00
|
|
|
res.json({ txId });
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/video/start', function(req, res) {
|
|
|
|
|
// Fire-and-forget; browser receives onVideoReady via WebSocket event broadcast.
|
|
|
|
|
const txId = jibo._conn.startVideo();
|
2026-04-19 02:40:41 -04:00
|
|
|
res.json({ txId });
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/video/stop', function(req, res) {
|
|
|
|
|
jibo.camera.stopVideo();
|
2026-04-19 02:40:41 -04:00
|
|
|
res.json({ ok: true });
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Display
|
|
|
|
|
app.post('/api/display/eye', function(req, res) {
|
|
|
|
|
jibo.display.showEye();
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/display/anim', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { name } = req.body;
|
|
|
|
|
if (!name) return res.status(400).json({ error: 'name required' });
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo._conn.playAnim(name); // fire-and-forget; awaiting would hold the response open
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/display/text', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { text } = req.body;
|
|
|
|
|
if (!text) return res.status(400).json({ error: 'text required' });
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.display.showText(text);
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/display/image', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { src } = req.body;
|
|
|
|
|
if (!src) return res.status(400).json({ error: 'src required' });
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.display.showImage(src);
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Attention & volume — fire-and-forget is fine for these control ops.
|
|
|
|
|
app.post('/api/attention', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { mode } = req.body;
|
|
|
|
|
if (!mode) return res.status(400).json({ error: 'mode required' });
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.behavior.setAttention(mode);
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/volume', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { level } = req.body;
|
|
|
|
|
if (level == null) return res.status(400).json({ error: 'level required' });
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo.audio.setVolume(parseFloat(level));
|
|
|
|
|
res.json({ ok: true });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Cancel
|
|
|
|
|
app.post('/api/cancel', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { txId } = req.body;
|
|
|
|
|
if (!txId) return res.status(400).json({ error: 'txId required' });
|
2026-04-23 02:13:03 -04:00
|
|
|
jibo._conn.cancel(txId);
|
2026-04-19 02:40:41 -04:00
|
|
|
res.json({ ok: true });
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/say/cancel', function(req, res) {
|
|
|
|
|
if (activeSayAbort) { activeSayAbort(); activeSayAbort = null; }
|
|
|
|
|
res.json({ ok: true });
|
|
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.post('/api/listen/cancel', function(req, res) {
|
|
|
|
|
if (activeListenTxId) { jibo._conn.cancel(activeListenTxId); activeListenTxId = null; }
|
|
|
|
|
res.json({ ok: true });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
app.post('/api/llm/cancel', function(req, res) {
|
|
|
|
|
if (activeLlmAbort) { activeLlmAbort(); activeLlmAbort = null; }
|
|
|
|
|
res.json({ ok: true });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Interrupt all active operations (used by the hotword override).
|
|
|
|
|
app.post('/api/interrupt', function(req, res) {
|
|
|
|
|
if (activeSayAbort) { activeSayAbort(); activeSayAbort = null; }
|
|
|
|
|
if (activeLlmAbort) { activeLlmAbort(); activeLlmAbort = null; }
|
|
|
|
|
if (activeListenTxId) { jibo._conn.cancel(activeListenTxId); activeListenTxId = null; }
|
|
|
|
|
res.json({ ok: true });
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Config / status
|
|
|
|
|
app.get('/api/config', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
res.json({
|
2026-04-23 02:13:03 -04:00
|
|
|
llmEndpoint: process.env.LLM_ENDPOINT || '',
|
|
|
|
|
llmModel: process.env.LLM_MODEL || '',
|
|
|
|
|
llmSystemPrompt: LLM_SYSTEM_PROMPT || '',
|
|
|
|
|
sessionMode: !!process.env.LLM_SESSION_KEY,
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.get('/api/status', function(req, res) {
|
|
|
|
|
res.json({
|
|
|
|
|
connected: jibo.connected,
|
|
|
|
|
sessionID: jibo.sessionID,
|
|
|
|
|
angles: jibo.currentAngles,
|
|
|
|
|
videoStreamActive: jibo.videoStreamActive,
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
2026-04-23 02:13:03 -04:00
|
|
|
});
|
2026-04-19 02:40:41 -04:00
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// LLM chat proxy — unchanged from Re-Commander-2
|
|
|
|
|
app.post('/api/llm/chat', async function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { messages = [], endpoint, model, systemPrompt } = req.body;
|
2026-04-23 02:13:03 -04:00
|
|
|
const url = endpoint || process.env.LLM_ENDPOINT || 'http://localhost:11434/v1/chat/completions';
|
|
|
|
|
const mdl = model || process.env.LLM_MODEL || 'llama3';
|
|
|
|
|
const sysProm = systemPrompt || LLM_SYSTEM_PROMPT || '';
|
|
|
|
|
const apiKey = process.env.LLM_API_KEY || '';
|
|
|
|
|
const sessionKey = process.env.LLM_SESSION_KEY || '';
|
2026-04-19 02:40:41 -04:00
|
|
|
|
|
|
|
|
const allMessages = sysProm
|
2026-04-23 02:13:03 -04:00
|
|
|
? [{ role: 'system', content: sysProm }].concat(messages)
|
2026-04-19 02:40:41 -04:00
|
|
|
: messages;
|
|
|
|
|
|
|
|
|
|
const headers = {};
|
2026-04-23 02:13:03 -04:00
|
|
|
if (apiKey) headers['Authorization'] = 'Bearer ' + apiKey;
|
2026-04-19 14:54:59 -04:00
|
|
|
try {
|
|
|
|
|
const extra = process.env.LLM_HEADERS ? JSON.parse(process.env.LLM_HEADERS) : {};
|
|
|
|
|
Object.assign(headers, extra);
|
2026-04-23 02:13:03 -04:00
|
|
|
} catch (e) { console.warn('[llm] LLM_HEADERS is not valid JSON — ignored'); }
|
|
|
|
|
|
|
|
|
|
const body = { model: mdl, messages: allMessages, stream: false };
|
|
|
|
|
if (sessionKey) body.user = sessionKey;
|
|
|
|
|
|
|
|
|
|
if (activeLlmAbort) activeLlmAbort();
|
|
|
|
|
const { promise, abort } = httpPost(url, headers, body);
|
|
|
|
|
activeLlmAbort = abort;
|
2026-04-19 02:40:41 -04:00
|
|
|
|
|
|
|
|
try {
|
2026-04-23 02:13:03 -04:00
|
|
|
const result = await promise;
|
|
|
|
|
activeLlmAbort = null;
|
|
|
|
|
const reply = (result.choices && result.choices[0] && result.choices[0].message && result.choices[0].message.content || '').trim();
|
|
|
|
|
res.json({ reply, sessionMode: !!sessionKey });
|
2026-04-19 02:40:41 -04:00
|
|
|
} catch (err) {
|
2026-04-23 02:13:03 -04:00
|
|
|
activeLlmAbort = null;
|
|
|
|
|
if (err.message === 'LLM cancelled') return res.json({ error: 'cancelled' });
|
2026-04-19 02:40:41 -04:00
|
|
|
console.error('[llm] error:', err.message);
|
|
|
|
|
res.status(502).json({ error: err.message });
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
// Proxy routes for browser → Jibo media
|
|
|
|
|
app.get('/proxy/stream', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { uri } = req.query;
|
|
|
|
|
if (!uri || !uri.startsWith('/')) return res.status(400).json({ error: 'invalid uri' });
|
|
|
|
|
proxyJiboStream(uri, res);
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
app.get('/proxy/photo', function(req, res) {
|
2026-04-19 02:40:41 -04:00
|
|
|
const { uri } = req.query;
|
|
|
|
|
if (!uri || !uri.startsWith('/')) return res.status(400).json({ error: 'invalid uri' });
|
|
|
|
|
proxyJiboFetch(uri, res);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// ── HTTP + WebSocket server ───────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
const server = http.createServer(app);
|
|
|
|
|
|
|
|
|
|
const wss = new WebSocketServer({ server, path: '/ws' });
|
2026-04-23 02:13:03 -04:00
|
|
|
wss.on('connection', function(ws) {
|
|
|
|
|
subscribers.add(ws);
|
|
|
|
|
ws.send(JSON.stringify({
|
|
|
|
|
type: 'status',
|
|
|
|
|
connected: jibo.connected,
|
|
|
|
|
sessionID: jibo.sessionID,
|
|
|
|
|
angles: jibo.currentAngles,
|
|
|
|
|
}));
|
|
|
|
|
ws.on('close', function() { subscribers.delete(ws); });
|
|
|
|
|
ws.on('error', function() { subscribers.delete(ws); });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|
|
|
|
|
|
2026-04-23 02:13:03 -04:00
|
|
|
server.listen(APP_PORT, function() {
|
|
|
|
|
console.log('Re-Commander-3 running at http://localhost:' + APP_PORT);
|
|
|
|
|
jibo.audio.watchWakeword();
|
|
|
|
|
jibo.connect().catch(function(err) { console.error('[jibo] connect error:', err.message); });
|
2026-04-19 02:40:41 -04:00
|
|
|
});
|