- Track.lookAt: was calling client.user.lookAtEntity (undefined), now correctly routes to client.behavior.lookAtEntity — fixes unhandled promise rejections on every face-detection event - connection: httpGet/httpGetStream had no socket timeout; added 15 s req.setTimeout so fetchBuffer/pipe reject instead of hanging forever - connection: _txSend silently dropped commands when session not yet ready and returned a dead txId, causing callers to hang for the full timeout; now throws immediately with code NOT_READY Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
18 KiB
rom-control
Robust client for the Jibo ROM WebSocket API (port 8160).
Requires: Node.js ≥ 16, ws ^8.14.2
// CommonJS
const { Client, AttentionMode } = require('rom-control');
// ESM
import { Client, AttentionMode } from 'rom-control';
Quick Start
const { Client, AttentionMode } = require('rom-control');
const client = new Client({ host: '192.168.1.217' });
client.once('ready', () => {
console.log('Connected, session:', client.sessionID);
});
client.on('trackCreate', async (track) => {
await track.lookAt();
});
await client.connect();
await client.behavior.setAttention(AttentionMode.Engaged);
await client.behavior.say("Hello! I'm Jibo.");
try {
const speech = await client.audio.awaitSpeech({ mode: 'local', time: 15000 });
await client.behavior.say(`You said: ${speech.content}`);
} catch {
// SPEECH_TIMEOUT — no input detected
}
client.destroy();
new Client(options?)
| Option | Type | Default | Description |
|---|---|---|---|
host |
string |
'192.168.1.217' |
Robot IP address |
port |
number |
8160 |
ROM WebSocket port |
appId |
string |
'ImmaLittleTeapot' |
ACO app identifier |
autoReconnect |
boolean |
true |
Reconnect on disconnect |
reconnectDelay |
number |
3000 |
ms between reconnect attempts |
heartbeatInterval |
number |
9000 |
ms between GetConfig keepalives |
autoHeartbeat |
boolean |
true |
Send GetConfig keepalives automatically |
autoSubscribe |
boolean |
true |
Subscribe Entity/Motion/HeadTouch/ScreenGesture on connect |
Instance properties
| Property | Type | Description |
|---|---|---|
connected |
boolean |
WebSocket is open and session established |
sessionID |
string |
Current ROM session ID ('' when disconnected) |
currentAngles |
[number, number] |
Last [theta_deg, psi_deg] sent via lookAtAngle |
videoStreamActive |
boolean |
A video stream command is currently active |
tracks |
Map<number, Track> |
Live map of tracked entities keyed by EntityID |
user |
BehaviorManager |
Attention, speech, head motion, animations |
audio |
AudioManager |
Microphone input, volume, wakeword |
camera |
CameraManager |
Still photos and video streaming |
display |
DisplayManager |
Screen output |
assets |
AssetManager |
Remote asset caching |
Lifecycle
client.connect() → Promise<void>
Posts the ACO /request to unlock ROM commands, opens the WebSocket, and waits for the session to be established. Emits 'ready' on success.
await client.connect();
client.disconnect()
Closes the WebSocket and stops auto-reconnect. The instance can be reconnected with connect() again.
client.destroy()
Disconnects, stops the wakeword listener, and removes all event listeners. The instance cannot be reused.
Events
All events are emitted on the Client instance (extends EventEmitter).
| Event | Args | Description |
|---|---|---|
'ready' |
— | Connected and session established |
'disconnect' |
— | WebSocket closed; client.tracks is cleared |
'error' |
err: Error |
Connection or protocol error |
'trackCreate' |
track: Track |
Entity first detected |
'trackUpdate' |
oldTrack: Track, newTrack: Track |
Entity position updated |
'trackDelete' |
track: Track |
Entity lost |
'motionDetected' |
motion: Motion |
Motion detected |
'headTouch' |
event: HeadTouchEvent |
Head pad touched |
'gesture' |
event: GestureEvent |
Screen tapped or swiped |
'hotword' |
event: HotwordEvent |
"Hey Jibo" detected |
client.on('trackCreate', async (track) => {
console.log('Saw entity', track.id, 'at', track.screenCoords);
await track.lookAt();
});
client.on('headTouch', (event) => {
console.log('Touched pads:', event.activePads);
});
client.on('gesture', (event) => {
if (event.isTap) console.log('Tapped at', event.coordinate);
if (event.isSwipe) console.log('Swiped', event.direction);
});
client.behavior — BehaviorManager
Controls Jibo's persona: attention, speech, head motion, and animations. All methods return Promises that resolve when the action physically completes.
client.behavior.setAttention(mode) → Promise<void>
Set Jibo's engagement mode.
await client.behavior.setAttention(AttentionMode.Engaged);
See AttentionMode for all values.
client.behavior.say(text, options?) → Promise<void>
Speak text or ESML. Automatically sanitizes input and chunks long text. Resolves when speech finishes physically.
| Option | Type | Default | Description |
|---|---|---|---|
maxChunkLen |
number |
450 |
Max chars per ROM Say command |
maxTotal |
number |
3000 |
Max total chars; excess trimmed with … |
chunkDelay |
number |
600 |
ms pause between chunks |
signal |
AbortSignal |
null |
Cancel mid-speech |
// Simple
await client.behavior.say("Hello! I'm Jibo.");
// With ESML tags
await client.behavior.say("<anim cat='excited' nonBlocking='true'/> Great to meet you!");
// Cancellable
const controller = new AbortController();
setTimeout(() => controller.abort(), 3000);
await client.behavior.say(longText, { signal: controller.signal });
Throws { code: 'SAY_TIMEOUT' } if the robot stops responding mid-speech and it wasn't cancelled via signal.
client.behavior.lookAtAngle(theta, psi, options?) → Promise<void>
Look at an angle in degrees. Resolves when onLookAtAchieved fires. If a look is already in-flight, the new angle is queued and the promise resolves immediately.
- theta — yaw, degrees. Positive = right. Clamped ±180°.
- psi — pitch, degrees. Positive = up. Clamped ±30°.
| Option | Type | Default |
|---|---|---|
track |
boolean |
false |
timeout |
number |
5000 ms |
await client.behavior.lookAtAngle(30, 10); // look right and slightly up
await client.behavior.lookAtAngle(0, 0); // center
client.behavior.nudge(dTheta, dPsi) → Promise<void>
Nudge head by a relative delta from the current position.
await client.behavior.nudge(15, 0); // rotate 15° right
client.behavior.lookAtScreen(x, y) → Promise<void>
Look at a pixel coordinate on the camera image (640×480).
client.behavior.lookAtPosition(x, y, z) → Promise<void>
Look at a world-relative 3D position in millimetres.
client.behavior.lookAtEntity(entityId, track?) → Promise<void>
Look at a tracked entity by ID. track defaults to true. When tracking, the promise resolves after the first onLookAtAchieved rather than waiting for tracking to end.
client.on('trackCreate', async (track) => {
await track.lookAt(); // shorthand — calls this internally
});
client.behavior.lookAt(target, options?) → Promise<void>
Raw LookAt for advanced use. target is the ROM LookAtTarget shape:
| Shape | Description |
|---|---|
{ Angle: [theta_rad, psi_rad] } |
Radians |
{ ScreenCoords: [x, y] } |
Camera pixels |
{ Position: [x, y, z] } |
World mm |
{ Entity: id } |
Entity ID |
Options: track (boolean, default false), levelHead (boolean, default false), timeout (ms, default 5000).
client.behavior.playAnim(animName) → Promise<void>
Play a named animation. Resolves when the animation finishes.
await client.behavior.playAnim('pleased_01');
client.behavior.playAnimCat(cat, options?) → Promise<void>
Play an animation by emotional category.
| Option | Type | Default | Description |
|---|---|---|---|
filter |
string|null |
null |
e.g. 'music, rom-upbeat' |
nonBlocking |
boolean |
false |
If true, resolves immediately |
await client.behavior.playAnimCat('excited');
await client.behavior.playAnimCat('dance', { filter: 'music, rom-upbeat' });
client.behavior.playAnimCat('happy', { nonBlocking: true }); // fire and forget
client.audio — AudioManager
client.audio.awaitSpeech(options?) → Promise<SpeechResult>
Listen for speech and resolve with the transcript. Rejects with { code: 'SPEECH_TIMEOUT' } if no speech is detected within time.
| Option | Type | Default | Description |
|---|---|---|---|
mode |
'local' | 'cloud' |
'local' |
'local' uses on-robot ASR (no cloud required) |
time |
number |
15000 |
Max ms to wait for speech |
noSpeechTime |
number |
5000 |
Max ms of silence before giving up (cloud mode) |
languageCode |
string |
'en-US' |
try {
const speech = await client.audio.awaitSpeech({ mode: 'local', time: 10000 });
console.log(speech.content); // transcript string
console.log(speech.languageCode); // 'en-US'
} catch (err) {
if (err.code === 'SPEECH_TIMEOUT') console.log('Nothing heard.');
}
client.audio.setVolume(level) → Promise<void>
Set audio mixer volume. level is 0.0–1.0.
client.audio.watchWakeword(asrPort?)
Connect to the always-on resident ASR wakeword stream (port 8088). Fires 'hotword' events on the Client with a HotwordEvent payload. No-op if already watching.
client.audio.watchWakeword();
client.on('hotword', (event) => {
console.log(`"${event.utterance}" — score ${event.score}`);
});
client.audio.stopWakeword()
Stop the wakeword listener.
client.camera — CameraManager
client.camera.takePhoto(options?) → Promise<Photo>
Take a still photo. Resolves with a Photo object once the robot signals ready.
| Option | Type | Default | Options |
|---|---|---|---|
camera |
string |
'Right' |
Camera.Left, Camera.Right |
resolution |
string |
'HighRes' |
Resolution.* values |
distortion |
boolean |
false |
|
timeout |
number |
15000 ms |
const photo = await client.camera.takePhoto({ resolution: Resolution.MedRes });
const buffer = await photo.fetchBuffer();
fs.writeFileSync('shot.jpg', buffer);
client.camera.startVideo(options?) → Promise<VideoStream>
Start a video stream. Resolves with a VideoStream once the robot signals ready.
| Option | Type | Default |
|---|---|---|
type |
string |
VideoType.Normal ('NORMAL') |
timeout |
number |
10000 ms |
const stream = await client.camera.startVideo();
console.log(stream.uri); // Jibo URI for the MJPEG stream
await stream.pipe(fs.createWriteStream('out.mjpeg'));
stream.stop();
client.camera.stopVideo()
Stop the active video stream.
client.display — DisplayManager
All display methods are fire-and-forget (no await needed).
client.display.showEye(name?)
Show Jibo's eye animation. Default: 'default'.
client.display.showText(text, name?)
Display text on Jibo's screen. name is the view slot name (default 'view').
client.display.showImage(src, name?)
Display an image on Jibo's screen. src is a URL.
client.assets — AssetManager
client.assets.fetch(uri, name, timeout?) → Promise<void>
Download a remote file and cache it on the robot under name. Rejects with { code: 'ASSET_FAILED' } on error or { code: 'ASSET_TIMEOUT' } after timeout ms (default 30000).
await client.assets.fetch('https://example.com/sound.mp3', 'mysound');
client.assets.unload(name)
Remove a cached asset from the robot by name.
Structures
Rich objects emitted by events or returned from manager methods. All have a _client back-reference for calling methods.
Track
Emitted by 'trackCreate', 'trackUpdate', 'trackDelete' and stored in client.tracks.
| Property | Type | Description |
|---|---|---|
id |
number |
ROM EntityID |
screenCoords |
{ x, y } | null |
Position on camera image |
worldCoords |
{ x, y, z } | null |
3D world position in mm |
track.lookAt(track = true) // → Promise<void>
SpeechResult
Resolved by client.audio.awaitSpeech().
| Property | Type |
|---|---|
content |
string — transcript |
languageCode |
string — e.g. 'en-US' |
Photo
Resolved by client.camera.takePhoto().
| Property | Type |
|---|---|
uri |
string — Jibo-internal URI |
name |
string |
angleTarget |
object | null |
positionTarget |
object | null |
photo.fetchBuffer() // → Promise<Buffer>
photo.pipe(writableStream) // → Promise<void>
VideoStream
Resolved by client.camera.startVideo().
| Property | Type |
|---|---|
uri |
string — Jibo-internal URI for the MJPEG stream |
active |
boolean |
stream.pipe(writableStream) // → Promise<void>
stream.stop()
Motion
Emitted by 'motionDetected'.
| Property | Type |
|---|---|
zones |
MotionZone[] |
Each MotionZone: { screenCoords: {x,y}|null, worldCoords: {x,y,z}|null, intensity: number|null }.
HeadTouchEvent
Emitted by 'headTouch'.
| Property | Type |
|---|---|
pads |
boolean[] — all 6 pads in order |
activePads |
string[] — names of currently-pressed pads |
Pad order / names: frontLeft, middleLeft, backLeft, frontRight, middleRight, backRight.
event.isTouched('frontLeft') // → boolean
GestureEvent
Emitted by 'gesture'.
| Property | Type |
|---|---|
type |
'Tap' | 'Swipe' |
coordinate |
{ x, y } | null — tap position |
direction |
'Up' | 'Down' | 'Left' | 'Right' | null — swipe direction |
isTap |
boolean |
isSwipe |
boolean |
HotwordEvent
Emitted by 'hotword'.
| Property | Type |
|---|---|
utterance |
string — e.g. 'hey jibo' |
score |
number |
timestamp |
string — ISO 8601 |
Constants
const {
AttentionMode, // Off, Idle, Disengage, Engaged, Speaking, Fixated, Attractable, Menu, Command
Camera, // Left, Right
Resolution, // HighRes, MedRes, LowRes, MicroRes
VideoType, // Normal ('NORMAL'), Debug ('DEBUG')
GestureType, // Tap, SwipeDown, SwipeUp, SwipeRight, SwipeLeft
HEAD_TOUCH_PADS // ['frontLeft','middleLeft','backLeft','frontRight','middleRight','backRight']
} = require('rom-control');
AttentionMode
| Value | Description |
|---|---|
Off |
Disengage all attention systems |
Idle |
Low-power idle |
Disengage |
Stop tracking, return to neutral |
Engaged |
Actively track and engage |
Speaking |
Speaking mode |
Fixated |
Lock gaze on current target |
Attractable |
Look toward movement and sound |
Menu |
Menu interaction mode |
Command |
Command input mode |
ESML Utilities
Exported for callers that build raw ESML strings. Used internally by client.behavior.say().
sanitizeEsml(text) → string
Strip characters rejected by Jibo's ROM parser: emoji, bare &, non-ASCII, markdown formatting, <ssa> tags, and newlines. Preserves valid ESML tags (<anim>, <break>, <style>, etc.).
chunkEsml(text, maxLen?) → string[]
Split sanitized ESML into chunks of at most maxLen chars (default 450), cutting at sentence boundaries then word boundaries. Every chunk is guaranteed to contain at least one XML tag (required by Jibo's TTS parser).
Using client._conn (advanced)
client._conn is the internal RomConnection instance — the raw txId-based layer. It is intentionally not part of the public API, but is accessible when you need capabilities the managers don't cover:
- Raw event firehose —
client._conn.on('event', (txId, body) => ...)receives every robot message unfiltered, useful for forwarding events to a UI. - Cancel by txId —
client._conn.cancel(txId)when you hold a txId from a fire-and-forget call. - Fire-and-forget with txId —
client._conn.listenLocalASR(),client._conn.takePhoto(), etc. when you need the txId to correlate async events arriving through a separate channel (e.g. a WebSocket broadcast to a browser). - Low-level LookAt —
client._conn.lookAt(target, trackFlag)for tracking screen coordinates.
// Example: forward all raw events to connected browser clients
client._conn.on('event', (txId, body) => {
broadcast({ type: 'jiboEvent', txId, body });
});
// Example: fire listen and return txId to a REST caller for WebSocket correlation
const txId = client._conn.listenLocalASR(5000, 10000);
res.json({ txId });
Complete Example
const { Client, AttentionMode, Resolution } = require('rom-control');
async function main() {
const client = new Client({ host: '192.168.1.217' });
client.once('ready', () => {
console.log('Connected, session:', client.sessionID);
});
// Track entities in client.tracks automatically
client.on('trackCreate', async (track) => {
console.log('Saw person at', track.screenCoords);
await track.lookAt();
});
client.on('headTouch', (event) => {
if (event.isTouched('frontLeft')) {
client.behavior.say('Ouch, that tickles!');
}
});
client.on('hotword', () => {
client.behavior.playAnimCat('excited', { nonBlocking: true });
});
await client.connect();
// Greet
await client.behavior.setAttention(AttentionMode.Engaged);
await client.behavior.say("<anim cat='happy' nonBlocking='true'/> Hello, I'm Jibo!");
// Take a photo
const photo = await client.camera.takePhoto({ resolution: Resolution.HighRes });
const buf = await photo.fetchBuffer();
require('fs').writeFileSync('jibo-shot.jpg', buf);
// Listen for a response
try {
const speech = await client.audio.awaitSpeech({ mode: 'local', time: 12000 });
await client.behavior.say(`You said: ${speech.content}`);
} catch {
await client.behavior.say("I didn't catch that.");
}
// Watch for wakeword in the background
client.audio.watchWakeword();
// Clean up after 60 seconds
setTimeout(() => client.destroy(), 60_000);
}
main().catch(console.error);