Initial release: v2.0.0 discord.js-style OOP client for Jibo ROM

This commit is contained in:
Paskooter
2026-04-23 01:15:20 -04:00
commit cede3d3641
23 changed files with 2205 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
node_modules/
npm-debug.log*
.env

606
API.md Normal file
View File

@@ -0,0 +1,606 @@
# rom-control
Discord.js-style OOP client for the Jibo ROM WebSocket API (port 8160).
**Requires:** Node.js ≥ 16, `ws` ^8.14.2
```js
// CommonJS
const { Client, AttentionMode } = require('rom-control');
// ESM
import { Client, AttentionMode } from 'rom-control';
```
---
## Quick Start
```js
const { Client, AttentionMode } = require('rom-control');
const client = new Client({ host: '192.168.1.217' });
client.once('ready', () => {
console.log('Connected, session:', client.sessionID);
});
client.on('trackCreate', async (track) => {
await track.lookAt();
});
await client.connect();
await client.behavior.setAttention(AttentionMode.Engaged);
await client.behavior.say("Hello! I'm Jibo.");
try {
const speech = await client.audio.awaitSpeech({ mode: 'local', time: 15000 });
await client.behavior.say(`You said: ${speech.content}`);
} catch {
// SPEECH_TIMEOUT — no input detected
}
client.destroy();
```
---
## `new Client(options?)`
| Option | Type | Default | Description |
|---|---|---|---|
| `host` | `string` | `'192.168.1.217'` | Robot IP address |
| `port` | `number` | `8160` | ROM WebSocket port |
| `appId` | `string` | `'ImmaLittleTeapot'` | ACO app identifier |
| `autoReconnect` | `boolean` | `true` | Reconnect on disconnect |
| `reconnectDelay` | `number` | `3000` | ms between reconnect attempts |
| `heartbeatInterval` | `number` | `9000` | ms between GetConfig keepalives |
| `autoHeartbeat` | `boolean` | `true` | Send GetConfig keepalives automatically |
| `autoSubscribe` | `boolean` | `true` | Subscribe Entity/Motion/HeadTouch/ScreenGesture on connect |
### Instance properties
| Property | Type | Description |
|---|---|---|
| `connected` | `boolean` | WebSocket is open and session established |
| `sessionID` | `string` | Current ROM session ID (`''` when disconnected) |
| `currentAngles` | `[number, number]` | Last `[theta_deg, psi_deg]` sent via `lookAtAngle` |
| `videoStreamActive` | `boolean` | A video stream command is currently active |
| `tracks` | `Map<number, Track>` | Live map of tracked entities keyed by EntityID |
| `user` | `BehaviorManager` | Attention, speech, head motion, animations |
| `audio` | `AudioManager` | Microphone input, volume, wakeword |
| `camera` | `CameraManager` | Still photos and video streaming |
| `display` | `DisplayManager` | Screen output |
| `assets` | `AssetManager` | Remote asset caching |
---
## Lifecycle
### `client.connect()` → `Promise<void>`
Posts the ACO `/request` to unlock ROM commands, opens the WebSocket, and waits for the session to be established. Emits `'ready'` on success.
```js
await client.connect();
```
### `client.disconnect()`
Closes the WebSocket and stops auto-reconnect. The instance can be reconnected with `connect()` again.
### `client.destroy()`
Disconnects, stops the wakeword listener, and removes all event listeners. The instance cannot be reused.
---
## Events
All events are emitted on the `Client` instance (extends `EventEmitter`).
| Event | Args | Description |
|---|---|---|
| `'ready'` | — | Connected and session established |
| `'disconnect'` | — | WebSocket closed; `client.tracks` is cleared |
| `'error'` | `err: Error` | Connection or protocol error |
| `'trackCreate'` | `track: Track` | Entity first detected |
| `'trackUpdate'` | `oldTrack: Track, newTrack: Track` | Entity position updated |
| `'trackDelete'` | `track: Track` | Entity lost |
| `'motionDetected'` | `motion: Motion` | Motion detected |
| `'headTouch'` | `event: HeadTouchEvent` | Head pad touched |
| `'gesture'` | `event: GestureEvent` | Screen tapped or swiped |
| `'hotword'` | `event: HotwordEvent` | "Hey Jibo" detected |
```js
client.on('trackCreate', async (track) => {
console.log('Saw entity', track.id, 'at', track.screenCoords);
await track.lookAt();
});
client.on('headTouch', (event) => {
console.log('Touched pads:', event.activePads);
});
client.on('gesture', (event) => {
if (event.isTap) console.log('Tapped at', event.coordinate);
if (event.isSwipe) console.log('Swiped', event.direction);
});
```
---
## `client.behavior` — BehaviorManager
Controls Jibo's persona: attention, speech, head motion, and animations. All methods return Promises that resolve when the action physically completes.
### `client.behavior.setAttention(mode)` → `Promise<void>`
Set Jibo's engagement mode.
```js
await client.behavior.setAttention(AttentionMode.Engaged);
```
See [AttentionMode](#attentionmode) for all values.
---
### `client.behavior.say(text, options?)` → `Promise<void>`
Speak text or ESML. Automatically sanitizes input and chunks long text. Resolves when speech finishes physically.
| Option | Type | Default | Description |
|---|---|---|---|
| `maxChunkLen` | `number` | `450` | Max chars per ROM `Say` command |
| `maxTotal` | `number` | `3000` | Max total chars; excess trimmed with `…` |
| `chunkDelay` | `number` | `600` | ms pause between chunks |
| `signal` | `AbortSignal` | `null` | Cancel mid-speech |
```js
// Simple
await client.behavior.say("Hello! I'm Jibo.");
// With ESML tags
await client.behavior.say("<anim cat='excited' nonBlocking='true'/> Great to meet you!");
// Cancellable
const controller = new AbortController();
setTimeout(() => controller.abort(), 3000);
await client.behavior.say(longText, { signal: controller.signal });
```
Throws `{ code: 'SAY_TIMEOUT' }` if the robot stops responding mid-speech and it wasn't cancelled via signal.
---
### `client.behavior.lookAtAngle(theta, psi, options?)` → `Promise<void>`
Look at an angle in degrees. Resolves when `onLookAtAchieved` fires. If a look is already in-flight, the new angle is queued and the promise resolves immediately.
- **theta** — yaw, degrees. Positive = right. Clamped ±180°.
- **psi** — pitch, degrees. Positive = up. Clamped ±30°.
| Option | Type | Default |
|---|---|---|
| `track` | `boolean` | `false` |
| `timeout` | `number` | `5000` ms |
```js
await client.behavior.lookAtAngle(30, 10); // look right and slightly up
await client.behavior.lookAtAngle(0, 0); // center
```
### `client.behavior.nudge(dTheta, dPsi)` → `Promise<void>`
Nudge head by a relative delta from the current position.
```js
await client.behavior.nudge(15, 0); // rotate 15° right
```
### `client.behavior.lookAtScreen(x, y)` → `Promise<void>`
Look at a pixel coordinate on the camera image (640×480).
### `client.behavior.lookAtPosition(x, y, z)` → `Promise<void>`
Look at a world-relative 3D position in millimetres.
### `client.behavior.lookAtEntity(entityId, track?)` → `Promise<void>`
Look at a tracked entity by ID. `track` defaults to `true`. When tracking, the promise resolves after the first `onLookAtAchieved` rather than waiting for tracking to end.
```js
client.on('trackCreate', async (track) => {
await track.lookAt(); // shorthand — calls this internally
});
```
### `client.behavior.lookAt(target, options?)` → `Promise<void>`
Raw LookAt for advanced use. `target` is the ROM `LookAtTarget` shape:
| Shape | Description |
|---|---|
| `{ Angle: [theta_rad, psi_rad] }` | Radians |
| `{ ScreenCoords: [x, y] }` | Camera pixels |
| `{ Position: [x, y, z] }` | World mm |
| `{ Entity: id }` | Entity ID |
Options: `track` (boolean, default `false`), `levelHead` (boolean, default `false`), `timeout` (ms, default `5000`).
---
### `client.behavior.playAnim(animName)` → `Promise<void>`
Play a named animation. Resolves when the animation finishes.
```js
await client.behavior.playAnim('pleased_01');
```
### `client.behavior.playAnimCat(cat, options?)` → `Promise<void>`
Play an animation by emotional category.
| Option | Type | Default | Description |
|---|---|---|---|
| `filter` | `string\|null` | `null` | e.g. `'music, rom-upbeat'` |
| `nonBlocking` | `boolean` | `false` | If true, resolves immediately |
```js
await client.behavior.playAnimCat('excited');
await client.behavior.playAnimCat('dance', { filter: 'music, rom-upbeat' });
client.behavior.playAnimCat('happy', { nonBlocking: true }); // fire and forget
```
---
## `client.audio` — AudioManager
### `client.audio.awaitSpeech(options?)` → `Promise<SpeechResult>`
Listen for speech and resolve with the transcript. Rejects with `{ code: 'SPEECH_TIMEOUT' }` if no speech is detected within `time`.
| Option | Type | Default | Description |
|---|---|---|---|
| `mode` | `'local' \| 'cloud'` | `'local'` | `'local'` uses on-robot ASR (no cloud required) |
| `time` | `number` | `15000` | Max ms to wait for speech |
| `noSpeechTime` | `number` | `5000` | Max ms of silence before giving up (cloud mode) |
| `languageCode` | `string` | `'en-US'` | |
```js
try {
const speech = await client.audio.awaitSpeech({ mode: 'local', time: 10000 });
console.log(speech.content); // transcript string
console.log(speech.languageCode); // 'en-US'
} catch (err) {
if (err.code === 'SPEECH_TIMEOUT') console.log('Nothing heard.');
}
```
### `client.audio.setVolume(level)` → `Promise<void>`
Set audio mixer volume. `level` is `0.0``1.0`.
### `client.audio.watchWakeword(asrPort?)`
Connect to the always-on resident ASR wakeword stream (port 8088). Fires `'hotword'` events on the `Client` with a `HotwordEvent` payload. No-op if already watching.
```js
client.audio.watchWakeword();
client.on('hotword', (event) => {
console.log(`"${event.utterance}" — score ${event.score}`);
});
```
### `client.audio.stopWakeword()`
Stop the wakeword listener.
---
## `client.camera` — CameraManager
### `client.camera.takePhoto(options?)` → `Promise<Photo>`
Take a still photo. Resolves with a `Photo` object once the robot signals ready.
| Option | Type | Default | Options |
|---|---|---|---|
| `camera` | `string` | `'Right'` | `Camera.Left`, `Camera.Right` |
| `resolution` | `string` | `'HighRes'` | `Resolution.*` values |
| `distortion` | `boolean` | `false` | |
| `timeout` | `number` | `15000` ms | |
```js
const photo = await client.camera.takePhoto({ resolution: Resolution.MedRes });
const buffer = await photo.fetchBuffer();
fs.writeFileSync('shot.jpg', buffer);
```
### `client.camera.startVideo(options?)` → `Promise<VideoStream>`
Start a video stream. Resolves with a `VideoStream` once the robot signals ready.
| Option | Type | Default |
|---|---|---|
| `type` | `string` | `VideoType.Normal` (`'NORMAL'`) |
| `timeout` | `number` | `10000` ms |
```js
const stream = await client.camera.startVideo();
console.log(stream.uri); // Jibo URI for the MJPEG stream
await stream.pipe(fs.createWriteStream('out.mjpeg'));
stream.stop();
```
### `client.camera.stopVideo()`
Stop the active video stream.
---
## `client.display` — DisplayManager
All display methods are fire-and-forget (no await needed).
### `client.display.showEye(name?)`
Show Jibo's eye animation. Default: `'default'`.
### `client.display.showText(text, name?)`
Display text on Jibo's screen. `name` is the view slot name (default `'view'`).
### `client.display.showImage(src, name?)`
Display an image on Jibo's screen. `src` is a URL.
---
## `client.assets` — AssetManager
### `client.assets.fetch(uri, name, timeout?)` → `Promise<void>`
Download a remote file and cache it on the robot under `name`. Rejects with `{ code: 'ASSET_FAILED' }` on error or `{ code: 'ASSET_TIMEOUT' }` after `timeout` ms (default 30000).
```js
await client.assets.fetch('https://example.com/sound.mp3', 'mysound');
```
### `client.assets.unload(name)`
Remove a cached asset from the robot by name.
---
## Structures
Rich objects emitted by events or returned from manager methods. All have a `_client` back-reference for calling methods.
### `Track`
Emitted by `'trackCreate'`, `'trackUpdate'`, `'trackDelete'` and stored in `client.tracks`.
| Property | Type | Description |
|---|---|---|
| `id` | `number` | ROM EntityID |
| `screenCoords` | `{ x, y } \| null` | Position on camera image |
| `worldCoords` | `{ x, y, z } \| null` | 3D world position in mm |
```js
track.lookAt(track = true) // → Promise<void>
```
### `SpeechResult`
Resolved by `client.audio.awaitSpeech()`.
| Property | Type |
|---|---|
| `content` | `string` — transcript |
| `languageCode` | `string` — e.g. `'en-US'` |
### `Photo`
Resolved by `client.camera.takePhoto()`.
| Property | Type |
|---|---|
| `uri` | `string` — Jibo-internal URI |
| `name` | `string` |
| `angleTarget` | `object \| null` |
| `positionTarget` | `object \| null` |
```js
photo.fetchBuffer() // → Promise<Buffer>
photo.pipe(writableStream) // → Promise<void>
```
### `VideoStream`
Resolved by `client.camera.startVideo()`.
| Property | Type |
|---|---|
| `uri` | `string` — Jibo-internal URI for the MJPEG stream |
| `active` | `boolean` |
```js
stream.pipe(writableStream) // → Promise<void>
stream.stop()
```
### `Motion`
Emitted by `'motionDetected'`.
| Property | Type |
|---|---|
| `zones` | `MotionZone[]` |
Each `MotionZone`: `{ screenCoords: {x,y}|null, worldCoords: {x,y,z}|null, intensity: number|null }`.
### `HeadTouchEvent`
Emitted by `'headTouch'`.
| Property | Type |
|---|---|
| `pads` | `boolean[]` — all 6 pads in order |
| `activePads` | `string[]` — names of currently-pressed pads |
Pad order / names: `frontLeft`, `middleLeft`, `backLeft`, `frontRight`, `middleRight`, `backRight`.
```js
event.isTouched('frontLeft') // → boolean
```
### `GestureEvent`
Emitted by `'gesture'`.
| Property | Type |
|---|---|
| `type` | `'Tap' \| 'Swipe'` |
| `coordinate` | `{ x, y } \| null` — tap position |
| `direction` | `'Up' \| 'Down' \| 'Left' \| 'Right' \| null` — swipe direction |
| `isTap` | `boolean` |
| `isSwipe` | `boolean` |
### `HotwordEvent`
Emitted by `'hotword'`.
| Property | Type |
|---|---|
| `utterance` | `string` — e.g. `'hey jibo'` |
| `score` | `number` |
| `timestamp` | `string` — ISO 8601 |
---
## Constants
```js
const {
AttentionMode, // Off, Idle, Disengage, Engaged, Speaking, Fixated, Attractable, Menu, Command
Camera, // Left, Right
Resolution, // HighRes, MedRes, LowRes, MicroRes
VideoType, // Normal ('NORMAL'), Debug ('DEBUG')
GestureType, // Tap, SwipeDown, SwipeUp, SwipeRight, SwipeLeft
HEAD_TOUCH_PADS // ['frontLeft','middleLeft','backLeft','frontRight','middleRight','backRight']
} = require('rom-control');
```
### AttentionMode
| Value | Description |
|---|---|
| `Off` | Disengage all attention systems |
| `Idle` | Low-power idle |
| `Disengage` | Stop tracking, return to neutral |
| `Engaged` | Actively track and engage |
| `Speaking` | Speaking mode |
| `Fixated` | Lock gaze on current target |
| `Attractable` | Look toward movement and sound |
| `Menu` | Menu interaction mode |
| `Command` | Command input mode |
---
## ESML Utilities
Exported for callers that build raw ESML strings. Used internally by `client.behavior.say()`.
### `sanitizeEsml(text)` → `string`
Strip characters rejected by Jibo's ROM parser: emoji, bare `&`, non-ASCII, markdown formatting, `<ssa>` tags, and newlines. Preserves valid ESML tags (`<anim>`, `<break>`, `<style>`, etc.).
### `chunkEsml(text, maxLen?)` → `string[]`
Split sanitized ESML into chunks of at most `maxLen` chars (default 450), cutting at sentence boundaries then word boundaries. Every chunk is guaranteed to contain at least one XML tag (required by Jibo's TTS parser).
---
## Using `client._conn` (advanced)
`client._conn` is the internal `RomConnection` instance — the raw txId-based layer. It is intentionally not part of the public API, but is accessible when you need capabilities the managers don't cover:
- **Raw event firehose** — `client._conn.on('event', (txId, body) => ...)` receives every robot message unfiltered, useful for forwarding events to a UI.
- **Cancel by txId** — `client._conn.cancel(txId)` when you hold a txId from a fire-and-forget call.
- **Fire-and-forget with txId** — `client._conn.listenLocalASR()`, `client._conn.takePhoto()`, etc. when you need the txId to correlate async events arriving through a separate channel (e.g. a WebSocket broadcast to a browser).
- **Low-level LookAt** — `client._conn.lookAt(target, trackFlag)` for tracking screen coordinates.
```js
// Example: forward all raw events to connected browser clients
client._conn.on('event', (txId, body) => {
broadcast({ type: 'jiboEvent', txId, body });
});
// Example: fire listen and return txId to a REST caller for WebSocket correlation
const txId = client._conn.listenLocalASR(5000, 10000);
res.json({ txId });
```
---
## Complete Example
```js
const { Client, AttentionMode, Resolution } = require('rom-control');
async function main() {
const client = new Client({ host: '192.168.1.217' });
client.once('ready', () => {
console.log('Connected, session:', client.sessionID);
});
// Track entities in client.tracks automatically
client.on('trackCreate', async (track) => {
console.log('Saw person at', track.screenCoords);
await track.lookAt();
});
client.on('headTouch', (event) => {
if (event.isTouched('frontLeft')) {
client.behavior.say('Ouch, that tickles!');
}
});
client.on('hotword', () => {
client.behavior.playAnimCat('excited', { nonBlocking: true });
});
await client.connect();
// Greet
await client.behavior.setAttention(AttentionMode.Engaged);
await client.behavior.say("<anim cat='happy' nonBlocking='true'/> Hello, I'm Jibo!");
// Take a photo
const photo = await client.camera.takePhoto({ resolution: Resolution.HighRes });
const buf = await photo.fetchBuffer();
require('fs').writeFileSync('jibo-shot.jpg', buf);
// Listen for a response
try {
const speech = await client.audio.awaitSpeech({ mode: 'local', time: 12000 });
await client.behavior.say(`You said: ${speech.content}`);
} catch {
await client.behavior.say("I didn't catch that.");
}
// Watch for wakeword in the background
client.audio.watchWakeword();
// Clean up after 60 seconds
setTimeout(() => client.destroy(), 60_000);
}
main().catch(console.error);
```

49
index.js Normal file
View File

@@ -0,0 +1,49 @@
'use strict';
const Client = require('./src/Client');
const {
AttentionMode,
Camera,
Resolution,
VideoType,
GestureType,
HEAD_TOUCH_PADS,
} = require('./src/constants');
const Track = require('./src/structures/Track');
const Photo = require('./src/structures/Photo');
const VideoStream = require('./src/structures/VideoStream');
const SpeechResult = require('./src/structures/SpeechResult');
const Motion = require('./src/structures/Motion');
const HeadTouchEvent = require('./src/structures/HeadTouchEvent');
const GestureEvent = require('./src/structures/GestureEvent');
const HotwordEvent = require('./src/structures/HotwordEvent');
const { sanitizeEsml, chunkEsml } = require('./src/util/esml');
module.exports = {
// Main class
Client,
// Constants / enums
AttentionMode,
Camera,
Resolution,
VideoType,
GestureType,
HEAD_TOUCH_PADS,
// Structures (for instanceof checks / JSDoc typing)
Track,
Photo,
VideoStream,
SpeechResult,
Motion,
HeadTouchEvent,
GestureEvent,
HotwordEvent,
// ESML utilities (still useful for callers who build raw ESML strings)
sanitizeEsml,
chunkEsml,
};

26
index.mjs Normal file
View File

@@ -0,0 +1,26 @@
// ESM wrapper — re-exports everything from the CJS module
import { createRequire } from 'module';
const require = createRequire(import.meta.url);
const mod = require('./index.js');
export const {
Client,
AttentionMode,
Camera,
Resolution,
VideoType,
GestureType,
HEAD_TOUCH_PADS,
Track,
Photo,
VideoStream,
SpeechResult,
Motion,
HeadTouchEvent,
GestureEvent,
HotwordEvent,
sanitizeEsml,
chunkEsml,
} = mod;
export default mod;

13
package-lock.json generated Normal file
View File

@@ -0,0 +1,13 @@
{
"name": "rom-control",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"ws": {
"version": "8.20.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
"integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA=="
}
}
}

40
package.json Normal file
View File

@@ -0,0 +1,40 @@
{
"name": "rom-control",
"version": "2.0.0",
"description": "Discord.js-style OOP client for the Jibo ROM WebSocket API",
"main": "./index.js",
"exports": {
"require": "./index.js",
"import": "./index.mjs",
"default": "./index.js"
},
"files": [
"src/",
"index.js",
"index.mjs",
"API.md"
],
"keywords": [
"jibo",
"rom",
"robot",
"websocket",
"sdk"
],
"author": "Paskooter",
"license": "MIT",
"repository": {
"type": "git",
"url": "git+ssh://git@github.com/Paskooter/rom-control.git"
},
"homepage": "https://github.com/Paskooter/rom-control#readme",
"bugs": {
"url": "https://github.com/Paskooter/rom-control/issues"
},
"dependencies": {
"ws": "^8.14.2"
},
"engines": {
"node": ">=16"
}
}

172
src/Client.js Normal file
View File

@@ -0,0 +1,172 @@
'use strict';
const EventEmitter = require('events');
const { RomConnection } = require('./connection');
const BehaviorManager = require('./managers/BehaviorManager');
const AudioManager = require('./managers/AudioManager');
const CameraManager = require('./managers/CameraManager');
const DisplayManager = require('./managers/DisplayManager');
const AssetManager = require('./managers/AssetManager');
const Track = require('./structures/Track');
const Motion = require('./structures/Motion');
const HeadTouchEvent = require('./structures/HeadTouchEvent');
const GestureEvent = require('./structures/GestureEvent');
const HotwordEvent = require('./structures/HotwordEvent');
/**
* Client — the main entry point for controlling Jibo.
* Modelled after discord.js Client.
*
* Events:
* 'ready' — Connected and session established
* 'disconnect' — WebSocket closed
* 'error' — Error from the connection layer
* 'trackCreate' — (track: Track) Entity first detected
* 'trackUpdate' — (oldTrack: Track, newTrack: Track) Entity moved
* 'trackDelete' — (track: Track) Entity lost
* 'motionDetected' — (motion: Motion) Motion detected
* 'headTouch' — (event: HeadTouchEvent) Head pad touched
* 'gesture' — (event: GestureEvent) Screen tapped or swiped
* 'hotword' — (event: HotwordEvent) "Hey Jibo" detected
*/
class Client extends EventEmitter {
/**
* @param {object} [options]
* @param {string} [options.host='192.168.1.217']
* @param {number} [options.port=8160]
* @param {string} [options.appId='ImmaLittleTeapot']
* @param {boolean} [options.autoReconnect=true]
* @param {number} [options.reconnectDelay=3000]
* @param {number} [options.heartbeatInterval=9000]
* @param {boolean} [options.autoHeartbeat=true]
* @param {boolean} [options.autoSubscribe=true]
*/
constructor(options = {}) {
super();
this._conn = new RomConnection(options);
/** @type {BehaviorManager} */
this.behavior = new BehaviorManager(this);
/** @type {AudioManager} */
this.audio = new AudioManager(this);
/** @type {CameraManager} */
this.camera = new CameraManager(this);
/** @type {DisplayManager} */
this.display = new DisplayManager(this);
/** @type {AssetManager} */
this.assets = new AssetManager(this);
/**
* Live map of tracked entities keyed by EntityID.
* @type {Map<number, Track>}
*/
this.tracks = new Map();
this._wireEvents();
}
// ── Public properties ───────────────────────────────────────────────────────
/** Current ROM session ID. Empty string when not connected. */
get sessionID() { return this._conn.sessionID; }
/** Whether the WebSocket session is active. */
get connected() { return this._conn.connected; }
/** Last known head angles [theta_deg, psi_deg]. */
get currentAngles() { return this._conn.currentAngles; }
/** True while a video stream command is active. */
get videoStreamActive() { return this._conn.videoStreamActive; }
// ── Lifecycle ───────────────────────────────────────────────────────────────
/**
* Connect to the robot. Resolves once the session is ready (analogous to
* client.login() in discord.js). Emits 'ready' on success.
* @returns {Promise<void>}
*/
connect() {
return this._conn.connect();
}
/**
* Disconnect and stop auto-reconnect. The Client can be reconnected via
* connect() again.
*/
disconnect() {
this._conn.disconnect();
}
/**
* Disconnect and release all resources. The instance cannot be reused.
*/
destroy() {
this._conn.destroy();
this.removeAllListeners();
}
// ── Event wiring ────────────────────────────────────────────────────────────
_wireEvents() {
const conn = this._conn;
conn.on('connected', () => this.emit('ready'));
conn.on('disconnected', () => { this.tracks.clear(); this.emit('disconnect'); });
conn.on('error', (err) => this.emit('error', err));
// Entity tracking
conn.on('onTrackGained', (txId, body) => {
for (const raw of (body.Tracks || [])) {
const track = new Track(raw, this);
this.tracks.set(track.id, track);
this.emit('trackCreate', track);
}
});
conn.on('onTrackUpdate', (txId, body) => {
for (const raw of (body.Tracks || [])) {
const existing = this.tracks.get(raw.EntityID);
// Shallow-clone the old track so listeners get a frozen snapshot
const oldTrack = existing ? Object.assign(Object.create(Track.prototype), existing) : null;
const newTrack = new Track(raw, this);
this.tracks.set(newTrack.id, newTrack);
this.emit('trackUpdate', oldTrack, newTrack);
}
});
conn.on('onTrackLost', (txId, body) => {
for (const raw of (body.Tracks || [])) {
const track = this.tracks.get(raw.EntityID) || new Track(raw, this);
this.tracks.delete(track.id);
this.emit('trackDelete', track);
}
});
// Motion
conn.on('onMotionDetected', (txId, body) => {
this.emit('motionDetected', new Motion(body));
});
// Head touch
conn.on('onHeadTouched', (txId, body) => {
this.emit('headTouch', new HeadTouchEvent(body));
});
// Screen gestures
conn.on('onTap', (txId, body) => this.emit('gesture', new GestureEvent({ ...body, Event: 'onTap' })));
conn.on('onSwipe', (txId, body) => this.emit('gesture', new GestureEvent({ ...body, Event: 'onSwipe' })));
// Wakeword (from watchWakeword / audio.watchWakeword)
conn.on('hotword', (data) => this.emit('hotword', new HotwordEvent(data)));
}
}
module.exports = Client;

628
src/connection.js Normal file
View File

@@ -0,0 +1,628 @@
'use strict';
// Internal low-level ROM WebSocket connection.
// Not part of the public API — use Client instead.
const EventEmitter = require('events');
const WebSocket = require('ws');
const crypto = require('crypto');
const http = require('http');
const { sanitizeEsml, chunkEsml } = require('./util/esml');
const DEFAULT_COMMAND_SET = Object.freeze([
'StartSession', 'GetConfig', 'SetConfig', 'Cancel',
'SetAttention', 'Say', 'Listen', 'LookAt',
'TakePhoto', 'Video', 'Display', 'FetchAsset', 'UnloadAsset', 'Subscribe',
]);
const DEFAULT_STREAM_SET = Object.freeze([
'Entity', 'Motion', 'HeadTouch', 'ScreenGesture', 'HotWord',
]);
// ── HTTP helpers ─────────────────────────────────────────────────────────────
function httpPost(host, port, path, body) {
return new Promise((resolve, reject) => {
const payload = typeof body === 'string' ? body : JSON.stringify(body);
const req = http.request({
host, port, path, method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(payload),
},
}, (res) => {
let data = '';
res.on('data', d => data += d);
res.on('end', () => { try { resolve(JSON.parse(data)); } catch { resolve(data); } });
});
req.on('error', reject);
req.write(payload);
req.end();
});
}
function httpGet(host, port, path) {
return new Promise((resolve, reject) => {
const chunks = [];
const req = http.get({ host, port, path }, (res) => {
res.on('data', d => chunks.push(d));
res.on('end', () => resolve(Buffer.concat(chunks)));
});
req.on('error', reject);
});
}
function httpGetStream(host, port, path, dest) {
return new Promise((resolve, reject) => {
const req = http.get({ host, port, path }, (res) => {
res.pipe(dest);
res.on('end', resolve);
dest.on('close', () => req.destroy());
});
req.on('error', reject);
});
}
// ── WakewordWatcher ──────────────────────────────────────────────────────────
class WakewordWatcher extends EventEmitter {
constructor(host, port = 8088) {
super();
this.host = host;
this.port = port;
this._ws = null;
this._reconnectTimer = null;
this._running = false;
}
start() {
this._running = true;
this._connect();
}
stop() {
this._running = false;
clearTimeout(this._reconnectTimer);
if (this._ws) { try { this._ws.terminate(); } catch (_) {} this._ws = null; }
}
_connect() {
if (!this._running) return;
this._ws = new WebSocket(`ws://${this.host}:${this.port}/simple_port`);
this._ws.on('open', () => this.emit('connected'));
this._ws.on('message', (data) => {
let evt;
try { evt = JSON.parse(String(data)); } catch { return; }
if (evt.event_type !== 'hotphrase') return;
const utterance = evt.utterances?.[0];
this.emit('hotword', {
utterance: utterance ? utterance.utterance : 'hey jibo',
score: utterance ? utterance.score : 0,
timestamp: evt.timestamp || new Date().toISOString(),
raw: evt,
});
});
this._ws.on('close', () => {
this.emit('disconnected');
if (this._running) {
this._reconnectTimer = setTimeout(() => this._connect(), 3000);
}
});
this._ws.on('error', (err) => this.emit('error', err));
}
}
// ── RomConnection ─────────────────────────────────────────────────────────────
class RomConnection extends EventEmitter {
constructor(options = {}) {
super();
this.host = options.host || '192.168.1.217';
this.port = options.port || 8160;
this.appId = options.appId || 'ImmaLittleTeapot';
this.autoReconnect = options.autoReconnect !== false;
this.reconnectDelay = options.reconnectDelay || 3000;
this.heartbeatInterval = options.heartbeatInterval || 9000;
this.autoHeartbeat = options.autoHeartbeat !== false;
this.autoSubscribe = options.autoSubscribe !== false;
this.commandSet = options.commandSet || [...DEFAULT_COMMAND_SET];
this.streamSet = options.streamSet || [...DEFAULT_STREAM_SET];
this.ws = null;
this.sessionID = '';
this.version = '1.0';
this.connected = false;
this.videoStreamActive = false;
this.videoURI = null;
this.videoTxId = null;
this.currentAngles = [0, 0];
this._pendingTx = new Map();
this._txCommands = new Map();
this._reconnectTimer = null;
this._heartbeatTimer = null;
this._heartbeatTxIds = new Set();
this._lookInFlight = false;
this._lookPending = null;
this._lookAckTimer = null;
this._lookActiveTxId = null;
this._destroyed = false;
this._wakewordWatcher = null;
}
// ── Connection ──────────────────────────────────────────────────────────────
connect() {
return new Promise((resolve, reject) => {
if (this._destroyed) return reject(new Error('RomConnection has been destroyed'));
const onceConnected = () => { this.removeListener('_connectErr', onceErr); resolve(); };
const onceErr = (e) => { this.removeListener('connected', onceConnected); reject(e); };
this.once('connected', onceConnected);
this.once('_connectErr', onceErr);
this._connect();
});
}
_connect() {
if (this.ws) { try { this.ws.terminate(); } catch (_) {} this.ws = null; }
this._postAco()
.catch(() => {})
.then(() => {
if (this._destroyed) return;
this.ws = new WebSocket(`ws://${this.host}:${this.port}`);
this.ws.on('open', () => {
this.connected = true;
this.sessionID = '';
this._txSend({ Type: 'StartSession' });
this.ws.on('ping', () => { try { this.ws.pong(); } catch (_) {} });
});
this.ws.on('message', (data) => {
let msg;
try { msg = JSON.parse(data); } catch { return; }
this._handleMessage(msg);
});
this.ws.on('close', () => {
this.connected = false;
this.sessionID = '';
this.videoStreamActive = false;
this._lookInFlight = false;
this._lookPending = null;
clearTimeout(this._lookAckTimer);
this._stopHeartbeat();
this._txCommands.clear();
this.emit('disconnected');
if (!this._destroyed && this.autoReconnect) {
clearTimeout(this._reconnectTimer);
this._reconnectTimer = setTimeout(() => this._connect(), this.reconnectDelay);
}
});
this.ws.on('error', (err) => {
this.emit('error', err);
this.emit('_connectErr', err);
});
});
}
disconnect() {
clearTimeout(this._reconnectTimer);
this._stopHeartbeat();
if (this.ws) { this.ws.terminate(); this.ws = null; }
this.connected = false;
this.sessionID = '';
this._txCommands.clear();
}
destroy() {
this._destroyed = true;
this.disconnect();
this.stopWakeword();
this.removeAllListeners();
}
_postAco() {
return httpPost(this.host, this.port, '/request', {
aco: {
version: '1.0',
sourceId: this.appId,
commandSet: this.commandSet,
streamSet: this.streamSet,
keepAliveTimeout: 10000,
recoveryTimeout: 20000,
remoteConfig: { hideVisualCue: false, inactivityTimeout: 3600000 },
},
});
}
// ── Message handling ────────────────────────────────────────────────────────
_handleMessage(msg) {
if (msg.Response?.ResponseBody?.SessionID && !this.sessionID) {
this.sessionID = msg.Response.ResponseBody.SessionID;
this.version = msg.Response.ResponseBody.Version || '1.0';
if (this.autoSubscribe) {
this._txSend({ Type: 'Subscribe', StreamType: 'Entity' });
this._txSend({ Type: 'Subscribe', StreamType: 'Motion' });
this._txSend({ Type: 'Subscribe', StreamType: 'HeadTouch', StreamFilter: {} });
this._txSend({ Type: 'Subscribe', StreamType: 'ScreenGesture',
StreamFilter: { Type: 'Tap', Area: { x: 0, y: 0, width: 1, height: 1 } } });
}
if (this.autoHeartbeat) this._startHeartbeat();
this.emit('connected');
return;
}
const txId = msg.EventHeader?.TransactionID || msg.ResponseHeader?.TransactionID;
const evtName = msg.EventBody?.Event;
const body = msg.EventBody || msg.Response;
// 403 Forbidden = ACO expired or session invalid. Log it and reconnect.
if (body?.ResponseCode === 403) {
const cmdType = (txId && this._txCommands.get(txId)) || 'unknown';
console.warn(`[ROM] 403 Forbidden on ${cmdType} command (txId ${txId?.slice(0, 8) || '?'}) — reconnecting`);
this._txCommands.delete(txId);
if (this.ws) { try { this.ws.terminate(); } catch (_) {} }
return;
}
if (txId && this._heartbeatTxIds.has(txId)) return;
if (evtName === 'onVideoReady') {
this.videoStreamActive = true;
this.videoURI = msg.EventBody.URI;
}
if (evtName === 'onError' &&
msg.EventBody?.EventError?.ErrorString === 'Target overwritten') return;
if (txId && txId === this._lookActiveTxId &&
(evtName === 'onLookAtAchieved' || evtName === 'onStop' || evtName === 'onError')) {
this._onLookAngleDone();
}
if (txId && this._pendingTx.has(txId)) {
const entry = this._pendingTx.get(txId);
if (entry.events.size === 0 || (evtName && entry.events.has(evtName))) {
clearTimeout(entry.timer);
this._pendingTx.delete(txId);
this._txCommands.delete(txId);
entry.resolve({ txId, event: evtName, body, msg });
}
}
if (evtName) {
this.emit('event', txId, body);
this.emit(evtName, txId, body);
} else if (body) {
this.emit('event', txId, body);
}
}
// ── Low-level send ──────────────────────────────────────────────────────────
_txId() {
return crypto.createHash('md5')
.update(Date.now().toString() + Math.random().toString())
.digest('hex');
}
_txSend(command) {
const txId = this._txId();
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
// Don't send any command except StartSession before the session ID arrives.
// Commands sent with an empty SessionID are rejected by ROM with 403 Forbidden.
if (!this.sessionID && command.Type !== 'StartSession') return txId;
this._txCommands.set(txId, command.Type);
if (this._txCommands.size > 60) {
this._txCommands.delete(this._txCommands.keys().next().value);
}
this.ws.send(JSON.stringify({
ClientHeader: {
TransactionID: txId,
SessionID: this.sessionID,
AppID: this.appId,
Credentials: '',
Version: this.version,
},
Command: command,
}));
}
return txId;
}
// ── Heartbeat ───────────────────────────────────────────────────────────────
_startHeartbeat() {
this._stopHeartbeat();
this._heartbeatTimer = setInterval(() => {
if (this.connected && this.sessionID) {
const txId = this._txSend({ Type: 'GetConfig' });
if (txId) {
this._heartbeatTxIds.add(txId);
if (this._heartbeatTxIds.size > 20) {
this._heartbeatTxIds.delete(this._heartbeatTxIds.values().next().value);
}
}
}
}, this.heartbeatInterval);
}
_stopHeartbeat() {
if (this._heartbeatTimer) { clearInterval(this._heartbeatTimer); this._heartbeatTimer = null; }
this._heartbeatTxIds.clear();
}
// ── Await helpers ───────────────────────────────────────────────────────────
awaitEvent(txId, events, timeoutMs = 10000) {
const eventSet = new Set(Array.isArray(events) ? events : events ? [events] : []);
return new Promise((resolve) => {
const timer = setTimeout(() => { this._pendingTx.delete(txId); resolve(null); }, timeoutMs);
this._pendingTx.set(txId, { resolve, events: eventSet, timer });
});
}
awaitAck(txId, timeoutMs = 5000) {
return this.awaitEvent(txId, [], timeoutMs);
}
awaitDone(txId, timeoutMs = 30000) {
return this.awaitEvent(txId, ['onStop', 'onError'], timeoutMs);
}
// ── Commands ────────────────────────────────────────────────────────────────
getConfig() { return this._txSend({ Type: 'GetConfig' }); }
setConfig(level) { return this._txSend({ Type: 'SetConfig', Options: { Mixer: Math.max(0, Math.min(1, level)) } }); }
cancel(txId) { return this._txSend({ Type: 'Cancel', ID: txId }); }
setAttention(mode) { return this._txSend({ Type: 'SetAttention', Mode: mode }); }
say(esml) { return this._txSend({ Type: 'Say', ESML: esml }); }
async sayChunked(text, options = {}) {
const {
maxChunkLen = 450,
maxTotal = 3000,
chunkDelay = 600,
chunkTimeout = 60000,
signal = null,
} = options;
let clean = sanitizeEsml(text);
if (clean.length > maxTotal) {
clean = clean.slice(0, maxTotal).replace(/<[^>]*$/, '').replace(/\s+\S*$/, '').trimEnd() + '\u2026';
}
const chunks = chunkEsml(clean, maxChunkLen);
let lastTxId = null;
for (let i = 0; i < chunks.length; i++) {
if (signal?.aborted) return { chunks: i, aborted: true, lastTxId };
if (i > 0) await new Promise(r => setTimeout(r, chunkDelay));
if (signal?.aborted) return { chunks: i, aborted: true, lastTxId };
lastTxId = this._txSend({ Type: 'Say', ESML: chunks[i] });
const abortPromise = signal
? new Promise(r => signal.addEventListener('abort', () => r(null), { once: true }))
: null;
const waitPromise = this.awaitDone(lastTxId, chunkTimeout);
const result = abortPromise
? await Promise.race([waitPromise, abortPromise])
: await waitPromise;
if (result === null || signal?.aborted) {
if (lastTxId) this.cancel(lastTxId);
return { chunks: i + 1, aborted: true, lastTxId };
}
}
return { chunks: chunks.length, aborted: false, lastTxId };
}
listen(maxSpeechTimeout = 10000, maxNoSpeechTimeout = 5000, languageCode = 'en-US') {
return this._txSend({
Type: 'Listen',
MaxSpeechTimeout: maxSpeechTimeout,
MaxNoSpeechTimeout: maxNoSpeechTimeout,
LanguageCode: languageCode,
});
}
listenLocalASR(maxNoSpeech = 5000, maxSpeech = 10000, asrPort = 8088) {
const romTxId = this._txSend({ Type: 'Listen', MaxSpeechTimeout: maxSpeech, MaxNoSpeechTimeout: maxNoSpeech, LanguageCode: 'en-US' });
const taskId = 'rom-ctrl-' + Date.now() + '-' + Math.floor(Math.random() * 1e9);
const reqId = 'start-' + Date.now();
const timeoutMs = Math.max(maxNoSpeech, maxSpeech) + 2000;
const self = this;
const startPayload = JSON.stringify({
command: 'start', task_id: taskId, request_id: reqId,
audio_source_id: 'alsa1', hotphrase: 'none', speech_to_text: true,
});
function stopASR() {
httpPost(self.host, asrPort, '/asr_simple_interface',
JSON.stringify({ command: 'stop', task_id: taskId, request_id: 'stop-' + Date.now() })).catch(() => {});
}
let asrWs = null, timer = null, done = false;
function finish(speech) {
if (done) return;
done = true;
clearTimeout(timer);
if (asrWs) { try { asrWs.terminate(); } catch (_) {} asrWs = null; }
stopASR();
self.cancel(romTxId);
const evtBody = speech
? { Event: 'onListenResult', Speech: speech, LanguageCode: 'en-US' }
: { Event: 'onStop', StopReason: 'NoInput' };
self._handleMessage({ EventHeader: { TransactionID: romTxId }, EventBody: evtBody });
}
asrWs = new WebSocket(`ws://${this.host}:${asrPort}/simple_port`);
asrWs.on('open', () => {
httpPost(self.host, asrPort, '/asr_simple_interface', startPayload).catch(() => finish(null));
timer = setTimeout(() => finish(null), timeoutMs);
});
asrWs.on('message', (data) => {
let evt;
try { evt = JSON.parse(String(data)); } catch { return; }
const evType = evt.event_type || evt.eventType || evt.event || evt.type;
if (evType !== 'speech_to_text_final') return;
const evTask = evt.task_id || evt.taskId || evt.payload?.task_id;
const evReq = evt.request_id || evt.requestId || evt.payload?.request_id;
if ((evTask || evReq) && evTask !== taskId && evReq !== reqId) return;
const utterances = evt.utterances || evt.Utterances || evt.payload?.utterances;
function pickUtterance(u) {
if (!u) return '';
if (typeof u === 'string') return u;
return String(u.utterance || u.Utterance || u.text || '');
}
const text = Array.isArray(utterances)
? pickUtterance(utterances[0])
: (typeof utterances === 'string' ? utterances : '');
if (text.trim()) finish(text.trim());
});
asrWs.on('error', () => finish(null));
asrWs.on('close', () => { if (!done) finish(null); });
return romTxId;
}
// LookAt
lookAt(target, trackFlag = false, levelHeadFlag = false) {
return this._txSend({ Type: 'LookAt', LookAtTarget: target, TrackFlag: trackFlag, LevelHeadFlag: levelHeadFlag });
}
lookAtAngle(theta, psi, track = false) {
theta = Math.max(-180, Math.min(180, theta));
psi = Math.max(-30, Math.min(30, psi));
this.currentAngles = [theta, psi];
if (this._lookInFlight) { this._lookPending = [theta, psi, track]; return null; }
return this._fireLookAngle(theta, psi, track);
}
_fireLookAngle(theta, psi, track) {
this._lookInFlight = true;
this._lookPending = null;
const DEG = Math.PI / 180;
const txId = this.lookAt({ Angle: [theta * DEG, psi * DEG] }, track);
this._lookActiveTxId = txId;
clearTimeout(this._lookAckTimer);
this._lookAckTimer = setTimeout(() => this._onLookAngleDone(), 400);
return txId;
}
_onLookAngleDone() {
clearTimeout(this._lookAckTimer);
this._lookInFlight = false;
this._lookActiveTxId = null;
if (this._lookPending) {
const [t, p, track] = this._lookPending;
this._lookPending = null;
this._fireLookAngle(t, p, track);
}
}
nudge(dTheta, dPsi) {
const [theta, psi] = this.currentAngles;
return this.lookAtAngle(theta + dTheta, psi + dPsi);
}
lookAtScreen(x, y, track = false) { return this.lookAt({ ScreenCoords: [x, y] }, track, false); }
lookAtPosition(x, y, z, track = false) { return this.lookAt({ Position: [x, y, z] }, track, false); }
lookAtEntity(entityId, track = true) { return this.lookAt({ Entity: entityId }, track, false); }
// Camera
takePhoto(camera = 'Right', resolution = 'HighRes', distortion = false) {
return this._txSend({ Type: 'TakePhoto', Camera: camera, Resolution: resolution, Distortion: distortion });
}
startVideo(videoType = 'NORMAL') {
this.videoTxId = this._txSend({ Type: 'Video', VideoType: videoType });
return this.videoTxId;
}
stopVideo() {
if (this.videoTxId) {
this.cancel(this.videoTxId);
this.videoTxId = null;
this.videoStreamActive = false;
}
}
fetchMedia(jiboUri) { return httpGet(this.host, this.port, jiboUri); }
fetchMediaStream(jiboUri, dest) { return httpGetStream(this.host, this.port, jiboUri, dest); }
// Display
displayEye(name = 'default') {
return this._txSend({ Type: 'Display', View: { Type: 'Eye', Name: name } });
}
displayText(text, name = 'view') {
return this._txSend({ Type: 'Display', View: { Type: 'Text', Name: name, Text: text } });
}
displayImage(src, name = 'view') {
return this._txSend({ Type: 'Display', View: { Type: 'Image', Name: name, Image: { src, name, set: '' } } });
}
playAnim(animName) {
return this._txSend({ Type: 'Say', ESML: `<anim name="${animName}"></anim>` });
}
playAnimCat(cat, filter = null, nonBlocking = false) {
let tag = `<anim cat='${cat}'`;
if (nonBlocking) tag += ` nonBlocking='true'`;
if (filter) tag += ` filter='${filter}'`;
tag += '/>';
return this._txSend({ Type: 'Say', ESML: tag });
}
// Assets
fetchAsset(uri, name) { return this._txSend({ Type: 'FetchAsset', URI: uri, Name: name }); }
unloadAsset(name) { return this._txSend({ Type: 'UnloadAsset', Name: name }); }
// Subscriptions
subscribe(streamType, filter = null) {
const cmd = { Type: 'Subscribe', StreamType: streamType };
if (filter != null) cmd.StreamFilter = filter;
return this._txSend(cmd);
}
// Wakeword
watchWakeword(asrPort = 8088) {
if (this._wakewordWatcher) return this._wakewordWatcher;
this._wakewordWatcher = new WakewordWatcher(this.host, asrPort);
this._wakewordWatcher.on('hotword', (data) => this.emit('hotword', data));
this._wakewordWatcher.on('error', (err) => this.emit('error', err));
this._wakewordWatcher.start();
return this._wakewordWatcher;
}
stopWakeword() {
if (this._wakewordWatcher) {
this._wakewordWatcher.stop();
this._wakewordWatcher = null;
}
}
}
module.exports = { RomConnection, WakewordWatcher, DEFAULT_COMMAND_SET, DEFAULT_STREAM_SET };

45
src/constants.js Normal file
View File

@@ -0,0 +1,45 @@
'use strict';
const AttentionMode = Object.freeze({
Off: 'Off',
Idle: 'Idle',
Disengage: 'Disengage',
Engaged: 'Engaged',
Speaking: 'Speaking',
Fixated: 'Fixated',
Attractable: 'Attractable',
Menu: 'Menu',
Command: 'Command',
});
const Camera = Object.freeze({
Left: 'Left',
Right: 'Right',
});
const Resolution = Object.freeze({
HighRes: 'HighRes',
MedRes: 'MedRes',
LowRes: 'LowRes',
MicroRes: 'MicroRes',
});
const VideoType = Object.freeze({
Normal: 'NORMAL',
Debug: 'DEBUG',
});
const GestureType = Object.freeze({
Tap: 'Tap',
SwipeDown: 'SwipeDown',
SwipeUp: 'SwipeUp',
SwipeRight: 'SwipeRight',
SwipeLeft: 'SwipeLeft',
});
const HEAD_TOUCH_PADS = Object.freeze([
'frontLeft', 'middleLeft', 'backLeft',
'frontRight', 'middleRight', 'backRight',
]);
module.exports = { AttentionMode, Camera, Resolution, VideoType, GestureType, HEAD_TOUCH_PADS };

View File

@@ -0,0 +1,47 @@
'use strict';
/**
* AssetManager — download and cache remote files on the robot.
* Access via client.assets.
*/
class AssetManager {
constructor(client) {
this._client = client;
}
get _conn() { return this._client._conn; }
/**
* Download a remote file and cache it on the robot.
* Resolves when the asset is ready; rejects on failure or timeout.
*
* @param {string} uri Remote URL to fetch
* @param {string} name Cache key / reference name
* @param {number} [timeout=30000]
* @returns {Promise<void>}
*/
async fetch(uri, name, timeout = 30000) {
const txId = this._conn.fetchAsset(uri, name);
const result = await this._conn.awaitEvent(txId, ['onAssetReady', 'onAssetFailed'], timeout);
if (!result) {
throw Object.assign(new Error(`fetchAsset('${name}') timed out`), { code: 'ASSET_TIMEOUT' });
}
if (result.event === 'onAssetFailed') {
throw Object.assign(
new Error(`Asset fetch failed: ${result.body?.Detail ?? 'unknown error'}`),
{ code: 'ASSET_FAILED' }
);
}
}
/**
* Remove a cached asset from the robot.
* @param {string} name The cache key used in fetch()
*/
unload(name) {
this._conn.unloadAsset(name);
}
}
module.exports = AssetManager;

View File

@@ -0,0 +1,85 @@
'use strict';
const SpeechResult = require('../structures/SpeechResult');
/**
* AudioManager — controls Jibo's microphone and audio output.
* Access via client.audio.
*/
class AudioManager {
constructor(client) {
this._client = client;
}
get _conn() { return this._client._conn; }
// ── Volume ──────────────────────────────────────────────────────────────────
/**
* Set the audio mixer volume.
* @param {number} level 0.01.0
* @returns {Promise<void>}
*/
async setVolume(level) {
const txId = this._conn.setConfig(level);
await this._conn.awaitAck(txId, 5000);
}
// ── Speech input ────────────────────────────────────────────────────────────
/**
* Listen for speech and resolve with the transcript.
* Rejects with a SPEECH_TIMEOUT error if no speech is detected in time.
*
* @param {object} [options]
* @param {'local'|'cloud'} [options.mode='local'] 'local' uses on-robot ASR (no cloud)
* @param {number} [options.time=15000] Max ms to wait for speech
* @param {number} [options.noSpeechTime=5000] Max ms of silence before giving up (cloud mode only)
* @param {string} [options.languageCode='en-US']
* @returns {Promise<SpeechResult>}
*/
async awaitSpeech(options = {}) {
const {
mode = 'local',
time = 15000,
noSpeechTime = 5000,
languageCode = 'en-US',
} = options;
let txId;
if (mode === 'local') {
txId = this._conn.listenLocalASR(Math.min(time, noSpeechTime), time);
} else {
txId = this._conn.listen(time, noSpeechTime, languageCode);
}
const result = await this._conn.awaitEvent(txId, ['onListenResult', 'onStop'], time + 3000);
if (!result || result.event !== 'onListenResult') {
throw Object.assign(
new Error('No speech detected within the time limit.'),
{ code: 'SPEECH_TIMEOUT' }
);
}
return new SpeechResult(result.body);
}
// ── Wakeword ────────────────────────────────────────────────────────────────
/**
* Start listening for the "Hey Jibo" wakeword.
* Fires 'hotword' events on the Client with a HotwordEvent payload.
* @param {number} [asrPort=8088]
*/
watchWakeword(asrPort = 8088) {
this._conn.watchWakeword(asrPort);
}
/** Stop the wakeword listener. */
stopWakeword() {
this._conn.stopWakeword();
}
}
module.exports = AudioManager;

View File

@@ -0,0 +1,162 @@
'use strict';
/**
* BehaviorManager — controls Jibo's active behaviors: attention, speech,
* head motion, and animations. Access via client.behavior.
*/
class BehaviorManager {
constructor(client) {
this._client = client;
}
get _conn() { return this._client._conn; }
// ── Attention ───────────────────────────────────────────────────────────────
/**
* Set Jibo's engagement/attention mode.
* @param {string} mode AttentionMode constant
* @returns {Promise<void>}
*/
async setAttention(mode) {
const txId = this._conn.setAttention(mode);
await this._conn.awaitAck(txId, 5000);
}
// ── Speech ──────────────────────────────────────────────────────────────────
/**
* Speak text or ESML. Automatically sanitizes and chunks long input.
* Resolves when speech is physically finished.
*
* @param {string} text
* @param {object} [options]
* @param {number} [options.maxChunkLen=450]
* @param {number} [options.maxTotal=3000]
* @param {number} [options.chunkDelay=600]
* @param {AbortSignal} [options.signal]
* @returns {Promise<void>}
*/
async say(text, options = {}) {
const result = await this._conn.sayChunked(text, options);
if (result.aborted && !options.signal?.aborted) {
throw Object.assign(new Error('say() timed out'), { code: 'SAY_TIMEOUT' });
}
}
// ── Head motion ─────────────────────────────────────────────────────────────
/**
* Raw LookAt. Resolves when onLookAtAchieved fires (or on timeout).
* @param {object} target { Angle:[r,r] } | { ScreenCoords:[x,y] } | { Position:[x,y,z] } | { Entity:id }
* @param {object} [options]
* @param {boolean} [options.track=false]
* @param {boolean} [options.levelHead=false]
* @param {number} [options.timeout=5000]
* @returns {Promise<void>}
*/
async lookAt(target, options = {}) {
const { track = false, levelHead = false, timeout = 5000 } = options;
const txId = this._conn.lookAt(target, track, levelHead);
await this._conn.awaitEvent(txId, ['onLookAtAchieved', 'onStop', 'onError'], timeout);
}
/**
* Look at an angle in degrees. Theta = yaw (±180), psi = pitch (±30).
* Resolves when the head reaches the target (or the look was only queued).
*
* @param {number} theta Yaw degrees, positive = right
* @param {number} psi Pitch degrees, positive = up
* @param {object} [options]
* @param {boolean} [options.track=false]
* @param {number} [options.timeout=5000]
* @returns {Promise<void>}
*/
async lookAtAngle(theta, psi, options = {}) {
const { track = false, timeout = 5000 } = options;
const txId = this._conn.lookAtAngle(theta, psi, track);
if (!txId) return; // queued — will execute after the current move
await this._conn.awaitEvent(txId, ['onLookAtAchieved', 'onStop', 'onError'], timeout);
}
/**
* Nudge head by a relative angle delta from the current position.
* @param {number} dTheta Delta yaw degrees
* @param {number} dPsi Delta pitch degrees
* @returns {Promise<void>}
*/
async nudge(dTheta, dPsi) {
const txId = this._conn.nudge(dTheta, dPsi);
if (!txId) return;
await this._conn.awaitEvent(txId, ['onLookAtAchieved', 'onStop', 'onError'], 5000);
}
/**
* Look at a screen pixel coordinate.
* @param {number} x
* @param {number} y
* @returns {Promise<void>}
*/
async lookAtScreen(x, y) {
const txId = this._conn.lookAtScreen(x, y);
await this._conn.awaitEvent(txId, ['onLookAtAchieved', 'onStop', 'onError'], 5000);
}
/**
* Look at a world-relative 3D position in mm.
* @param {number} x
* @param {number} y
* @param {number} z
* @returns {Promise<void>}
*/
async lookAtPosition(x, y, z) {
const txId = this._conn.lookAtPosition(x, y, z);
await this._conn.awaitEvent(txId, ['onLookAtAchieved', 'onStop', 'onError'], 5000);
}
/**
* Look at (and optionally track) an entity by ID.
* When track=true, the robot continuously follows the entity — the promise
* resolves immediately after the first onLookAtAchieved rather than waiting
* for tracking to end.
*
* @param {number} entityId
* @param {boolean} [track=true]
* @returns {Promise<void>}
*/
async lookAtEntity(entityId, track = true) {
const txId = this._conn.lookAtEntity(entityId, track);
await this._conn.awaitEvent(txId, ['onLookAtAchieved', 'onStop', 'onError'], 5000);
}
// ── Animations ──────────────────────────────────────────────────────────────
/**
* Play a named animation. Resolves when the animation finishes.
* @param {string} animName e.g. 'pleased_01'
* @returns {Promise<void>}
*/
async playAnim(animName) {
const txId = this._conn.playAnim(animName);
const result = await this._conn.awaitDone(txId, 30000);
if (!result) throw Object.assign(new Error(`playAnim('${animName}') timed out`), { code: 'ANIM_TIMEOUT' });
}
/**
* Play an animation by emotional category.
* @param {string} cat e.g. 'happy', 'excited', 'sad', 'dance', 'emoji'
* @param {object} [options]
* @param {string} [options.filter] e.g. 'music, rom-upbeat'
* @param {boolean} [options.nonBlocking=false]
* @returns {Promise<void>}
*/
async playAnimCat(cat, options = {}) {
const { filter = null, nonBlocking = false } = options;
const txId = this._conn.playAnimCat(cat, filter, nonBlocking);
if (nonBlocking) return;
const result = await this._conn.awaitDone(txId, 30000);
if (!result) throw Object.assign(new Error(`playAnimCat('${cat}') timed out`), { code: 'ANIM_TIMEOUT' });
}
}
module.exports = BehaviorManager;

View File

@@ -0,0 +1,78 @@
'use strict';
const Photo = require('../structures/Photo');
const VideoStream = require('../structures/VideoStream');
/**
* CameraManager — controls Jibo's cameras.
* Access via client.camera.
*/
class CameraManager {
constructor(client) {
this._client = client;
this._activeStream = null;
}
get _conn() { return this._client._conn; }
/**
* Take a still photo. Resolves with a Photo object.
*
* @param {object} [options]
* @param {string} [options.camera='Right'] Camera.Left | Camera.Right
* @param {string} [options.resolution='HighRes'] Resolution constant
* @param {boolean} [options.distortion=false]
* @param {number} [options.timeout=15000]
* @returns {Promise<Photo>}
*/
async takePhoto(options = {}) {
const {
camera = 'Right',
resolution = 'HighRes',
distortion = false,
timeout = 15000,
} = options;
const txId = this._conn.takePhoto(camera, resolution, distortion);
const result = await this._conn.awaitEvent(txId, 'onTakePhoto', timeout);
if (!result) {
throw Object.assign(new Error('takePhoto() timed out'), { code: 'PHOTO_TIMEOUT' });
}
return new Photo(result.body, this._client);
}
/**
* Start a video stream. Resolves with a VideoStream once the robot is ready.
*
* @param {object} [options]
* @param {string} [options.type='NORMAL'] VideoType.Normal | VideoType.Debug
* @param {number} [options.timeout=10000]
* @returns {Promise<VideoStream>}
*/
async startVideo(options = {}) {
const { type = 'NORMAL', timeout = 10000 } = options;
const txId = this._conn.startVideo(type);
const result = await this._conn.awaitEvent(txId, 'onVideoReady', timeout);
if (!result) {
throw Object.assign(new Error('startVideo() timed out waiting for onVideoReady'), { code: 'VIDEO_TIMEOUT' });
}
this._activeStream = new VideoStream(result.body.URI, txId, this._client);
return this._activeStream;
}
/** Stop the active video stream. */
stopVideo() {
if (this._activeStream) {
this._activeStream.active = false;
this._activeStream = null;
}
this._conn.stopVideo();
}
}
module.exports = CameraManager;

View File

@@ -0,0 +1,41 @@
'use strict';
/**
* DisplayManager — controls what's shown on Jibo's screen.
* Access via client.display.
*/
class DisplayManager {
constructor(client) {
this._client = client;
}
get _conn() { return this._client._conn; }
/**
* Show Jibo's eye animation.
* @param {string} [name='default']
*/
showEye(name = 'default') {
this._conn.displayEye(name);
}
/**
* Display text on Jibo's screen.
* @param {string} text
* @param {string} [name='view']
*/
showText(text, name = 'view') {
this._conn.displayText(text, name);
}
/**
* Display an image on Jibo's screen.
* @param {string} src Image URL
* @param {string} [name='view']
*/
showImage(src, name = 'view') {
this._conn.displayImage(src, name);
}
}
module.exports = DisplayManager;

View File

@@ -0,0 +1,20 @@
'use strict';
class GestureEvent {
constructor(raw) {
// raw.Event is 'onTap' or 'onSwipe'
this.type = raw.Event === 'onTap' ? 'Tap' : 'Swipe';
this.coordinate = raw.Coordinate
? { x: raw.Coordinate[0], y: raw.Coordinate[1] }
: null;
// 'Up' | 'Down' | 'Left' | 'Right' — only present on swipes
this.direction = raw.Direction || null;
}
get isTap() { return this.type === 'Tap'; }
get isSwipe() { return this.type === 'Swipe'; }
}
module.exports = GestureEvent;

View File

@@ -0,0 +1,18 @@
'use strict';
const { HEAD_TOUCH_PADS } = require('../constants');
class HeadTouchEvent {
constructor(raw) {
// raw.Pads is a bool[6] in pad order: frontLeft…backRight
this.pads = raw.Pads || [];
this.activePads = HEAD_TOUCH_PADS.filter((_, i) => this.pads[i]);
}
isTouched(padName) {
const idx = HEAD_TOUCH_PADS.indexOf(padName);
return idx !== -1 ? !!this.pads[idx] : false;
}
}
module.exports = HeadTouchEvent;

View File

@@ -0,0 +1,11 @@
'use strict';
class HotwordEvent {
constructor(raw) {
this.utterance = raw.utterance || 'hey jibo';
this.score = raw.score ?? 0;
this.timestamp = raw.timestamp || new Date().toISOString();
}
}
module.exports = HotwordEvent;

21
src/structures/Motion.js Normal file
View File

@@ -0,0 +1,21 @@
'use strict';
class MotionZone {
constructor(raw) {
this.screenCoords = raw.ScreenCoords
? { x: raw.ScreenCoords[0], y: raw.ScreenCoords[1] }
: null;
this.worldCoords = raw.WorldCoords
? { x: raw.WorldCoords[0], y: raw.WorldCoords[1], z: raw.WorldCoords[2] }
: null;
this.intensity = raw.Intensity ?? null;
}
}
class Motion {
constructor(raw) {
this.zones = (raw.Motions || []).map(m => new MotionZone(m));
}
}
module.exports = Motion;

23
src/structures/Photo.js Normal file
View File

@@ -0,0 +1,23 @@
'use strict';
class Photo {
constructor(raw, client) {
this.uri = raw.URI;
this.name = raw.Name;
this.angleTarget = raw.AngleTarget || null;
this.positionTarget = raw.PositionTarget || null;
this._client = client;
}
// Download and return the image as a Buffer.
fetchBuffer() {
return this._client._conn.fetchMedia(this.uri);
}
// Pipe the image into a writable stream (e.g. fs.createWriteStream).
pipe(dest) {
return this._client._conn.fetchMediaStream(this.uri, dest);
}
}
module.exports = Photo;

View File

@@ -0,0 +1,10 @@
'use strict';
class SpeechResult {
constructor(raw) {
this.content = raw.Speech;
this.languageCode = raw.LanguageCode || 'en-US';
}
}
module.exports = SpeechResult;

21
src/structures/Track.js Normal file
View File

@@ -0,0 +1,21 @@
'use strict';
class Track {
constructor(raw, client) {
this.id = raw.EntityID;
this.screenCoords = raw.ScreenCoords
? { x: raw.ScreenCoords[0], y: raw.ScreenCoords[1] }
: null;
this.worldCoords = raw.WorldCoords
? { x: raw.WorldCoords[0], y: raw.WorldCoords[1], z: raw.WorldCoords[2] }
: null;
this._client = client;
}
// Look at this entity. Resolves when the head reaches the target.
async lookAt(track = true) {
return this._client.user.lookAtEntity(this.id, track);
}
}
module.exports = Track;

View File

@@ -0,0 +1,24 @@
'use strict';
class VideoStream {
constructor(uri, txId, client) {
this.uri = uri;
this._txId = txId;
this._client = client;
this.active = true;
}
// Pipe the MJPEG stream into a writable stream.
pipe(dest) {
return this._client._conn.fetchMediaStream(this.uri, dest);
}
// Stop the video stream.
stop() {
if (!this.active) return;
this.active = false;
this._client.camera.stopVideo();
}
}
module.exports = VideoStream;

62
src/util/esml.js Normal file
View File

@@ -0,0 +1,62 @@
'use strict';
function sanitizeEsml(text) {
return text
.replace(/\p{Extended_Pictographic}/gu, '')
.replace(/[\u{FE00}-\u{FE0F}\u{20D0}-\u{20FF}]/gu, '')
.replace(/<ssa\b[^>]*\/>/gi, '')
.replace(/^#{1,6}\s+/gm, '')
.replace(/\*\*([^*\n]*)\*\*/g, '$1')
.replace(/__([^_\n]*)__/g, '$1')
.replace(/\*([^*\n]+)\*/g, '$1')
.replace(/_([^_\n]+)_/g, '$1')
.replace(/^[ \t]*[*\-]\s+/gm, '')
.replace(/\n+/g, ' ')
.replace(/&(?![a-zA-Z#]\w*;)/g, 'and')
.replace(/[^\x20-\x7E<>'"/\-]/g, '')
.trim();
}
function chunkEsml(text, maxLen = 450) {
if (text.length <= maxLen) {
return [/<[a-zA-Z]/.test(text) ? text : `<break size='0.1'/> ${text}`];
}
const chunks = [];
let remaining = text;
while (remaining.length > maxLen) {
let inTag = false, lastSentEnd = -1, lastWordEnd = -1;
for (let i = 0; i < maxLen && i < remaining.length; i++) {
const c = remaining[i];
if (c === '<') { inTag = true; continue; }
if (c === '>') { inTag = false; continue; }
if (inTag) continue;
if (c === ' ') {
const prev = remaining[i - 1];
if (prev === '.' || prev === '!' || prev === '?') lastSentEnd = i;
else lastWordEnd = i;
}
}
const cutAt = lastSentEnd !== -1 ? lastSentEnd : lastWordEnd !== -1 ? lastWordEnd : -1;
let chunk;
if (cutAt !== -1) {
chunk = remaining.slice(0, cutAt).trimEnd();
remaining = remaining.slice(cutAt + 1).trimStart();
} else {
chunk = remaining.slice(0, maxLen).replace(/<[^>]*$/, '').trimEnd();
remaining = remaining.slice(chunk.length).trimStart();
}
if (chunk) chunks.push(chunk);
}
if (remaining.trim()) chunks.push(remaining.trim());
return chunks
.filter(c => c.length > 0)
.map(c => /<[a-zA-Z]/.test(c) ? c : `<break size='0.1'/> ${c}`);
}
module.exports = { sanitizeEsml, chunkEsml };