From cede3d3641b2d77e9593a9be54f6074915724716 Mon Sep 17 00:00:00 2001 From: Paskooter Date: Thu, 23 Apr 2026 01:15:20 -0400 Subject: [PATCH] Initial release: v2.0.0 discord.js-style OOP client for Jibo ROM --- .gitignore | 3 + API.md | 606 +++++++++++++++++++++++++++++ index.js | 49 +++ index.mjs | 26 ++ package-lock.json | 13 + package.json | 40 ++ src/Client.js | 172 +++++++++ src/connection.js | 628 +++++++++++++++++++++++++++++++ src/constants.js | 45 +++ src/managers/AssetManager.js | 47 +++ src/managers/AudioManager.js | 85 +++++ src/managers/BehaviorManager.js | 162 ++++++++ src/managers/CameraManager.js | 78 ++++ src/managers/DisplayManager.js | 41 ++ src/structures/GestureEvent.js | 20 + src/structures/HeadTouchEvent.js | 18 + src/structures/HotwordEvent.js | 11 + src/structures/Motion.js | 21 ++ src/structures/Photo.js | 23 ++ src/structures/SpeechResult.js | 10 + src/structures/Track.js | 21 ++ src/structures/VideoStream.js | 24 ++ src/util/esml.js | 62 +++ 23 files changed, 2205 insertions(+) create mode 100644 .gitignore create mode 100644 API.md create mode 100644 index.js create mode 100644 index.mjs create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 src/Client.js create mode 100644 src/connection.js create mode 100644 src/constants.js create mode 100644 src/managers/AssetManager.js create mode 100644 src/managers/AudioManager.js create mode 100644 src/managers/BehaviorManager.js create mode 100644 src/managers/CameraManager.js create mode 100644 src/managers/DisplayManager.js create mode 100644 src/structures/GestureEvent.js create mode 100644 src/structures/HeadTouchEvent.js create mode 100644 src/structures/HotwordEvent.js create mode 100644 src/structures/Motion.js create mode 100644 src/structures/Photo.js create mode 100644 src/structures/SpeechResult.js create mode 100644 src/structures/Track.js create mode 100644 src/structures/VideoStream.js create mode 100644 src/util/esml.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee7c078 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +node_modules/ +npm-debug.log* +.env diff --git a/API.md b/API.md new file mode 100644 index 0000000..5e2126d --- /dev/null +++ b/API.md @@ -0,0 +1,606 @@ +# rom-control + +Discord.js-style OOP client for the Jibo ROM WebSocket API (port 8160). + +**Requires:** Node.js ≥ 16, `ws` ^8.14.2 + +```js +// CommonJS +const { Client, AttentionMode } = require('rom-control'); + +// ESM +import { Client, AttentionMode } from 'rom-control'; +``` + +--- + +## Quick Start + +```js +const { Client, AttentionMode } = require('rom-control'); + +const client = new Client({ host: '192.168.1.217' }); + +client.once('ready', () => { + console.log('Connected, session:', client.sessionID); +}); + +client.on('trackCreate', async (track) => { + await track.lookAt(); +}); + +await client.connect(); + +await client.behavior.setAttention(AttentionMode.Engaged); +await client.behavior.say("Hello! I'm Jibo."); + +try { + const speech = await client.audio.awaitSpeech({ mode: 'local', time: 15000 }); + await client.behavior.say(`You said: ${speech.content}`); +} catch { + // SPEECH_TIMEOUT — no input detected +} + +client.destroy(); +``` + +--- + +## `new Client(options?)` + +| Option | Type | Default | Description | +|---|---|---|---| +| `host` | `string` | `'192.168.1.217'` | Robot IP address | +| `port` | `number` | `8160` | ROM WebSocket port | +| `appId` | `string` | `'ImmaLittleTeapot'` | ACO app identifier | +| `autoReconnect` | `boolean` | `true` | Reconnect on disconnect | +| `reconnectDelay` | `number` | `3000` | ms between reconnect attempts | +| `heartbeatInterval` | `number` | `9000` | ms between GetConfig keepalives | +| `autoHeartbeat` | `boolean` | `true` | Send GetConfig keepalives automatically | +| `autoSubscribe` | `boolean` | `true` | Subscribe Entity/Motion/HeadTouch/ScreenGesture on connect | + +### Instance properties + +| Property | Type | Description | +|---|---|---| +| `connected` | `boolean` | WebSocket is open and session established | +| `sessionID` | `string` | Current ROM session ID (`''` when disconnected) | +| `currentAngles` | `[number, number]` | Last `[theta_deg, psi_deg]` sent via `lookAtAngle` | +| `videoStreamActive` | `boolean` | A video stream command is currently active | +| `tracks` | `Map` | Live map of tracked entities keyed by EntityID | +| `user` | `BehaviorManager` | Attention, speech, head motion, animations | +| `audio` | `AudioManager` | Microphone input, volume, wakeword | +| `camera` | `CameraManager` | Still photos and video streaming | +| `display` | `DisplayManager` | Screen output | +| `assets` | `AssetManager` | Remote asset caching | + +--- + +## Lifecycle + +### `client.connect()` → `Promise` + +Posts the ACO `/request` to unlock ROM commands, opens the WebSocket, and waits for the session to be established. Emits `'ready'` on success. + +```js +await client.connect(); +``` + +### `client.disconnect()` + +Closes the WebSocket and stops auto-reconnect. The instance can be reconnected with `connect()` again. + +### `client.destroy()` + +Disconnects, stops the wakeword listener, and removes all event listeners. The instance cannot be reused. + +--- + +## Events + +All events are emitted on the `Client` instance (extends `EventEmitter`). + +| Event | Args | Description | +|---|---|---| +| `'ready'` | — | Connected and session established | +| `'disconnect'` | — | WebSocket closed; `client.tracks` is cleared | +| `'error'` | `err: Error` | Connection or protocol error | +| `'trackCreate'` | `track: Track` | Entity first detected | +| `'trackUpdate'` | `oldTrack: Track, newTrack: Track` | Entity position updated | +| `'trackDelete'` | `track: Track` | Entity lost | +| `'motionDetected'` | `motion: Motion` | Motion detected | +| `'headTouch'` | `event: HeadTouchEvent` | Head pad touched | +| `'gesture'` | `event: GestureEvent` | Screen tapped or swiped | +| `'hotword'` | `event: HotwordEvent` | "Hey Jibo" detected | + +```js +client.on('trackCreate', async (track) => { + console.log('Saw entity', track.id, 'at', track.screenCoords); + await track.lookAt(); +}); + +client.on('headTouch', (event) => { + console.log('Touched pads:', event.activePads); +}); + +client.on('gesture', (event) => { + if (event.isTap) console.log('Tapped at', event.coordinate); + if (event.isSwipe) console.log('Swiped', event.direction); +}); +``` + +--- + +## `client.behavior` — BehaviorManager + +Controls Jibo's persona: attention, speech, head motion, and animations. All methods return Promises that resolve when the action physically completes. + +### `client.behavior.setAttention(mode)` → `Promise` + +Set Jibo's engagement mode. + +```js +await client.behavior.setAttention(AttentionMode.Engaged); +``` + +See [AttentionMode](#attentionmode) for all values. + +--- + +### `client.behavior.say(text, options?)` → `Promise` + +Speak text or ESML. Automatically sanitizes input and chunks long text. Resolves when speech finishes physically. + +| Option | Type | Default | Description | +|---|---|---|---| +| `maxChunkLen` | `number` | `450` | Max chars per ROM `Say` command | +| `maxTotal` | `number` | `3000` | Max total chars; excess trimmed with `…` | +| `chunkDelay` | `number` | `600` | ms pause between chunks | +| `signal` | `AbortSignal` | `null` | Cancel mid-speech | + +```js +// Simple +await client.behavior.say("Hello! I'm Jibo."); + +// With ESML tags +await client.behavior.say(" Great to meet you!"); + +// Cancellable +const controller = new AbortController(); +setTimeout(() => controller.abort(), 3000); +await client.behavior.say(longText, { signal: controller.signal }); +``` + +Throws `{ code: 'SAY_TIMEOUT' }` if the robot stops responding mid-speech and it wasn't cancelled via signal. + +--- + +### `client.behavior.lookAtAngle(theta, psi, options?)` → `Promise` + +Look at an angle in degrees. Resolves when `onLookAtAchieved` fires. If a look is already in-flight, the new angle is queued and the promise resolves immediately. + +- **theta** — yaw, degrees. Positive = right. Clamped ±180°. +- **psi** — pitch, degrees. Positive = up. Clamped ±30°. + +| Option | Type | Default | +|---|---|---| +| `track` | `boolean` | `false` | +| `timeout` | `number` | `5000` ms | + +```js +await client.behavior.lookAtAngle(30, 10); // look right and slightly up +await client.behavior.lookAtAngle(0, 0); // center +``` + +### `client.behavior.nudge(dTheta, dPsi)` → `Promise` + +Nudge head by a relative delta from the current position. + +```js +await client.behavior.nudge(15, 0); // rotate 15° right +``` + +### `client.behavior.lookAtScreen(x, y)` → `Promise` + +Look at a pixel coordinate on the camera image (640×480). + +### `client.behavior.lookAtPosition(x, y, z)` → `Promise` + +Look at a world-relative 3D position in millimetres. + +### `client.behavior.lookAtEntity(entityId, track?)` → `Promise` + +Look at a tracked entity by ID. `track` defaults to `true`. When tracking, the promise resolves after the first `onLookAtAchieved` rather than waiting for tracking to end. + +```js +client.on('trackCreate', async (track) => { + await track.lookAt(); // shorthand — calls this internally +}); +``` + +### `client.behavior.lookAt(target, options?)` → `Promise` + +Raw LookAt for advanced use. `target` is the ROM `LookAtTarget` shape: + +| Shape | Description | +|---|---| +| `{ Angle: [theta_rad, psi_rad] }` | Radians | +| `{ ScreenCoords: [x, y] }` | Camera pixels | +| `{ Position: [x, y, z] }` | World mm | +| `{ Entity: id }` | Entity ID | + +Options: `track` (boolean, default `false`), `levelHead` (boolean, default `false`), `timeout` (ms, default `5000`). + +--- + +### `client.behavior.playAnim(animName)` → `Promise` + +Play a named animation. Resolves when the animation finishes. + +```js +await client.behavior.playAnim('pleased_01'); +``` + +### `client.behavior.playAnimCat(cat, options?)` → `Promise` + +Play an animation by emotional category. + +| Option | Type | Default | Description | +|---|---|---|---| +| `filter` | `string\|null` | `null` | e.g. `'music, rom-upbeat'` | +| `nonBlocking` | `boolean` | `false` | If true, resolves immediately | + +```js +await client.behavior.playAnimCat('excited'); +await client.behavior.playAnimCat('dance', { filter: 'music, rom-upbeat' }); +client.behavior.playAnimCat('happy', { nonBlocking: true }); // fire and forget +``` + +--- + +## `client.audio` — AudioManager + +### `client.audio.awaitSpeech(options?)` → `Promise` + +Listen for speech and resolve with the transcript. Rejects with `{ code: 'SPEECH_TIMEOUT' }` if no speech is detected within `time`. + +| Option | Type | Default | Description | +|---|---|---|---| +| `mode` | `'local' \| 'cloud'` | `'local'` | `'local'` uses on-robot ASR (no cloud required) | +| `time` | `number` | `15000` | Max ms to wait for speech | +| `noSpeechTime` | `number` | `5000` | Max ms of silence before giving up (cloud mode) | +| `languageCode` | `string` | `'en-US'` | | + +```js +try { + const speech = await client.audio.awaitSpeech({ mode: 'local', time: 10000 }); + console.log(speech.content); // transcript string + console.log(speech.languageCode); // 'en-US' +} catch (err) { + if (err.code === 'SPEECH_TIMEOUT') console.log('Nothing heard.'); +} +``` + +### `client.audio.setVolume(level)` → `Promise` + +Set audio mixer volume. `level` is `0.0`–`1.0`. + +### `client.audio.watchWakeword(asrPort?)` + +Connect to the always-on resident ASR wakeword stream (port 8088). Fires `'hotword'` events on the `Client` with a `HotwordEvent` payload. No-op if already watching. + +```js +client.audio.watchWakeword(); +client.on('hotword', (event) => { + console.log(`"${event.utterance}" — score ${event.score}`); +}); +``` + +### `client.audio.stopWakeword()` + +Stop the wakeword listener. + +--- + +## `client.camera` — CameraManager + +### `client.camera.takePhoto(options?)` → `Promise` + +Take a still photo. Resolves with a `Photo` object once the robot signals ready. + +| Option | Type | Default | Options | +|---|---|---|---| +| `camera` | `string` | `'Right'` | `Camera.Left`, `Camera.Right` | +| `resolution` | `string` | `'HighRes'` | `Resolution.*` values | +| `distortion` | `boolean` | `false` | | +| `timeout` | `number` | `15000` ms | | + +```js +const photo = await client.camera.takePhoto({ resolution: Resolution.MedRes }); +const buffer = await photo.fetchBuffer(); +fs.writeFileSync('shot.jpg', buffer); +``` + +### `client.camera.startVideo(options?)` → `Promise` + +Start a video stream. Resolves with a `VideoStream` once the robot signals ready. + +| Option | Type | Default | +|---|---|---| +| `type` | `string` | `VideoType.Normal` (`'NORMAL'`) | +| `timeout` | `number` | `10000` ms | + +```js +const stream = await client.camera.startVideo(); +console.log(stream.uri); // Jibo URI for the MJPEG stream +await stream.pipe(fs.createWriteStream('out.mjpeg')); +stream.stop(); +``` + +### `client.camera.stopVideo()` + +Stop the active video stream. + +--- + +## `client.display` — DisplayManager + +All display methods are fire-and-forget (no await needed). + +### `client.display.showEye(name?)` + +Show Jibo's eye animation. Default: `'default'`. + +### `client.display.showText(text, name?)` + +Display text on Jibo's screen. `name` is the view slot name (default `'view'`). + +### `client.display.showImage(src, name?)` + +Display an image on Jibo's screen. `src` is a URL. + +--- + +## `client.assets` — AssetManager + +### `client.assets.fetch(uri, name, timeout?)` → `Promise` + +Download a remote file and cache it on the robot under `name`. Rejects with `{ code: 'ASSET_FAILED' }` on error or `{ code: 'ASSET_TIMEOUT' }` after `timeout` ms (default 30000). + +```js +await client.assets.fetch('https://example.com/sound.mp3', 'mysound'); +``` + +### `client.assets.unload(name)` + +Remove a cached asset from the robot by name. + +--- + +## Structures + +Rich objects emitted by events or returned from manager methods. All have a `_client` back-reference for calling methods. + +### `Track` + +Emitted by `'trackCreate'`, `'trackUpdate'`, `'trackDelete'` and stored in `client.tracks`. + +| Property | Type | Description | +|---|---|---| +| `id` | `number` | ROM EntityID | +| `screenCoords` | `{ x, y } \| null` | Position on camera image | +| `worldCoords` | `{ x, y, z } \| null` | 3D world position in mm | + +```js +track.lookAt(track = true) // → Promise +``` + +### `SpeechResult` + +Resolved by `client.audio.awaitSpeech()`. + +| Property | Type | +|---|---| +| `content` | `string` — transcript | +| `languageCode` | `string` — e.g. `'en-US'` | + +### `Photo` + +Resolved by `client.camera.takePhoto()`. + +| Property | Type | +|---|---| +| `uri` | `string` — Jibo-internal URI | +| `name` | `string` | +| `angleTarget` | `object \| null` | +| `positionTarget` | `object \| null` | + +```js +photo.fetchBuffer() // → Promise +photo.pipe(writableStream) // → Promise +``` + +### `VideoStream` + +Resolved by `client.camera.startVideo()`. + +| Property | Type | +|---|---| +| `uri` | `string` — Jibo-internal URI for the MJPEG stream | +| `active` | `boolean` | + +```js +stream.pipe(writableStream) // → Promise +stream.stop() +``` + +### `Motion` + +Emitted by `'motionDetected'`. + +| Property | Type | +|---|---| +| `zones` | `MotionZone[]` | + +Each `MotionZone`: `{ screenCoords: {x,y}|null, worldCoords: {x,y,z}|null, intensity: number|null }`. + +### `HeadTouchEvent` + +Emitted by `'headTouch'`. + +| Property | Type | +|---|---| +| `pads` | `boolean[]` — all 6 pads in order | +| `activePads` | `string[]` — names of currently-pressed pads | + +Pad order / names: `frontLeft`, `middleLeft`, `backLeft`, `frontRight`, `middleRight`, `backRight`. + +```js +event.isTouched('frontLeft') // → boolean +``` + +### `GestureEvent` + +Emitted by `'gesture'`. + +| Property | Type | +|---|---| +| `type` | `'Tap' \| 'Swipe'` | +| `coordinate` | `{ x, y } \| null` — tap position | +| `direction` | `'Up' \| 'Down' \| 'Left' \| 'Right' \| null` — swipe direction | +| `isTap` | `boolean` | +| `isSwipe` | `boolean` | + +### `HotwordEvent` + +Emitted by `'hotword'`. + +| Property | Type | +|---|---| +| `utterance` | `string` — e.g. `'hey jibo'` | +| `score` | `number` | +| `timestamp` | `string` — ISO 8601 | + +--- + +## Constants + +```js +const { + AttentionMode, // Off, Idle, Disengage, Engaged, Speaking, Fixated, Attractable, Menu, Command + Camera, // Left, Right + Resolution, // HighRes, MedRes, LowRes, MicroRes + VideoType, // Normal ('NORMAL'), Debug ('DEBUG') + GestureType, // Tap, SwipeDown, SwipeUp, SwipeRight, SwipeLeft + HEAD_TOUCH_PADS // ['frontLeft','middleLeft','backLeft','frontRight','middleRight','backRight'] +} = require('rom-control'); +``` + +### AttentionMode + +| Value | Description | +|---|---| +| `Off` | Disengage all attention systems | +| `Idle` | Low-power idle | +| `Disengage` | Stop tracking, return to neutral | +| `Engaged` | Actively track and engage | +| `Speaking` | Speaking mode | +| `Fixated` | Lock gaze on current target | +| `Attractable` | Look toward movement and sound | +| `Menu` | Menu interaction mode | +| `Command` | Command input mode | + +--- + +## ESML Utilities + +Exported for callers that build raw ESML strings. Used internally by `client.behavior.say()`. + +### `sanitizeEsml(text)` → `string` + +Strip characters rejected by Jibo's ROM parser: emoji, bare `&`, non-ASCII, markdown formatting, `` tags, and newlines. Preserves valid ESML tags (``, ``, `