diff --git a/package.json b/package.json index 24cd784..c3aa077 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "rom-control", - "version": "2.0.1", + "version": "2.0.2", "description": "Discord.js-style OOP client for the Jibo ROM WebSocket API", "main": "./index.js", "exports": { diff --git a/src/Client.js b/src/Client.js index e98e295..f74bd9b 100644 --- a/src/Client.js +++ b/src/Client.js @@ -122,8 +122,9 @@ class Client extends EventEmitter { conn.on('disconnected', () => { this.tracks.clear(); this.emit('disconnect'); }); conn.on('error', (err) => this.emit('error', err)); - // Entity tracking - conn.on('onTrackGained', (txId, body) => { + // Entity tracking — wire event names per APK Command$EventType @SerializedName: + // "onEntityGained" / "onEntityUpdate" / "onEntityLost" (NOT onTrackGained/Update/Lost). + conn.on('onEntityGained', (txId, body) => { for (const raw of (body.Tracks || [])) { const track = new Track(raw, this); this.tracks.set(track.id, track); @@ -131,7 +132,7 @@ class Client extends EventEmitter { } }); - conn.on('onTrackUpdate', (txId, body) => { + conn.on('onEntityUpdate', (txId, body) => { for (const raw of (body.Tracks || [])) { const existing = this.tracks.get(raw.EntityID); // Shallow-clone the old track so listeners get a frozen snapshot @@ -142,7 +143,7 @@ class Client extends EventEmitter { } }); - conn.on('onTrackLost', (txId, body) => { + conn.on('onEntityLost', (txId, body) => { for (const raw of (body.Tracks || [])) { const track = this.tracks.get(raw.EntityID) || new Track(raw, this); this.tracks.delete(track.id); @@ -155,8 +156,8 @@ class Client extends EventEmitter { this.emit('motionDetected', new Motion(body)); }); - // Head touch - conn.on('onHeadTouched', (txId, body) => { + // Head touch — wire event is "onHeadTouch" (singular), not "onHeadTouched". + conn.on('onHeadTouch', (txId, body) => { this.emit('headTouch', new HeadTouchEvent(body)); }); diff --git a/src/connection.js b/src/connection.js index 637f347..ed3a176 100644 --- a/src/connection.js +++ b/src/connection.js @@ -9,14 +9,21 @@ const crypto = require('crypto'); const http = require('http'); const { sanitizeEsml, chunkEsml } = require('./util/esml'); +// Command Type values match com.jibo.atk.model.Command$CommandType (PascalCase per Gson @SerializedName). +// All stream subscriptions (Entity / Motion / HeadTouch / ScreenGesture / Speech) go through Type:'Subscribe' +// with a different StreamType value — even though the APK enum lists ScreenGesture as its own top-level Type, +// the live firmware's JSON schema only validates the Subscribe sub-schema and rejects Type:'ScreenGesture'. +// UnloadAsset is intentionally absent — it's not in the firmware's CommandType enum either. const DEFAULT_COMMAND_SET = Object.freeze([ 'StartSession', 'GetConfig', 'SetConfig', 'Cancel', 'SetAttention', 'Say', 'Listen', 'LookAt', - 'TakePhoto', 'Video', 'Display', 'FetchAsset', 'UnloadAsset', 'Subscribe', + 'TakePhoto', 'Video', 'Display', 'FetchAsset', + 'Subscribe', ]); +// StreamType values match com.jibo.atk.model.Command$StreamTypes (PascalCase per Gson @SerializedName). const DEFAULT_STREAM_SET = Object.freeze([ - 'Entity', 'Motion', 'HeadTouch', 'ScreenGesture', 'HotWord', + 'Entity', 'Motion', 'HeadTouch', 'ScreenGesture', 'Speech', 'HotWord', ]); // ── HTTP helpers ───────────────────────────────────────────────────────────── @@ -258,11 +265,10 @@ class RomConnection extends EventEmitter { this.version = msg.Response.ResponseBody.Version || '1.0'; if (this.autoSubscribe) { - this._txSend({ Type: 'Subscribe', StreamType: 'Entity' }); - this._txSend({ Type: 'Subscribe', StreamType: 'Motion' }); - this._txSend({ Type: 'Subscribe', StreamType: 'HeadTouch', StreamFilter: {} }); - this._txSend({ Type: 'Subscribe', StreamType: 'ScreenGesture', - StreamFilter: { Type: 'Tap', Area: { x: 0, y: 0, width: 1, height: 1 } } }); + //this.subscribeEntity(); + //this.subscribeMotion(); + this.subscribeHeadTouch(); + this.subscribeScreenGesture(); } if (this.autoHeartbeat) this._startHeartbeat(); @@ -308,6 +314,11 @@ class RomConnection extends EventEmitter { } } + if (process.env.ROM_DEBUG) { + const cmdType = txId ? this._txCommands.get(txId) : null; + const sliceLen = body && body.ResponseCode >= 400 ? 4000 : 200; + console.log('[ROM<<]', evtName || '(response)', 'tx=' + (txId ? txId.slice(0,8) : 'none'), cmdType ? 'forCmd=' + cmdType : '', body ? JSON.stringify(body).slice(0, sliceLen) : ''); + } if (evtName) { this.emit('event', txId, body); this.emit(evtName, txId, body); @@ -326,6 +337,7 @@ class RomConnection extends EventEmitter { _txSend(command) { const txId = this._txId(); + const wsState = this.ws ? this.ws.readyState : 'no-ws'; if (this.ws && this.ws.readyState === WebSocket.OPEN) { // Don't send any command except StartSession before the session ID arrives. // Commands sent with an empty SessionID are rejected by ROM with 403 Forbidden. @@ -338,7 +350,7 @@ class RomConnection extends EventEmitter { this._txCommands.delete(this._txCommands.keys().next().value); } - this.ws.send(JSON.stringify({ + const frame = JSON.stringify({ ClientHeader: { TransactionID: txId, SessionID: this.sessionID, @@ -347,7 +359,11 @@ class RomConnection extends EventEmitter { Version: this.version, }, Command: command, - })); + }); + if (process.env.ROM_DEBUG) console.log('[ROM>>]', command.Type, 'tx=' + txId.slice(0,8), frame); + this.ws.send(frame); + } else { + if (process.env.ROM_DEBUG) console.warn('[ROM>>] DROPPED', command.Type, 'wsState=' + wsState); } return txId; } @@ -562,7 +578,7 @@ class RomConnection extends EventEmitter { lookAtEntity(entityId, track = true) { return this.lookAt({ Entity: entityId }, track, false); } // Camera - takePhoto(camera = 'Right', resolution = 'HighRes', distortion = false) { + takePhoto(camera = 'right', resolution = 'highRes', distortion = false) { return this._txSend({ Type: 'TakePhoto', Camera: camera, Resolution: resolution, Distortion: distortion }); } @@ -606,13 +622,29 @@ class RomConnection extends EventEmitter { // Assets fetchAsset(uri, name) { return this._txSend({ Type: 'FetchAsset', URI: uri, Name: name }); } - unloadAsset(name) { return this._txSend({ Type: 'UnloadAsset', Name: name }); } // Subscriptions - subscribe(streamType, filter = null) { - const cmd = { Type: 'Subscribe', StreamType: streamType }; - if (filter != null) cmd.StreamFilter = filter; - return this._txSend(cmd); + // + // Wire shape derived from the APK's Command$BaseSubscribeCommand: + // Entity / Motion / HeadTouch / Speech → Type:'Subscribe', StreamType:<...>, StreamFilter:'' + // ScreenGesture → Type:'ScreenGesture', StreamType:'ScreenGesture', + // StreamFilter:{ Type:gestureType, Area:{x,y,width,height|radius} } + // Speech adds a top-level Listen:bool. + // + // The firmware's JSON schema requires StreamFilter on every subscribe variant — empty string + // satisfies it for the non-ScreenGesture streams (the field is typed `String` in the APK). + subscribeEntity() { return this._txSend({ Type: 'Subscribe', StreamType: 'Entity', StreamFilter: '' }); } + subscribeMotion() { return this._txSend({ Type: 'Subscribe', StreamType: 'Motion', StreamFilter: '' }); } + subscribeHeadTouch() { return this._txSend({ Type: 'Subscribe', StreamType: 'HeadTouch', StreamFilter: '' }); } + subscribeSpeech(listen = true) { + return this._txSend({ Type: 'Subscribe', StreamType: 'Speech', StreamFilter: '', Listen: !!listen }); + } + subscribeScreenGesture(filter = { Type: 'Tap', Area: { x: 0, y: 0, width: 1, height: 1 } }) { + return this._txSend({ Type: 'Subscribe', StreamType: 'ScreenGesture', StreamFilter: filter }); + } + // Generic escape hatch — caller must supply the correct shape. + subscribe(streamType, filter = '') { + return this._txSend({ Type: 'Subscribe', StreamType: streamType, StreamFilter: filter }); } // Wakeword diff --git a/src/constants.js b/src/constants.js index b81f781..9cde334 100644 --- a/src/constants.js +++ b/src/constants.js @@ -1,27 +1,27 @@ 'use strict'; const AttentionMode = Object.freeze({ - Off: 'Off', - Idle: 'Idle', - Disengage: 'Disengage', - Engaged: 'Engaged', - Speaking: 'Speaking', - Fixated: 'Fixated', - Attractable: 'Attractable', - Menu: 'Menu', - Command: 'Command', + Off: 'OFF', + Idle: 'IDLE', + Disengage: 'DISENGAGE', + Engaged: 'ENGAGED', + Speaking: 'SPEAKING', + Fixated: 'FIXATED', + Attractable: 'ATTRACTABLE', + Menu: 'MENU', + Command: 'COMMAND', }); const Camera = Object.freeze({ - Left: 'Left', - Right: 'Right', + Left: 'left', + Right: 'right', }); const Resolution = Object.freeze({ - HighRes: 'HighRes', - MedRes: 'MedRes', - LowRes: 'LowRes', - MicroRes: 'MicroRes', + HighRes: 'highRes', + MedRes: 'medRes', + LowRes: 'lowRes', + MicroRes: 'microRes', }); const VideoType = Object.freeze({ diff --git a/src/managers/AssetManager.js b/src/managers/AssetManager.js index 7abd891..d1ef731 100644 --- a/src/managers/AssetManager.js +++ b/src/managers/AssetManager.js @@ -35,13 +35,6 @@ class AssetManager { } } - /** - * Remove a cached asset from the robot. - * @param {string} name The cache key used in fetch() - */ - unload(name) { - this._conn.unloadAsset(name); - } } module.exports = AssetManager; diff --git a/src/managers/BehaviorManager.js b/src/managers/BehaviorManager.js index ca3e10b..8e0ebde 100644 --- a/src/managers/BehaviorManager.js +++ b/src/managers/BehaviorManager.js @@ -144,6 +144,13 @@ class BehaviorManager { /** * Play an animation by emotional category. + * + * In nonBlocking mode the call resolves once the firmware has acknowledged + * the command — *not* once the animation has finished. Awaiting the ACK + * (rather than returning synchronously) prevents a follow-up Say from + * racing the anim's Say frame on the wire, which corrupts the firmware's + * ESML parser state and yields an "Unexpected token ] in JSON" error. + * * @param {string} cat e.g. 'happy', 'excited', 'sad', 'dance', 'emoji' * @param {object} [options] * @param {string} [options.filter] e.g. 'music, rom-upbeat' @@ -153,7 +160,12 @@ class BehaviorManager { async playAnimCat(cat, options = {}) { const { filter = null, nonBlocking = false } = options; const txId = this._conn.playAnimCat(cat, filter, nonBlocking); - if (nonBlocking) return; + if (nonBlocking) { + // Wait for the 202 Accepted (or any txId-matching message) so the next + // Say is not in-flight at the same time as this one. + await this._conn.awaitAck(txId, 5000); + return; + } const result = await this._conn.awaitDone(txId, 30000); if (!result) throw Object.assign(new Error(`playAnimCat('${cat}') timed out`), { code: 'ANIM_TIMEOUT' }); } diff --git a/src/managers/CameraManager.js b/src/managers/CameraManager.js index 82d4e5f..4caaab8 100644 --- a/src/managers/CameraManager.js +++ b/src/managers/CameraManager.js @@ -27,8 +27,8 @@ class CameraManager { */ async takePhoto(options = {}) { const { - camera = 'Right', - resolution = 'HighRes', + camera = 'right', + resolution = 'highRes', distortion = false, timeout = 15000, } = options; diff --git a/src/structures/Motion.js b/src/structures/Motion.js index 7d0131a..08f715a 100644 --- a/src/structures/Motion.js +++ b/src/structures/Motion.js @@ -2,8 +2,9 @@ class MotionZone { constructor(raw) { + // ScreenCoords arrives as a 4-element bounding box [x, y, width, height]. this.screenCoords = raw.ScreenCoords - ? { x: raw.ScreenCoords[0], y: raw.ScreenCoords[1] } + ? { x: raw.ScreenCoords[0], y: raw.ScreenCoords[1], width: raw.ScreenCoords[2], height: raw.ScreenCoords[3] } : null; this.worldCoords = raw.WorldCoords ? { x: raw.WorldCoords[0], y: raw.WorldCoords[1], z: raw.WorldCoords[2] } diff --git a/src/structures/Track.js b/src/structures/Track.js index 7070e47..9202bd7 100644 --- a/src/structures/Track.js +++ b/src/structures/Track.js @@ -2,9 +2,13 @@ class Track { constructor(raw, client) { - this.id = raw.EntityID; + this.id = raw.EntityID; + // EntityType per APK @SerializedName: lowercase 'person' / 'unknown'. + this.type = raw.Type ?? null; + this.confidence = raw.Confidence ?? null; + // ScreenCoords arrives as a 4-element bounding box [x, y, width, height]. this.screenCoords = raw.ScreenCoords - ? { x: raw.ScreenCoords[0], y: raw.ScreenCoords[1] } + ? { x: raw.ScreenCoords[0], y: raw.ScreenCoords[1], width: raw.ScreenCoords[2], height: raw.ScreenCoords[3] } : null; this.worldCoords = raw.WorldCoords ? { x: raw.WorldCoords[0], y: raw.WorldCoords[1], z: raw.WorldCoords[2] } diff --git a/src/util/esml.js b/src/util/esml.js index 32bd940..a631ebd 100644 --- a/src/util/esml.js +++ b/src/util/esml.js @@ -18,9 +18,7 @@ function sanitizeEsml(text) { } function chunkEsml(text, maxLen = 450) { - if (text.length <= maxLen) { - return [/<[a-zA-Z]/.test(text) ? text : ` ${text}`]; - } + if (text.length <= maxLen) return [text]; const chunks = []; let remaining = text; @@ -54,9 +52,7 @@ function chunkEsml(text, maxLen = 450) { if (remaining.trim()) chunks.push(remaining.trim()); - return chunks - .filter(c => c.length > 0) - .map(c => /<[a-zA-Z]/.test(c) ? c : ` ${c}`); + return chunks.filter(c => c.length > 0); } module.exports = { sanitizeEsml, chunkEsml };