2026-03-20 03:32:49 +02:00
/* eslint-disable no-console */
const fs = require ( 'fs' ) ;
const http = require ( 'http' ) ;
const https = require ( 'https' ) ;
const crypto = require ( 'crypto' ) ;
let WebSocket ;
function requireWs ( ) {
try {
// Prefer a normal node resolution (server install, or local node_modules).
return require ( 'ws' ) ;
} catch ( _ ) {
// Fallback for on-robot usage where `ws` already exists under the skills runtime.
return require ( '/opt/jibo/Jibo/Skills/@be/be/node_modules/ws' ) ;
}
}
WebSocket = requireWs ( ) ;
function nowMs ( ) {
return Date . now ( ) ;
}
function uuid ( ) {
// Node 14+: crypto.randomUUID exists; otherwise fallback.
if ( typeof crypto . randomUUID === 'function' ) return crypto . randomUUID ( ) ;
return [ 4 , 2 , 2 , 2 , 6 ] . map ( ( len ) => crypto . randomBytes ( len ) . toString ( 'hex' ) ) . join ( '-' ) ;
}
function loadJson ( filePath ) {
const raw = fs . readFileSync ( filePath , 'utf8' ) ;
return JSON . parse ( raw ) ;
}
function sleep ( ms ) {
return new Promise ( ( resolve ) => setTimeout ( resolve , ms ) ) ;
}
function logFactory ( level ) {
const order = { error : 0 , warn : 1 , info : 2 , debug : 3 } ;
const threshold = order [ level ] ? ? 2 ;
return {
error : ( ... args ) => threshold >= 0 && console . error ( '[hub-shim]' , ... args ) ,
warn : ( ... args ) => threshold >= 1 && console . warn ( '[hub-shim]' , ... args ) ,
info : ( ... args ) => threshold >= 2 && console . log ( '[hub-shim]' , ... args ) ,
debug : ( ... args ) => threshold >= 3 && console . log ( '[hub-shim]' , ... args ) ,
} ;
}
function normalizeText ( text ) {
return String ( text || '' ) . trim ( ) ;
}
function classifyYesNo ( text ) {
const t = normalizeText ( text ) . toLowerCase ( ) ;
// Negative first to avoid matching "no" in "not"?? Keep it simple.
if ( /\b(no|nope|nah|not really|don't|do not|didn't|did not)\b/ . test ( t ) ) return 'no' ;
if ( /\b(yes|yep|yeah|ya|sure|ok|okay|i did|i do|i liked it|liked it|that was good|that was great|great|good|awesome|amazing)\b/ . test ( t ) ) return 'yes' ;
return '' ;
}
function buildAsrResult ( text ) {
return {
text ,
confidence : 1 ,
// Keep these optional fields minimal.
alternates : [ ] ,
} ;
}
function buildNluResult ( intent , rules = [ ] , entities = { } , slotActionsOverride ) {
const normalizedIntent = intent || '' ;
const outEntities = entities && typeof entities === 'object' ? { ... entities } : { } ;
// Many flows either use `intent` directly or compute firstGrammarTag from
// entities[slotActions[0]]. Keep defaults backwards compatible, but allow
// callers to provide explicit slotActions for launch-style grammars.
let slotActions = [ ] ;
if ( Array . isArray ( slotActionsOverride ) && slotActionsOverride . length ) {
slotActions = slotActionsOverride . map ( ( s ) => String ( s ) ) . filter ( Boolean ) ;
} else if ( normalizedIntent ) {
slotActions = [ normalizedIntent ] ;
if ( outEntities [ normalizedIntent ] == null ) outEntities [ normalizedIntent ] = normalizedIntent ;
}
return {
rules : Array . isArray ( rules ) ? rules : [ ] ,
intent : normalizedIntent ,
entities : outEntities ,
slotActions ,
source : 'hub-shim' ,
confidence : normalizedIntent ? 1 : 0 ,
} ;
}
function hasRule ( rules , want ) {
if ( ! Array . isArray ( rules ) || ! want ) return false ;
const w = String ( want ) . toLowerCase ( ) ;
for ( const r of rules ) {
const s = String ( r || '' ) . toLowerCase ( ) ;
if ( ! s ) continue ;
if ( s === w ) return true ;
if ( s . endsWith ( '/' + w ) ) return true ;
// Common aliasing patterns.
if ( w === 'launch' && ( s . includes ( 'global_commands_launch' ) || s . includes ( 'commands_launch' ) ) ) return true ;
if ( w === 'dance' && s . includes ( 'tutorial/dance' ) ) return true ;
if ( ( w === 'take_photo' || w === 'photo' ) && ( s . includes ( 'tutorial/take_photo' ) || s . includes ( 'take_photo' ) ) ) return true ;
}
return false ;
}
function inferNluFromText ( text , rules ) {
2026-03-24 02:56:27 +02:00
const t = normalizeText ( text ) . toLowerCase ( ) ;
// No text → empty intent (ListenResultState.noInput → Mim re-prompts or times out).
if ( ! t ) return buildNluResult ( '' , rules , { } ) ;
// Yes/No detection — common across many MIM types.
2026-03-20 03:32:49 +02:00
const yn = classifyYesNo ( text ) ;
2026-03-24 02:56:27 +02:00
if ( ! Array . isArray ( rules ) || ! rules . length ) {
return yn ? buildNluResult ( yn , rules , { } ) : buildNluResult ( '' , rules , { } ) ;
2026-03-20 03:32:49 +02:00
}
2026-03-24 02:56:27 +02:00
// ── Skill-specific rule matching (checked BEFORE global catch-all) ──
// introductions/recognition_type_menu: expects face, name, voice, all
if ( hasRule ( rules , 'recognition_type_menu' ) ) {
if ( /\bface\b/ . test ( t ) ) return buildNluResult ( 'face' , rules , { } ) ;
if ( /\bname\b/ . test ( t ) ) return buildNluResult ( 'name' , rules , { } ) ;
if ( /\bvoice\b/ . test ( t ) ) return buildNluResult ( 'voice' , rules , { } ) ;
if ( /\b(all|everything|everyone)\b/ . test ( t ) ) return buildNluResult ( 'all' , rules , { } ) ;
if ( yn ) return buildNluResult ( yn , rules , { } ) ;
return buildNluResult ( '' , rules , { } ) ;
2026-03-20 03:32:49 +02:00
}
2026-03-24 02:56:27 +02:00
// introductions/voice_face_training_menu: similar recognition type choices
if ( hasRule ( rules , 'voice_face_training_menu' ) ) {
if ( /\bface\b/ . test ( t ) ) return buildNluResult ( 'face' , rules , { } ) ;
if ( /\bname\b/ . test ( t ) ) return buildNluResult ( 'name' , rules , { } ) ;
if ( /\bvoice\b/ . test ( t ) ) return buildNluResult ( 'voice' , rules , { } ) ;
if ( /\b(all|everything)\b/ . test ( t ) ) return buildNluResult ( 'all' , rules , { } ) ;
if ( yn ) return buildNluResult ( yn , rules , { } ) ;
return buildNluResult ( '' , rules , { } ) ;
}
2026-03-20 03:32:49 +02:00
2026-03-24 02:56:27 +02:00
// introductions yes/no questions: face_capture_ready, did_i_hear_name,
// did_i_pronounce_name, any_more_intros, recognition_any_more
if ( rules . some ( ( r ) => / face _capture | did _i _hear | did _i _pronounce | any _more | recognition _any _more / . test ( r ) ) ) {
if ( yn ) return buildNluResult ( yn , rules , { } ) ;
2026-03-20 03:32:49 +02:00
return buildNluResult ( '' , rules , { } ) ;
}
2026-03-24 02:56:27 +02:00
// introductions/intro_looper: expects loopmember intent with loopMemberReferent entity.
// Without real NLU we cannot resolve the entity → return noMatch so the MIM re-prompts.
if ( hasRule ( rules , 'intro_looper' ) ) {
if ( yn ) return buildNluResult ( yn , rules , { } ) ;
return buildNluResult ( '' , rules , { } ) ;
}
// main-menu/execute_main_menu: map spoken words to menu items.
if ( hasRule ( rules , 'execute_main_menu' ) ) {
if ( /\bintroduc/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'introductions' } ) ;
if ( /\bsurprise/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'surprise-me' } ) ;
if ( /\b(time|clock)\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'clock' } ) ;
if ( /\bphoto\s*booth\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'photobooth' } ) ;
if ( /\bgallery\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'gallery' } ) ;
if ( /\b(exercise|workout)\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'exercise' } ) ;
if ( /\b(radio|music)\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'radio' } ) ;
if ( /\bsettings?\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'settings' } ) ;
if ( /\btips?\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'tips-tricks' } ) ;
if ( /\bfun\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'fun-stuff' } ) ;
if ( /\bcreate\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'create' } ) ;
if ( /\b(report|personal)\b/ . test ( t ) ) return buildNluResult ( 'loadMenu' , rules , { loadMenu : 'personal-report' } ) ;
// Fall through to generic patterns below.
}
// ── Generic patterns ──
// Yes/No (applies to any MIM type that accepts it)
if ( yn ) return buildNluResult ( yn , rules , { } ) ;
// Dance / Photo (tutorial rules)
if ( hasRule ( rules , 'dance' ) && /\bdance\b/ . test ( t ) ) return buildNluResult ( 'dance' , rules , { } ) ;
if ( hasRule ( rules , 'take_photo' ) && /\b(photo|picture|take a photo|take a picture)\b/ . test ( t ) ) return buildNluResult ( 'take_photo' , rules , { } ) ;
// Global launch commands — only for specific well-known commands.
// Do NOT catch-all to chitchat; that breaks in-skill NLU.
if ( hasRule ( rules , 'launch' ) ) {
if ( /\bdance\b/ . test ( t ) ) return buildNluResult ( 'launch' , rules , { skill : 'dance' , query : normalizeText ( text ) } , [ 'skill' ] ) ;
if ( /\b(photo|picture|take a photo|take a picture|selfie)\b/ . test ( t ) ) return buildNluResult ( 'launch' , rules , { skill : 'photobooth' , query : normalizeText ( text ) } , [ 'skill' ] ) ;
}
// Default: no match — lets the Mim framework re-prompt or handle noMatch.
2026-03-20 03:32:49 +02:00
return buildNluResult ( '' , rules , { } ) ;
}
function httpJsonPostRaw ( urlString , payload , timeoutMs ) {
timeoutMs = typeof timeoutMs === 'number' ? timeoutMs : 6000 ;
const u = new URL ( urlString ) ;
const bodyStr = JSON . stringify ( payload || { } ) ;
const body = Buffer . from ( bodyStr , 'utf8' ) ;
const isHttps = u . protocol === 'https:' ;
const requestOptions = {
protocol : u . protocol ,
hostname : u . hostname ,
port : u . port || ( isHttps ? 443 : 80 ) ,
path : u . pathname + ( u . search || '' ) ,
method : 'POST' ,
headers : {
'Content-Type' : 'application/json' ,
'Content-Length' : body . length ,
} ,
timeout : timeoutMs ,
} ;
return new Promise ( ( resolve , reject ) => {
const req = ( isHttps ? https : http ) . request ( requestOptions , ( res ) => {
const chunks = [ ] ;
res . on ( 'data' , ( d ) => chunks . push ( d ) ) ;
res . on ( 'end' , ( ) => {
resolve ( {
statusCode : res . statusCode ,
headers : res . headers || { } ,
body : Buffer . concat ( chunks ) . toString ( 'utf8' ) ,
} ) ;
} ) ;
} ) ;
req . on ( 'error' , reject ) ;
req . on ( 'timeout' , ( ) => {
req . destroy ( new Error ( 'POST timeout' ) ) ;
} ) ;
req . write ( body ) ;
req . end ( ) ;
} ) ;
}
function readJsonBody ( req , maxBytes = 256 * 1024 ) {
return new Promise ( ( resolve , reject ) => {
let total = 0 ;
const chunks = [ ] ;
req . on ( 'data' , ( d ) => {
total += d . length || 0 ;
if ( total > maxBytes ) {
reject ( new Error ( 'request body too large' ) ) ;
try { req . destroy ( ) ; } catch ( _ ) { /* ignore */ }
return ;
}
chunks . push ( d ) ;
} ) ;
req . on ( 'end' , ( ) => {
const raw = Buffer . concat ( chunks ) . toString ( 'utf8' ) ;
if ( ! raw ) return resolve ( { } ) ;
try {
resolve ( JSON . parse ( raw ) ) ;
} catch ( e ) {
reject ( new Error ( 'invalid json body' ) ) ;
}
} ) ;
req . on ( 'error' , reject ) ;
} ) ;
}
async function ollamaAnswer ( { baseUrl , model , system , prompt , timeoutMs } ) {
baseUrl = String ( baseUrl || 'http://127.0.0.1:11434' ) . replace ( /\/+$/ , '' ) ;
model = String ( model || 'llama3' ) ;
system = typeof system === 'string' ? system : 'Answer conversationally and concisely.' ;
prompt = String ( prompt || '' ) . trim ( ) ;
if ( ! prompt ) return '' ;
// Prefer /api/generate; fallback to /api/chat.
try {
const r = await httpJsonPostRaw ( ` ${ baseUrl } /api/generate ` , {
model ,
system ,
prompt ,
stream : false ,
} , timeoutMs ) ;
if ( r . statusCode >= 200 && r . statusCode < 300 ) {
const obj = JSON . parse ( r . body || '{}' ) ;
return String ( obj . response || '' ) . trim ( ) ;
}
} catch ( _ ) {
// fall through
}
const r2 = await httpJsonPostRaw ( ` ${ baseUrl } /api/chat ` , {
model ,
stream : false ,
messages : [
{ role : 'system' , content : system } ,
{ role : 'user' , content : prompt } ,
] ,
} , timeoutMs ) ;
if ( ! ( r2 . statusCode >= 200 && r2 . statusCode < 300 ) ) {
throw new Error ( ` ollama http ${ r2 . statusCode } ` ) ;
}
const obj2 = JSON . parse ( r2 . body || '{}' ) ;
return String ( obj2 . message && obj2 . message . content ? obj2 . message . content : '' ) . trim ( ) ;
}
function createGqaShim ( config , logger ) {
const gqa = ( config && config . gqaShim ) || { } ;
const enabled = ! ! gqa . enabled || String ( process . env . JIBO _GQA _SHIM _ENABLE || '' ) . toLowerCase ( ) === '1' || String ( process . env . JIBO _GQA _SHIM _ENABLE || '' ) . toLowerCase ( ) === 'true' ;
if ( ! enabled ) return null ;
const bindHost = gqa . bindHost || process . env . JIBO _GQA _SHIM _BIND _HOST || '0.0.0.0' ;
const port = Number ( gqa . port || process . env . JIBO _GQA _SHIM _PORT || 8080 ) ;
const timeoutMs = Number ( gqa . timeoutMs || process . env . JIBO _GQA _SHIM _TIMEOUT _MS || 20000 ) ;
const ollama = ( gqa . ollama || { } ) ;
const ollamaBaseUrl = ollama . baseUrl || process . env . OLLAMA _BASE _URL || 'http://127.0.0.1:11434' ;
const ollamaModel = ollama . model || process . env . OLLAMA _MODEL || 'llama3' ;
const systemPrompt = ollama . systemPrompt || process . env . OLLAMA _SYSTEM _PROMPT || 'You are Jibo, a friendly social robot. Reply in 1-2 short spoken sentences.' ;
const server = http . createServer ( async ( req , res ) => {
try {
if ( req . method === 'GET' ) {
res . writeHead ( 200 , { 'content-type' : 'text/plain' } ) ;
res . end ( 'jibo-gqa-shim\n' ) ;
return ;
}
if ( req . method !== 'POST' ) {
res . writeHead ( 405 , { 'content-type' : 'application/json' } ) ;
res . end ( JSON . stringify ( { message : 'method not allowed' } ) ) ;
return ;
}
const target = String ( ( req . headers && ( req . headers [ 'x-amz-target' ] || req . headers [ 'X-Amz-Target' ] ) ) || '' ) . trim ( ) ;
const op = target . includes ( '.' ) ? target . split ( '.' ) . pop ( ) : '' ;
const body = await readJsonBody ( req ) ;
if ( op === 'Question' ) {
const input = ( body && ( body . Input || body . input ) ) ? String ( body . Input || body . input ) : '' ;
logger . info ( 'gqa Question' , { input : input . slice ( 0 , 120 ) } ) ;
let answer = '' ;
try {
answer = await ollamaAnswer ( {
baseUrl : ollamaBaseUrl ,
model : ollamaModel ,
system : systemPrompt ,
prompt : input ,
timeoutMs ,
} ) ;
} catch ( e ) {
logger . warn ( 'ollama failed' , { err : String ( e && ( e . stack || e . message || e ) ) } ) ;
answer = '' ;
}
const ok = ! ! answer ;
const resp = {
success : ok ,
source : ok ? 'MOCK' : 'No Match' ,
message : ok ? undefined : 'No response' ,
type : ok ? 'answer' : 'error' ,
timestamps : { } ,
response : {
type : 'string' ,
payload : answer ,
} ,
version : '0.0.1' ,
} ;
res . writeHead ( 200 , { 'content-type' : 'application/x-amz-json-1.0' } ) ;
res . end ( JSON . stringify ( resp ) ) ;
return ;
}
if ( op === 'ListAttribution' ) {
const resp = { success : true , attributions : [ ] , timestamps : { } , version : '0.0.1' } ;
res . writeHead ( 200 , { 'content-type' : 'application/x-amz-json-1.0' } ) ;
res . end ( JSON . stringify ( resp ) ) ;
return ;
}
res . writeHead ( 400 , { 'content-type' : 'application/x-amz-json-1.0' } ) ;
res . end ( JSON . stringify ( { message : 'unknown operation' , target } ) ) ;
} catch ( e ) {
res . writeHead ( 500 , { 'content-type' : 'application/x-amz-json-1.0' } ) ;
res . end ( JSON . stringify ( { message : String ( e && ( e . message || e ) ) } ) ) ;
}
} ) ;
logger . info ( 'gqa http' , { bindHost , port , ollamaBaseUrl , ollamaModel } ) ;
return {
start : ( ) => new Promise ( ( resolve , reject ) => server . listen ( port , bindHost , ( err ) => ( err ? reject ( err ) : resolve ( ) ) ) ) ,
stop : ( ) => new Promise ( ( resolve ) => server . close ( ( ) => resolve ( ) ) ) ,
} ;
}
function pickBestAsrUtterance ( utterances ) {
try {
if ( ! utterances || ! utterances . length ) return '' ;
let bestText = '' ;
let bestScore = - 1e99 ;
for ( const u of utterances ) {
let text = u && ( u . utterance || u . Utterance || u . text ) ? String ( u . utterance || u . Utterance || u . text ) : '' ;
text = text . trim ( ) ;
if ( ! text ) continue ;
if ( text . length >= 2 && text [ 0 ] . toLowerCase && text [ 1 ] . toLowerCase && text [ 0 ] . toLowerCase ( ) === text [ 1 ] . toLowerCase ( ) ) {
text = text . slice ( 1 ) ;
}
const score = typeof u . score === 'number' ? u . score : ( typeof u . Score === 'number' ? u . Score : 0 ) ;
if ( ! bestText || score > bestScore ) {
bestText = text ;
bestScore = score ;
}
}
return bestText ;
} catch ( _ ) {
return '' ;
}
}
function extractFinalTextFromAsrEvent ( evt ) {
// jibo-asr-service has had multiple JSON shapes across builds. Prefer utterances,
// but accept common fallback fields.
try {
const utterances = evt . utterances || evt . Utterances || ( evt . payload && ( evt . payload . utterances || evt . payload . Utterances ) ) ;
const best = pickBestAsrUtterance ( utterances ) ;
if ( best && String ( best ) . trim ( ) ) return String ( best ) . trim ( ) ;
const candidates = [
evt . text ,
evt . transcript ,
evt . utterance ,
evt . Utterance ,
evt . payload && ( evt . payload . text || evt . payload . transcript || evt . payload . utterance || evt . payload . Utterance ) ,
] ;
for ( const c of candidates ) {
if ( typeof c === 'string' && c . trim ( ) ) return c . trim ( ) ;
}
return '' ;
} catch ( _ ) {
return '' ;
}
}
async function asrServiceSttOnce ( asrBaseUrl , wsPath , timeoutMs , audioSourceId , logger ) {
// jibo-asr-service protocol:
// 1) Connect to WS /simple_port (event stream)
// 2) HTTP POST /asr_simple_interface {command:'start', task_id, audio_source_id, ...}
// 3) Wait for event_type=='speech_to_text_final' and extract utterance
// 4) HTTP POST /asr_simple_interface {command:'stop', task_id}
const url = new URL ( asrBaseUrl ) ;
const wsUrl = ` ${ url . protocol === 'https:' ? 'wss' : 'ws' } :// ${ url . host } ${ wsPath } ` ;
const baseHttp = ` ${ url . protocol } // ${ url . host } ` ;
const taskId = ` hub-shim- ${ Date . now ( ) } - ${ Math . floor ( Math . random ( ) * 1e9 ) } ` ;
const requestId = ` stt_start_ ${ Date . now ( ) } _ ${ Math . floor ( Math . random ( ) * 1e9 ) } ` ;
logger . debug ( 'asr connect' , { wsUrl , baseHttp , taskId } ) ;
return await new Promise ( ( resolve , reject ) => {
const ws = new WebSocket ( wsUrl ) ;
let done = false ;
const t0 = nowMs ( ) ;
let timer ;
function finish ( err , value ) {
if ( done ) return ;
done = true ;
if ( timer ) clearTimeout ( timer ) ;
try {
ws . close ( ) ;
} catch ( _ ) {
// ignore
}
if ( err ) reject ( err ) ;
else resolve ( value ) ;
}
async function stopAlways ( ) {
const stopPayload = {
command : 'stop' ,
task _id : taskId ,
request _id : ` stt_stop_ ${ Date . now ( ) } _ ${ Math . floor ( Math . random ( ) * 1e9 ) } ` ,
} ;
try {
await httpJsonPostRaw ( ` ${ baseHttp } /asr_simple_interface ` , stopPayload , 6000 ) ;
} catch ( _ ) {
// ignore
}
}
timer = setTimeout ( ( ) => {
stopAlways ( ) . finally ( ( ) => {
finish ( new Error ( ` asr timeout after ${ timeoutMs } ms ` ) ) ;
} ) ;
} , timeoutMs ) ;
ws . on ( 'open' , async ( ) => {
const startPayload = {
command : 'start' ,
task _id : taskId ,
audio _source _id : String ( audioSourceId || 'alsa1' ) ,
hotphrase : 'none' ,
speech _to _text : true ,
request _id : requestId ,
} ;
try {
const resp = await httpJsonPostRaw ( ` ${ baseHttp } /asr_simple_interface ` , startPayload , 8000 ) ;
if ( ! resp || ! resp . statusCode || resp . statusCode < 200 || resp . statusCode >= 300 ) {
await stopAlways ( ) ;
finish ( new Error ( ` asr start failed: HTTP ${ resp && resp . statusCode } ${ resp && resp . body ? resp . body . slice ( 0 , 200 ) : '' } ` ) ) ;
return ;
}
logger . info ( 'asr start' , { ms : nowMs ( ) - t0 , status : resp . statusCode } ) ;
} catch ( e ) {
await stopAlways ( ) ;
finish ( e ) ;
}
} ) ;
ws . on ( 'message' , ( data ) => {
let evt ;
try {
evt = JSON . parse ( data . toString ( 'utf8' ) ) ;
} catch ( _ ) {
return ;
}
if ( ! evt ) return ;
// Legacy/simple protocols: {type:'final', text:'...'}
const isFinal = evt . type === 'final' || evt . final === true || evt . isFinal === true ;
const simpleText = evt . text || evt . transcript ;
if ( isFinal && typeof simpleText === 'string' ) {
stopAlways ( ) . finally ( ( ) => {
logger . info ( 'asr final(simple)' , { ms : nowMs ( ) - t0 , text : String ( simpleText ) . slice ( 0 , 80 ) } ) ;
finish ( null , normalizeText ( simpleText ) ) ;
} ) ;
return ;
}
const eventType = evt . event _type || evt . eventType || evt . event || evt . type ;
if ( eventType !== 'speech_to_text_final' ) return ;
// Correlate ids when possible, but do not hard-require a match.
const evTaskId = evt . task _id || evt . taskId || ( evt . payload && ( evt . payload . task _id || evt . payload . taskId ) ) ;
const evRequestId = evt . request _id || evt . requestId || ( evt . payload && ( evt . payload . request _id || evt . payload . requestId ) ) ;
const idMatches = ( ! evTaskId && ! evRequestId ) || ( evTaskId && String ( evTaskId ) === String ( taskId ) ) || ( evRequestId && String ( evRequestId ) === String ( requestId ) ) ;
if ( ! idMatches ) {
logger . debug ( 'asr final id mismatch (accepting)' , {
taskId ,
requestId ,
evTaskId ,
evRequestId ,
} ) ;
}
const finalText = extractFinalTextFromAsrEvent ( evt ) ;
if ( ! finalText ) {
logger . debug ( 'asr final but no text extracted' , {
keys : Object . keys ( evt || { } ) . slice ( 0 , 30 ) ,
payloadKeys : evt && evt . payload ? Object . keys ( evt . payload ) . slice ( 0 , 30 ) : undefined ,
} ) ;
return ;
}
stopAlways ( ) . finally ( ( ) => {
logger . info ( 'asr final' , { ms : nowMs ( ) - t0 , text : String ( finalText ) . slice ( 0 , 80 ) } ) ;
finish ( null , normalizeText ( finalText ) ) ;
} ) ;
} ) ;
ws . on ( 'error' , async ( e ) => {
await stopAlways ( ) ;
finish ( e ) ;
} ) ;
ws . on ( 'close' , ( ) => {
if ( ! done ) {
finish ( new Error ( 'asr ws closed before final' ) ) ;
}
} ) ;
} ) ;
}
function send ( ws , obj ) {
ws . send ( JSON . stringify ( obj ) ) ;
}
function extractTransIdFromHeaders ( req ) {
// Jetstream sets an x-jibo-transid header (see @jibo/interfaces Headers.transID).
const h = req . headers || { } ;
return h [ 'x-jibo-transid' ] || h [ 'X-Jibo-TransId' ] || h [ 'x-jibo-transId' ] || h [ 'x-jibo-transID' ] || '' ;
}
function createHubShim ( configPath ) {
const config = loadJson ( configPath ) ;
const logger = logFactory ( config ? . logging ? . level || 'info' ) ;
const listen = config . listen || { } ;
const port = Number ( listen . port || 9000 ) ;
const bindHost = listen . bindHost || '0.0.0.0' ;
const path = listen . path || '/v1/listen' ;
const asr = config . asrService || { } ;
const asrBaseUrl = asr . baseUrl || 'http://127.0.0.1:8088' ;
const wsPath = asr . wsPath || '/simple_port' ;
const timeoutMs = Number ( asr . timeoutMs || 15000 ) ;
const audioSourceId = asr . audioSourceId || 'alsa1' ;
// The robot ASR service is effectively a shared hardware resource.
// Jetstream may open multiple WS connections/listens concurrently; serialize STT
// to avoid crosstalk and reduce timeouts.
let asrQueue = Promise . resolve ( ) ;
function runAsrQueued ( fn ) {
const queuedAt = nowMs ( ) ;
const run = async ( ) => {
const waited = nowMs ( ) - queuedAt ;
if ( waited > 50 ) logger . debug ( 'asr queue wait' , { waitedMs : waited } ) ;
return await fn ( ) ;
} ;
const p = asrQueue . then ( run , run ) ;
asrQueue = p . catch ( ( ) => undefined ) ;
return p ;
}
const server = http . createServer ( ( req , res ) => {
res . writeHead ( 200 , { 'content-type' : 'text/plain' } ) ;
res . end ( 'jibo-hub-shim\n' ) ;
} ) ;
const wss = new WebSocket . Server ( { server , path } ) ;
logger . info ( 'listen ws' , { bindHost , port , path , asrBaseUrl , audioSourceId } ) ;
const gqaShim = createGqaShim ( config , logger ) ;
wss . on ( 'connection' , ( ws , req ) => {
const connId = uuid ( ) . slice ( 0 , 8 ) ;
const transID = extractTransIdFromHeaders ( req ) ;
logger . info ( 'ws connected' , { connId , transID : transID || undefined } ) ;
let binaryFrames = 0 ;
let binaryBytes = 0 ;
let lastContext = null ;
2026-03-24 02:56:27 +02:00
let lastContextMsg = null ;
2026-03-20 03:32:49 +02:00
let pendingListen = null ;
async function maybeHandleListen ( ) {
if ( ! lastContext || ! pendingListen ) return ;
const listenMsg = pendingListen ;
const mode = listenMsg ? . data ? . mode ;
// If the client is responsible for supplying ASR or NLU, wait until we receive it.
if ( mode === 'CLIENT_ASR' && ! listenMsg . _clientAsrText ) return ;
if ( mode === 'CLIENT_NLU' && ! listenMsg . _clientNlu ) return ;
pendingListen = null ;
2026-03-24 02:56:27 +02:00
// Derive transID: echo back the transID the jetstream-service sent.
// It may appear on either the LISTEN or CONTEXT message.
const msgTransID = listenMsg . transID || ( lastContextMsg && lastContextMsg . transID ) || transID || '' ;
2026-03-20 03:32:49 +02:00
2026-03-24 02:56:27 +02:00
const t0 = nowMs ( ) ;
2026-03-20 03:32:49 +02:00
let text = '' ;
try {
if ( mode === 'CLIENT_ASR' ) {
text = normalizeText ( listenMsg . _clientAsrText ) ;
2026-03-24 02:56:27 +02:00
} else if ( mode === 'CLIENT_NLU' ) {
// No ASR needed for client-supplied NLU.
2026-03-20 03:32:49 +02:00
} else {
text = await runAsrQueued ( ( ) => asrServiceSttOnce ( asrBaseUrl , wsPath , timeoutMs , audioSourceId , logger ) ) ;
}
} catch ( e ) {
logger . warn ( 'asr failed' , { connId , err : String ( e && ( e . stack || e . message || e ) ) } ) ;
2026-03-24 02:56:27 +02:00
// Still send an empty listen result.
2026-03-20 03:32:49 +02:00
}
const rules = Array . isArray ( listenMsg ? . data ? . rules ) ? listenMsg . data . rules : [ ] ;
const nluRes =
( mode === 'CLIENT_NLU' && listenMsg . _clientNlu )
? listenMsg . _clientNlu
: ( ( config ? . nlu ? . enabled === false ) ? buildNluResult ( '' , rules , { } ) : inferNluFromText ( text , rules ) ) ;
const asrRes = buildAsrResult ( text ) ;
2026-03-24 02:56:27 +02:00
// Build the match object (mirrors what the cloud hub returns).
const skillID = ( lastContext && lastContext . skill && lastContext . skill . skillID )
? lastContext . skill . skillID
: ( lastContext && typeof lastContext . skill === 'string' ? lastContext . skill : '' ) ;
const matchObj = { onRobot : true } ;
if ( skillID ) matchObj . skillID = skillID ;
2026-03-20 03:32:49 +02:00
logger . info ( 'listen result' , {
connId ,
2026-03-24 02:56:27 +02:00
transID : msgTransID || undefined ,
2026-03-20 03:32:49 +02:00
text : String ( text || '' ) . slice ( 0 , 120 ) ,
intent : nluRes && nluRes . intent ,
slot0 : nluRes && Array . isArray ( nluRes . slotActions ) ? nluRes . slotActions [ 0 ] : undefined ,
skill : nluRes && nluRes . entities ? nluRes . entities . skill : undefined ,
rule0 : Array . isArray ( rules ) ? rules [ 0 ] : undefined ,
rulesCount : Array . isArray ( rules ) ? rules . length : 0 ,
rules : Array . isArray ( rules ) ? rules . slice ( 0 , 6 ) : [ ] ,
} ) ;
2026-03-24 02:56:27 +02:00
// Send a local TURN_RESULT (not just LISTEN) so the skill's local turn resolves.
const turnResult = {
type : 'TURN_RESULT' ,
msgID : listenMsg . msgID || uuid ( ) ,
transID : msgTransID ,
2026-03-20 03:32:49 +02:00
ts : nowMs ( ) ,
2026-03-24 02:56:27 +02:00
requestID : msgTransID , // local turn uses transID as requestID
2026-03-20 03:32:49 +02:00
data : {
2026-03-24 02:56:27 +02:00
status : 'SUCCEEDED' ,
global : false ,
result : {
asr : asrRes ,
nlu : nluRes ,
match : matchObj ,
} ,
2026-03-20 03:32:49 +02:00
} ,
final : true ,
} ;
2026-03-24 02:56:27 +02:00
send ( ws , turnResult ) ;
2026-03-20 03:32:49 +02:00
}
ws . on ( 'message' , async ( data , isBinary ) => {
if ( isBinary ) {
binaryFrames += 1 ;
try {
binaryBytes += ( data && ( data . length || data . byteLength ) ) || 0 ;
} catch ( _ ) {
// ignore
}
if ( binaryFrames <= 3 || ( binaryFrames % 50 ) === 0 ) {
logger . debug ( 'rx binary' , { connId , frames : binaryFrames , bytes : binaryBytes } ) ;
}
// Ignore audio/binary frames; this shim does not decode audio.
return ;
}
let msg ;
try {
msg = JSON . parse ( data . toString ( 'utf8' ) ) ;
} catch ( e ) {
logger . warn ( 'bad json' , { connId , sample : String ( data || '' ) . slice ( 0 , 120 ) } ) ;
return ;
}
if ( ! msg || typeof msg . type !== 'string' ) return ;
logger . debug ( 'rx' , { connId , type : msg . type } ) ;
switch ( msg . type ) {
case 'CONTEXT' :
lastContext = msg . data ;
2026-03-24 02:56:27 +02:00
lastContextMsg = msg ;
2026-03-20 03:32:49 +02:00
logger . debug ( 'context' , {
connId ,
2026-03-24 02:56:27 +02:00
transID : msg . transID || undefined ,
2026-03-20 03:32:49 +02:00
hasSkill : ! ! ( msg . data && msg . data . skill ) ,
hasRuntime : ! ! ( msg . data && msg . data . runtime ) ,
} ) ;
break ;
case 'LISTEN' :
pendingListen = msg ;
logger . debug ( 'listen req' , {
connId ,
2026-03-24 02:56:27 +02:00
transID : msg . transID || undefined ,
2026-03-20 03:32:49 +02:00
hotphrase : ! ! ( msg . data && msg . data . hotphrase ) ,
mode : msg . data && msg . data . mode ,
rules : Array . isArray ( msg . data && msg . data . rules ) ? msg . data . rules : [ ] ,
} ) ;
break ;
case 'CLIENT_ASR' :
// Accept client-provided text (requires a LISTEN message too).
2026-03-24 02:56:27 +02:00
if ( ! pendingListen ) {
pendingListen = { type : 'LISTEN' , msgID : uuid ( ) , transID : msg . transID , ts : nowMs ( ) , data : { rules : [ ] , mode : 'CLIENT_ASR' } } ;
}
2026-03-20 03:32:49 +02:00
pendingListen . _clientAsrText = msg . data ? . text ;
break ;
case 'CLIENT_NLU' :
2026-03-24 02:56:27 +02:00
if ( ! pendingListen ) {
pendingListen = { type : 'LISTEN' , msgID : uuid ( ) , transID : msg . transID , ts : nowMs ( ) , data : { rules : [ ] , mode : 'CLIENT_NLU' } } ;
}
2026-03-20 03:32:49 +02:00
pendingListen . _clientNlu = msg . data ;
break ;
default :
// TRIGGER/proactive not implemented yet.
break ;
}
// Normal path: wait for both CONTEXT and LISTEN then handle.
try {
await maybeHandleListen ( ) ;
} catch ( e ) {
logger . warn ( 'listen handler error' , { connId , err : String ( e && ( e . stack || e . message || e ) ) } ) ;
}
} ) ;
ws . on ( 'close' , ( ) => {
logger . info ( 'ws closed' , { connId } ) ;
} ) ;
ws . on ( 'error' , ( e ) => {
logger . warn ( 'ws error' , { connId , err : String ( e && ( e . stack || e . message || e ) ) } ) ;
} ) ;
} ) ;
return {
start : ( ) => new Promise ( ( resolve , reject ) => {
server . listen ( port , bindHost , async ( err ) => {
if ( err ) return reject ( err ) ;
try {
if ( gqaShim ) await gqaShim . start ( ) ;
resolve ( ) ;
} catch ( e ) {
reject ( e ) ;
}
} ) ;
} ) ,
stop : ( ) => new Promise ( ( resolve ) => {
const done = async ( ) => {
try {
if ( gqaShim ) await gqaShim . stop ( ) ;
} finally {
resolve ( ) ;
}
} ;
server . close ( ( ) => { done ( ) . catch ( ( ) => resolve ( ) ) ; } ) ;
} ) ,
config ,
} ;
}
async function main ( ) {
const configPath = process . argv [ 2 ] || process . env . JIBO _HUB _SHIM _CONFIG || './config.json' ;
if ( ! fs . existsSync ( configPath ) ) {
console . error ( 'Config file not found:' , configPath ) ;
process . exit ( 2 ) ;
}
const shim = createHubShim ( configPath ) ;
await shim . start ( ) ;
// Keep process alive.
for ( ; ; ) await sleep ( 60_000 ) ;
}
if ( require . main === module ) {
main ( ) . catch ( ( e ) => {
console . error ( '[hub-shim] fatal:' , e && ( e . stack || e . message || e ) ) ;
process . exit ( 1 ) ;
} ) ;
}