AI Server & jibo AI Server Bridge

This commit is contained in:
2026-03-19 15:39:32 +02:00
parent 216bb1586e
commit 268cee305e
4 changed files with 367 additions and 6 deletions

View File

@@ -0,0 +1,31 @@
{
"enabled": true,
"mode": "TEXT",
"serverBaseUrl": "http://192.168.1.28:8020",
"recordSeconds": 5,
"useDumpStateAudio": true,
"useAsrServiceStt": true,
"asrServiceHost": "127.0.0.1",
"asrServicePort": 8088,
"asrAudioSourceId": "alsa1",
"asrTimeoutMs": 15000,
"asrServiceDebugWs": false,
"asrAutoStart": true,
"wakeupChitchatPhrases": [
"hello",
"howdy",
"hi",
"hey",
"look what i found",
"nice to see you",
"good morning",
"good afternoon",
"good evening"
],
"followupEnabled": true,
"followupDelayMs": 250
}

View File

@@ -72,6 +72,61 @@ try {
WsLib = null; WsLib = null;
} }
function escapeRegExp(s) {
return String(s || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function stripKnownEdgePhrases(text, phrases) {
// Remove known "Jibo wake chitchat" phrases from the start/end.
// This helps avoid TTS bleed-through becoming the user's prompt.
try {
var t = String(text || "");
if (!t) return "";
phrases = phrases && phrases.length ? phrases : [];
for (var pass = 0; pass < 2; pass += 1) {
for (var i = 0; i < phrases.length; i += 1) {
var p = String(phrases[i] || "").trim();
if (!p) continue;
var startRe = new RegExp("^\\s*" + escapeRegExp(p) + "[\\s,!.?;:]*", "i");
var endRe = new RegExp("[\\s,!.?;:]*" + escapeRegExp(p) + "\\s*$", "i");
t = t.replace(startRe, "");
t = t.replace(endRe, "");
}
t = t.replace(/^\s+|\s+$/g, "");
}
return t;
} catch (e) {
return String(text || "");
}
}
function isChitchatOnly(text, phrases) {
try {
var t = String(text || "").trim().toLowerCase();
if (!t) return true;
phrases = phrases && phrases.length ? phrases : [];
for (var i = 0; i < phrases.length; i += 1) {
var p = String(phrases[i] || "").trim().toLowerCase();
if (!p) continue;
if (t === p) return true;
}
return false;
} catch (e) {
return false;
}
}
function endsWithQuestion(text) {
try {
var t = String(text || "").replace(/\s+/g, " ").trim();
if (!t) return false;
// Consider "?" at end (optionally followed by quotes).
return /\?\s*["']?$/.test(t);
} catch (e) {
return false;
}
}
function httpJsonPost(urlString, payload, timeoutMs) { function httpJsonPost(urlString, payload, timeoutMs) {
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000; timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000;
@@ -994,6 +1049,8 @@ function AIBridge(skill, jibo) {
this.skill = skill; this.skill = skill;
this.jibo = jibo; this.jibo = jibo;
this.configPath = "/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge-config.json";
this.enabled = true; this.enabled = true;
// Default to TEXT because the built-in ASR pipeline already owns the mic // Default to TEXT because the built-in ASR pipeline already owns the mic
// on many builds; AUDIO/arecord often fails with "Device or resource busy". // on many builds; AUDIO/arecord often fails with "Device or resource busy".
@@ -1016,6 +1073,17 @@ function AIBridge(skill, jibo) {
this.asrTimeoutMs = 15000; this.asrTimeoutMs = 15000;
this.asrServiceDebugWs = false; this.asrServiceDebugWs = false;
this._asrServiceInFlight = false; this._asrServiceInFlight = false;
this.asrExecutable = "/usr/local/bin/jibo-asr-service";
this.asrLocalConfigPath = "/opt/jibo/Jibo/Skills/@be/be/be/jibo-asr-service.local.json";
this.asrSystemConfigPath = "/usr/local/etc/jibo-asr-service.json";
this.asrAutoStart = true;
this._asrSpawnAttempted = false;
// Filter for common wake-up phrases Jibo says (to avoid self-prompting).
this.wakeupChitchatPhrases = ["hello", "howdy", "hi", "hey", "look what i found", "nice to see you", "good morning", "good afternoon", "good evening"];
// Follow-up mode: if our AI reply ends with '?', listen once more automatically.
this.followupEnabled = true;
this.followupDelayMs = 250; // after TTS completes
this._followupTurnsLeft = 0;
this._inFlight = false; this._inFlight = false;
this._lastHJAt = 0; this._lastHJAt = 0;
@@ -1040,6 +1108,128 @@ function AIBridge(skill, jibo) {
}; };
} }
AIBridge.prototype._loadConfig = function () {
var self = this;
try {
var p = String(self.configPath || "").trim();
if (!p) return;
var raw = fs.readFileSync(p, "utf8");
if (!raw) return;
var cfg = JSON.parse(raw);
if (!cfg || typeof cfg !== "object") return;
if (typeof cfg.enabled === "boolean") self.enabled = cfg.enabled;
if (typeof cfg.mode === "string" && (cfg.mode === InputMode.TEXT || cfg.mode === InputMode.AUDIO)) self.mode = cfg.mode;
if (typeof cfg.serverBaseUrl === "string") {
var normalized = normalizeServerBaseUrl(cfg.serverBaseUrl);
if (normalized) self.serverBaseUrl = normalized;
}
if (typeof cfg.recordSeconds === "number") self.recordSeconds = Math.max(1, Math.min(15, Math.floor(cfg.recordSeconds)));
if (typeof cfg.useDumpStateAudio === "boolean") self.useDumpStateAudio = cfg.useDumpStateAudio;
if (typeof cfg.useAsrServiceStt === "boolean") self.useAsrServiceStt = cfg.useAsrServiceStt;
if (typeof cfg.asrServiceHost === "string") self.asrServiceHost = cfg.asrServiceHost;
if (typeof cfg.asrServicePort === "number") self.asrServicePort = cfg.asrServicePort;
if (typeof cfg.asrAudioSourceId === "string") self.asrAudioSourceId = cfg.asrAudioSourceId;
if (typeof cfg.asrTimeoutMs === "number") self.asrTimeoutMs = Math.max(2000, Math.min(60000, Math.floor(cfg.asrTimeoutMs)));
if (typeof cfg.asrServiceDebugWs === "boolean") self.asrServiceDebugWs = cfg.asrServiceDebugWs;
if (typeof cfg.asrAutoStart === "boolean") self.asrAutoStart = cfg.asrAutoStart;
if (typeof cfg.followupEnabled === "boolean") self.followupEnabled = cfg.followupEnabled;
if (typeof cfg.followupDelayMs === "number") self.followupDelayMs = Math.max(0, Math.min(5000, Math.floor(cfg.followupDelayMs)));
if (cfg.wakeupChitchatPhrases && cfg.wakeupChitchatPhrases.length) self.wakeupChitchatPhrases = cfg.wakeupChitchatPhrases;
if (rlog) {
rlog.info("ai-bridge", "loaded config", {
path: p,
enabled: self.enabled,
mode: self.mode,
url: self.serverBaseUrl,
useAsrServiceStt: self.useAsrServiceStt,
});
}
} catch (e) {
if (rlog) {
rlog.warn("ai-bridge", "config load failed", { err: String(e && (e.stack || e.message || e)), path: String(self.configPath || "") });
}
}
};
AIBridge.prototype._ensureAsrServiceRunning = function () {
var self = this;
if (!self.asrAutoStart) return Promise.resolve();
var host = String(self.asrServiceHost || "127.0.0.1");
var port = typeof self.asrServicePort === "number" ? self.asrServicePort : Number(self.asrServicePort) || 8088;
var tryConnect = function (ms) {
ms = typeof ms === "number" ? ms : 350;
return new Promise(function (resolve, reject) {
var sock = net.connect({ host: host, port: port });
var done = false;
var timer = setTimeout(function () {
if (done) return;
done = true;
try {
sock.destroy();
} catch (e) {
// ignore
}
reject(new Error("asr port timeout"));
}, ms);
sock.on("connect", function () {
if (done) return;
done = true;
clearTimeout(timer);
try {
sock.end();
} catch (e) {
// ignore
}
resolve();
});
sock.on("error", function (e) {
if (done) return;
done = true;
clearTimeout(timer);
reject(e);
});
});
};
return tryConnect(250)
.catch(function () {
if (self._asrSpawnAttempted) return;
self._asrSpawnAttempted = true;
var exe = String(self.asrExecutable || "");
if (!exe) return;
var cfgPath = self.asrSystemConfigPath;
try {
var st = fs.statSync(self.asrLocalConfigPath);
if (st && st.isFile && st.isFile()) cfgPath = self.asrLocalConfigPath;
} catch (e) {
// ignore
}
try {
var child = spawn(exe, ["-c", String(cfgPath)], { stdio: ["ignore", "ignore", "ignore"], detached: true });
child.unref && child.unref();
if (rlog) {
rlog.warn("ai-bridge", "attempted to start asr-service", { exe: exe, cfg: String(cfgPath) });
}
} catch (e2) {
if (rlog) {
rlog.warn("ai-bridge", "failed to spawn asr-service", { err: String(e2 && (e2.stack || e2.message || e2)) });
}
}
return new Promise(function (resolve) {
setTimeout(resolve, 450);
});
})
.then(function () {
return tryConnect(350);
})
.catch(function () {
// ignore; normal if system-manager manages the service.
});
};
AIBridge.prototype._captureTextViaAsrService = function (source) { AIBridge.prototype._captureTextViaAsrService = function (source) {
var self = this; var self = this;
if (!self.enabled) return Promise.resolve(null); if (!self.enabled) return Promise.resolve(null);
@@ -1123,15 +1313,35 @@ AIBridge.prototype._captureTextViaAsrService = function (source) {
var utterances = evt.utterances || evt.Utterances || (evt.payload && (evt.payload.utterances || evt.payload.Utterances)); var utterances = evt.utterances || evt.Utterances || (evt.payload && (evt.payload.utterances || evt.payload.Utterances));
var best = pickBestAsrUtterance(utterances); var best = pickBestAsrUtterance(utterances);
if (!best || !String(best).trim()) return; if (!best || !String(best).trim()) return;
var rawText = String(best);
var cleaned = stripKnownEdgePhrases(rawText, self.wakeupChitchatPhrases);
if (cleaned !== rawText && rlog) {
rlog.info("ai-bridge", "asr-service sanitized text", {
raw: rawText.slice(0, 160),
clean: cleaned.slice(0, 160),
source: source || "asr-service",
});
}
cleaned = String(cleaned || "").trim();
if (!cleaned) return;
if (isChitchatOnly(cleaned, self.wakeupChitchatPhrases)) {
// Keep listening; this is probably Jibo's own wake chatter.
if (rlog) {
rlog.warn("ai-bridge", "ignored wake chitchat transcript", { text: cleaned.slice(0, 80), source: source || "asr-service" });
}
return;
}
resolved = true; resolved = true;
clearTimeout(timer); clearTimeout(timer);
resolve(String(best)); resolve(String(cleaned));
}); });
}); });
}; };
var t0 = Date.now(); var t0 = Date.now();
return connectWebSocketText(wsUrl, 4000) return self._ensureAsrServiceRunning().then(function () {
return connectWebSocketText(wsUrl, 4000);
})
.then(function (ws) { .then(function (ws) {
wsClient = ws; wsClient = ws;
return httpJsonPostRaw(baseHttp + "/asr_simple_interface", startPayload, 6000); return httpJsonPostRaw(baseHttp + "/asr_simple_interface", startPayload, 6000);
@@ -2052,6 +2262,11 @@ AIBridge.prototype.setupTunables = function () {
}; };
AIBridge.prototype.start = function () { AIBridge.prototype.start = function () {
try {
this._loadConfig();
} catch (e0) {
// ignore
}
try { try {
this.setupTunables(); this.setupTunables();
} catch (e) { } catch (e) {
@@ -2279,18 +2494,41 @@ AIBridge.prototype._sendText = function (text, source) {
if (rlog) { if (rlog) {
rlog.info("ai-bridge", "text request complete", { ms: Date.now() - t0, ok: !!(resp && resp.reply) }); rlog.info("ai-bridge", "text request complete", { ms: Date.now() - t0, ok: !!(resp && resp.reply) });
} }
if (resp && resp.reply) { var reply = resp && resp.reply ? String(resp.reply) : "";
return self._speak(resp.reply); if (!reply) return { reply: "" };
} return self._speak(reply).then(function () {
return { reply: reply };
});
}) })
.catch(function (e) { .catch(function (e) {
self.skill.log && self.skill.log.warn && self.skill.log.warn("AI Bridge text mode error", e && (e.stack || e.message || e)); self.skill.log && self.skill.log.warn && self.skill.log.warn("AI Bridge text mode error", e && (e.stack || e.message || e));
if (rlog) { if (rlog) {
rlog.warn("ai-bridge", "text mode error", { err: String(e && (e.stack || e.message || e)) }); rlog.warn("ai-bridge", "text mode error", { err: String(e && (e.stack || e.message || e)) });
} }
return { reply: "" };
}) })
.then(function () { .then(function (res) {
// Release in-flight before any follow-up listening.
self._inFlight = false; self._inFlight = false;
var reply = res && res.reply ? String(res.reply) : "";
if (!reply) return;
if (!self.followupEnabled) return;
if (!self.useAsrServiceStt) return;
if (!endsWithQuestion(reply)) return;
if (self._followupTurnsLeft > 0) return;
self._followupTurnsLeft = 1;
var delay = typeof self.followupDelayMs === "number" ? self.followupDelayMs : 250;
setTimeout(function () {
// Start one follow-up listen. If it fails, just drop it.
self
._captureTextViaAsrService("followup")
.then(function () {
self._followupTurnsLeft = 0;
})
.catch(function () {
self._followupTurnsLeft = 0;
});
}, Math.max(0, delay));
}); });
}; };

View File

@@ -0,0 +1,86 @@
{
"webCore" : {
"serverPort": 8088,
"fileRoot": "/usr/local/var/www/asrservice",
"requestLogging": false
},
"AsrService" : {
"cloud_establish_http_timeout": 5000,
"language": "en-US",
"post_to_performance_service": true,
"log_audio": true,
"log_text" : true,
"log_path" : "/var/log/asr",
"log_level" : "INFO",
"log_server_url" : "https://speech-logging.jibo.com/logdrop/logdrop.py",
"speaker_id_resource_path" : "/var/jibo/asr/",
"name_learning_resource_path" : "/usr/local/share/asr/namelearning",
"name_learning_temp_path" : "/var/jibo/asr/namelearning_temp/",
"name_learning_nbest" : 70,
"active_sleep_duration" : 5000,
"idle_sleep_duration" : 50000,
"block_duration" : 50,
"audio_loop_sleep_us": 10000,
"use_nuance_upload_voc": false,
"dictation_type" : "dictation",
"nuance_uId" : "b8fb02f2c5794963aaafb8c716ef384c",
"contacts_checksum": "",
"loop_checksum": "1",
"customs_checksum": "",
"upload_voc_url": "ws.nuancemobility.net",
"cloud_url": "https://jibo-ncs-engusa-http.nuancemobility.net/NmspServlet/",
"cloud_appid": "HTTP_NMDPPRODUCTION_Jibo_Jibo_Robot_20151231124503",
"cloud_appkey": "a8c18159a8e3ca49471c56d867552bc77693ccdcc041375ee97b7c867160ae1a212f73c9123d1359596931c0be5c8734ef5310af95470d7ec3890434e9b24e0b",
"upload_voc_rootcert": "",
"google_credential": "/usr/local/share/asr/google_asr/credentials-key.json",
"fadeout_duration": 5000000,
"max_logfile_size": 1000,
"log_upload_time_interval": 60000,
"min_available_log_partition_space": 5000,
"max_asr_log_dir_size_before_upload_trigger": 10000,
"max_asr_log_dir_size": 12000,
"size_to_free_up_when_dir_overflowing": 1000,
"asr_resource_path" : "/usr/local/share/asr/",
"max_memory": 150000,
"wipable_files": [
"/var/log/asr/*.pcm",
"/var/log/asr/*.wav",
"/var/log/asr/*.log",
"/var/jibo/asr/sensory_data_td/client_model.bin",
"/var/jibo/asr/sensory_data_td/audio/*",
"/var/jibo/asr/namelearning_temp/*"
],
"resident_task" : "{\"command\":\"start\",\"task_id\":\"task0\",\"audio_source_id\":\"alsa1\",\"hotphrase\":\"hey_jibo\",\"request_id\":\"resident_hey_jibo_self_start\",\"residency\":true}",
"resident_audio_channel" : "{\"action\":\"start\", \"audio_source_id\":\"alsa1\", \"wav_files\":[], \"audio_source\":\"alsa\", \"request_id\":\"self_start_audio_source_request_id\"}",
"task_templates" : {
"hey_jibo_resident": {
"input_template": "{\"name\":\"hey jibo\",\"path\":\"/usr/local/share/asr/hey_jibo\",\"timeout\":0} * {\"name\":\"Speaker ID TD\",\"path\":\"/usr/local/share/asr/sensory_spkr_id_td\",\"audio_tail_length\":1}",
"emitting_recogs": ["hey jibo", "Speaker ID TD"]
},
"hey_jibo": {
"input_template": "{\"name\":\"hey jibo\",\"path\":\"/usr/local/share/asr/hey_jibo\",\"timeout\":0} * ({\"name\":\"pcmwriter\",\"path\":\"/usr/local/share/asr/pcm_writer\",\"timeout\":0,\"audio_tail_length\":0, \"audio_overshoot_duration\":0} | {\"name\":\"Speaker ID TD\",\"path\":\"/usr/local/share/asr/sensory_spkr_id_td\",\"audio_tail_length\":1})",
"emitting_recogs": ["hey jibo", "Speaker ID TD"]
},
"cloud": {
"input_template":"({\"name\":\"google_asr\",\"path\":\"/usr/local/share/asr/google_asr\",\"timeout\":14000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false,\"audio_tail_length\":300}| {\"name\":\"sensory_sdet\",\"path\":\"/usr/local/share/asr/jibo_energy_fake_eos\",\"timeout\":50000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false})",
"emitting_recogs": ["google_asr","sensory_sdet"]
},
"hey_jibo_cloud": {
"input_template":"{\"name\":\"hey jibo\",\"path\":\"/usr/local/share/asr/hey_jibo\",\"timeout\":0,\"bargein\":true,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false,\"speaker_id\":true} * ({\"name\":\"Speaker ID TD\",\"path\":\"/usr/local/share/asr/sensory_spkr_id_td\",\"audio_tail_length\":1} & ({\"name\":\"pcmwriter\",\"path\":\"/usr/local/share/asr/pcm_writer\",\"timeout\":0,\"audio_tail_length\":400, \"audio_overshoot_duration\":0, \"prebuffer\":true} | {\"name\":\"google_asr\",\"path\":\"/usr/local/share/asr/google_asr\",\"timeout\":14000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false,\"trim_audio_tail\":true,\"wakeup_phrase_detection\":false,\"audio_tail_length\":0}|{\"name\":\"sensory_sdet\",\"path\":\"/usr/local/share/asr/jibo_energy_fake_eos\",\"timeout\":50000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false}))",
"emitting_recogs": ["hey jibo", "Speaker ID TD", "google_asr","sensory_sdet"]
}
},
"rewrite_rules" : {
"log_audio_no_trigger" : "^(?!.*?hey_jibo)(.*)->{\"name\":\"pcmwriter\",\"path\":\"/usr/local/share/asr/pcm_writer\",\"timeout\":0,\"audio_tail_length\":300, \"audio_overshoot_duration\":0,\"prebuffer\":false} | ($1)",
"namelearning_EOS" : "^(.*?\"name\":\\s*name_learning.*)->{\"name\":\"jibo_energy_eos\",\"path\":\"/usr/local/share/asr/jibo_energy_eos\",\"timeout\":10000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false} | ($1)"
}
},
"logging" : {
"jibo_message_prefix": "C",
"loggers" : {
"root": {"level": "information"},
"l1" : {"name" : "ASRService", "level" : "information"},
"l2" : {"name" : "Application", "level" : "information"}
}
}
}

View File

@@ -0,0 +1,6 @@
#!/bin/sh
# Run jibo-asr-service using the *writable* config under Skills.
# This avoids relying on /usr/local/etc (often read-only on device).
exec /usr/local/bin/jibo-asr-service -c /opt/jibo/Jibo/Skills/@be/be/be/jibo-asr-service.local.json