diff --git a/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge-config.json b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge-config.json new file mode 100644 index 00000000..2201bb8b --- /dev/null +++ b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge-config.json @@ -0,0 +1,31 @@ +{ + "enabled": true, + "mode": "TEXT", + "serverBaseUrl": "http://192.168.1.28:8020", + + "recordSeconds": 5, + "useDumpStateAudio": true, + + "useAsrServiceStt": true, + "asrServiceHost": "127.0.0.1", + "asrServicePort": 8088, + "asrAudioSourceId": "alsa1", + "asrTimeoutMs": 15000, + "asrServiceDebugWs": false, + "asrAutoStart": true, + + "wakeupChitchatPhrases": [ + "hello", + "howdy", + "hi", + "hey", + "look what i found", + "nice to see you", + "good morning", + "good afternoon", + "good evening" + ], + + "followupEnabled": true, + "followupDelayMs": 250 +} diff --git a/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge.js b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge.js index 3c07602d..1b53abfa 100644 --- a/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge.js +++ b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge.js @@ -72,6 +72,61 @@ try { WsLib = null; } +function escapeRegExp(s) { + return String(s || "").replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function stripKnownEdgePhrases(text, phrases) { + // Remove known "Jibo wake chitchat" phrases from the start/end. + // This helps avoid TTS bleed-through becoming the user's prompt. + try { + var t = String(text || ""); + if (!t) return ""; + phrases = phrases && phrases.length ? phrases : []; + for (var pass = 0; pass < 2; pass += 1) { + for (var i = 0; i < phrases.length; i += 1) { + var p = String(phrases[i] || "").trim(); + if (!p) continue; + var startRe = new RegExp("^\\s*" + escapeRegExp(p) + "[\\s,!.?;:]*", "i"); + var endRe = new RegExp("[\\s,!.?;:]*" + escapeRegExp(p) + "\\s*$", "i"); + t = t.replace(startRe, ""); + t = t.replace(endRe, ""); + } + t = t.replace(/^\s+|\s+$/g, ""); + } + return t; + } catch (e) { + return String(text || ""); + } +} + +function isChitchatOnly(text, phrases) { + try { + var t = String(text || "").trim().toLowerCase(); + if (!t) return true; + phrases = phrases && phrases.length ? phrases : []; + for (var i = 0; i < phrases.length; i += 1) { + var p = String(phrases[i] || "").trim().toLowerCase(); + if (!p) continue; + if (t === p) return true; + } + return false; + } catch (e) { + return false; + } +} + +function endsWithQuestion(text) { + try { + var t = String(text || "").replace(/\s+/g, " ").trim(); + if (!t) return false; + // Consider "?" at end (optionally followed by quotes). + return /\?\s*["']?$/.test(t); + } catch (e) { + return false; + } +} + function httpJsonPost(urlString, payload, timeoutMs) { timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000; @@ -994,6 +1049,8 @@ function AIBridge(skill, jibo) { this.skill = skill; this.jibo = jibo; + this.configPath = "/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge-config.json"; + this.enabled = true; // Default to TEXT because the built-in ASR pipeline already owns the mic // on many builds; AUDIO/arecord often fails with "Device or resource busy". @@ -1016,6 +1073,17 @@ function AIBridge(skill, jibo) { this.asrTimeoutMs = 15000; this.asrServiceDebugWs = false; this._asrServiceInFlight = false; + this.asrExecutable = "/usr/local/bin/jibo-asr-service"; + this.asrLocalConfigPath = "/opt/jibo/Jibo/Skills/@be/be/be/jibo-asr-service.local.json"; + this.asrSystemConfigPath = "/usr/local/etc/jibo-asr-service.json"; + this.asrAutoStart = true; + this._asrSpawnAttempted = false; + // Filter for common wake-up phrases Jibo says (to avoid self-prompting). + this.wakeupChitchatPhrases = ["hello", "howdy", "hi", "hey", "look what i found", "nice to see you", "good morning", "good afternoon", "good evening"]; + // Follow-up mode: if our AI reply ends with '?', listen once more automatically. + this.followupEnabled = true; + this.followupDelayMs = 250; // after TTS completes + this._followupTurnsLeft = 0; this._inFlight = false; this._lastHJAt = 0; @@ -1040,6 +1108,128 @@ function AIBridge(skill, jibo) { }; } +AIBridge.prototype._loadConfig = function () { + var self = this; + try { + var p = String(self.configPath || "").trim(); + if (!p) return; + var raw = fs.readFileSync(p, "utf8"); + if (!raw) return; + var cfg = JSON.parse(raw); + if (!cfg || typeof cfg !== "object") return; + + if (typeof cfg.enabled === "boolean") self.enabled = cfg.enabled; + if (typeof cfg.mode === "string" && (cfg.mode === InputMode.TEXT || cfg.mode === InputMode.AUDIO)) self.mode = cfg.mode; + if (typeof cfg.serverBaseUrl === "string") { + var normalized = normalizeServerBaseUrl(cfg.serverBaseUrl); + if (normalized) self.serverBaseUrl = normalized; + } + if (typeof cfg.recordSeconds === "number") self.recordSeconds = Math.max(1, Math.min(15, Math.floor(cfg.recordSeconds))); + if (typeof cfg.useDumpStateAudio === "boolean") self.useDumpStateAudio = cfg.useDumpStateAudio; + if (typeof cfg.useAsrServiceStt === "boolean") self.useAsrServiceStt = cfg.useAsrServiceStt; + if (typeof cfg.asrServiceHost === "string") self.asrServiceHost = cfg.asrServiceHost; + if (typeof cfg.asrServicePort === "number") self.asrServicePort = cfg.asrServicePort; + if (typeof cfg.asrAudioSourceId === "string") self.asrAudioSourceId = cfg.asrAudioSourceId; + if (typeof cfg.asrTimeoutMs === "number") self.asrTimeoutMs = Math.max(2000, Math.min(60000, Math.floor(cfg.asrTimeoutMs))); + if (typeof cfg.asrServiceDebugWs === "boolean") self.asrServiceDebugWs = cfg.asrServiceDebugWs; + if (typeof cfg.asrAutoStart === "boolean") self.asrAutoStart = cfg.asrAutoStart; + if (typeof cfg.followupEnabled === "boolean") self.followupEnabled = cfg.followupEnabled; + if (typeof cfg.followupDelayMs === "number") self.followupDelayMs = Math.max(0, Math.min(5000, Math.floor(cfg.followupDelayMs))); + if (cfg.wakeupChitchatPhrases && cfg.wakeupChitchatPhrases.length) self.wakeupChitchatPhrases = cfg.wakeupChitchatPhrases; + + if (rlog) { + rlog.info("ai-bridge", "loaded config", { + path: p, + enabled: self.enabled, + mode: self.mode, + url: self.serverBaseUrl, + useAsrServiceStt: self.useAsrServiceStt, + }); + } + } catch (e) { + if (rlog) { + rlog.warn("ai-bridge", "config load failed", { err: String(e && (e.stack || e.message || e)), path: String(self.configPath || "") }); + } + } +}; + +AIBridge.prototype._ensureAsrServiceRunning = function () { + var self = this; + if (!self.asrAutoStart) return Promise.resolve(); + var host = String(self.asrServiceHost || "127.0.0.1"); + var port = typeof self.asrServicePort === "number" ? self.asrServicePort : Number(self.asrServicePort) || 8088; + + var tryConnect = function (ms) { + ms = typeof ms === "number" ? ms : 350; + return new Promise(function (resolve, reject) { + var sock = net.connect({ host: host, port: port }); + var done = false; + var timer = setTimeout(function () { + if (done) return; + done = true; + try { + sock.destroy(); + } catch (e) { + // ignore + } + reject(new Error("asr port timeout")); + }, ms); + sock.on("connect", function () { + if (done) return; + done = true; + clearTimeout(timer); + try { + sock.end(); + } catch (e) { + // ignore + } + resolve(); + }); + sock.on("error", function (e) { + if (done) return; + done = true; + clearTimeout(timer); + reject(e); + }); + }); + }; + + return tryConnect(250) + .catch(function () { + if (self._asrSpawnAttempted) return; + self._asrSpawnAttempted = true; + var exe = String(self.asrExecutable || ""); + if (!exe) return; + var cfgPath = self.asrSystemConfigPath; + try { + var st = fs.statSync(self.asrLocalConfigPath); + if (st && st.isFile && st.isFile()) cfgPath = self.asrLocalConfigPath; + } catch (e) { + // ignore + } + try { + var child = spawn(exe, ["-c", String(cfgPath)], { stdio: ["ignore", "ignore", "ignore"], detached: true }); + child.unref && child.unref(); + if (rlog) { + rlog.warn("ai-bridge", "attempted to start asr-service", { exe: exe, cfg: String(cfgPath) }); + } + } catch (e2) { + if (rlog) { + rlog.warn("ai-bridge", "failed to spawn asr-service", { err: String(e2 && (e2.stack || e2.message || e2)) }); + } + } + return new Promise(function (resolve) { + setTimeout(resolve, 450); + }); + }) + .then(function () { + return tryConnect(350); + }) + .catch(function () { + // ignore; normal if system-manager manages the service. + }); +}; + AIBridge.prototype._captureTextViaAsrService = function (source) { var self = this; if (!self.enabled) return Promise.resolve(null); @@ -1123,15 +1313,35 @@ AIBridge.prototype._captureTextViaAsrService = function (source) { var utterances = evt.utterances || evt.Utterances || (evt.payload && (evt.payload.utterances || evt.payload.Utterances)); var best = pickBestAsrUtterance(utterances); if (!best || !String(best).trim()) return; + var rawText = String(best); + var cleaned = stripKnownEdgePhrases(rawText, self.wakeupChitchatPhrases); + if (cleaned !== rawText && rlog) { + rlog.info("ai-bridge", "asr-service sanitized text", { + raw: rawText.slice(0, 160), + clean: cleaned.slice(0, 160), + source: source || "asr-service", + }); + } + cleaned = String(cleaned || "").trim(); + if (!cleaned) return; + if (isChitchatOnly(cleaned, self.wakeupChitchatPhrases)) { + // Keep listening; this is probably Jibo's own wake chatter. + if (rlog) { + rlog.warn("ai-bridge", "ignored wake chitchat transcript", { text: cleaned.slice(0, 80), source: source || "asr-service" }); + } + return; + } resolved = true; clearTimeout(timer); - resolve(String(best)); + resolve(String(cleaned)); }); }); }; var t0 = Date.now(); - return connectWebSocketText(wsUrl, 4000) + return self._ensureAsrServiceRunning().then(function () { + return connectWebSocketText(wsUrl, 4000); + }) .then(function (ws) { wsClient = ws; return httpJsonPostRaw(baseHttp + "/asr_simple_interface", startPayload, 6000); @@ -2052,6 +2262,11 @@ AIBridge.prototype.setupTunables = function () { }; AIBridge.prototype.start = function () { + try { + this._loadConfig(); + } catch (e0) { + // ignore + } try { this.setupTunables(); } catch (e) { @@ -2279,18 +2494,41 @@ AIBridge.prototype._sendText = function (text, source) { if (rlog) { rlog.info("ai-bridge", "text request complete", { ms: Date.now() - t0, ok: !!(resp && resp.reply) }); } - if (resp && resp.reply) { - return self._speak(resp.reply); - } + var reply = resp && resp.reply ? String(resp.reply) : ""; + if (!reply) return { reply: "" }; + return self._speak(reply).then(function () { + return { reply: reply }; + }); }) .catch(function (e) { self.skill.log && self.skill.log.warn && self.skill.log.warn("AI Bridge text mode error", e && (e.stack || e.message || e)); if (rlog) { rlog.warn("ai-bridge", "text mode error", { err: String(e && (e.stack || e.message || e)) }); } + return { reply: "" }; }) - .then(function () { + .then(function (res) { + // Release in-flight before any follow-up listening. self._inFlight = false; + var reply = res && res.reply ? String(res.reply) : ""; + if (!reply) return; + if (!self.followupEnabled) return; + if (!self.useAsrServiceStt) return; + if (!endsWithQuestion(reply)) return; + if (self._followupTurnsLeft > 0) return; + self._followupTurnsLeft = 1; + var delay = typeof self.followupDelayMs === "number" ? self.followupDelayMs : 250; + setTimeout(function () { + // Start one follow-up listen. If it fails, just drop it. + self + ._captureTextViaAsrService("followup") + .then(function () { + self._followupTurnsLeft = 0; + }) + .catch(function () { + self._followupTurnsLeft = 0; + }); + }, Math.max(0, delay)); }); }; diff --git a/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/jibo-asr-service.local.json b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/jibo-asr-service.local.json new file mode 100644 index 00000000..6b2eea2d --- /dev/null +++ b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/jibo-asr-service.local.json @@ -0,0 +1,86 @@ +{ + "webCore" : { + "serverPort": 8088, + "fileRoot": "/usr/local/var/www/asrservice", + "requestLogging": false + }, + "AsrService" : { + "cloud_establish_http_timeout": 5000, + "language": "en-US", + "post_to_performance_service": true, + "log_audio": true, + "log_text" : true, + "log_path" : "/var/log/asr", + "log_level" : "INFO", + "log_server_url" : "https://speech-logging.jibo.com/logdrop/logdrop.py", + "speaker_id_resource_path" : "/var/jibo/asr/", + "name_learning_resource_path" : "/usr/local/share/asr/namelearning", + "name_learning_temp_path" : "/var/jibo/asr/namelearning_temp/", + "name_learning_nbest" : 70, + "active_sleep_duration" : 5000, + "idle_sleep_duration" : 50000, + "block_duration" : 50, + "audio_loop_sleep_us": 10000, + "use_nuance_upload_voc": false, + "dictation_type" : "dictation", + "nuance_uId" : "b8fb02f2c5794963aaafb8c716ef384c", + "contacts_checksum": "", + "loop_checksum": "1", + "customs_checksum": "", + "upload_voc_url": "ws.nuancemobility.net", + "cloud_url": "https://jibo-ncs-engusa-http.nuancemobility.net/NmspServlet/", + "cloud_appid": "HTTP_NMDPPRODUCTION_Jibo_Jibo_Robot_20151231124503", + "cloud_appkey": "a8c18159a8e3ca49471c56d867552bc77693ccdcc041375ee97b7c867160ae1a212f73c9123d1359596931c0be5c8734ef5310af95470d7ec3890434e9b24e0b", + "upload_voc_rootcert": "", + "google_credential": "/usr/local/share/asr/google_asr/credentials-key.json", + "fadeout_duration": 5000000, + "max_logfile_size": 1000, + "log_upload_time_interval": 60000, + "min_available_log_partition_space": 5000, + "max_asr_log_dir_size_before_upload_trigger": 10000, + "max_asr_log_dir_size": 12000, + "size_to_free_up_when_dir_overflowing": 1000, + "asr_resource_path" : "/usr/local/share/asr/", + "max_memory": 150000, + "wipable_files": [ + "/var/log/asr/*.pcm", + "/var/log/asr/*.wav", + "/var/log/asr/*.log", + "/var/jibo/asr/sensory_data_td/client_model.bin", + "/var/jibo/asr/sensory_data_td/audio/*", + "/var/jibo/asr/namelearning_temp/*" + ], + "resident_task" : "{\"command\":\"start\",\"task_id\":\"task0\",\"audio_source_id\":\"alsa1\",\"hotphrase\":\"hey_jibo\",\"request_id\":\"resident_hey_jibo_self_start\",\"residency\":true}", + "resident_audio_channel" : "{\"action\":\"start\", \"audio_source_id\":\"alsa1\", \"wav_files\":[], \"audio_source\":\"alsa\", \"request_id\":\"self_start_audio_source_request_id\"}", + "task_templates" : { + "hey_jibo_resident": { + "input_template": "{\"name\":\"hey jibo\",\"path\":\"/usr/local/share/asr/hey_jibo\",\"timeout\":0} * {\"name\":\"Speaker ID TD\",\"path\":\"/usr/local/share/asr/sensory_spkr_id_td\",\"audio_tail_length\":1}", + "emitting_recogs": ["hey jibo", "Speaker ID TD"] + }, + "hey_jibo": { + "input_template": "{\"name\":\"hey jibo\",\"path\":\"/usr/local/share/asr/hey_jibo\",\"timeout\":0} * ({\"name\":\"pcmwriter\",\"path\":\"/usr/local/share/asr/pcm_writer\",\"timeout\":0,\"audio_tail_length\":0, \"audio_overshoot_duration\":0} | {\"name\":\"Speaker ID TD\",\"path\":\"/usr/local/share/asr/sensory_spkr_id_td\",\"audio_tail_length\":1})", + "emitting_recogs": ["hey jibo", "Speaker ID TD"] + }, + "cloud": { + "input_template":"({\"name\":\"google_asr\",\"path\":\"/usr/local/share/asr/google_asr\",\"timeout\":14000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false,\"audio_tail_length\":300}| {\"name\":\"sensory_sdet\",\"path\":\"/usr/local/share/asr/jibo_energy_fake_eos\",\"timeout\":50000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false})", + "emitting_recogs": ["google_asr","sensory_sdet"] + }, + "hey_jibo_cloud": { + "input_template":"{\"name\":\"hey jibo\",\"path\":\"/usr/local/share/asr/hey_jibo\",\"timeout\":0,\"bargein\":true,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false,\"speaker_id\":true} * ({\"name\":\"Speaker ID TD\",\"path\":\"/usr/local/share/asr/sensory_spkr_id_td\",\"audio_tail_length\":1} & ({\"name\":\"pcmwriter\",\"path\":\"/usr/local/share/asr/pcm_writer\",\"timeout\":0,\"audio_tail_length\":400, \"audio_overshoot_duration\":0, \"prebuffer\":true} | {\"name\":\"google_asr\",\"path\":\"/usr/local/share/asr/google_asr\",\"timeout\":14000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false,\"trim_audio_tail\":true,\"wakeup_phrase_detection\":false,\"audio_tail_length\":0}|{\"name\":\"sensory_sdet\",\"path\":\"/usr/local/share/asr/jibo_energy_fake_eos\",\"timeout\":50000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false}))", + "emitting_recogs": ["hey jibo", "Speaker ID TD", "google_asr","sensory_sdet"] + } + }, + "rewrite_rules" : { + "log_audio_no_trigger" : "^(?!.*?hey_jibo)(.*)->{\"name\":\"pcmwriter\",\"path\":\"/usr/local/share/asr/pcm_writer\",\"timeout\":0,\"audio_tail_length\":300, \"audio_overshoot_duration\":0,\"prebuffer\":false} | ($1)", + "namelearning_EOS" : "^(.*?\"name\":\\s*name_learning.*)->{\"name\":\"jibo_energy_eos\",\"path\":\"/usr/local/share/asr/jibo_energy_eos\",\"timeout\":10000,\"bargein\":false,\"nbest\":1,\"speaker_name\":\"\",\"incremental\":false} | ($1)" + } + }, + "logging" : { + "jibo_message_prefix": "C", + "loggers" : { + "root": {"level": "information"}, + "l1" : {"name" : "ASRService", "level" : "information"}, + "l2" : {"name" : "Application", "level" : "information"} + } + } +} diff --git a/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/run-asr-service-local.sh b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/run-asr-service-local.sh new file mode 100644 index 00000000..4349a5a0 --- /dev/null +++ b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/run-asr-service-local.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +# Run jibo-asr-service using the *writable* config under Skills. +# This avoids relying on /usr/local/etc (often read-only on device). + +exec /usr/local/bin/jibo-asr-service -c /opt/jibo/Jibo/Skills/@be/be/be/jibo-asr-service.local.json