diff --git a/V3.1/build/ai_bridge_server/server.py b/V3.1/build/ai_bridge_server/server.py index 421d5809..0086d65a 100644 --- a/V3.1/build/ai_bridge_server/server.py +++ b/V3.1/build/ai_bridge_server/server.py @@ -33,6 +33,7 @@ import time import traceback import wave from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from urllib.error import URLError from urllib.request import Request, urlopen @@ -81,8 +82,13 @@ def _ollama_chat(user_text: str) -> str: method="POST", ) - with urlopen(req, timeout=60) as resp: - data = json.loads(resp.read().decode("utf-8")) + try: + with urlopen(req, timeout=60) as resp: + data = json.loads(resp.read().decode("utf-8")) + except Exception as e: + # Let caller decide how to respond; include a useful hint in logs. + _log(f"Ollama request failed url={ollama_url!r} err={e!r}") + raise msg = data.get("message") or {} content = msg.get("content") @@ -91,6 +97,19 @@ def _ollama_chat(user_text: str) -> str: return content.strip() +def _short_err(e: BaseException) -> str: + s = str(e) or e.__class__.__name__ + s = " ".join(s.split()) + if len(s) > 240: + s = s[:240] + "..." + return s + + +def _ollama_down_reply() -> str: + # Keep it short and speakable. + return "My AI server isn't reachable right now. Please start Ollama on the computer, then try again." + + def _wav_diagnostics(wav_bytes: bytes) -> dict: """Best-effort WAV parsing + signal stats for debugging mic capture.""" info: dict = {"bytes": len(wav_bytes)} @@ -245,9 +264,16 @@ class Handler(BaseHTTPRequestHandler): _json_response(self, 400, {"error": "Missing 'text'"}) return _log(f"{client} /text prompt_chars={len(text)} prompt={text[:200]!r}") - reply = _ollama_chat(text) - _log(f"{client} /text ok reply_chars={len(reply)}") - _json_response(self, 200, {"reply": reply}) + try: + reply = _ollama_chat(text) + _log(f"{client} /text ok reply_chars={len(reply)}") + _json_response(self, 200, {"reply": reply}) + except URLError as e: + _log(f"{client} /text ollama_unreachable err={_short_err(e)!r}") + _json_response(self, 200, {"reply": _ollama_down_reply(), "ollama_ok": False, "ollama_error": _short_err(e)}) + except ConnectionRefusedError as e: + _log(f"{client} /text ollama_refused err={_short_err(e)!r}") + _json_response(self, 200, {"reply": _ollama_down_reply(), "ollama_ok": False, "ollama_error": _short_err(e)}) return if self.path == "/v1/chat/audio": @@ -295,9 +321,24 @@ class Handler(BaseHTTPRequestHandler): _json_response(self, 200, {"reply": "I didn't catch that. Could you say it again?", "text": ""}) return _log(f"{client} /audio transcript_chars={len(transcript)} transcript={transcript[:200]!r}") - reply = _ollama_chat(transcript) - _log(f"{client} /audio ok text_chars={len(transcript)} reply_chars={len(reply)}") - _json_response(self, 200, {"reply": reply, "text": transcript}) + try: + reply = _ollama_chat(transcript) + _log(f"{client} /audio ok text_chars={len(transcript)} reply_chars={len(reply)}") + _json_response(self, 200, {"reply": reply, "text": transcript}) + except URLError as e: + _log(f"{client} /audio ollama_unreachable err={_short_err(e)!r}") + _json_response( + self, + 200, + {"reply": _ollama_down_reply(), "text": transcript, "ollama_ok": False, "ollama_error": _short_err(e)}, + ) + except ConnectionRefusedError as e: + _log(f"{client} /audio ollama_refused err={_short_err(e)!r}") + _json_response( + self, + 200, + {"reply": _ollama_down_reply(), "text": transcript, "ollama_ok": False, "ollama_error": _short_err(e)}, + ) return _json_response(self, 404, {"error": "Not found"}) @@ -332,6 +373,7 @@ def main(): + " model=" + os.environ.get("OLLAMA_MODEL", "phi3.5") ) + _log("Ollama health check: curl -s http://127.0.0.1:11434/api/tags | head") if not _whisper.available(): _log("STT: faster-whisper not installed; /v1/chat/audio will return 503") server.serve_forever() diff --git a/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge.js b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge.js index 5be1243a..3c07602d 100644 --- a/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge.js +++ b/V3.1/build/opt/jibo/Jibo/Skills/@be/be/be/ai-bridge.js @@ -9,6 +9,13 @@ var http = require("http"); var https = require("https"); var urlLib = require("url"); var fs = require("fs"); +var net = require("net"); +var crypto = null; +try { + crypto = require("crypto"); +} catch (e) { + crypto = null; +} var childProcess = require("child_process"); var spawn = childProcess.spawn; @@ -49,6 +56,8 @@ var LABEL = { audioFormat: "AI Bridge: Audio format (arecord -f)", debugAudio: "AI Bridge: Debug audio capture", useDumpStateAudio: "AI Bridge: Use audio-service dump-state (fix mic busy)", + useAsrServiceStt: "AI Bridge: Use ASR-service STT (8088)", + asrServiceDebugWs: "AI Bridge: Debug ASR-service WS", }; var InputMode = { @@ -56,6 +65,13 @@ var InputMode = { AUDIO: "AUDIO", }; +var WsLib = null; +try { + WsLib = require("ws"); +} catch (e) { + WsLib = null; +} + function httpJsonPost(urlString, payload, timeoutMs) { timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000; @@ -108,6 +124,399 @@ function httpJsonPost(urlString, payload, timeoutMs) { }); } +function httpJsonPostRaw(urlString, payload, timeoutMs) { + timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 6000; + + var parsed = urlLib.parse(urlString); + var isHttps = parsed.protocol === "https:"; + var bodyStr = JSON.stringify(payload || {}); + var body = new Buffer(bodyStr, "utf8"); + var requestOptions = { + protocol: parsed.protocol, + hostname: parsed.hostname, + port: parsed.port || (isHttps ? 443 : 80), + path: parsed.path || "/", + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": body.length, + }, + timeout: timeoutMs, + }; + + return new Promise(function (resolve, reject) { + var req = (isHttps ? https : http).request(requestOptions, function (res) { + var chunks = []; + res.on("data", function (d) { + chunks.push(d); + }); + res.on("end", function () { + var raw = Buffer.concat(chunks).toString("utf8"); + resolve({ statusCode: res.statusCode, headers: res.headers || {}, body: raw }); + }); + }); + req.on("error", reject); + req.on("timeout", function () { + req.destroy(new Error("POST timeout")); + }); + req.write(body); + req.end(); + }); +} + +function parseWsUrl(wsUrl) { + // Very small ws:// URL parser for embedded Node. + // Returns { host, port, path }. + if (typeof wsUrl !== "string") return null; + var s = wsUrl.trim(); + var m = s.match(/^wss?:\/\/([^\/]+)(\/.*)?$/i); + if (!m) return null; + var hostPort = m[1]; + var path = m[2] || "/"; + var hp = hostPort.split(":"); + var host = hp[0]; + var port = hp.length > 1 ? Number(hp[1]) : s.toLowerCase().indexOf("wss://") === 0 ? 443 : 80; + if (!host) return null; + if (!port || Number.isNaN(port)) port = 80; + return { host: host, port: port, path: path }; +} + +function connectWebSocketText(wsUrl, timeoutMs) { + // Returns a Promise resolving to { onMessage(fn), close() }. + // Uses `ws` module if available; otherwise uses a minimal RFC6455 client. + timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 4000; + + if (WsLib) { + return new Promise(function (resolve, reject) { + var ws; + try { + ws = new WsLib(wsUrl); + } catch (e) { + reject(e); + return; + } + var timer = setTimeout(function () { + try { + ws.terminate && ws.terminate(); + } catch (e) { + // ignore + } + reject(new Error("ws connect timeout")); + }, timeoutMs); + ws.on("open", function () { + clearTimeout(timer); + resolve({ + onMessage: function (fn) { + ws.on("message", function (msg) { + fn(typeof msg === "string" ? msg : msg && msg.toString ? msg.toString("utf8") : String(msg)); + }); + }, + close: function () { + try { + ws.close(); + } catch (e) { + // ignore + } + }, + }); + }); + ws.on("error", function (e) { + clearTimeout(timer); + reject(e); + }); + }); + } + + // Minimal client without external deps. + return new Promise(function (resolve, reject) { + var findHeaderEnd = function (buf) { + // Find \r\n\r\n in a Buffer. + if (!buf || !buf.length) return -1; + for (var i = 0; i + 3 < buf.length; i += 1) { + if (buf[i] === 13 && buf[i + 1] === 10 && buf[i + 2] === 13 && buf[i + 3] === 10) return i; + } + return -1; + }; + + var info = parseWsUrl(wsUrl); + if (!info) { + reject(new Error("bad ws url")); + return; + } + var key = null; + try { + key = crypto && crypto.randomBytes ? crypto.randomBytes(16).toString("base64") : new Buffer(String(Date.now()) + ":" + String(Math.random())).toString("base64"); + } catch (e) { + key = new Buffer(String(Date.now()) + ":" + String(Math.random())).toString("base64"); + } + + var socket = net.connect({ host: info.host, port: info.port }); + var timer = setTimeout(function () { + try { + socket.destroy(); + } catch (e) { + // ignore + } + reject(new Error("ws connect timeout")); + }, timeoutMs); + + var state = { handshook: false, buf: new Buffer(0) }; + var msgHandlers = []; + var closed = false; + var fragType = null; // 0x1 text, 0x2 binary + var fragChunks = []; + + var emitMessage = function (s) { + for (var i = 0; i < msgHandlers.length; i += 1) { + try { + msgHandlers[i](s); + } catch (e) { + // ignore + } + } + }; + + var sendPong = function (payload) { + try { + // server->client frames are unmasked; client->server must be masked. + var mask = crypto && crypto.randomBytes ? crypto.randomBytes(4) : new Buffer([1, 2, 3, 4]); + var len = payload ? payload.length : 0; + var header = null; + if (len < 126) { + header = new Buffer(2); + header[0] = 0x8a; // FIN + pong + header[1] = 0x80 | len; + } else { + header = new Buffer(4); + header[0] = 0x8a; + header[1] = 0x80 | 126; + header.writeUInt16BE(len, 2); + } + var out = new Buffer(header.length + 4 + len); + header.copy(out, 0); + mask.copy(out, header.length); + for (var i = 0; i < len; i += 1) { + out[header.length + 4 + i] = payload[i] ^ mask[i % 4]; + } + socket.write(out); + } catch (e) { + // ignore + } + }; + + var parseFrames = function () { + // parse server->client frames (unmasked) + while (state.buf.length >= 2) { + var b0 = state.buf[0]; + var b1 = state.buf[1]; + var fin = (b0 & 0x80) !== 0; + var opcode = b0 & 0x0f; + var masked = (b1 & 0x80) !== 0; + var len = b1 & 0x7f; + var off = 2; + if (len === 126) { + if (state.buf.length < off + 2) return; + len = state.buf.readUInt16BE(off); + off += 2; + } else if (len === 127) { + // Not expected; avoid overflow. + if (state.buf.length < off + 8) return; + // Only support up to 2^31-1 + var hi = state.buf.readUInt32BE(off); + var lo = state.buf.readUInt32BE(off + 4); + off += 8; + if (hi !== 0) { + // too large + try { + socket.destroy(); + } catch (e) { + // ignore + } + return; + } + len = lo; + } + var maskKey = null; + if (masked) { + if (state.buf.length < off + 4) return; + maskKey = state.buf.slice(off, off + 4); + off += 4; + } + if (state.buf.length < off + len) return; + var payload = state.buf.slice(off, off + len); + state.buf = state.buf.slice(off + len); + + if (masked && maskKey) { + for (var i = 0; i < payload.length; i += 1) { + payload[i] = payload[i] ^ maskKey[i % 4]; + } + } + + if (opcode === 0x8) { + try { + socket.end(); + } catch (e) { + // ignore + } + return; + } + if (opcode === 0x9) { + // ping + sendPong(payload); + continue; + } + // Continuation frames for fragmented messages. + if (opcode === 0x0) { + if (!fragType) { + continue; + } + fragChunks.push(payload); + if (fin) { + var whole = Buffer.concat(fragChunks); + fragType = null; + fragChunks = []; + try { + emitMessage(whole.toString("utf8")); + } catch (e) { + // ignore + } + } + continue; + } + + // Text or binary payloads (ASR may send either). + if (opcode === 0x1 || opcode === 0x2) { + if (!fin) { + fragType = opcode; + fragChunks = [payload]; + continue; + } + try { + emitMessage(payload.toString("utf8")); + } catch (e) { + // ignore + } + continue; + } + } + }; + + socket.on("connect", function () { + var req = ""; + req += "GET " + info.path + " HTTP/1.1\r\n"; + req += "Host: " + info.host + ":" + String(info.port) + "\r\n"; + req += "Upgrade: websocket\r\n"; + req += "Connection: Upgrade\r\n"; + req += "Sec-WebSocket-Key: " + key + "\r\n"; + req += "Sec-WebSocket-Version: 13\r\n"; + req += "\r\n"; + socket.write(req); + }); + + socket.on("data", function (d) { + if (closed) return; + state.buf = Buffer.concat([state.buf, d]); + if (!state.handshook) { + var endIdx = findHeaderEnd(state.buf); + if (endIdx < 0) return; + var headerText = state.buf.slice(0, endIdx).toString("utf8"); + state.buf = state.buf.slice(endIdx + 4); + if (!/^HTTP\/1\.1 101/i.test(headerText)) { + closed = true; + clearTimeout(timer); + reject(new Error("ws handshake failed: " + headerText.split("\r\n")[0])); + try { + socket.destroy(); + } catch (e) { + // ignore + } + return; + } + state.handshook = true; + clearTimeout(timer); + resolve({ + onMessage: function (fn) { + msgHandlers.push(fn); + }, + close: function () { + if (closed) return; + closed = true; + try { + socket.end(); + } catch (e) { + // ignore + } + }, + }); + } + // Parse any remaining frames. + if (state.handshook) parseFrames(); + }); + socket.on("error", function (e) { + if (closed) return; + closed = true; + clearTimeout(timer); + reject(e); + }); + socket.on("close", function () { + if (closed) return; + closed = true; + clearTimeout(timer); + }); + }); +} + +function pickBestAsrUtterance(utterances) { + // utterances: [{utterance, score}] possibly. + try { + if (!utterances || !utterances.length) return ""; + var bestText = ""; + var bestScore = -1e99; + for (var i = 0; i < utterances.length; i += 1) { + var u = utterances[i]; + var text = u && (u.utterance || u.Utterance || u.text) ? String(u.utterance || u.Utterance || u.text) : ""; + text = text.replace(/^\s+|\s+$/g, ""); + if (!text) continue; + // Light cleanup: sometimes a leading duplicated char occurs. + if (text.length >= 2 && text[0].toLowerCase && text[1].toLowerCase && text[0].toLowerCase() === text[1].toLowerCase()) { + text = text.slice(1); + } + var score = typeof u.score === "number" ? u.score : typeof u.Score === "number" ? u.Score : 0; + if (!bestText) { + bestText = text; + bestScore = score; + continue; + } + if (score > bestScore) { + bestText = text; + bestScore = score; + } + } + // Fallback: choose the shortest distinct utterance. + if (!bestText) { + var uniq = {}; + var arr = []; + for (var j = 0; j < utterances.length; j += 1) { + var u2 = utterances[j]; + var t2 = u2 && (u2.utterance || u2.Utterance || u2.text) ? String(u2.utterance || u2.Utterance || u2.text) : ""; + t2 = t2.replace(/^\s+|\s+$/g, ""); + if (!t2) continue; + var k = t2.toLowerCase ? t2.toLowerCase() : t2; + if (uniq[k]) continue; + uniq[k] = true; + arr.push(t2); + } + arr.sort(function (a, b) { + return a.length - b.length; + }); + bestText = arr[0] || ""; + } + return bestText; + } catch (e) { + return ""; + } +} + function recordWavWithArecord(durationSec, sampleRate, channels, device, format, debug) { durationSec = typeof durationSec === "number" ? durationSec : 5; sampleRate = typeof sampleRate === "number" ? sampleRate : 16000; @@ -599,6 +1008,14 @@ function AIBridge(skill, jibo) { // Prefer grabbing audio from the system audio service (avoids ALSA mic lock). this.useDumpStateAudio = true; this.dumpStateDir = "/tmp"; + // Offline STT via jibo-asr-service (HTTP + WebSocket) when available. + this.useAsrServiceStt = false; + this.asrServiceHost = "127.0.0.1"; + this.asrServicePort = 8088; + this.asrAudioSourceId = "alsa1"; + this.asrTimeoutMs = 15000; + this.asrServiceDebugWs = false; + this._asrServiceInFlight = false; this._inFlight = false; this._lastHJAt = 0; @@ -623,6 +1040,156 @@ function AIBridge(skill, jibo) { }; } +AIBridge.prototype._captureTextViaAsrService = function (source) { + var self = this; + if (!self.enabled) return Promise.resolve(null); + if (self._inFlight || self._asrServiceInFlight) return Promise.resolve(null); + self._asrServiceInFlight = true; + + var host = String(self.asrServiceHost || "127.0.0.1"); + var port = typeof self.asrServicePort === "number" ? self.asrServicePort : Number(self.asrServicePort) || 8088; + var baseHttp = "http://" + host + ":" + String(port); + var wsUrl = "ws://" + host + ":" + String(port) + "/simple_port"; + var taskId = "DEBUG:ai-bridge-" + String(Date.now()) + "-" + String(Math.floor(Math.random() * 1e9)); + var requestId = "stt_start_" + String(Date.now()) + "_" + String(Math.floor(Math.random() * 1e9)); + var timeoutMs = typeof self.asrTimeoutMs === "number" ? self.asrTimeoutMs : 15000; + var debugWs = !!self.asrServiceDebugWs; + + var wsClient = null; + var stopAlways = function () { + var stopPayload = { command: "stop", task_id: taskId, request_id: "stt_stop_" + String(Date.now()) + "_" + String(Math.floor(Math.random() * 1e9)) }; + return httpJsonPostRaw(baseHttp + "/asr_simple_interface", stopPayload, 5000) + .then(function () { + // ignore + }) + .catch(function () { + // ignore + }); + }; + + var done = function () { + self._asrServiceInFlight = false; + }; + + var fail = function (e) { + if (rlog) { + rlog.warn("ai-bridge", "asr-service stt failed", { err: String(e && (e.stack || e.message || e)), source: source || "asr-service" }); + } + self._asrServiceInFlight = false; + }; + + var startPayload = { + command: "start", + task_id: taskId, + audio_source_id: String(self.asrAudioSourceId || "alsa1"), + hotphrase: "none", + speech_to_text: true, + request_id: requestId, + }; + + var waitForFinal = function () { + return new Promise(function (resolve, reject) { + var timer = setTimeout(function () { + reject(new Error("asr-service stt timeout")); + }, timeoutMs); + var resolved = false; + var seen = 0; + wsClient.onMessage(function (msg) { + if (resolved) return; + seen += 1; + if (debugWs && rlog && seen <= 6) { + rlog.debug("ai-bridge", "asr-service ws msg", { idx: seen, raw: String(msg || "").slice(0, 500) }); + } + var evt = null; + try { + evt = JSON.parse(String(msg || "")); + } catch (e) { + return; + } + if (!evt || typeof evt !== "object") return; + var eventType = evt.event_type || evt.eventType || evt.event || evt.type; + if (eventType !== "speech_to_text_final") return; + + // Correlate to our request when possible, but don't hard-fail on schema drift. + var evTaskId = evt.task_id || evt.taskId || (evt.payload && (evt.payload.task_id || evt.payload.taskId)); + var evRequestId = evt.request_id || evt.requestId || (evt.payload && (evt.payload.request_id || evt.payload.requestId)); + var idMatches = false; + if (evTaskId && String(evTaskId) === String(taskId)) idMatches = true; + if (evRequestId && String(evRequestId) === String(requestId)) idMatches = true; + // If the event carries neither id, accept it (single in-flight capture). + if (!evTaskId && !evRequestId) idMatches = true; + if (!idMatches) return; + + var utterances = evt.utterances || evt.Utterances || (evt.payload && (evt.payload.utterances || evt.payload.Utterances)); + var best = pickBestAsrUtterance(utterances); + if (!best || !String(best).trim()) return; + resolved = true; + clearTimeout(timer); + resolve(String(best)); + }); + }); + }; + + var t0 = Date.now(); + return connectWebSocketText(wsUrl, 4000) + .then(function (ws) { + wsClient = ws; + return httpJsonPostRaw(baseHttp + "/asr_simple_interface", startPayload, 6000); + }) + .then(function (resp) { + if (rlog) { + rlog.info("ai-bridge", "asr-service stt start", { + status: resp && resp.statusCode, + ms: Date.now() - t0, + ws: wsUrl, + http: baseHttp, + source: source || "asr-service", + body: resp && resp.body ? String(resp.body).slice(0, 500) : "", + }); + } + if (!resp || !resp.statusCode || resp.statusCode < 200 || resp.statusCode >= 300) { + throw new Error("asr-service start failed: " + String(resp && resp.statusCode)); + } + return waitForFinal(); + }) + .then(function (text) { + return stopAlways().then(function () { + return text; + }); + }) + .then(function (text) { + try { + wsClient && wsClient.close && wsClient.close(); + } catch (e) { + // ignore + } + if (rlog) { + rlog.info("ai-bridge", "asr-service text", { chars: String(text).length, text: String(text).slice(0, 160), ms: Date.now() - t0, source: source || "asr-service" }); + } + return self._sendText(text, "asr-service:" + String(source || "")); + }) + .then(function () { + done(); + }) + .catch(function (e) { + try { + wsClient && wsClient.close && wsClient.close(); + } catch (e2) { + // ignore + } + return stopAlways() + .then(function () { + throw e; + }) + .catch(function () { + throw e; + }); + }) + .catch(function (e) { + fail(e); + }); +}; + AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) { var self = this; if (!self.enabled) return; @@ -630,7 +1197,7 @@ AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) { if (!self._allowAnyTextNoHJ && Date.now() - self._lastHJAt > self._hjWindowMs) return; if (self._inFlight) return; - if (self._audioFallbackTimer) { + if (self._audioFallbackTimer) { clearTimeout(self._audioFallbackTimer); self._audioFallbackTimer = null; } @@ -675,7 +1242,7 @@ AIBridge.prototype._captureAndSendAudio = function (source) { if (["S16_LE", "S32_LE"].indexOf(fmt) < 0) fmt = "S16_LE"; var dbg = !!self.debugAudioCapture; - var dumpDir = typeof self.dumpStateDir === "string" && self.dumpStateDir ? self.dumpStateDir : "/tmp"; + var dumpDir = typeof self.dumpStateDir === "string" && self.dumpStateDir ? self.dumpStateDir : "/tmp"; var captureViaDumpState = function () { var tDump0 = Date.now(); @@ -812,7 +1379,7 @@ AIBridge.prototype._installOpenHook = function () { } // If cloud ASR is down, Hey-Jibo intents often have empty asr.text. // Try a one-shot offline audio fallback after the pipeline releases the mic. - if (looksLikeHeyJiboIntent(results)) { + if (looksLikeHeyJiboIntent(results) && !self.useAsrServiceStt) { self._scheduleOfflineAudioFallback("openHookNoText", 1400); } return resolve(); @@ -823,7 +1390,9 @@ AIBridge.prototype._installOpenHook = function () { if (rlog) { rlog.warn("ai-bridge", "ignoring cloud ASR error text (open hook)", { text: text.slice(0, 200) }); } - self._scheduleOfflineAudioFallback("cloudAsrErrorText", 1400); + if (!self.useAsrServiceStt) { + self._scheduleOfflineAudioFallback("cloudAsrErrorText", 1400); + } return resolve(); } @@ -1423,6 +1992,38 @@ AIBridge.prototype.setupTunables = function () { }); } + // Offline STT via jibo-asr-service (requires service running). + safe(function () { + return Tunable.getCheckboxField(LABEL.useAsrServiceStt, self.useAsrServiceStt, WIN).events.change.on(function (v) { + self.useAsrServiceStt = !!v; + if (rlog) { + rlog.info("ai-bridge", "use asr-service stt changed", { enabled: self.useAsrServiceStt }); + } + }); + }); + + safe(function () { + return Tunable.getCheckboxField(LABEL.asrServiceDebugWs, self.asrServiceDebugWs, WIN).events.change.on(function (v) { + self.asrServiceDebugWs = !!v; + if (rlog) { + rlog.info("ai-bridge", "asr-service debug ws changed", { enabled: self.asrServiceDebugWs }); + } + }); + }); + if (typeof Tunable.getButtonField === "function") { + safe(function () { + return Tunable.getButtonField("AI Bridge: Run ASR-service STT now", WIN).events.change.on(function () { + try { + self._captureTextViaAsrService("manual"); + } catch (e) { + if (rlog) { + rlog.warn("ai-bridge", "manual asr-service stt crashed", { err: String(e && (e.stack || e.message || e)) }); + } + } + }); + }); + } + if (typeof Tunable.getNumberField === "function") { safe(function () { return Tunable.getNumberField(LABEL.recordSeconds, self.recordSeconds, 1, 15, 1, WIN).events.change.on(function (v) { @@ -1550,6 +2151,10 @@ AIBridge.prototype._handleHJHeard = function () { if (!self.enabled) { return; } + if (self.useAsrServiceStt) { + if (self._inFlight) return; + return self._captureTextViaAsrService("hjHeard"); + } if (self.mode !== InputMode.AUDIO) { return; } @@ -1576,6 +2181,11 @@ AIBridge.prototype._handleTurnResult = function (data, eventName) { if (!self.enabled) { return; } + // If we're using the local ASR service STT pipeline, ignore cloud turn-results + // to avoid duplicate sends. + if (self.useAsrServiceStt) { + return; + } if (rlog) { try { rlog.debug("ai-bridge", "turn result", { event: eventName }); @@ -1614,7 +2224,9 @@ AIBridge.prototype._handleTurnResult = function (data, eventName) { if (rlog) { rlog.warn("ai-bridge", "ignoring cloud ASR error text", { event: eventName, text: text.slice(0, 220) }); } - self._scheduleOfflineAudioFallback("cloudAsrErrorText:" + String(eventName || "turnResult"), 1400); + if (!self.useAsrServiceStt) { + self._scheduleOfflineAudioFallback("cloudAsrErrorText:" + String(eventName || "turnResult"), 1400); + } return; } diff --git a/V3.1/build/usr/local/etc/jibo-system-manager.json b/V3.1/build/usr/local/etc/jibo-system-manager.json index d497e4c1..2407c4ec 100644 --- a/V3.1/build/usr/local/etc/jibo-system-manager.json +++ b/V3.1/build/usr/local/etc/jibo-system-manager.json @@ -95,7 +95,7 @@ "arguments": ["-c","/usr/local/etc/jibo-asr-service.json"], "environment": {}, "directory": "", - "enabled":false + "enabled":true }, "certification": { "enabled": false