HE SPEAKS?!??!?!?!?!

This commit is contained in:
2026-03-19 14:11:26 +02:00
parent 9eb2681de6
commit 216bb1586e
3 changed files with 668 additions and 14 deletions

View File

@@ -33,6 +33,7 @@ import time
import traceback import traceback
import wave import wave
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from urllib.error import URLError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
@@ -81,8 +82,13 @@ def _ollama_chat(user_text: str) -> str:
method="POST", method="POST",
) )
with urlopen(req, timeout=60) as resp: try:
data = json.loads(resp.read().decode("utf-8")) with urlopen(req, timeout=60) as resp:
data = json.loads(resp.read().decode("utf-8"))
except Exception as e:
# Let caller decide how to respond; include a useful hint in logs.
_log(f"Ollama request failed url={ollama_url!r} err={e!r}")
raise
msg = data.get("message") or {} msg = data.get("message") or {}
content = msg.get("content") content = msg.get("content")
@@ -91,6 +97,19 @@ def _ollama_chat(user_text: str) -> str:
return content.strip() return content.strip()
def _short_err(e: BaseException) -> str:
s = str(e) or e.__class__.__name__
s = " ".join(s.split())
if len(s) > 240:
s = s[:240] + "..."
return s
def _ollama_down_reply() -> str:
# Keep it short and speakable.
return "My AI server isn't reachable right now. Please start Ollama on the computer, then try again."
def _wav_diagnostics(wav_bytes: bytes) -> dict: def _wav_diagnostics(wav_bytes: bytes) -> dict:
"""Best-effort WAV parsing + signal stats for debugging mic capture.""" """Best-effort WAV parsing + signal stats for debugging mic capture."""
info: dict = {"bytes": len(wav_bytes)} info: dict = {"bytes": len(wav_bytes)}
@@ -245,9 +264,16 @@ class Handler(BaseHTTPRequestHandler):
_json_response(self, 400, {"error": "Missing 'text'"}) _json_response(self, 400, {"error": "Missing 'text'"})
return return
_log(f"{client} /text prompt_chars={len(text)} prompt={text[:200]!r}") _log(f"{client} /text prompt_chars={len(text)} prompt={text[:200]!r}")
reply = _ollama_chat(text) try:
_log(f"{client} /text ok reply_chars={len(reply)}") reply = _ollama_chat(text)
_json_response(self, 200, {"reply": reply}) _log(f"{client} /text ok reply_chars={len(reply)}")
_json_response(self, 200, {"reply": reply})
except URLError as e:
_log(f"{client} /text ollama_unreachable err={_short_err(e)!r}")
_json_response(self, 200, {"reply": _ollama_down_reply(), "ollama_ok": False, "ollama_error": _short_err(e)})
except ConnectionRefusedError as e:
_log(f"{client} /text ollama_refused err={_short_err(e)!r}")
_json_response(self, 200, {"reply": _ollama_down_reply(), "ollama_ok": False, "ollama_error": _short_err(e)})
return return
if self.path == "/v1/chat/audio": if self.path == "/v1/chat/audio":
@@ -295,9 +321,24 @@ class Handler(BaseHTTPRequestHandler):
_json_response(self, 200, {"reply": "I didn't catch that. Could you say it again?", "text": ""}) _json_response(self, 200, {"reply": "I didn't catch that. Could you say it again?", "text": ""})
return return
_log(f"{client} /audio transcript_chars={len(transcript)} transcript={transcript[:200]!r}") _log(f"{client} /audio transcript_chars={len(transcript)} transcript={transcript[:200]!r}")
reply = _ollama_chat(transcript) try:
_log(f"{client} /audio ok text_chars={len(transcript)} reply_chars={len(reply)}") reply = _ollama_chat(transcript)
_json_response(self, 200, {"reply": reply, "text": transcript}) _log(f"{client} /audio ok text_chars={len(transcript)} reply_chars={len(reply)}")
_json_response(self, 200, {"reply": reply, "text": transcript})
except URLError as e:
_log(f"{client} /audio ollama_unreachable err={_short_err(e)!r}")
_json_response(
self,
200,
{"reply": _ollama_down_reply(), "text": transcript, "ollama_ok": False, "ollama_error": _short_err(e)},
)
except ConnectionRefusedError as e:
_log(f"{client} /audio ollama_refused err={_short_err(e)!r}")
_json_response(
self,
200,
{"reply": _ollama_down_reply(), "text": transcript, "ollama_ok": False, "ollama_error": _short_err(e)},
)
return return
_json_response(self, 404, {"error": "Not found"}) _json_response(self, 404, {"error": "Not found"})
@@ -332,6 +373,7 @@ def main():
+ " model=" + " model="
+ os.environ.get("OLLAMA_MODEL", "phi3.5") + os.environ.get("OLLAMA_MODEL", "phi3.5")
) )
_log("Ollama health check: curl -s http://127.0.0.1:11434/api/tags | head")
if not _whisper.available(): if not _whisper.available():
_log("STT: faster-whisper not installed; /v1/chat/audio will return 503") _log("STT: faster-whisper not installed; /v1/chat/audio will return 503")
server.serve_forever() server.serve_forever()

View File

@@ -9,6 +9,13 @@ var http = require("http");
var https = require("https"); var https = require("https");
var urlLib = require("url"); var urlLib = require("url");
var fs = require("fs"); var fs = require("fs");
var net = require("net");
var crypto = null;
try {
crypto = require("crypto");
} catch (e) {
crypto = null;
}
var childProcess = require("child_process"); var childProcess = require("child_process");
var spawn = childProcess.spawn; var spawn = childProcess.spawn;
@@ -49,6 +56,8 @@ var LABEL = {
audioFormat: "AI Bridge: Audio format (arecord -f)", audioFormat: "AI Bridge: Audio format (arecord -f)",
debugAudio: "AI Bridge: Debug audio capture", debugAudio: "AI Bridge: Debug audio capture",
useDumpStateAudio: "AI Bridge: Use audio-service dump-state (fix mic busy)", useDumpStateAudio: "AI Bridge: Use audio-service dump-state (fix mic busy)",
useAsrServiceStt: "AI Bridge: Use ASR-service STT (8088)",
asrServiceDebugWs: "AI Bridge: Debug ASR-service WS",
}; };
var InputMode = { var InputMode = {
@@ -56,6 +65,13 @@ var InputMode = {
AUDIO: "AUDIO", AUDIO: "AUDIO",
}; };
var WsLib = null;
try {
WsLib = require("ws");
} catch (e) {
WsLib = null;
}
function httpJsonPost(urlString, payload, timeoutMs) { function httpJsonPost(urlString, payload, timeoutMs) {
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000; timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000;
@@ -108,6 +124,399 @@ function httpJsonPost(urlString, payload, timeoutMs) {
}); });
} }
function httpJsonPostRaw(urlString, payload, timeoutMs) {
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 6000;
var parsed = urlLib.parse(urlString);
var isHttps = parsed.protocol === "https:";
var bodyStr = JSON.stringify(payload || {});
var body = new Buffer(bodyStr, "utf8");
var requestOptions = {
protocol: parsed.protocol,
hostname: parsed.hostname,
port: parsed.port || (isHttps ? 443 : 80),
path: parsed.path || "/",
method: "POST",
headers: {
"Content-Type": "application/json",
"Content-Length": body.length,
},
timeout: timeoutMs,
};
return new Promise(function (resolve, reject) {
var req = (isHttps ? https : http).request(requestOptions, function (res) {
var chunks = [];
res.on("data", function (d) {
chunks.push(d);
});
res.on("end", function () {
var raw = Buffer.concat(chunks).toString("utf8");
resolve({ statusCode: res.statusCode, headers: res.headers || {}, body: raw });
});
});
req.on("error", reject);
req.on("timeout", function () {
req.destroy(new Error("POST timeout"));
});
req.write(body);
req.end();
});
}
function parseWsUrl(wsUrl) {
// Very small ws:// URL parser for embedded Node.
// Returns { host, port, path }.
if (typeof wsUrl !== "string") return null;
var s = wsUrl.trim();
var m = s.match(/^wss?:\/\/([^\/]+)(\/.*)?$/i);
if (!m) return null;
var hostPort = m[1];
var path = m[2] || "/";
var hp = hostPort.split(":");
var host = hp[0];
var port = hp.length > 1 ? Number(hp[1]) : s.toLowerCase().indexOf("wss://") === 0 ? 443 : 80;
if (!host) return null;
if (!port || Number.isNaN(port)) port = 80;
return { host: host, port: port, path: path };
}
function connectWebSocketText(wsUrl, timeoutMs) {
// Returns a Promise resolving to { onMessage(fn), close() }.
// Uses `ws` module if available; otherwise uses a minimal RFC6455 client.
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 4000;
if (WsLib) {
return new Promise(function (resolve, reject) {
var ws;
try {
ws = new WsLib(wsUrl);
} catch (e) {
reject(e);
return;
}
var timer = setTimeout(function () {
try {
ws.terminate && ws.terminate();
} catch (e) {
// ignore
}
reject(new Error("ws connect timeout"));
}, timeoutMs);
ws.on("open", function () {
clearTimeout(timer);
resolve({
onMessage: function (fn) {
ws.on("message", function (msg) {
fn(typeof msg === "string" ? msg : msg && msg.toString ? msg.toString("utf8") : String(msg));
});
},
close: function () {
try {
ws.close();
} catch (e) {
// ignore
}
},
});
});
ws.on("error", function (e) {
clearTimeout(timer);
reject(e);
});
});
}
// Minimal client without external deps.
return new Promise(function (resolve, reject) {
var findHeaderEnd = function (buf) {
// Find \r\n\r\n in a Buffer.
if (!buf || !buf.length) return -1;
for (var i = 0; i + 3 < buf.length; i += 1) {
if (buf[i] === 13 && buf[i + 1] === 10 && buf[i + 2] === 13 && buf[i + 3] === 10) return i;
}
return -1;
};
var info = parseWsUrl(wsUrl);
if (!info) {
reject(new Error("bad ws url"));
return;
}
var key = null;
try {
key = crypto && crypto.randomBytes ? crypto.randomBytes(16).toString("base64") : new Buffer(String(Date.now()) + ":" + String(Math.random())).toString("base64");
} catch (e) {
key = new Buffer(String(Date.now()) + ":" + String(Math.random())).toString("base64");
}
var socket = net.connect({ host: info.host, port: info.port });
var timer = setTimeout(function () {
try {
socket.destroy();
} catch (e) {
// ignore
}
reject(new Error("ws connect timeout"));
}, timeoutMs);
var state = { handshook: false, buf: new Buffer(0) };
var msgHandlers = [];
var closed = false;
var fragType = null; // 0x1 text, 0x2 binary
var fragChunks = [];
var emitMessage = function (s) {
for (var i = 0; i < msgHandlers.length; i += 1) {
try {
msgHandlers[i](s);
} catch (e) {
// ignore
}
}
};
var sendPong = function (payload) {
try {
// server->client frames are unmasked; client->server must be masked.
var mask = crypto && crypto.randomBytes ? crypto.randomBytes(4) : new Buffer([1, 2, 3, 4]);
var len = payload ? payload.length : 0;
var header = null;
if (len < 126) {
header = new Buffer(2);
header[0] = 0x8a; // FIN + pong
header[1] = 0x80 | len;
} else {
header = new Buffer(4);
header[0] = 0x8a;
header[1] = 0x80 | 126;
header.writeUInt16BE(len, 2);
}
var out = new Buffer(header.length + 4 + len);
header.copy(out, 0);
mask.copy(out, header.length);
for (var i = 0; i < len; i += 1) {
out[header.length + 4 + i] = payload[i] ^ mask[i % 4];
}
socket.write(out);
} catch (e) {
// ignore
}
};
var parseFrames = function () {
// parse server->client frames (unmasked)
while (state.buf.length >= 2) {
var b0 = state.buf[0];
var b1 = state.buf[1];
var fin = (b0 & 0x80) !== 0;
var opcode = b0 & 0x0f;
var masked = (b1 & 0x80) !== 0;
var len = b1 & 0x7f;
var off = 2;
if (len === 126) {
if (state.buf.length < off + 2) return;
len = state.buf.readUInt16BE(off);
off += 2;
} else if (len === 127) {
// Not expected; avoid overflow.
if (state.buf.length < off + 8) return;
// Only support up to 2^31-1
var hi = state.buf.readUInt32BE(off);
var lo = state.buf.readUInt32BE(off + 4);
off += 8;
if (hi !== 0) {
// too large
try {
socket.destroy();
} catch (e) {
// ignore
}
return;
}
len = lo;
}
var maskKey = null;
if (masked) {
if (state.buf.length < off + 4) return;
maskKey = state.buf.slice(off, off + 4);
off += 4;
}
if (state.buf.length < off + len) return;
var payload = state.buf.slice(off, off + len);
state.buf = state.buf.slice(off + len);
if (masked && maskKey) {
for (var i = 0; i < payload.length; i += 1) {
payload[i] = payload[i] ^ maskKey[i % 4];
}
}
if (opcode === 0x8) {
try {
socket.end();
} catch (e) {
// ignore
}
return;
}
if (opcode === 0x9) {
// ping
sendPong(payload);
continue;
}
// Continuation frames for fragmented messages.
if (opcode === 0x0) {
if (!fragType) {
continue;
}
fragChunks.push(payload);
if (fin) {
var whole = Buffer.concat(fragChunks);
fragType = null;
fragChunks = [];
try {
emitMessage(whole.toString("utf8"));
} catch (e) {
// ignore
}
}
continue;
}
// Text or binary payloads (ASR may send either).
if (opcode === 0x1 || opcode === 0x2) {
if (!fin) {
fragType = opcode;
fragChunks = [payload];
continue;
}
try {
emitMessage(payload.toString("utf8"));
} catch (e) {
// ignore
}
continue;
}
}
};
socket.on("connect", function () {
var req = "";
req += "GET " + info.path + " HTTP/1.1\r\n";
req += "Host: " + info.host + ":" + String(info.port) + "\r\n";
req += "Upgrade: websocket\r\n";
req += "Connection: Upgrade\r\n";
req += "Sec-WebSocket-Key: " + key + "\r\n";
req += "Sec-WebSocket-Version: 13\r\n";
req += "\r\n";
socket.write(req);
});
socket.on("data", function (d) {
if (closed) return;
state.buf = Buffer.concat([state.buf, d]);
if (!state.handshook) {
var endIdx = findHeaderEnd(state.buf);
if (endIdx < 0) return;
var headerText = state.buf.slice(0, endIdx).toString("utf8");
state.buf = state.buf.slice(endIdx + 4);
if (!/^HTTP\/1\.1 101/i.test(headerText)) {
closed = true;
clearTimeout(timer);
reject(new Error("ws handshake failed: " + headerText.split("\r\n")[0]));
try {
socket.destroy();
} catch (e) {
// ignore
}
return;
}
state.handshook = true;
clearTimeout(timer);
resolve({
onMessage: function (fn) {
msgHandlers.push(fn);
},
close: function () {
if (closed) return;
closed = true;
try {
socket.end();
} catch (e) {
// ignore
}
},
});
}
// Parse any remaining frames.
if (state.handshook) parseFrames();
});
socket.on("error", function (e) {
if (closed) return;
closed = true;
clearTimeout(timer);
reject(e);
});
socket.on("close", function () {
if (closed) return;
closed = true;
clearTimeout(timer);
});
});
}
function pickBestAsrUtterance(utterances) {
// utterances: [{utterance, score}] possibly.
try {
if (!utterances || !utterances.length) return "";
var bestText = "";
var bestScore = -1e99;
for (var i = 0; i < utterances.length; i += 1) {
var u = utterances[i];
var text = u && (u.utterance || u.Utterance || u.text) ? String(u.utterance || u.Utterance || u.text) : "";
text = text.replace(/^\s+|\s+$/g, "");
if (!text) continue;
// Light cleanup: sometimes a leading duplicated char occurs.
if (text.length >= 2 && text[0].toLowerCase && text[1].toLowerCase && text[0].toLowerCase() === text[1].toLowerCase()) {
text = text.slice(1);
}
var score = typeof u.score === "number" ? u.score : typeof u.Score === "number" ? u.Score : 0;
if (!bestText) {
bestText = text;
bestScore = score;
continue;
}
if (score > bestScore) {
bestText = text;
bestScore = score;
}
}
// Fallback: choose the shortest distinct utterance.
if (!bestText) {
var uniq = {};
var arr = [];
for (var j = 0; j < utterances.length; j += 1) {
var u2 = utterances[j];
var t2 = u2 && (u2.utterance || u2.Utterance || u2.text) ? String(u2.utterance || u2.Utterance || u2.text) : "";
t2 = t2.replace(/^\s+|\s+$/g, "");
if (!t2) continue;
var k = t2.toLowerCase ? t2.toLowerCase() : t2;
if (uniq[k]) continue;
uniq[k] = true;
arr.push(t2);
}
arr.sort(function (a, b) {
return a.length - b.length;
});
bestText = arr[0] || "";
}
return bestText;
} catch (e) {
return "";
}
}
function recordWavWithArecord(durationSec, sampleRate, channels, device, format, debug) { function recordWavWithArecord(durationSec, sampleRate, channels, device, format, debug) {
durationSec = typeof durationSec === "number" ? durationSec : 5; durationSec = typeof durationSec === "number" ? durationSec : 5;
sampleRate = typeof sampleRate === "number" ? sampleRate : 16000; sampleRate = typeof sampleRate === "number" ? sampleRate : 16000;
@@ -599,6 +1008,14 @@ function AIBridge(skill, jibo) {
// Prefer grabbing audio from the system audio service (avoids ALSA mic lock). // Prefer grabbing audio from the system audio service (avoids ALSA mic lock).
this.useDumpStateAudio = true; this.useDumpStateAudio = true;
this.dumpStateDir = "/tmp"; this.dumpStateDir = "/tmp";
// Offline STT via jibo-asr-service (HTTP + WebSocket) when available.
this.useAsrServiceStt = false;
this.asrServiceHost = "127.0.0.1";
this.asrServicePort = 8088;
this.asrAudioSourceId = "alsa1";
this.asrTimeoutMs = 15000;
this.asrServiceDebugWs = false;
this._asrServiceInFlight = false;
this._inFlight = false; this._inFlight = false;
this._lastHJAt = 0; this._lastHJAt = 0;
@@ -623,6 +1040,156 @@ function AIBridge(skill, jibo) {
}; };
} }
AIBridge.prototype._captureTextViaAsrService = function (source) {
var self = this;
if (!self.enabled) return Promise.resolve(null);
if (self._inFlight || self._asrServiceInFlight) return Promise.resolve(null);
self._asrServiceInFlight = true;
var host = String(self.asrServiceHost || "127.0.0.1");
var port = typeof self.asrServicePort === "number" ? self.asrServicePort : Number(self.asrServicePort) || 8088;
var baseHttp = "http://" + host + ":" + String(port);
var wsUrl = "ws://" + host + ":" + String(port) + "/simple_port";
var taskId = "DEBUG:ai-bridge-" + String(Date.now()) + "-" + String(Math.floor(Math.random() * 1e9));
var requestId = "stt_start_" + String(Date.now()) + "_" + String(Math.floor(Math.random() * 1e9));
var timeoutMs = typeof self.asrTimeoutMs === "number" ? self.asrTimeoutMs : 15000;
var debugWs = !!self.asrServiceDebugWs;
var wsClient = null;
var stopAlways = function () {
var stopPayload = { command: "stop", task_id: taskId, request_id: "stt_stop_" + String(Date.now()) + "_" + String(Math.floor(Math.random() * 1e9)) };
return httpJsonPostRaw(baseHttp + "/asr_simple_interface", stopPayload, 5000)
.then(function () {
// ignore
})
.catch(function () {
// ignore
});
};
var done = function () {
self._asrServiceInFlight = false;
};
var fail = function (e) {
if (rlog) {
rlog.warn("ai-bridge", "asr-service stt failed", { err: String(e && (e.stack || e.message || e)), source: source || "asr-service" });
}
self._asrServiceInFlight = false;
};
var startPayload = {
command: "start",
task_id: taskId,
audio_source_id: String(self.asrAudioSourceId || "alsa1"),
hotphrase: "none",
speech_to_text: true,
request_id: requestId,
};
var waitForFinal = function () {
return new Promise(function (resolve, reject) {
var timer = setTimeout(function () {
reject(new Error("asr-service stt timeout"));
}, timeoutMs);
var resolved = false;
var seen = 0;
wsClient.onMessage(function (msg) {
if (resolved) return;
seen += 1;
if (debugWs && rlog && seen <= 6) {
rlog.debug("ai-bridge", "asr-service ws msg", { idx: seen, raw: String(msg || "").slice(0, 500) });
}
var evt = null;
try {
evt = JSON.parse(String(msg || ""));
} catch (e) {
return;
}
if (!evt || typeof evt !== "object") return;
var eventType = evt.event_type || evt.eventType || evt.event || evt.type;
if (eventType !== "speech_to_text_final") return;
// Correlate to our request when possible, but don't hard-fail on schema drift.
var evTaskId = evt.task_id || evt.taskId || (evt.payload && (evt.payload.task_id || evt.payload.taskId));
var evRequestId = evt.request_id || evt.requestId || (evt.payload && (evt.payload.request_id || evt.payload.requestId));
var idMatches = false;
if (evTaskId && String(evTaskId) === String(taskId)) idMatches = true;
if (evRequestId && String(evRequestId) === String(requestId)) idMatches = true;
// If the event carries neither id, accept it (single in-flight capture).
if (!evTaskId && !evRequestId) idMatches = true;
if (!idMatches) return;
var utterances = evt.utterances || evt.Utterances || (evt.payload && (evt.payload.utterances || evt.payload.Utterances));
var best = pickBestAsrUtterance(utterances);
if (!best || !String(best).trim()) return;
resolved = true;
clearTimeout(timer);
resolve(String(best));
});
});
};
var t0 = Date.now();
return connectWebSocketText(wsUrl, 4000)
.then(function (ws) {
wsClient = ws;
return httpJsonPostRaw(baseHttp + "/asr_simple_interface", startPayload, 6000);
})
.then(function (resp) {
if (rlog) {
rlog.info("ai-bridge", "asr-service stt start", {
status: resp && resp.statusCode,
ms: Date.now() - t0,
ws: wsUrl,
http: baseHttp,
source: source || "asr-service",
body: resp && resp.body ? String(resp.body).slice(0, 500) : "",
});
}
if (!resp || !resp.statusCode || resp.statusCode < 200 || resp.statusCode >= 300) {
throw new Error("asr-service start failed: " + String(resp && resp.statusCode));
}
return waitForFinal();
})
.then(function (text) {
return stopAlways().then(function () {
return text;
});
})
.then(function (text) {
try {
wsClient && wsClient.close && wsClient.close();
} catch (e) {
// ignore
}
if (rlog) {
rlog.info("ai-bridge", "asr-service text", { chars: String(text).length, text: String(text).slice(0, 160), ms: Date.now() - t0, source: source || "asr-service" });
}
return self._sendText(text, "asr-service:" + String(source || ""));
})
.then(function () {
done();
})
.catch(function (e) {
try {
wsClient && wsClient.close && wsClient.close();
} catch (e2) {
// ignore
}
return stopAlways()
.then(function () {
throw e;
})
.catch(function () {
throw e;
});
})
.catch(function (e) {
fail(e);
});
};
AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) { AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) {
var self = this; var self = this;
if (!self.enabled) return; if (!self.enabled) return;
@@ -630,7 +1197,7 @@ AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) {
if (!self._allowAnyTextNoHJ && Date.now() - self._lastHJAt > self._hjWindowMs) return; if (!self._allowAnyTextNoHJ && Date.now() - self._lastHJAt > self._hjWindowMs) return;
if (self._inFlight) return; if (self._inFlight) return;
if (self._audioFallbackTimer) { if (self._audioFallbackTimer) {
clearTimeout(self._audioFallbackTimer); clearTimeout(self._audioFallbackTimer);
self._audioFallbackTimer = null; self._audioFallbackTimer = null;
} }
@@ -675,7 +1242,7 @@ AIBridge.prototype._captureAndSendAudio = function (source) {
if (["S16_LE", "S32_LE"].indexOf(fmt) < 0) fmt = "S16_LE"; if (["S16_LE", "S32_LE"].indexOf(fmt) < 0) fmt = "S16_LE";
var dbg = !!self.debugAudioCapture; var dbg = !!self.debugAudioCapture;
var dumpDir = typeof self.dumpStateDir === "string" && self.dumpStateDir ? self.dumpStateDir : "/tmp"; var dumpDir = typeof self.dumpStateDir === "string" && self.dumpStateDir ? self.dumpStateDir : "/tmp";
var captureViaDumpState = function () { var captureViaDumpState = function () {
var tDump0 = Date.now(); var tDump0 = Date.now();
@@ -812,7 +1379,7 @@ AIBridge.prototype._installOpenHook = function () {
} }
// If cloud ASR is down, Hey-Jibo intents often have empty asr.text. // If cloud ASR is down, Hey-Jibo intents often have empty asr.text.
// Try a one-shot offline audio fallback after the pipeline releases the mic. // Try a one-shot offline audio fallback after the pipeline releases the mic.
if (looksLikeHeyJiboIntent(results)) { if (looksLikeHeyJiboIntent(results) && !self.useAsrServiceStt) {
self._scheduleOfflineAudioFallback("openHookNoText", 1400); self._scheduleOfflineAudioFallback("openHookNoText", 1400);
} }
return resolve(); return resolve();
@@ -823,7 +1390,9 @@ AIBridge.prototype._installOpenHook = function () {
if (rlog) { if (rlog) {
rlog.warn("ai-bridge", "ignoring cloud ASR error text (open hook)", { text: text.slice(0, 200) }); rlog.warn("ai-bridge", "ignoring cloud ASR error text (open hook)", { text: text.slice(0, 200) });
} }
self._scheduleOfflineAudioFallback("cloudAsrErrorText", 1400); if (!self.useAsrServiceStt) {
self._scheduleOfflineAudioFallback("cloudAsrErrorText", 1400);
}
return resolve(); return resolve();
} }
@@ -1423,6 +1992,38 @@ AIBridge.prototype.setupTunables = function () {
}); });
} }
// Offline STT via jibo-asr-service (requires service running).
safe(function () {
return Tunable.getCheckboxField(LABEL.useAsrServiceStt, self.useAsrServiceStt, WIN).events.change.on(function (v) {
self.useAsrServiceStt = !!v;
if (rlog) {
rlog.info("ai-bridge", "use asr-service stt changed", { enabled: self.useAsrServiceStt });
}
});
});
safe(function () {
return Tunable.getCheckboxField(LABEL.asrServiceDebugWs, self.asrServiceDebugWs, WIN).events.change.on(function (v) {
self.asrServiceDebugWs = !!v;
if (rlog) {
rlog.info("ai-bridge", "asr-service debug ws changed", { enabled: self.asrServiceDebugWs });
}
});
});
if (typeof Tunable.getButtonField === "function") {
safe(function () {
return Tunable.getButtonField("AI Bridge: Run ASR-service STT now", WIN).events.change.on(function () {
try {
self._captureTextViaAsrService("manual");
} catch (e) {
if (rlog) {
rlog.warn("ai-bridge", "manual asr-service stt crashed", { err: String(e && (e.stack || e.message || e)) });
}
}
});
});
}
if (typeof Tunable.getNumberField === "function") { if (typeof Tunable.getNumberField === "function") {
safe(function () { safe(function () {
return Tunable.getNumberField(LABEL.recordSeconds, self.recordSeconds, 1, 15, 1, WIN).events.change.on(function (v) { return Tunable.getNumberField(LABEL.recordSeconds, self.recordSeconds, 1, 15, 1, WIN).events.change.on(function (v) {
@@ -1550,6 +2151,10 @@ AIBridge.prototype._handleHJHeard = function () {
if (!self.enabled) { if (!self.enabled) {
return; return;
} }
if (self.useAsrServiceStt) {
if (self._inFlight) return;
return self._captureTextViaAsrService("hjHeard");
}
if (self.mode !== InputMode.AUDIO) { if (self.mode !== InputMode.AUDIO) {
return; return;
} }
@@ -1576,6 +2181,11 @@ AIBridge.prototype._handleTurnResult = function (data, eventName) {
if (!self.enabled) { if (!self.enabled) {
return; return;
} }
// If we're using the local ASR service STT pipeline, ignore cloud turn-results
// to avoid duplicate sends.
if (self.useAsrServiceStt) {
return;
}
if (rlog) { if (rlog) {
try { try {
rlog.debug("ai-bridge", "turn result", { event: eventName }); rlog.debug("ai-bridge", "turn result", { event: eventName });
@@ -1614,7 +2224,9 @@ AIBridge.prototype._handleTurnResult = function (data, eventName) {
if (rlog) { if (rlog) {
rlog.warn("ai-bridge", "ignoring cloud ASR error text", { event: eventName, text: text.slice(0, 220) }); rlog.warn("ai-bridge", "ignoring cloud ASR error text", { event: eventName, text: text.slice(0, 220) });
} }
self._scheduleOfflineAudioFallback("cloudAsrErrorText:" + String(eventName || "turnResult"), 1400); if (!self.useAsrServiceStt) {
self._scheduleOfflineAudioFallback("cloudAsrErrorText:" + String(eventName || "turnResult"), 1400);
}
return; return;
} }

View File

@@ -95,7 +95,7 @@
"arguments": ["-c","/usr/local/etc/jibo-asr-service.json"], "arguments": ["-c","/usr/local/etc/jibo-asr-service.json"],
"environment": {}, "environment": {},
"directory": "", "directory": "",
"enabled":false "enabled":true
}, },
"certification": { "certification": {
"enabled": false "enabled": false