HE SPEAKS?!??!?!?!?!
This commit is contained in:
@@ -33,6 +33,7 @@ import time
|
||||
import traceback
|
||||
import wave
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from urllib.error import URLError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
|
||||
@@ -81,8 +82,13 @@ def _ollama_chat(user_text: str) -> str:
|
||||
method="POST",
|
||||
)
|
||||
|
||||
with urlopen(req, timeout=60) as resp:
|
||||
data = json.loads(resp.read().decode("utf-8"))
|
||||
try:
|
||||
with urlopen(req, timeout=60) as resp:
|
||||
data = json.loads(resp.read().decode("utf-8"))
|
||||
except Exception as e:
|
||||
# Let caller decide how to respond; include a useful hint in logs.
|
||||
_log(f"Ollama request failed url={ollama_url!r} err={e!r}")
|
||||
raise
|
||||
|
||||
msg = data.get("message") or {}
|
||||
content = msg.get("content")
|
||||
@@ -91,6 +97,19 @@ def _ollama_chat(user_text: str) -> str:
|
||||
return content.strip()
|
||||
|
||||
|
||||
def _short_err(e: BaseException) -> str:
|
||||
s = str(e) or e.__class__.__name__
|
||||
s = " ".join(s.split())
|
||||
if len(s) > 240:
|
||||
s = s[:240] + "..."
|
||||
return s
|
||||
|
||||
|
||||
def _ollama_down_reply() -> str:
|
||||
# Keep it short and speakable.
|
||||
return "My AI server isn't reachable right now. Please start Ollama on the computer, then try again."
|
||||
|
||||
|
||||
def _wav_diagnostics(wav_bytes: bytes) -> dict:
|
||||
"""Best-effort WAV parsing + signal stats for debugging mic capture."""
|
||||
info: dict = {"bytes": len(wav_bytes)}
|
||||
@@ -245,9 +264,16 @@ class Handler(BaseHTTPRequestHandler):
|
||||
_json_response(self, 400, {"error": "Missing 'text'"})
|
||||
return
|
||||
_log(f"{client} /text prompt_chars={len(text)} prompt={text[:200]!r}")
|
||||
reply = _ollama_chat(text)
|
||||
_log(f"{client} /text ok reply_chars={len(reply)}")
|
||||
_json_response(self, 200, {"reply": reply})
|
||||
try:
|
||||
reply = _ollama_chat(text)
|
||||
_log(f"{client} /text ok reply_chars={len(reply)}")
|
||||
_json_response(self, 200, {"reply": reply})
|
||||
except URLError as e:
|
||||
_log(f"{client} /text ollama_unreachable err={_short_err(e)!r}")
|
||||
_json_response(self, 200, {"reply": _ollama_down_reply(), "ollama_ok": False, "ollama_error": _short_err(e)})
|
||||
except ConnectionRefusedError as e:
|
||||
_log(f"{client} /text ollama_refused err={_short_err(e)!r}")
|
||||
_json_response(self, 200, {"reply": _ollama_down_reply(), "ollama_ok": False, "ollama_error": _short_err(e)})
|
||||
return
|
||||
|
||||
if self.path == "/v1/chat/audio":
|
||||
@@ -295,9 +321,24 @@ class Handler(BaseHTTPRequestHandler):
|
||||
_json_response(self, 200, {"reply": "I didn't catch that. Could you say it again?", "text": ""})
|
||||
return
|
||||
_log(f"{client} /audio transcript_chars={len(transcript)} transcript={transcript[:200]!r}")
|
||||
reply = _ollama_chat(transcript)
|
||||
_log(f"{client} /audio ok text_chars={len(transcript)} reply_chars={len(reply)}")
|
||||
_json_response(self, 200, {"reply": reply, "text": transcript})
|
||||
try:
|
||||
reply = _ollama_chat(transcript)
|
||||
_log(f"{client} /audio ok text_chars={len(transcript)} reply_chars={len(reply)}")
|
||||
_json_response(self, 200, {"reply": reply, "text": transcript})
|
||||
except URLError as e:
|
||||
_log(f"{client} /audio ollama_unreachable err={_short_err(e)!r}")
|
||||
_json_response(
|
||||
self,
|
||||
200,
|
||||
{"reply": _ollama_down_reply(), "text": transcript, "ollama_ok": False, "ollama_error": _short_err(e)},
|
||||
)
|
||||
except ConnectionRefusedError as e:
|
||||
_log(f"{client} /audio ollama_refused err={_short_err(e)!r}")
|
||||
_json_response(
|
||||
self,
|
||||
200,
|
||||
{"reply": _ollama_down_reply(), "text": transcript, "ollama_ok": False, "ollama_error": _short_err(e)},
|
||||
)
|
||||
return
|
||||
|
||||
_json_response(self, 404, {"error": "Not found"})
|
||||
@@ -332,6 +373,7 @@ def main():
|
||||
+ " model="
|
||||
+ os.environ.get("OLLAMA_MODEL", "phi3.5")
|
||||
)
|
||||
_log("Ollama health check: curl -s http://127.0.0.1:11434/api/tags | head")
|
||||
if not _whisper.available():
|
||||
_log("STT: faster-whisper not installed; /v1/chat/audio will return 503")
|
||||
server.serve_forever()
|
||||
|
||||
@@ -9,6 +9,13 @@ var http = require("http");
|
||||
var https = require("https");
|
||||
var urlLib = require("url");
|
||||
var fs = require("fs");
|
||||
var net = require("net");
|
||||
var crypto = null;
|
||||
try {
|
||||
crypto = require("crypto");
|
||||
} catch (e) {
|
||||
crypto = null;
|
||||
}
|
||||
var childProcess = require("child_process");
|
||||
var spawn = childProcess.spawn;
|
||||
|
||||
@@ -49,6 +56,8 @@ var LABEL = {
|
||||
audioFormat: "AI Bridge: Audio format (arecord -f)",
|
||||
debugAudio: "AI Bridge: Debug audio capture",
|
||||
useDumpStateAudio: "AI Bridge: Use audio-service dump-state (fix mic busy)",
|
||||
useAsrServiceStt: "AI Bridge: Use ASR-service STT (8088)",
|
||||
asrServiceDebugWs: "AI Bridge: Debug ASR-service WS",
|
||||
};
|
||||
|
||||
var InputMode = {
|
||||
@@ -56,6 +65,13 @@ var InputMode = {
|
||||
AUDIO: "AUDIO",
|
||||
};
|
||||
|
||||
var WsLib = null;
|
||||
try {
|
||||
WsLib = require("ws");
|
||||
} catch (e) {
|
||||
WsLib = null;
|
||||
}
|
||||
|
||||
function httpJsonPost(urlString, payload, timeoutMs) {
|
||||
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000;
|
||||
|
||||
@@ -108,6 +124,399 @@ function httpJsonPost(urlString, payload, timeoutMs) {
|
||||
});
|
||||
}
|
||||
|
||||
function httpJsonPostRaw(urlString, payload, timeoutMs) {
|
||||
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 6000;
|
||||
|
||||
var parsed = urlLib.parse(urlString);
|
||||
var isHttps = parsed.protocol === "https:";
|
||||
var bodyStr = JSON.stringify(payload || {});
|
||||
var body = new Buffer(bodyStr, "utf8");
|
||||
var requestOptions = {
|
||||
protocol: parsed.protocol,
|
||||
hostname: parsed.hostname,
|
||||
port: parsed.port || (isHttps ? 443 : 80),
|
||||
path: parsed.path || "/",
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Content-Length": body.length,
|
||||
},
|
||||
timeout: timeoutMs,
|
||||
};
|
||||
|
||||
return new Promise(function (resolve, reject) {
|
||||
var req = (isHttps ? https : http).request(requestOptions, function (res) {
|
||||
var chunks = [];
|
||||
res.on("data", function (d) {
|
||||
chunks.push(d);
|
||||
});
|
||||
res.on("end", function () {
|
||||
var raw = Buffer.concat(chunks).toString("utf8");
|
||||
resolve({ statusCode: res.statusCode, headers: res.headers || {}, body: raw });
|
||||
});
|
||||
});
|
||||
req.on("error", reject);
|
||||
req.on("timeout", function () {
|
||||
req.destroy(new Error("POST timeout"));
|
||||
});
|
||||
req.write(body);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
function parseWsUrl(wsUrl) {
|
||||
// Very small ws:// URL parser for embedded Node.
|
||||
// Returns { host, port, path }.
|
||||
if (typeof wsUrl !== "string") return null;
|
||||
var s = wsUrl.trim();
|
||||
var m = s.match(/^wss?:\/\/([^\/]+)(\/.*)?$/i);
|
||||
if (!m) return null;
|
||||
var hostPort = m[1];
|
||||
var path = m[2] || "/";
|
||||
var hp = hostPort.split(":");
|
||||
var host = hp[0];
|
||||
var port = hp.length > 1 ? Number(hp[1]) : s.toLowerCase().indexOf("wss://") === 0 ? 443 : 80;
|
||||
if (!host) return null;
|
||||
if (!port || Number.isNaN(port)) port = 80;
|
||||
return { host: host, port: port, path: path };
|
||||
}
|
||||
|
||||
function connectWebSocketText(wsUrl, timeoutMs) {
|
||||
// Returns a Promise resolving to { onMessage(fn), close() }.
|
||||
// Uses `ws` module if available; otherwise uses a minimal RFC6455 client.
|
||||
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 4000;
|
||||
|
||||
if (WsLib) {
|
||||
return new Promise(function (resolve, reject) {
|
||||
var ws;
|
||||
try {
|
||||
ws = new WsLib(wsUrl);
|
||||
} catch (e) {
|
||||
reject(e);
|
||||
return;
|
||||
}
|
||||
var timer = setTimeout(function () {
|
||||
try {
|
||||
ws.terminate && ws.terminate();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
reject(new Error("ws connect timeout"));
|
||||
}, timeoutMs);
|
||||
ws.on("open", function () {
|
||||
clearTimeout(timer);
|
||||
resolve({
|
||||
onMessage: function (fn) {
|
||||
ws.on("message", function (msg) {
|
||||
fn(typeof msg === "string" ? msg : msg && msg.toString ? msg.toString("utf8") : String(msg));
|
||||
});
|
||||
},
|
||||
close: function () {
|
||||
try {
|
||||
ws.close();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
},
|
||||
});
|
||||
});
|
||||
ws.on("error", function (e) {
|
||||
clearTimeout(timer);
|
||||
reject(e);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Minimal client without external deps.
|
||||
return new Promise(function (resolve, reject) {
|
||||
var findHeaderEnd = function (buf) {
|
||||
// Find \r\n\r\n in a Buffer.
|
||||
if (!buf || !buf.length) return -1;
|
||||
for (var i = 0; i + 3 < buf.length; i += 1) {
|
||||
if (buf[i] === 13 && buf[i + 1] === 10 && buf[i + 2] === 13 && buf[i + 3] === 10) return i;
|
||||
}
|
||||
return -1;
|
||||
};
|
||||
|
||||
var info = parseWsUrl(wsUrl);
|
||||
if (!info) {
|
||||
reject(new Error("bad ws url"));
|
||||
return;
|
||||
}
|
||||
var key = null;
|
||||
try {
|
||||
key = crypto && crypto.randomBytes ? crypto.randomBytes(16).toString("base64") : new Buffer(String(Date.now()) + ":" + String(Math.random())).toString("base64");
|
||||
} catch (e) {
|
||||
key = new Buffer(String(Date.now()) + ":" + String(Math.random())).toString("base64");
|
||||
}
|
||||
|
||||
var socket = net.connect({ host: info.host, port: info.port });
|
||||
var timer = setTimeout(function () {
|
||||
try {
|
||||
socket.destroy();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
reject(new Error("ws connect timeout"));
|
||||
}, timeoutMs);
|
||||
|
||||
var state = { handshook: false, buf: new Buffer(0) };
|
||||
var msgHandlers = [];
|
||||
var closed = false;
|
||||
var fragType = null; // 0x1 text, 0x2 binary
|
||||
var fragChunks = [];
|
||||
|
||||
var emitMessage = function (s) {
|
||||
for (var i = 0; i < msgHandlers.length; i += 1) {
|
||||
try {
|
||||
msgHandlers[i](s);
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var sendPong = function (payload) {
|
||||
try {
|
||||
// server->client frames are unmasked; client->server must be masked.
|
||||
var mask = crypto && crypto.randomBytes ? crypto.randomBytes(4) : new Buffer([1, 2, 3, 4]);
|
||||
var len = payload ? payload.length : 0;
|
||||
var header = null;
|
||||
if (len < 126) {
|
||||
header = new Buffer(2);
|
||||
header[0] = 0x8a; // FIN + pong
|
||||
header[1] = 0x80 | len;
|
||||
} else {
|
||||
header = new Buffer(4);
|
||||
header[0] = 0x8a;
|
||||
header[1] = 0x80 | 126;
|
||||
header.writeUInt16BE(len, 2);
|
||||
}
|
||||
var out = new Buffer(header.length + 4 + len);
|
||||
header.copy(out, 0);
|
||||
mask.copy(out, header.length);
|
||||
for (var i = 0; i < len; i += 1) {
|
||||
out[header.length + 4 + i] = payload[i] ^ mask[i % 4];
|
||||
}
|
||||
socket.write(out);
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
};
|
||||
|
||||
var parseFrames = function () {
|
||||
// parse server->client frames (unmasked)
|
||||
while (state.buf.length >= 2) {
|
||||
var b0 = state.buf[0];
|
||||
var b1 = state.buf[1];
|
||||
var fin = (b0 & 0x80) !== 0;
|
||||
var opcode = b0 & 0x0f;
|
||||
var masked = (b1 & 0x80) !== 0;
|
||||
var len = b1 & 0x7f;
|
||||
var off = 2;
|
||||
if (len === 126) {
|
||||
if (state.buf.length < off + 2) return;
|
||||
len = state.buf.readUInt16BE(off);
|
||||
off += 2;
|
||||
} else if (len === 127) {
|
||||
// Not expected; avoid overflow.
|
||||
if (state.buf.length < off + 8) return;
|
||||
// Only support up to 2^31-1
|
||||
var hi = state.buf.readUInt32BE(off);
|
||||
var lo = state.buf.readUInt32BE(off + 4);
|
||||
off += 8;
|
||||
if (hi !== 0) {
|
||||
// too large
|
||||
try {
|
||||
socket.destroy();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
return;
|
||||
}
|
||||
len = lo;
|
||||
}
|
||||
var maskKey = null;
|
||||
if (masked) {
|
||||
if (state.buf.length < off + 4) return;
|
||||
maskKey = state.buf.slice(off, off + 4);
|
||||
off += 4;
|
||||
}
|
||||
if (state.buf.length < off + len) return;
|
||||
var payload = state.buf.slice(off, off + len);
|
||||
state.buf = state.buf.slice(off + len);
|
||||
|
||||
if (masked && maskKey) {
|
||||
for (var i = 0; i < payload.length; i += 1) {
|
||||
payload[i] = payload[i] ^ maskKey[i % 4];
|
||||
}
|
||||
}
|
||||
|
||||
if (opcode === 0x8) {
|
||||
try {
|
||||
socket.end();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (opcode === 0x9) {
|
||||
// ping
|
||||
sendPong(payload);
|
||||
continue;
|
||||
}
|
||||
// Continuation frames for fragmented messages.
|
||||
if (opcode === 0x0) {
|
||||
if (!fragType) {
|
||||
continue;
|
||||
}
|
||||
fragChunks.push(payload);
|
||||
if (fin) {
|
||||
var whole = Buffer.concat(fragChunks);
|
||||
fragType = null;
|
||||
fragChunks = [];
|
||||
try {
|
||||
emitMessage(whole.toString("utf8"));
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Text or binary payloads (ASR may send either).
|
||||
if (opcode === 0x1 || opcode === 0x2) {
|
||||
if (!fin) {
|
||||
fragType = opcode;
|
||||
fragChunks = [payload];
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
emitMessage(payload.toString("utf8"));
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
socket.on("connect", function () {
|
||||
var req = "";
|
||||
req += "GET " + info.path + " HTTP/1.1\r\n";
|
||||
req += "Host: " + info.host + ":" + String(info.port) + "\r\n";
|
||||
req += "Upgrade: websocket\r\n";
|
||||
req += "Connection: Upgrade\r\n";
|
||||
req += "Sec-WebSocket-Key: " + key + "\r\n";
|
||||
req += "Sec-WebSocket-Version: 13\r\n";
|
||||
req += "\r\n";
|
||||
socket.write(req);
|
||||
});
|
||||
|
||||
socket.on("data", function (d) {
|
||||
if (closed) return;
|
||||
state.buf = Buffer.concat([state.buf, d]);
|
||||
if (!state.handshook) {
|
||||
var endIdx = findHeaderEnd(state.buf);
|
||||
if (endIdx < 0) return;
|
||||
var headerText = state.buf.slice(0, endIdx).toString("utf8");
|
||||
state.buf = state.buf.slice(endIdx + 4);
|
||||
if (!/^HTTP\/1\.1 101/i.test(headerText)) {
|
||||
closed = true;
|
||||
clearTimeout(timer);
|
||||
reject(new Error("ws handshake failed: " + headerText.split("\r\n")[0]));
|
||||
try {
|
||||
socket.destroy();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
return;
|
||||
}
|
||||
state.handshook = true;
|
||||
clearTimeout(timer);
|
||||
resolve({
|
||||
onMessage: function (fn) {
|
||||
msgHandlers.push(fn);
|
||||
},
|
||||
close: function () {
|
||||
if (closed) return;
|
||||
closed = true;
|
||||
try {
|
||||
socket.end();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
// Parse any remaining frames.
|
||||
if (state.handshook) parseFrames();
|
||||
});
|
||||
socket.on("error", function (e) {
|
||||
if (closed) return;
|
||||
closed = true;
|
||||
clearTimeout(timer);
|
||||
reject(e);
|
||||
});
|
||||
socket.on("close", function () {
|
||||
if (closed) return;
|
||||
closed = true;
|
||||
clearTimeout(timer);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function pickBestAsrUtterance(utterances) {
|
||||
// utterances: [{utterance, score}] possibly.
|
||||
try {
|
||||
if (!utterances || !utterances.length) return "";
|
||||
var bestText = "";
|
||||
var bestScore = -1e99;
|
||||
for (var i = 0; i < utterances.length; i += 1) {
|
||||
var u = utterances[i];
|
||||
var text = u && (u.utterance || u.Utterance || u.text) ? String(u.utterance || u.Utterance || u.text) : "";
|
||||
text = text.replace(/^\s+|\s+$/g, "");
|
||||
if (!text) continue;
|
||||
// Light cleanup: sometimes a leading duplicated char occurs.
|
||||
if (text.length >= 2 && text[0].toLowerCase && text[1].toLowerCase && text[0].toLowerCase() === text[1].toLowerCase()) {
|
||||
text = text.slice(1);
|
||||
}
|
||||
var score = typeof u.score === "number" ? u.score : typeof u.Score === "number" ? u.Score : 0;
|
||||
if (!bestText) {
|
||||
bestText = text;
|
||||
bestScore = score;
|
||||
continue;
|
||||
}
|
||||
if (score > bestScore) {
|
||||
bestText = text;
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
// Fallback: choose the shortest distinct utterance.
|
||||
if (!bestText) {
|
||||
var uniq = {};
|
||||
var arr = [];
|
||||
for (var j = 0; j < utterances.length; j += 1) {
|
||||
var u2 = utterances[j];
|
||||
var t2 = u2 && (u2.utterance || u2.Utterance || u2.text) ? String(u2.utterance || u2.Utterance || u2.text) : "";
|
||||
t2 = t2.replace(/^\s+|\s+$/g, "");
|
||||
if (!t2) continue;
|
||||
var k = t2.toLowerCase ? t2.toLowerCase() : t2;
|
||||
if (uniq[k]) continue;
|
||||
uniq[k] = true;
|
||||
arr.push(t2);
|
||||
}
|
||||
arr.sort(function (a, b) {
|
||||
return a.length - b.length;
|
||||
});
|
||||
bestText = arr[0] || "";
|
||||
}
|
||||
return bestText;
|
||||
} catch (e) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function recordWavWithArecord(durationSec, sampleRate, channels, device, format, debug) {
|
||||
durationSec = typeof durationSec === "number" ? durationSec : 5;
|
||||
sampleRate = typeof sampleRate === "number" ? sampleRate : 16000;
|
||||
@@ -599,6 +1008,14 @@ function AIBridge(skill, jibo) {
|
||||
// Prefer grabbing audio from the system audio service (avoids ALSA mic lock).
|
||||
this.useDumpStateAudio = true;
|
||||
this.dumpStateDir = "/tmp";
|
||||
// Offline STT via jibo-asr-service (HTTP + WebSocket) when available.
|
||||
this.useAsrServiceStt = false;
|
||||
this.asrServiceHost = "127.0.0.1";
|
||||
this.asrServicePort = 8088;
|
||||
this.asrAudioSourceId = "alsa1";
|
||||
this.asrTimeoutMs = 15000;
|
||||
this.asrServiceDebugWs = false;
|
||||
this._asrServiceInFlight = false;
|
||||
|
||||
this._inFlight = false;
|
||||
this._lastHJAt = 0;
|
||||
@@ -623,6 +1040,156 @@ function AIBridge(skill, jibo) {
|
||||
};
|
||||
}
|
||||
|
||||
AIBridge.prototype._captureTextViaAsrService = function (source) {
|
||||
var self = this;
|
||||
if (!self.enabled) return Promise.resolve(null);
|
||||
if (self._inFlight || self._asrServiceInFlight) return Promise.resolve(null);
|
||||
self._asrServiceInFlight = true;
|
||||
|
||||
var host = String(self.asrServiceHost || "127.0.0.1");
|
||||
var port = typeof self.asrServicePort === "number" ? self.asrServicePort : Number(self.asrServicePort) || 8088;
|
||||
var baseHttp = "http://" + host + ":" + String(port);
|
||||
var wsUrl = "ws://" + host + ":" + String(port) + "/simple_port";
|
||||
var taskId = "DEBUG:ai-bridge-" + String(Date.now()) + "-" + String(Math.floor(Math.random() * 1e9));
|
||||
var requestId = "stt_start_" + String(Date.now()) + "_" + String(Math.floor(Math.random() * 1e9));
|
||||
var timeoutMs = typeof self.asrTimeoutMs === "number" ? self.asrTimeoutMs : 15000;
|
||||
var debugWs = !!self.asrServiceDebugWs;
|
||||
|
||||
var wsClient = null;
|
||||
var stopAlways = function () {
|
||||
var stopPayload = { command: "stop", task_id: taskId, request_id: "stt_stop_" + String(Date.now()) + "_" + String(Math.floor(Math.random() * 1e9)) };
|
||||
return httpJsonPostRaw(baseHttp + "/asr_simple_interface", stopPayload, 5000)
|
||||
.then(function () {
|
||||
// ignore
|
||||
})
|
||||
.catch(function () {
|
||||
// ignore
|
||||
});
|
||||
};
|
||||
|
||||
var done = function () {
|
||||
self._asrServiceInFlight = false;
|
||||
};
|
||||
|
||||
var fail = function (e) {
|
||||
if (rlog) {
|
||||
rlog.warn("ai-bridge", "asr-service stt failed", { err: String(e && (e.stack || e.message || e)), source: source || "asr-service" });
|
||||
}
|
||||
self._asrServiceInFlight = false;
|
||||
};
|
||||
|
||||
var startPayload = {
|
||||
command: "start",
|
||||
task_id: taskId,
|
||||
audio_source_id: String(self.asrAudioSourceId || "alsa1"),
|
||||
hotphrase: "none",
|
||||
speech_to_text: true,
|
||||
request_id: requestId,
|
||||
};
|
||||
|
||||
var waitForFinal = function () {
|
||||
return new Promise(function (resolve, reject) {
|
||||
var timer = setTimeout(function () {
|
||||
reject(new Error("asr-service stt timeout"));
|
||||
}, timeoutMs);
|
||||
var resolved = false;
|
||||
var seen = 0;
|
||||
wsClient.onMessage(function (msg) {
|
||||
if (resolved) return;
|
||||
seen += 1;
|
||||
if (debugWs && rlog && seen <= 6) {
|
||||
rlog.debug("ai-bridge", "asr-service ws msg", { idx: seen, raw: String(msg || "").slice(0, 500) });
|
||||
}
|
||||
var evt = null;
|
||||
try {
|
||||
evt = JSON.parse(String(msg || ""));
|
||||
} catch (e) {
|
||||
return;
|
||||
}
|
||||
if (!evt || typeof evt !== "object") return;
|
||||
var eventType = evt.event_type || evt.eventType || evt.event || evt.type;
|
||||
if (eventType !== "speech_to_text_final") return;
|
||||
|
||||
// Correlate to our request when possible, but don't hard-fail on schema drift.
|
||||
var evTaskId = evt.task_id || evt.taskId || (evt.payload && (evt.payload.task_id || evt.payload.taskId));
|
||||
var evRequestId = evt.request_id || evt.requestId || (evt.payload && (evt.payload.request_id || evt.payload.requestId));
|
||||
var idMatches = false;
|
||||
if (evTaskId && String(evTaskId) === String(taskId)) idMatches = true;
|
||||
if (evRequestId && String(evRequestId) === String(requestId)) idMatches = true;
|
||||
// If the event carries neither id, accept it (single in-flight capture).
|
||||
if (!evTaskId && !evRequestId) idMatches = true;
|
||||
if (!idMatches) return;
|
||||
|
||||
var utterances = evt.utterances || evt.Utterances || (evt.payload && (evt.payload.utterances || evt.payload.Utterances));
|
||||
var best = pickBestAsrUtterance(utterances);
|
||||
if (!best || !String(best).trim()) return;
|
||||
resolved = true;
|
||||
clearTimeout(timer);
|
||||
resolve(String(best));
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
var t0 = Date.now();
|
||||
return connectWebSocketText(wsUrl, 4000)
|
||||
.then(function (ws) {
|
||||
wsClient = ws;
|
||||
return httpJsonPostRaw(baseHttp + "/asr_simple_interface", startPayload, 6000);
|
||||
})
|
||||
.then(function (resp) {
|
||||
if (rlog) {
|
||||
rlog.info("ai-bridge", "asr-service stt start", {
|
||||
status: resp && resp.statusCode,
|
||||
ms: Date.now() - t0,
|
||||
ws: wsUrl,
|
||||
http: baseHttp,
|
||||
source: source || "asr-service",
|
||||
body: resp && resp.body ? String(resp.body).slice(0, 500) : "",
|
||||
});
|
||||
}
|
||||
if (!resp || !resp.statusCode || resp.statusCode < 200 || resp.statusCode >= 300) {
|
||||
throw new Error("asr-service start failed: " + String(resp && resp.statusCode));
|
||||
}
|
||||
return waitForFinal();
|
||||
})
|
||||
.then(function (text) {
|
||||
return stopAlways().then(function () {
|
||||
return text;
|
||||
});
|
||||
})
|
||||
.then(function (text) {
|
||||
try {
|
||||
wsClient && wsClient.close && wsClient.close();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
if (rlog) {
|
||||
rlog.info("ai-bridge", "asr-service text", { chars: String(text).length, text: String(text).slice(0, 160), ms: Date.now() - t0, source: source || "asr-service" });
|
||||
}
|
||||
return self._sendText(text, "asr-service:" + String(source || ""));
|
||||
})
|
||||
.then(function () {
|
||||
done();
|
||||
})
|
||||
.catch(function (e) {
|
||||
try {
|
||||
wsClient && wsClient.close && wsClient.close();
|
||||
} catch (e2) {
|
||||
// ignore
|
||||
}
|
||||
return stopAlways()
|
||||
.then(function () {
|
||||
throw e;
|
||||
})
|
||||
.catch(function () {
|
||||
throw e;
|
||||
});
|
||||
})
|
||||
.catch(function (e) {
|
||||
fail(e);
|
||||
});
|
||||
};
|
||||
|
||||
AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) {
|
||||
var self = this;
|
||||
if (!self.enabled) return;
|
||||
@@ -630,7 +1197,7 @@ AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) {
|
||||
if (!self._allowAnyTextNoHJ && Date.now() - self._lastHJAt > self._hjWindowMs) return;
|
||||
if (self._inFlight) return;
|
||||
|
||||
if (self._audioFallbackTimer) {
|
||||
if (self._audioFallbackTimer) {
|
||||
clearTimeout(self._audioFallbackTimer);
|
||||
self._audioFallbackTimer = null;
|
||||
}
|
||||
@@ -675,7 +1242,7 @@ AIBridge.prototype._captureAndSendAudio = function (source) {
|
||||
if (["S16_LE", "S32_LE"].indexOf(fmt) < 0) fmt = "S16_LE";
|
||||
var dbg = !!self.debugAudioCapture;
|
||||
|
||||
var dumpDir = typeof self.dumpStateDir === "string" && self.dumpStateDir ? self.dumpStateDir : "/tmp";
|
||||
var dumpDir = typeof self.dumpStateDir === "string" && self.dumpStateDir ? self.dumpStateDir : "/tmp";
|
||||
|
||||
var captureViaDumpState = function () {
|
||||
var tDump0 = Date.now();
|
||||
@@ -812,7 +1379,7 @@ AIBridge.prototype._installOpenHook = function () {
|
||||
}
|
||||
// If cloud ASR is down, Hey-Jibo intents often have empty asr.text.
|
||||
// Try a one-shot offline audio fallback after the pipeline releases the mic.
|
||||
if (looksLikeHeyJiboIntent(results)) {
|
||||
if (looksLikeHeyJiboIntent(results) && !self.useAsrServiceStt) {
|
||||
self._scheduleOfflineAudioFallback("openHookNoText", 1400);
|
||||
}
|
||||
return resolve();
|
||||
@@ -823,7 +1390,9 @@ AIBridge.prototype._installOpenHook = function () {
|
||||
if (rlog) {
|
||||
rlog.warn("ai-bridge", "ignoring cloud ASR error text (open hook)", { text: text.slice(0, 200) });
|
||||
}
|
||||
self._scheduleOfflineAudioFallback("cloudAsrErrorText", 1400);
|
||||
if (!self.useAsrServiceStt) {
|
||||
self._scheduleOfflineAudioFallback("cloudAsrErrorText", 1400);
|
||||
}
|
||||
return resolve();
|
||||
}
|
||||
|
||||
@@ -1423,6 +1992,38 @@ AIBridge.prototype.setupTunables = function () {
|
||||
});
|
||||
}
|
||||
|
||||
// Offline STT via jibo-asr-service (requires service running).
|
||||
safe(function () {
|
||||
return Tunable.getCheckboxField(LABEL.useAsrServiceStt, self.useAsrServiceStt, WIN).events.change.on(function (v) {
|
||||
self.useAsrServiceStt = !!v;
|
||||
if (rlog) {
|
||||
rlog.info("ai-bridge", "use asr-service stt changed", { enabled: self.useAsrServiceStt });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
safe(function () {
|
||||
return Tunable.getCheckboxField(LABEL.asrServiceDebugWs, self.asrServiceDebugWs, WIN).events.change.on(function (v) {
|
||||
self.asrServiceDebugWs = !!v;
|
||||
if (rlog) {
|
||||
rlog.info("ai-bridge", "asr-service debug ws changed", { enabled: self.asrServiceDebugWs });
|
||||
}
|
||||
});
|
||||
});
|
||||
if (typeof Tunable.getButtonField === "function") {
|
||||
safe(function () {
|
||||
return Tunable.getButtonField("AI Bridge: Run ASR-service STT now", WIN).events.change.on(function () {
|
||||
try {
|
||||
self._captureTextViaAsrService("manual");
|
||||
} catch (e) {
|
||||
if (rlog) {
|
||||
rlog.warn("ai-bridge", "manual asr-service stt crashed", { err: String(e && (e.stack || e.message || e)) });
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof Tunable.getNumberField === "function") {
|
||||
safe(function () {
|
||||
return Tunable.getNumberField(LABEL.recordSeconds, self.recordSeconds, 1, 15, 1, WIN).events.change.on(function (v) {
|
||||
@@ -1550,6 +2151,10 @@ AIBridge.prototype._handleHJHeard = function () {
|
||||
if (!self.enabled) {
|
||||
return;
|
||||
}
|
||||
if (self.useAsrServiceStt) {
|
||||
if (self._inFlight) return;
|
||||
return self._captureTextViaAsrService("hjHeard");
|
||||
}
|
||||
if (self.mode !== InputMode.AUDIO) {
|
||||
return;
|
||||
}
|
||||
@@ -1576,6 +2181,11 @@ AIBridge.prototype._handleTurnResult = function (data, eventName) {
|
||||
if (!self.enabled) {
|
||||
return;
|
||||
}
|
||||
// If we're using the local ASR service STT pipeline, ignore cloud turn-results
|
||||
// to avoid duplicate sends.
|
||||
if (self.useAsrServiceStt) {
|
||||
return;
|
||||
}
|
||||
if (rlog) {
|
||||
try {
|
||||
rlog.debug("ai-bridge", "turn result", { event: eventName });
|
||||
@@ -1614,7 +2224,9 @@ AIBridge.prototype._handleTurnResult = function (data, eventName) {
|
||||
if (rlog) {
|
||||
rlog.warn("ai-bridge", "ignoring cloud ASR error text", { event: eventName, text: text.slice(0, 220) });
|
||||
}
|
||||
self._scheduleOfflineAudioFallback("cloudAsrErrorText:" + String(eventName || "turnResult"), 1400);
|
||||
if (!self.useAsrServiceStt) {
|
||||
self._scheduleOfflineAudioFallback("cloudAsrErrorText:" + String(eventName || "turnResult"), 1400);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -95,7 +95,7 @@
|
||||
"arguments": ["-c","/usr/local/etc/jibo-asr-service.json"],
|
||||
"environment": {},
|
||||
"directory": "",
|
||||
"enabled":false
|
||||
"enabled":true
|
||||
},
|
||||
"certification": {
|
||||
"enabled": false
|
||||
|
||||
Reference in New Issue
Block a user