HE SPEAKS?!??!?!?!?!
This commit is contained in:
@@ -33,6 +33,7 @@ import time
|
|||||||
import traceback
|
import traceback
|
||||||
import wave
|
import wave
|
||||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
|
from urllib.error import URLError
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
|
|
||||||
@@ -81,8 +82,13 @@ def _ollama_chat(user_text: str) -> str:
|
|||||||
method="POST",
|
method="POST",
|
||||||
)
|
)
|
||||||
|
|
||||||
with urlopen(req, timeout=60) as resp:
|
try:
|
||||||
data = json.loads(resp.read().decode("utf-8"))
|
with urlopen(req, timeout=60) as resp:
|
||||||
|
data = json.loads(resp.read().decode("utf-8"))
|
||||||
|
except Exception as e:
|
||||||
|
# Let caller decide how to respond; include a useful hint in logs.
|
||||||
|
_log(f"Ollama request failed url={ollama_url!r} err={e!r}")
|
||||||
|
raise
|
||||||
|
|
||||||
msg = data.get("message") or {}
|
msg = data.get("message") or {}
|
||||||
content = msg.get("content")
|
content = msg.get("content")
|
||||||
@@ -91,6 +97,19 @@ def _ollama_chat(user_text: str) -> str:
|
|||||||
return content.strip()
|
return content.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _short_err(e: BaseException) -> str:
|
||||||
|
s = str(e) or e.__class__.__name__
|
||||||
|
s = " ".join(s.split())
|
||||||
|
if len(s) > 240:
|
||||||
|
s = s[:240] + "..."
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def _ollama_down_reply() -> str:
|
||||||
|
# Keep it short and speakable.
|
||||||
|
return "My AI server isn't reachable right now. Please start Ollama on the computer, then try again."
|
||||||
|
|
||||||
|
|
||||||
def _wav_diagnostics(wav_bytes: bytes) -> dict:
|
def _wav_diagnostics(wav_bytes: bytes) -> dict:
|
||||||
"""Best-effort WAV parsing + signal stats for debugging mic capture."""
|
"""Best-effort WAV parsing + signal stats for debugging mic capture."""
|
||||||
info: dict = {"bytes": len(wav_bytes)}
|
info: dict = {"bytes": len(wav_bytes)}
|
||||||
@@ -245,9 +264,16 @@ class Handler(BaseHTTPRequestHandler):
|
|||||||
_json_response(self, 400, {"error": "Missing 'text'"})
|
_json_response(self, 400, {"error": "Missing 'text'"})
|
||||||
return
|
return
|
||||||
_log(f"{client} /text prompt_chars={len(text)} prompt={text[:200]!r}")
|
_log(f"{client} /text prompt_chars={len(text)} prompt={text[:200]!r}")
|
||||||
reply = _ollama_chat(text)
|
try:
|
||||||
_log(f"{client} /text ok reply_chars={len(reply)}")
|
reply = _ollama_chat(text)
|
||||||
_json_response(self, 200, {"reply": reply})
|
_log(f"{client} /text ok reply_chars={len(reply)}")
|
||||||
|
_json_response(self, 200, {"reply": reply})
|
||||||
|
except URLError as e:
|
||||||
|
_log(f"{client} /text ollama_unreachable err={_short_err(e)!r}")
|
||||||
|
_json_response(self, 200, {"reply": _ollama_down_reply(), "ollama_ok": False, "ollama_error": _short_err(e)})
|
||||||
|
except ConnectionRefusedError as e:
|
||||||
|
_log(f"{client} /text ollama_refused err={_short_err(e)!r}")
|
||||||
|
_json_response(self, 200, {"reply": _ollama_down_reply(), "ollama_ok": False, "ollama_error": _short_err(e)})
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.path == "/v1/chat/audio":
|
if self.path == "/v1/chat/audio":
|
||||||
@@ -295,9 +321,24 @@ class Handler(BaseHTTPRequestHandler):
|
|||||||
_json_response(self, 200, {"reply": "I didn't catch that. Could you say it again?", "text": ""})
|
_json_response(self, 200, {"reply": "I didn't catch that. Could you say it again?", "text": ""})
|
||||||
return
|
return
|
||||||
_log(f"{client} /audio transcript_chars={len(transcript)} transcript={transcript[:200]!r}")
|
_log(f"{client} /audio transcript_chars={len(transcript)} transcript={transcript[:200]!r}")
|
||||||
reply = _ollama_chat(transcript)
|
try:
|
||||||
_log(f"{client} /audio ok text_chars={len(transcript)} reply_chars={len(reply)}")
|
reply = _ollama_chat(transcript)
|
||||||
_json_response(self, 200, {"reply": reply, "text": transcript})
|
_log(f"{client} /audio ok text_chars={len(transcript)} reply_chars={len(reply)}")
|
||||||
|
_json_response(self, 200, {"reply": reply, "text": transcript})
|
||||||
|
except URLError as e:
|
||||||
|
_log(f"{client} /audio ollama_unreachable err={_short_err(e)!r}")
|
||||||
|
_json_response(
|
||||||
|
self,
|
||||||
|
200,
|
||||||
|
{"reply": _ollama_down_reply(), "text": transcript, "ollama_ok": False, "ollama_error": _short_err(e)},
|
||||||
|
)
|
||||||
|
except ConnectionRefusedError as e:
|
||||||
|
_log(f"{client} /audio ollama_refused err={_short_err(e)!r}")
|
||||||
|
_json_response(
|
||||||
|
self,
|
||||||
|
200,
|
||||||
|
{"reply": _ollama_down_reply(), "text": transcript, "ollama_ok": False, "ollama_error": _short_err(e)},
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
_json_response(self, 404, {"error": "Not found"})
|
_json_response(self, 404, {"error": "Not found"})
|
||||||
@@ -332,6 +373,7 @@ def main():
|
|||||||
+ " model="
|
+ " model="
|
||||||
+ os.environ.get("OLLAMA_MODEL", "phi3.5")
|
+ os.environ.get("OLLAMA_MODEL", "phi3.5")
|
||||||
)
|
)
|
||||||
|
_log("Ollama health check: curl -s http://127.0.0.1:11434/api/tags | head")
|
||||||
if not _whisper.available():
|
if not _whisper.available():
|
||||||
_log("STT: faster-whisper not installed; /v1/chat/audio will return 503")
|
_log("STT: faster-whisper not installed; /v1/chat/audio will return 503")
|
||||||
server.serve_forever()
|
server.serve_forever()
|
||||||
|
|||||||
@@ -9,6 +9,13 @@ var http = require("http");
|
|||||||
var https = require("https");
|
var https = require("https");
|
||||||
var urlLib = require("url");
|
var urlLib = require("url");
|
||||||
var fs = require("fs");
|
var fs = require("fs");
|
||||||
|
var net = require("net");
|
||||||
|
var crypto = null;
|
||||||
|
try {
|
||||||
|
crypto = require("crypto");
|
||||||
|
} catch (e) {
|
||||||
|
crypto = null;
|
||||||
|
}
|
||||||
var childProcess = require("child_process");
|
var childProcess = require("child_process");
|
||||||
var spawn = childProcess.spawn;
|
var spawn = childProcess.spawn;
|
||||||
|
|
||||||
@@ -49,6 +56,8 @@ var LABEL = {
|
|||||||
audioFormat: "AI Bridge: Audio format (arecord -f)",
|
audioFormat: "AI Bridge: Audio format (arecord -f)",
|
||||||
debugAudio: "AI Bridge: Debug audio capture",
|
debugAudio: "AI Bridge: Debug audio capture",
|
||||||
useDumpStateAudio: "AI Bridge: Use audio-service dump-state (fix mic busy)",
|
useDumpStateAudio: "AI Bridge: Use audio-service dump-state (fix mic busy)",
|
||||||
|
useAsrServiceStt: "AI Bridge: Use ASR-service STT (8088)",
|
||||||
|
asrServiceDebugWs: "AI Bridge: Debug ASR-service WS",
|
||||||
};
|
};
|
||||||
|
|
||||||
var InputMode = {
|
var InputMode = {
|
||||||
@@ -56,6 +65,13 @@ var InputMode = {
|
|||||||
AUDIO: "AUDIO",
|
AUDIO: "AUDIO",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
var WsLib = null;
|
||||||
|
try {
|
||||||
|
WsLib = require("ws");
|
||||||
|
} catch (e) {
|
||||||
|
WsLib = null;
|
||||||
|
}
|
||||||
|
|
||||||
function httpJsonPost(urlString, payload, timeoutMs) {
|
function httpJsonPost(urlString, payload, timeoutMs) {
|
||||||
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000;
|
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 15000;
|
||||||
|
|
||||||
@@ -108,6 +124,399 @@ function httpJsonPost(urlString, payload, timeoutMs) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function httpJsonPostRaw(urlString, payload, timeoutMs) {
|
||||||
|
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 6000;
|
||||||
|
|
||||||
|
var parsed = urlLib.parse(urlString);
|
||||||
|
var isHttps = parsed.protocol === "https:";
|
||||||
|
var bodyStr = JSON.stringify(payload || {});
|
||||||
|
var body = new Buffer(bodyStr, "utf8");
|
||||||
|
var requestOptions = {
|
||||||
|
protocol: parsed.protocol,
|
||||||
|
hostname: parsed.hostname,
|
||||||
|
port: parsed.port || (isHttps ? 443 : 80),
|
||||||
|
path: parsed.path || "/",
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Content-Length": body.length,
|
||||||
|
},
|
||||||
|
timeout: timeoutMs,
|
||||||
|
};
|
||||||
|
|
||||||
|
return new Promise(function (resolve, reject) {
|
||||||
|
var req = (isHttps ? https : http).request(requestOptions, function (res) {
|
||||||
|
var chunks = [];
|
||||||
|
res.on("data", function (d) {
|
||||||
|
chunks.push(d);
|
||||||
|
});
|
||||||
|
res.on("end", function () {
|
||||||
|
var raw = Buffer.concat(chunks).toString("utf8");
|
||||||
|
resolve({ statusCode: res.statusCode, headers: res.headers || {}, body: raw });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
req.on("error", reject);
|
||||||
|
req.on("timeout", function () {
|
||||||
|
req.destroy(new Error("POST timeout"));
|
||||||
|
});
|
||||||
|
req.write(body);
|
||||||
|
req.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseWsUrl(wsUrl) {
|
||||||
|
// Very small ws:// URL parser for embedded Node.
|
||||||
|
// Returns { host, port, path }.
|
||||||
|
if (typeof wsUrl !== "string") return null;
|
||||||
|
var s = wsUrl.trim();
|
||||||
|
var m = s.match(/^wss?:\/\/([^\/]+)(\/.*)?$/i);
|
||||||
|
if (!m) return null;
|
||||||
|
var hostPort = m[1];
|
||||||
|
var path = m[2] || "/";
|
||||||
|
var hp = hostPort.split(":");
|
||||||
|
var host = hp[0];
|
||||||
|
var port = hp.length > 1 ? Number(hp[1]) : s.toLowerCase().indexOf("wss://") === 0 ? 443 : 80;
|
||||||
|
if (!host) return null;
|
||||||
|
if (!port || Number.isNaN(port)) port = 80;
|
||||||
|
return { host: host, port: port, path: path };
|
||||||
|
}
|
||||||
|
|
||||||
|
function connectWebSocketText(wsUrl, timeoutMs) {
|
||||||
|
// Returns a Promise resolving to { onMessage(fn), close() }.
|
||||||
|
// Uses `ws` module if available; otherwise uses a minimal RFC6455 client.
|
||||||
|
timeoutMs = typeof timeoutMs === "number" ? timeoutMs : 4000;
|
||||||
|
|
||||||
|
if (WsLib) {
|
||||||
|
return new Promise(function (resolve, reject) {
|
||||||
|
var ws;
|
||||||
|
try {
|
||||||
|
ws = new WsLib(wsUrl);
|
||||||
|
} catch (e) {
|
||||||
|
reject(e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var timer = setTimeout(function () {
|
||||||
|
try {
|
||||||
|
ws.terminate && ws.terminate();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
reject(new Error("ws connect timeout"));
|
||||||
|
}, timeoutMs);
|
||||||
|
ws.on("open", function () {
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve({
|
||||||
|
onMessage: function (fn) {
|
||||||
|
ws.on("message", function (msg) {
|
||||||
|
fn(typeof msg === "string" ? msg : msg && msg.toString ? msg.toString("utf8") : String(msg));
|
||||||
|
});
|
||||||
|
},
|
||||||
|
close: function () {
|
||||||
|
try {
|
||||||
|
ws.close();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
ws.on("error", function (e) {
|
||||||
|
clearTimeout(timer);
|
||||||
|
reject(e);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Minimal client without external deps.
|
||||||
|
return new Promise(function (resolve, reject) {
|
||||||
|
var findHeaderEnd = function (buf) {
|
||||||
|
// Find \r\n\r\n in a Buffer.
|
||||||
|
if (!buf || !buf.length) return -1;
|
||||||
|
for (var i = 0; i + 3 < buf.length; i += 1) {
|
||||||
|
if (buf[i] === 13 && buf[i + 1] === 10 && buf[i + 2] === 13 && buf[i + 3] === 10) return i;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
var info = parseWsUrl(wsUrl);
|
||||||
|
if (!info) {
|
||||||
|
reject(new Error("bad ws url"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var key = null;
|
||||||
|
try {
|
||||||
|
key = crypto && crypto.randomBytes ? crypto.randomBytes(16).toString("base64") : new Buffer(String(Date.now()) + ":" + String(Math.random())).toString("base64");
|
||||||
|
} catch (e) {
|
||||||
|
key = new Buffer(String(Date.now()) + ":" + String(Math.random())).toString("base64");
|
||||||
|
}
|
||||||
|
|
||||||
|
var socket = net.connect({ host: info.host, port: info.port });
|
||||||
|
var timer = setTimeout(function () {
|
||||||
|
try {
|
||||||
|
socket.destroy();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
reject(new Error("ws connect timeout"));
|
||||||
|
}, timeoutMs);
|
||||||
|
|
||||||
|
var state = { handshook: false, buf: new Buffer(0) };
|
||||||
|
var msgHandlers = [];
|
||||||
|
var closed = false;
|
||||||
|
var fragType = null; // 0x1 text, 0x2 binary
|
||||||
|
var fragChunks = [];
|
||||||
|
|
||||||
|
var emitMessage = function (s) {
|
||||||
|
for (var i = 0; i < msgHandlers.length; i += 1) {
|
||||||
|
try {
|
||||||
|
msgHandlers[i](s);
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var sendPong = function (payload) {
|
||||||
|
try {
|
||||||
|
// server->client frames are unmasked; client->server must be masked.
|
||||||
|
var mask = crypto && crypto.randomBytes ? crypto.randomBytes(4) : new Buffer([1, 2, 3, 4]);
|
||||||
|
var len = payload ? payload.length : 0;
|
||||||
|
var header = null;
|
||||||
|
if (len < 126) {
|
||||||
|
header = new Buffer(2);
|
||||||
|
header[0] = 0x8a; // FIN + pong
|
||||||
|
header[1] = 0x80 | len;
|
||||||
|
} else {
|
||||||
|
header = new Buffer(4);
|
||||||
|
header[0] = 0x8a;
|
||||||
|
header[1] = 0x80 | 126;
|
||||||
|
header.writeUInt16BE(len, 2);
|
||||||
|
}
|
||||||
|
var out = new Buffer(header.length + 4 + len);
|
||||||
|
header.copy(out, 0);
|
||||||
|
mask.copy(out, header.length);
|
||||||
|
for (var i = 0; i < len; i += 1) {
|
||||||
|
out[header.length + 4 + i] = payload[i] ^ mask[i % 4];
|
||||||
|
}
|
||||||
|
socket.write(out);
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var parseFrames = function () {
|
||||||
|
// parse server->client frames (unmasked)
|
||||||
|
while (state.buf.length >= 2) {
|
||||||
|
var b0 = state.buf[0];
|
||||||
|
var b1 = state.buf[1];
|
||||||
|
var fin = (b0 & 0x80) !== 0;
|
||||||
|
var opcode = b0 & 0x0f;
|
||||||
|
var masked = (b1 & 0x80) !== 0;
|
||||||
|
var len = b1 & 0x7f;
|
||||||
|
var off = 2;
|
||||||
|
if (len === 126) {
|
||||||
|
if (state.buf.length < off + 2) return;
|
||||||
|
len = state.buf.readUInt16BE(off);
|
||||||
|
off += 2;
|
||||||
|
} else if (len === 127) {
|
||||||
|
// Not expected; avoid overflow.
|
||||||
|
if (state.buf.length < off + 8) return;
|
||||||
|
// Only support up to 2^31-1
|
||||||
|
var hi = state.buf.readUInt32BE(off);
|
||||||
|
var lo = state.buf.readUInt32BE(off + 4);
|
||||||
|
off += 8;
|
||||||
|
if (hi !== 0) {
|
||||||
|
// too large
|
||||||
|
try {
|
||||||
|
socket.destroy();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
len = lo;
|
||||||
|
}
|
||||||
|
var maskKey = null;
|
||||||
|
if (masked) {
|
||||||
|
if (state.buf.length < off + 4) return;
|
||||||
|
maskKey = state.buf.slice(off, off + 4);
|
||||||
|
off += 4;
|
||||||
|
}
|
||||||
|
if (state.buf.length < off + len) return;
|
||||||
|
var payload = state.buf.slice(off, off + len);
|
||||||
|
state.buf = state.buf.slice(off + len);
|
||||||
|
|
||||||
|
if (masked && maskKey) {
|
||||||
|
for (var i = 0; i < payload.length; i += 1) {
|
||||||
|
payload[i] = payload[i] ^ maskKey[i % 4];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opcode === 0x8) {
|
||||||
|
try {
|
||||||
|
socket.end();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (opcode === 0x9) {
|
||||||
|
// ping
|
||||||
|
sendPong(payload);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Continuation frames for fragmented messages.
|
||||||
|
if (opcode === 0x0) {
|
||||||
|
if (!fragType) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
fragChunks.push(payload);
|
||||||
|
if (fin) {
|
||||||
|
var whole = Buffer.concat(fragChunks);
|
||||||
|
fragType = null;
|
||||||
|
fragChunks = [];
|
||||||
|
try {
|
||||||
|
emitMessage(whole.toString("utf8"));
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Text or binary payloads (ASR may send either).
|
||||||
|
if (opcode === 0x1 || opcode === 0x2) {
|
||||||
|
if (!fin) {
|
||||||
|
fragType = opcode;
|
||||||
|
fragChunks = [payload];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
emitMessage(payload.toString("utf8"));
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
socket.on("connect", function () {
|
||||||
|
var req = "";
|
||||||
|
req += "GET " + info.path + " HTTP/1.1\r\n";
|
||||||
|
req += "Host: " + info.host + ":" + String(info.port) + "\r\n";
|
||||||
|
req += "Upgrade: websocket\r\n";
|
||||||
|
req += "Connection: Upgrade\r\n";
|
||||||
|
req += "Sec-WebSocket-Key: " + key + "\r\n";
|
||||||
|
req += "Sec-WebSocket-Version: 13\r\n";
|
||||||
|
req += "\r\n";
|
||||||
|
socket.write(req);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.on("data", function (d) {
|
||||||
|
if (closed) return;
|
||||||
|
state.buf = Buffer.concat([state.buf, d]);
|
||||||
|
if (!state.handshook) {
|
||||||
|
var endIdx = findHeaderEnd(state.buf);
|
||||||
|
if (endIdx < 0) return;
|
||||||
|
var headerText = state.buf.slice(0, endIdx).toString("utf8");
|
||||||
|
state.buf = state.buf.slice(endIdx + 4);
|
||||||
|
if (!/^HTTP\/1\.1 101/i.test(headerText)) {
|
||||||
|
closed = true;
|
||||||
|
clearTimeout(timer);
|
||||||
|
reject(new Error("ws handshake failed: " + headerText.split("\r\n")[0]));
|
||||||
|
try {
|
||||||
|
socket.destroy();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
state.handshook = true;
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve({
|
||||||
|
onMessage: function (fn) {
|
||||||
|
msgHandlers.push(fn);
|
||||||
|
},
|
||||||
|
close: function () {
|
||||||
|
if (closed) return;
|
||||||
|
closed = true;
|
||||||
|
try {
|
||||||
|
socket.end();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Parse any remaining frames.
|
||||||
|
if (state.handshook) parseFrames();
|
||||||
|
});
|
||||||
|
socket.on("error", function (e) {
|
||||||
|
if (closed) return;
|
||||||
|
closed = true;
|
||||||
|
clearTimeout(timer);
|
||||||
|
reject(e);
|
||||||
|
});
|
||||||
|
socket.on("close", function () {
|
||||||
|
if (closed) return;
|
||||||
|
closed = true;
|
||||||
|
clearTimeout(timer);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function pickBestAsrUtterance(utterances) {
|
||||||
|
// utterances: [{utterance, score}] possibly.
|
||||||
|
try {
|
||||||
|
if (!utterances || !utterances.length) return "";
|
||||||
|
var bestText = "";
|
||||||
|
var bestScore = -1e99;
|
||||||
|
for (var i = 0; i < utterances.length; i += 1) {
|
||||||
|
var u = utterances[i];
|
||||||
|
var text = u && (u.utterance || u.Utterance || u.text) ? String(u.utterance || u.Utterance || u.text) : "";
|
||||||
|
text = text.replace(/^\s+|\s+$/g, "");
|
||||||
|
if (!text) continue;
|
||||||
|
// Light cleanup: sometimes a leading duplicated char occurs.
|
||||||
|
if (text.length >= 2 && text[0].toLowerCase && text[1].toLowerCase && text[0].toLowerCase() === text[1].toLowerCase()) {
|
||||||
|
text = text.slice(1);
|
||||||
|
}
|
||||||
|
var score = typeof u.score === "number" ? u.score : typeof u.Score === "number" ? u.Score : 0;
|
||||||
|
if (!bestText) {
|
||||||
|
bestText = text;
|
||||||
|
bestScore = score;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (score > bestScore) {
|
||||||
|
bestText = text;
|
||||||
|
bestScore = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fallback: choose the shortest distinct utterance.
|
||||||
|
if (!bestText) {
|
||||||
|
var uniq = {};
|
||||||
|
var arr = [];
|
||||||
|
for (var j = 0; j < utterances.length; j += 1) {
|
||||||
|
var u2 = utterances[j];
|
||||||
|
var t2 = u2 && (u2.utterance || u2.Utterance || u2.text) ? String(u2.utterance || u2.Utterance || u2.text) : "";
|
||||||
|
t2 = t2.replace(/^\s+|\s+$/g, "");
|
||||||
|
if (!t2) continue;
|
||||||
|
var k = t2.toLowerCase ? t2.toLowerCase() : t2;
|
||||||
|
if (uniq[k]) continue;
|
||||||
|
uniq[k] = true;
|
||||||
|
arr.push(t2);
|
||||||
|
}
|
||||||
|
arr.sort(function (a, b) {
|
||||||
|
return a.length - b.length;
|
||||||
|
});
|
||||||
|
bestText = arr[0] || "";
|
||||||
|
}
|
||||||
|
return bestText;
|
||||||
|
} catch (e) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function recordWavWithArecord(durationSec, sampleRate, channels, device, format, debug) {
|
function recordWavWithArecord(durationSec, sampleRate, channels, device, format, debug) {
|
||||||
durationSec = typeof durationSec === "number" ? durationSec : 5;
|
durationSec = typeof durationSec === "number" ? durationSec : 5;
|
||||||
sampleRate = typeof sampleRate === "number" ? sampleRate : 16000;
|
sampleRate = typeof sampleRate === "number" ? sampleRate : 16000;
|
||||||
@@ -599,6 +1008,14 @@ function AIBridge(skill, jibo) {
|
|||||||
// Prefer grabbing audio from the system audio service (avoids ALSA mic lock).
|
// Prefer grabbing audio from the system audio service (avoids ALSA mic lock).
|
||||||
this.useDumpStateAudio = true;
|
this.useDumpStateAudio = true;
|
||||||
this.dumpStateDir = "/tmp";
|
this.dumpStateDir = "/tmp";
|
||||||
|
// Offline STT via jibo-asr-service (HTTP + WebSocket) when available.
|
||||||
|
this.useAsrServiceStt = false;
|
||||||
|
this.asrServiceHost = "127.0.0.1";
|
||||||
|
this.asrServicePort = 8088;
|
||||||
|
this.asrAudioSourceId = "alsa1";
|
||||||
|
this.asrTimeoutMs = 15000;
|
||||||
|
this.asrServiceDebugWs = false;
|
||||||
|
this._asrServiceInFlight = false;
|
||||||
|
|
||||||
this._inFlight = false;
|
this._inFlight = false;
|
||||||
this._lastHJAt = 0;
|
this._lastHJAt = 0;
|
||||||
@@ -623,6 +1040,156 @@ function AIBridge(skill, jibo) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AIBridge.prototype._captureTextViaAsrService = function (source) {
|
||||||
|
var self = this;
|
||||||
|
if (!self.enabled) return Promise.resolve(null);
|
||||||
|
if (self._inFlight || self._asrServiceInFlight) return Promise.resolve(null);
|
||||||
|
self._asrServiceInFlight = true;
|
||||||
|
|
||||||
|
var host = String(self.asrServiceHost || "127.0.0.1");
|
||||||
|
var port = typeof self.asrServicePort === "number" ? self.asrServicePort : Number(self.asrServicePort) || 8088;
|
||||||
|
var baseHttp = "http://" + host + ":" + String(port);
|
||||||
|
var wsUrl = "ws://" + host + ":" + String(port) + "/simple_port";
|
||||||
|
var taskId = "DEBUG:ai-bridge-" + String(Date.now()) + "-" + String(Math.floor(Math.random() * 1e9));
|
||||||
|
var requestId = "stt_start_" + String(Date.now()) + "_" + String(Math.floor(Math.random() * 1e9));
|
||||||
|
var timeoutMs = typeof self.asrTimeoutMs === "number" ? self.asrTimeoutMs : 15000;
|
||||||
|
var debugWs = !!self.asrServiceDebugWs;
|
||||||
|
|
||||||
|
var wsClient = null;
|
||||||
|
var stopAlways = function () {
|
||||||
|
var stopPayload = { command: "stop", task_id: taskId, request_id: "stt_stop_" + String(Date.now()) + "_" + String(Math.floor(Math.random() * 1e9)) };
|
||||||
|
return httpJsonPostRaw(baseHttp + "/asr_simple_interface", stopPayload, 5000)
|
||||||
|
.then(function () {
|
||||||
|
// ignore
|
||||||
|
})
|
||||||
|
.catch(function () {
|
||||||
|
// ignore
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
var done = function () {
|
||||||
|
self._asrServiceInFlight = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
var fail = function (e) {
|
||||||
|
if (rlog) {
|
||||||
|
rlog.warn("ai-bridge", "asr-service stt failed", { err: String(e && (e.stack || e.message || e)), source: source || "asr-service" });
|
||||||
|
}
|
||||||
|
self._asrServiceInFlight = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
var startPayload = {
|
||||||
|
command: "start",
|
||||||
|
task_id: taskId,
|
||||||
|
audio_source_id: String(self.asrAudioSourceId || "alsa1"),
|
||||||
|
hotphrase: "none",
|
||||||
|
speech_to_text: true,
|
||||||
|
request_id: requestId,
|
||||||
|
};
|
||||||
|
|
||||||
|
var waitForFinal = function () {
|
||||||
|
return new Promise(function (resolve, reject) {
|
||||||
|
var timer = setTimeout(function () {
|
||||||
|
reject(new Error("asr-service stt timeout"));
|
||||||
|
}, timeoutMs);
|
||||||
|
var resolved = false;
|
||||||
|
var seen = 0;
|
||||||
|
wsClient.onMessage(function (msg) {
|
||||||
|
if (resolved) return;
|
||||||
|
seen += 1;
|
||||||
|
if (debugWs && rlog && seen <= 6) {
|
||||||
|
rlog.debug("ai-bridge", "asr-service ws msg", { idx: seen, raw: String(msg || "").slice(0, 500) });
|
||||||
|
}
|
||||||
|
var evt = null;
|
||||||
|
try {
|
||||||
|
evt = JSON.parse(String(msg || ""));
|
||||||
|
} catch (e) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!evt || typeof evt !== "object") return;
|
||||||
|
var eventType = evt.event_type || evt.eventType || evt.event || evt.type;
|
||||||
|
if (eventType !== "speech_to_text_final") return;
|
||||||
|
|
||||||
|
// Correlate to our request when possible, but don't hard-fail on schema drift.
|
||||||
|
var evTaskId = evt.task_id || evt.taskId || (evt.payload && (evt.payload.task_id || evt.payload.taskId));
|
||||||
|
var evRequestId = evt.request_id || evt.requestId || (evt.payload && (evt.payload.request_id || evt.payload.requestId));
|
||||||
|
var idMatches = false;
|
||||||
|
if (evTaskId && String(evTaskId) === String(taskId)) idMatches = true;
|
||||||
|
if (evRequestId && String(evRequestId) === String(requestId)) idMatches = true;
|
||||||
|
// If the event carries neither id, accept it (single in-flight capture).
|
||||||
|
if (!evTaskId && !evRequestId) idMatches = true;
|
||||||
|
if (!idMatches) return;
|
||||||
|
|
||||||
|
var utterances = evt.utterances || evt.Utterances || (evt.payload && (evt.payload.utterances || evt.payload.Utterances));
|
||||||
|
var best = pickBestAsrUtterance(utterances);
|
||||||
|
if (!best || !String(best).trim()) return;
|
||||||
|
resolved = true;
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve(String(best));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
var t0 = Date.now();
|
||||||
|
return connectWebSocketText(wsUrl, 4000)
|
||||||
|
.then(function (ws) {
|
||||||
|
wsClient = ws;
|
||||||
|
return httpJsonPostRaw(baseHttp + "/asr_simple_interface", startPayload, 6000);
|
||||||
|
})
|
||||||
|
.then(function (resp) {
|
||||||
|
if (rlog) {
|
||||||
|
rlog.info("ai-bridge", "asr-service stt start", {
|
||||||
|
status: resp && resp.statusCode,
|
||||||
|
ms: Date.now() - t0,
|
||||||
|
ws: wsUrl,
|
||||||
|
http: baseHttp,
|
||||||
|
source: source || "asr-service",
|
||||||
|
body: resp && resp.body ? String(resp.body).slice(0, 500) : "",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (!resp || !resp.statusCode || resp.statusCode < 200 || resp.statusCode >= 300) {
|
||||||
|
throw new Error("asr-service start failed: " + String(resp && resp.statusCode));
|
||||||
|
}
|
||||||
|
return waitForFinal();
|
||||||
|
})
|
||||||
|
.then(function (text) {
|
||||||
|
return stopAlways().then(function () {
|
||||||
|
return text;
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.then(function (text) {
|
||||||
|
try {
|
||||||
|
wsClient && wsClient.close && wsClient.close();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
if (rlog) {
|
||||||
|
rlog.info("ai-bridge", "asr-service text", { chars: String(text).length, text: String(text).slice(0, 160), ms: Date.now() - t0, source: source || "asr-service" });
|
||||||
|
}
|
||||||
|
return self._sendText(text, "asr-service:" + String(source || ""));
|
||||||
|
})
|
||||||
|
.then(function () {
|
||||||
|
done();
|
||||||
|
})
|
||||||
|
.catch(function (e) {
|
||||||
|
try {
|
||||||
|
wsClient && wsClient.close && wsClient.close();
|
||||||
|
} catch (e2) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return stopAlways()
|
||||||
|
.then(function () {
|
||||||
|
throw e;
|
||||||
|
})
|
||||||
|
.catch(function () {
|
||||||
|
throw e;
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.catch(function (e) {
|
||||||
|
fail(e);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) {
|
AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) {
|
||||||
var self = this;
|
var self = this;
|
||||||
if (!self.enabled) return;
|
if (!self.enabled) return;
|
||||||
@@ -630,7 +1197,7 @@ AIBridge.prototype._scheduleOfflineAudioFallback = function (reason, delayMs) {
|
|||||||
if (!self._allowAnyTextNoHJ && Date.now() - self._lastHJAt > self._hjWindowMs) return;
|
if (!self._allowAnyTextNoHJ && Date.now() - self._lastHJAt > self._hjWindowMs) return;
|
||||||
if (self._inFlight) return;
|
if (self._inFlight) return;
|
||||||
|
|
||||||
if (self._audioFallbackTimer) {
|
if (self._audioFallbackTimer) {
|
||||||
clearTimeout(self._audioFallbackTimer);
|
clearTimeout(self._audioFallbackTimer);
|
||||||
self._audioFallbackTimer = null;
|
self._audioFallbackTimer = null;
|
||||||
}
|
}
|
||||||
@@ -675,7 +1242,7 @@ AIBridge.prototype._captureAndSendAudio = function (source) {
|
|||||||
if (["S16_LE", "S32_LE"].indexOf(fmt) < 0) fmt = "S16_LE";
|
if (["S16_LE", "S32_LE"].indexOf(fmt) < 0) fmt = "S16_LE";
|
||||||
var dbg = !!self.debugAudioCapture;
|
var dbg = !!self.debugAudioCapture;
|
||||||
|
|
||||||
var dumpDir = typeof self.dumpStateDir === "string" && self.dumpStateDir ? self.dumpStateDir : "/tmp";
|
var dumpDir = typeof self.dumpStateDir === "string" && self.dumpStateDir ? self.dumpStateDir : "/tmp";
|
||||||
|
|
||||||
var captureViaDumpState = function () {
|
var captureViaDumpState = function () {
|
||||||
var tDump0 = Date.now();
|
var tDump0 = Date.now();
|
||||||
@@ -812,7 +1379,7 @@ AIBridge.prototype._installOpenHook = function () {
|
|||||||
}
|
}
|
||||||
// If cloud ASR is down, Hey-Jibo intents often have empty asr.text.
|
// If cloud ASR is down, Hey-Jibo intents often have empty asr.text.
|
||||||
// Try a one-shot offline audio fallback after the pipeline releases the mic.
|
// Try a one-shot offline audio fallback after the pipeline releases the mic.
|
||||||
if (looksLikeHeyJiboIntent(results)) {
|
if (looksLikeHeyJiboIntent(results) && !self.useAsrServiceStt) {
|
||||||
self._scheduleOfflineAudioFallback("openHookNoText", 1400);
|
self._scheduleOfflineAudioFallback("openHookNoText", 1400);
|
||||||
}
|
}
|
||||||
return resolve();
|
return resolve();
|
||||||
@@ -823,7 +1390,9 @@ AIBridge.prototype._installOpenHook = function () {
|
|||||||
if (rlog) {
|
if (rlog) {
|
||||||
rlog.warn("ai-bridge", "ignoring cloud ASR error text (open hook)", { text: text.slice(0, 200) });
|
rlog.warn("ai-bridge", "ignoring cloud ASR error text (open hook)", { text: text.slice(0, 200) });
|
||||||
}
|
}
|
||||||
self._scheduleOfflineAudioFallback("cloudAsrErrorText", 1400);
|
if (!self.useAsrServiceStt) {
|
||||||
|
self._scheduleOfflineAudioFallback("cloudAsrErrorText", 1400);
|
||||||
|
}
|
||||||
return resolve();
|
return resolve();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1423,6 +1992,38 @@ AIBridge.prototype.setupTunables = function () {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Offline STT via jibo-asr-service (requires service running).
|
||||||
|
safe(function () {
|
||||||
|
return Tunable.getCheckboxField(LABEL.useAsrServiceStt, self.useAsrServiceStt, WIN).events.change.on(function (v) {
|
||||||
|
self.useAsrServiceStt = !!v;
|
||||||
|
if (rlog) {
|
||||||
|
rlog.info("ai-bridge", "use asr-service stt changed", { enabled: self.useAsrServiceStt });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
safe(function () {
|
||||||
|
return Tunable.getCheckboxField(LABEL.asrServiceDebugWs, self.asrServiceDebugWs, WIN).events.change.on(function (v) {
|
||||||
|
self.asrServiceDebugWs = !!v;
|
||||||
|
if (rlog) {
|
||||||
|
rlog.info("ai-bridge", "asr-service debug ws changed", { enabled: self.asrServiceDebugWs });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
if (typeof Tunable.getButtonField === "function") {
|
||||||
|
safe(function () {
|
||||||
|
return Tunable.getButtonField("AI Bridge: Run ASR-service STT now", WIN).events.change.on(function () {
|
||||||
|
try {
|
||||||
|
self._captureTextViaAsrService("manual");
|
||||||
|
} catch (e) {
|
||||||
|
if (rlog) {
|
||||||
|
rlog.warn("ai-bridge", "manual asr-service stt crashed", { err: String(e && (e.stack || e.message || e)) });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (typeof Tunable.getNumberField === "function") {
|
if (typeof Tunable.getNumberField === "function") {
|
||||||
safe(function () {
|
safe(function () {
|
||||||
return Tunable.getNumberField(LABEL.recordSeconds, self.recordSeconds, 1, 15, 1, WIN).events.change.on(function (v) {
|
return Tunable.getNumberField(LABEL.recordSeconds, self.recordSeconds, 1, 15, 1, WIN).events.change.on(function (v) {
|
||||||
@@ -1550,6 +2151,10 @@ AIBridge.prototype._handleHJHeard = function () {
|
|||||||
if (!self.enabled) {
|
if (!self.enabled) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (self.useAsrServiceStt) {
|
||||||
|
if (self._inFlight) return;
|
||||||
|
return self._captureTextViaAsrService("hjHeard");
|
||||||
|
}
|
||||||
if (self.mode !== InputMode.AUDIO) {
|
if (self.mode !== InputMode.AUDIO) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1576,6 +2181,11 @@ AIBridge.prototype._handleTurnResult = function (data, eventName) {
|
|||||||
if (!self.enabled) {
|
if (!self.enabled) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// If we're using the local ASR service STT pipeline, ignore cloud turn-results
|
||||||
|
// to avoid duplicate sends.
|
||||||
|
if (self.useAsrServiceStt) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (rlog) {
|
if (rlog) {
|
||||||
try {
|
try {
|
||||||
rlog.debug("ai-bridge", "turn result", { event: eventName });
|
rlog.debug("ai-bridge", "turn result", { event: eventName });
|
||||||
@@ -1614,7 +2224,9 @@ AIBridge.prototype._handleTurnResult = function (data, eventName) {
|
|||||||
if (rlog) {
|
if (rlog) {
|
||||||
rlog.warn("ai-bridge", "ignoring cloud ASR error text", { event: eventName, text: text.slice(0, 220) });
|
rlog.warn("ai-bridge", "ignoring cloud ASR error text", { event: eventName, text: text.slice(0, 220) });
|
||||||
}
|
}
|
||||||
self._scheduleOfflineAudioFallback("cloudAsrErrorText:" + String(eventName || "turnResult"), 1400);
|
if (!self.useAsrServiceStt) {
|
||||||
|
self._scheduleOfflineAudioFallback("cloudAsrErrorText:" + String(eventName || "turnResult"), 1400);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -95,7 +95,7 @@
|
|||||||
"arguments": ["-c","/usr/local/etc/jibo-asr-service.json"],
|
"arguments": ["-c","/usr/local/etc/jibo-asr-service.json"],
|
||||||
"environment": {},
|
"environment": {},
|
||||||
"directory": "",
|
"directory": "",
|
||||||
"enabled":false
|
"enabled":true
|
||||||
},
|
},
|
||||||
"certification": {
|
"certification": {
|
||||||
"enabled": false
|
"enabled": false
|
||||||
|
|||||||
Reference in New Issue
Block a user