Fixed the ai server & disabled the old ai-bridge

BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP BACKFLIP
This commit is contained in:
2026-03-24 02:56:27 +02:00
parent 93bd8db6bb
commit 5617886ebe
7 changed files with 143 additions and 61 deletions

View File

@@ -111,42 +111,88 @@ function hasRule(rules, want) {
}
function inferNluFromText(text, rules) {
// Minimal, rule-ish: only try to satisfy common skills.
const yn = classifyYesNo(text);
if (yn) {
// Many flows use the intent string as grammar tag.
return buildNluResult(yn, rules, {});
}
// If Jetstream provided tutorial/global rules, prefer returning the rule itself as intent.
// This matches flows that transition on a specific grammar tag.
if (Array.isArray(rules)) {
const t = normalizeText(text).toLowerCase();
// Many builds pass short rule names (e.g., 'launch', 'dance') instead of full paths.
if (hasRule(rules, 'dance') && /\bdance\b/.test(t)) return buildNluResult('dance', rules, {});
if (hasRule(rules, 'take_photo') && /\b(photo|picture|take a photo|take a picture)\b/.test(t)) return buildNluResult('take_photo', rules, {});
// Launch/global command grammar: some builds only provide 'launch' + globals/global_commands_launch
// for many commands. When the utterance clearly matches a known command, emit that intent so flows
// do not treat it as a generic launch request.
if (hasRule(rules, 'launch')) {
// Many global command handlers expect a launch intent with a target skill in entities.skill.
if (/\bdance\b/.test(t)) return buildNluResult('launch', rules, { skill: 'dance', query: normalizeText(text) }, ['skill']);
if (/\b(photo|picture|take a photo|take a picture|selfie)\b/.test(t)) return buildNluResult('launch', rules, { skill: 'photobooth', query: normalizeText(text) }, ['skill']);
// Otherwise: route to chitchat so the robot actually speaks an answer.
// We keep intent='launch' but provide a concrete skill target.
return buildNluResult('launch', rules, { skill: '@be/chitchat', domain: 'chitchat', query: normalizeText(text) }, ['skill']);
}
}
// Simple tutorial-ish intents.
const t = normalizeText(text).toLowerCase();
if (/\b(dance|do a dance)\b/.test(t)) return buildNluResult('dance', rules, {});
if (/\b(photo|picture|take a photo|take a picture)\b/.test(t)) return buildNluResult('take_photo', rules, {});
// If rules indicate yes/no, but we couldn't classify, mark noMatch.
if (Array.isArray(rules) && rules.some((r) => /yes[_-]?no/i.test(r))) {
// No text → empty intent (ListenResultState.noInput → Mim re-prompts or times out).
if (!t) return buildNluResult('', rules, {});
// Yes/No detection — common across many MIM types.
const yn = classifyYesNo(text);
if (!Array.isArray(rules) || !rules.length) {
return yn ? buildNluResult(yn, rules, {}) : buildNluResult('', rules, {});
}
// ── Skill-specific rule matching (checked BEFORE global catch-all) ──
// introductions/recognition_type_menu: expects face, name, voice, all
if (hasRule(rules, 'recognition_type_menu')) {
if (/\bface\b/.test(t)) return buildNluResult('face', rules, {});
if (/\bname\b/.test(t)) return buildNluResult('name', rules, {});
if (/\bvoice\b/.test(t)) return buildNluResult('voice', rules, {});
if (/\b(all|everything|everyone)\b/.test(t)) return buildNluResult('all', rules, {});
if (yn) return buildNluResult(yn, rules, {});
return buildNluResult('', rules, {});
}
// introductions/voice_face_training_menu: similar recognition type choices
if (hasRule(rules, 'voice_face_training_menu')) {
if (/\bface\b/.test(t)) return buildNluResult('face', rules, {});
if (/\bname\b/.test(t)) return buildNluResult('name', rules, {});
if (/\bvoice\b/.test(t)) return buildNluResult('voice', rules, {});
if (/\b(all|everything)\b/.test(t)) return buildNluResult('all', rules, {});
if (yn) return buildNluResult(yn, rules, {});
return buildNluResult('', rules, {});
}
// introductions yes/no questions: face_capture_ready, did_i_hear_name,
// did_i_pronounce_name, any_more_intros, recognition_any_more
if (rules.some((r) => /face_capture|did_i_hear|did_i_pronounce|any_more|recognition_any_more/.test(r))) {
if (yn) return buildNluResult(yn, rules, {});
return buildNluResult('', rules, {});
}
// introductions/intro_looper: expects loopmember intent with loopMemberReferent entity.
// Without real NLU we cannot resolve the entity → return noMatch so the MIM re-prompts.
if (hasRule(rules, 'intro_looper')) {
if (yn) return buildNluResult(yn, rules, {});
return buildNluResult('', rules, {});
}
// main-menu/execute_main_menu: map spoken words to menu items.
if (hasRule(rules, 'execute_main_menu')) {
if (/\bintroduc/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'introductions' });
if (/\bsurprise/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'surprise-me' });
if (/\b(time|clock)\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'clock' });
if (/\bphoto\s*booth\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'photobooth' });
if (/\bgallery\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'gallery' });
if (/\b(exercise|workout)\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'exercise' });
if (/\b(radio|music)\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'radio' });
if (/\bsettings?\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'settings' });
if (/\btips?\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'tips-tricks' });
if (/\bfun\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'fun-stuff' });
if (/\bcreate\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'create' });
if (/\b(report|personal)\b/.test(t)) return buildNluResult('loadMenu', rules, { loadMenu: 'personal-report' });
// Fall through to generic patterns below.
}
// ── Generic patterns ──
// Yes/No (applies to any MIM type that accepts it)
if (yn) return buildNluResult(yn, rules, {});
// Dance / Photo (tutorial rules)
if (hasRule(rules, 'dance') && /\bdance\b/.test(t)) return buildNluResult('dance', rules, {});
if (hasRule(rules, 'take_photo') && /\b(photo|picture|take a photo|take a picture)\b/.test(t)) return buildNluResult('take_photo', rules, {});
// Global launch commands — only for specific well-known commands.
// Do NOT catch-all to chitchat; that breaks in-skill NLU.
if (hasRule(rules, 'launch')) {
if (/\bdance\b/.test(t)) return buildNluResult('launch', rules, { skill: 'dance', query: normalizeText(text) }, ['skill']);
if (/\b(photo|picture|take a photo|take a picture|selfie)\b/.test(t)) return buildNluResult('launch', rules, { skill: 'photobooth', query: normalizeText(text) }, ['skill']);
}
// Default: no match — lets the Mim framework re-prompt or handle noMatch.
return buildNluResult('', rules, {});
}
@@ -588,6 +634,7 @@ function createHubShim(configPath) {
let binaryBytes = 0;
let lastContext = null;
let lastContextMsg = null;
let pendingListen = null;
async function maybeHandleListen() {
@@ -600,25 +647,24 @@ function createHubShim(configPath) {
if (mode === 'CLIENT_NLU' && !listenMsg._clientNlu) return;
pendingListen = null;
const t0 = nowMs();
const base = {
msgID: uuid(),
ts: nowMs(),
};
// Derive transID: echo back the transID the jetstream-service sent.
// It may appear on either the LISTEN or CONTEXT message.
const msgTransID = listenMsg.transID || (lastContextMsg && lastContextMsg.transID) || transID || '';
// Emit SOS immediately so the robot transitions into listening.
send(ws, { ...base, type: 'SOS', data: null, final: false });
const t0 = nowMs();
let text = '';
try {
if (mode === 'CLIENT_ASR') {
text = normalizeText(listenMsg._clientAsrText);
} else if (mode === 'CLIENT_NLU') {
// No ASR needed for client-supplied NLU.
} else {
text = await runAsrQueued(() => asrServiceSttOnce(asrBaseUrl, wsPath, timeoutMs, audioSourceId, logger));
}
} catch (e) {
logger.warn('asr failed', { connId, err: String(e && (e.stack || e.message || e)) });
// Still emit EOS and an empty listen result.
// Still send an empty listen result.
}
const rules = Array.isArray(listenMsg?.data?.rules) ? listenMsg.data.rules : [];
@@ -628,8 +674,16 @@ function createHubShim(configPath) {
: ((config?.nlu?.enabled === false) ? buildNluResult('', rules, {}) : inferNluFromText(text, rules));
const asrRes = buildAsrResult(text);
// Build the match object (mirrors what the cloud hub returns).
const skillID = (lastContext && lastContext.skill && lastContext.skill.skillID)
? lastContext.skill.skillID
: (lastContext && typeof lastContext.skill === 'string' ? lastContext.skill : '');
const matchObj = { onRobot: true };
if (skillID) matchObj.skillID = skillID;
logger.info('listen result', {
connId,
transID: msgTransID || undefined,
text: String(text || '').slice(0, 120),
intent: nluRes && nluRes.intent,
slot0: nluRes && Array.isArray(nluRes.slotActions) ? nluRes.slotActions[0] : undefined,
@@ -639,26 +693,25 @@ function createHubShim(configPath) {
rules: Array.isArray(rules) ? rules.slice(0, 6) : [],
});
// Optionally provide incremental ASR/NLU; Jetstream consumers often listen for these.
send(ws, { ...base, type: 'ASR', data: asrRes, final: false });
send(ws, { ...base, type: 'NLU', data: nluRes, final: false });
// Final listen response.
const listenResp = {
type: 'LISTEN',
msgID: uuid(),
// Send a local TURN_RESULT (not just LISTEN) so the skill's local turn resolves.
const turnResult = {
type: 'TURN_RESULT',
msgID: listenMsg.msgID || uuid(),
transID: msgTransID,
ts: nowMs(),
requestID: msgTransID, // local turn uses transID as requestID
data: {
asr: asrRes,
nlu: nluRes,
status: 'SUCCEEDED',
global: false,
result: {
asr: asrRes,
nlu: nluRes,
match: matchObj,
},
},
final: true,
timings: { total: nowMs() - t0 },
};
send(ws, listenResp);
// Emit EOS to complete the listen lifecycle.
send(ws, { ...base, type: 'EOS', data: null, final: true });
send(ws, turnResult);
}
ws.on('message', async (data, isBinary) => {
@@ -690,8 +743,10 @@ function createHubShim(configPath) {
switch (msg.type) {
case 'CONTEXT':
lastContext = msg.data;
lastContextMsg = msg;
logger.debug('context', {
connId,
transID: msg.transID || undefined,
hasSkill: !!(msg.data && msg.data.skill),
hasRuntime: !!(msg.data && msg.data.runtime),
});
@@ -700,6 +755,7 @@ function createHubShim(configPath) {
pendingListen = msg;
logger.debug('listen req', {
connId,
transID: msg.transID || undefined,
hotphrase: !!(msg.data && msg.data.hotphrase),
mode: msg.data && msg.data.mode,
rules: Array.isArray(msg.data && msg.data.rules) ? msg.data.rules : [],
@@ -707,11 +763,15 @@ function createHubShim(configPath) {
break;
case 'CLIENT_ASR':
// Accept client-provided text (requires a LISTEN message too).
pendingListen = pendingListen || { type: 'LISTEN', msgID: uuid(), ts: nowMs(), data: { rules: [], mode: 'CLIENT_ASR' } };
if (!pendingListen) {
pendingListen = { type: 'LISTEN', msgID: uuid(), transID: msg.transID, ts: nowMs(), data: { rules: [], mode: 'CLIENT_ASR' } };
}
pendingListen._clientAsrText = msg.data?.text;
break;
case 'CLIENT_NLU':
pendingListen = pendingListen || { type: 'LISTEN', msgID: uuid(), ts: nowMs(), data: { rules: [], mode: 'CLIENT_NLU' } };
if (!pendingListen) {
pendingListen = { type: 'LISTEN', msgID: uuid(), transID: msg.transID, ts: nowMs(), data: { rules: [], mode: 'CLIENT_NLU' } };
}
pendingListen._clientNlu = msg.data;
break;
default: