try and make skill launch mimic source code better

This commit is contained in:
Jacob Dubin
2026-04-19 08:17:28 -05:00
parent 17583e3cdc
commit bacaa6f2ca
16 changed files with 6071 additions and 81 deletions

View File

@@ -108,6 +108,9 @@ Evidence from the smaller `2026-04-18/19` hotphrase and word-of-the-day verifica
- the same bundle also shows `word-of-the-day/right_word` cleanup turns need a short ignore window for trailing audio or the robot can stay stuck in a blue-ring listening state
- the `jibo test 4` bundle exposed a broader websocket issue: inbound robot `LISTEN` setup packets were still being routed through turn finalization instead of just priming pending state, which can corrupt menu and word-of-the-day flows by treating setup turns like resolved intents
- the `jibo test 5` bundle suggests the remaining WOD launch and post-win cleanup bugs share the same root cause: we were leaving the robot-side `cloudSkillResponse` promise unresolved on `word_of_the_day`, `word_of_the_day_guess`, and `word-of-the-day/right_word`, so the latest .NET pass now emits a completion-only silent `SKILL_ACTION` for those paths instead of stopping at `LISTEN` + `EOS` or going fully silent
- the `jibo test 6` bundle plus the attached `@be` source snapshot refine that diagnosis: Nimbus does accept the silent completion response, but treats it as a normal `SLIM/RUNTIME_PROMPT` instead of a skill redirect, while the successful on-robot path is built around `menu + domain=word-of-the-day` skill switching through `SkillSwitchScheduler`
- the attached `be-framework.js` adds one more strong clue: the Be relaunch hook reads `skillData.nlu.skill`, so synthetic cloud launch turns for word-of-the-day should carry the explicit target skill name in the outbound NLU payload instead of expecting the robot to infer it from `intent/domain` alone
- the same `jibo test 6` capture also shows the blue-ring cleanup loop was partly self-inflicted in `.NET`: after `word-of-the-day/right_word` we stopped the active turn, but later stray binary audio on the same transID could still re-arm buffering even without a fresh `LISTEN`, so the next pass now requires a real listen phase before post-turn audio can reopen buffered completion
- the local buffered-audio seam is still producing repeated `whisper.cpp returned no transcript` and `ffmpeg ... Codec not found` failures, so lightweight waveform or energy screening is worth considering once the core launch flow is stable
Near-term interaction work should now prioritize:

View File

@@ -230,9 +230,8 @@ public sealed class JiboInteractionService(
"@be/word-of-the-day",
SkillPayload: new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["destination"] = "word-of-the-day",
["skillId"] = "@be/word-of-the-day",
["cloudResponseMode"] = "completion_only"
["domain"] = "word-of-the-day",
["skillId"] = "@be/word-of-the-day"
});
}

View File

@@ -26,7 +26,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
var nluGuess = ReadClientEntity(turn, "guess");
var wordOfDayGuess = ResolveWordOfDayGuess(turn, transcript, nluGuess);
var outboundIntent = isWordOfDayLaunch
? "loadMenu"
? "menu"
: isWordOfDayGuess
? "guess"
: string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
@@ -34,6 +34,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
: plan.IntentName ?? "unknown";
var outboundAsrText = isWordOfDayGuess && !string.IsNullOrWhiteSpace(wordOfDayGuess)
? wordOfDayGuess
: isWordOfDayLaunch
? string.Empty
: string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
? nluGuess
: isYesNoTurn && isYesNoIntent
@@ -42,72 +44,30 @@ public sealed class ResponsePlanToSocketMessagesMapper
? clientIntent
: transcript;
var outboundRules = isWordOfDayLaunch
? ["main-menu/execute_fun_stuff"]
? ["word-of-the-day/menu"]
: isYesNoTurn && isYesNoIntent ? [yesNoCreateRule!] : rules;
var entities = ReadEntities(turn, messageType, isYesNoTurn && isYesNoIntent, isWordOfDayLaunch, isWordOfDayGuess, wordOfDayGuess);
object listenMessage;
if (isWordOfDayLaunch)
var listenMessage = new
{
listenMessage = new
type = "LISTEN",
transID = transId,
data = new
{
type = "LISTEN",
transID = transId,
skillID = "@be/word-of-the-day",
onRobot = true,
data = new
asr = new
{
asr = new
{
confidence = 0.95,
final = true,
text = outboundAsrText
},
nlu = new
{
confidence = 0.95,
intent = outboundIntent,
rules = outboundRules,
entities
},
match = new
{
intent = outboundIntent,
rule = outboundRules.FirstOrDefault() ?? string.Empty,
score = 0.95
}
}
};
}
else
{
listenMessage = new
{
type = "LISTEN",
transID = transId,
data = new
confidence = 0.95,
final = true,
text = outboundAsrText
},
nlu = BuildNluPayload(outboundIntent, outboundRules, entities, isWordOfDayLaunch ? "@be/word-of-the-day" : null),
match = new
{
asr = new
{
confidence = 0.95,
final = true,
text = outboundAsrText
},
nlu = new
{
confidence = 0.95,
intent = outboundIntent,
rules = outboundRules,
entities
},
match = new
{
intent = outboundIntent,
rule = outboundRules.FirstOrDefault() ?? string.Empty,
score = 0.95
}
intent = outboundIntent,
rule = outboundRules.FirstOrDefault() ?? string.Empty,
score = 0.95
}
};
}
}
};
var messages = new List<SocketReplyPlan>
{
@@ -222,7 +182,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
{
return new Dictionary<string, object?>
{
["destination"] = "word-of-the-day"
["domain"] = "word-of-the-day"
};
}
@@ -410,6 +370,28 @@ public sealed class ResponsePlanToSocketMessagesMapper
};
}
private static IReadOnlyDictionary<string, object?> BuildNluPayload(
string outboundIntent,
IReadOnlyList<string> outboundRules,
object entities,
string? skillId)
{
var payload = new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["confidence"] = 0.95,
["intent"] = outboundIntent,
["rules"] = outboundRules,
["entities"] = entities
};
if (!string.IsNullOrWhiteSpace(skillId))
{
payload["skill"] = skillId;
}
return payload;
}
private static object BuildGenericFallbackSkillPayload(string transId)
{
return new

View File

@@ -44,6 +44,14 @@ public sealed class WebSocketTurnFinalizationService(
return [];
}
if (!turnState.AwaitingTurnCompletion &&
!session.FollowUpOpen &&
!turnState.SawListen &&
!string.IsNullOrWhiteSpace(turnState.TransId))
{
return [];
}
session.LastMessageType = "BINARY_AUDIO";
turnState.FirstAudioReceivedUtc ??= DateTimeOffset.UtcNow;
turnState.BufferedAudioChunkCount += 1;
@@ -140,6 +148,8 @@ public sealed class WebSocketTurnFinalizationService(
session.TurnState.IgnoreAdditionalAudioUntilUtc = DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
session.FollowUpExpiresUtc = null;
ResetBufferedAudio(session);
session.TurnState.SawListen = false;
session.TurnState.SawContext = false;
return ResponsePlanToSocketMessagesMapper.MapCompletionOnly(
session.TurnState.TransId ?? session.LastTransId ?? string.Empty,
"@be/word-of-the-day")
@@ -421,6 +431,8 @@ public sealed class WebSocketTurnFinalizationService(
turnState.IgnoreAdditionalAudioUntilUtc = DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
session.FollowUpExpiresUtc = null;
ResetBufferedAudio(session);
turnState.SawListen = false;
turnState.SawContext = false;
return [];
}
@@ -521,9 +533,9 @@ public sealed class WebSocketTurnFinalizationService(
? null
: DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
var emitSkillActions = messageType != "CLIENT_NLU" ||
string.Equals(plan.IntentName, "word_of_the_day", StringComparison.OrdinalIgnoreCase) ||
string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase);
var emitSkillActions = !string.Equals(plan.IntentName, "word_of_the_day", StringComparison.OrdinalIgnoreCase) &&
(messageType != "CLIENT_NLU" ||
string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase));
var replies = ResponsePlanToSocketMessagesMapper.Map(plan, finalizedTurn, session, emitSkillActions).Select(map => new WebSocketReply
{
Text = map.Text,
@@ -531,6 +543,8 @@ public sealed class WebSocketTurnFinalizationService(
}).ToArray();
ResetBufferedAudio(session);
turnState.SawListen = false;
turnState.SawContext = false;
return replies;
}

View File

@@ -144,9 +144,8 @@ public sealed class JiboInteractionServiceTests
Assert.Equal("word_of_the_day", decision.IntentName);
Assert.Equal("Starting word of the day.", decision.ReplyText);
Assert.Equal("@be/word-of-the-day", decision.SkillName);
Assert.Equal("word-of-the-day", decision.SkillPayload!["destination"]);
Assert.Equal("word-of-the-day", decision.SkillPayload!["domain"]);
Assert.Equal("@be/word-of-the-day", decision.SkillPayload["skillId"]);
Assert.Equal("completion_only", decision.SkillPayload["cloudResponseMode"]);
}
[Fact]

View File

@@ -489,15 +489,13 @@ public sealed class JiboWebSocketServiceTests
Text = """{"type":"CLIENT_ASR","transID":"trans-wod-launch","data":{"text":"Play word of the day."}}"""
});
Assert.Equal(3, replies.Count);
Assert.Equal(2, replies.Count);
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("loadMenu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("Play word of the day.", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("destination").GetString());
Assert.Equal("main-menu/execute_fun_stuff", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
Assert.Equal("@be/word-of-the-day", listenPayload.RootElement.GetProperty("skillID").GetString());
Assert.True(listenPayload.RootElement.GetProperty("onRobot").GetBoolean());
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
Assert.Equal("menu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal(string.Empty, listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("domain").GetString());
Assert.Equal("@be/word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
Assert.Equal("word-of-the-day/menu", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
var session = _store.FindSessionByToken("hub-wod-launch-token");
Assert.NotNull(session);
@@ -553,14 +551,14 @@ public sealed class JiboWebSocketServiceTests
Binary = new byte[3000]
});
Assert.Equal(3, replies.Count);
Assert.Equal(2, replies.Count);
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
Assert.Equal("EOS", ReadReplyType(replies[1]));
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("@be/word-of-the-day", listenPayload.RootElement.GetProperty("skillID").GetString());
Assert.True(listenPayload.RootElement.GetProperty("onRobot").GetBoolean());
Assert.Equal("menu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("domain").GetString());
Assert.Equal("@be/word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
var lateReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{