try and make skill launch mimic source code better
This commit is contained in:
@@ -108,6 +108,9 @@ Evidence from the smaller `2026-04-18/19` hotphrase and word-of-the-day verifica
|
||||
- the same bundle also shows `word-of-the-day/right_word` cleanup turns need a short ignore window for trailing audio or the robot can stay stuck in a blue-ring listening state
|
||||
- the `jibo test 4` bundle exposed a broader websocket issue: inbound robot `LISTEN` setup packets were still being routed through turn finalization instead of just priming pending state, which can corrupt menu and word-of-the-day flows by treating setup turns like resolved intents
|
||||
- the `jibo test 5` bundle suggests the remaining WOD launch and post-win cleanup bugs share the same root cause: we were leaving the robot-side `cloudSkillResponse` promise unresolved on `word_of_the_day`, `word_of_the_day_guess`, and `word-of-the-day/right_word`, so the latest .NET pass now emits a completion-only silent `SKILL_ACTION` for those paths instead of stopping at `LISTEN` + `EOS` or going fully silent
|
||||
- the `jibo test 6` bundle plus the attached `@be` source snapshot refine that diagnosis: Nimbus does accept the silent completion response, but treats it as a normal `SLIM/RUNTIME_PROMPT` instead of a skill redirect, while the successful on-robot path is built around `menu + domain=word-of-the-day` skill switching through `SkillSwitchScheduler`
|
||||
- the attached `be-framework.js` adds one more strong clue: the Be relaunch hook reads `skillData.nlu.skill`, so synthetic cloud launch turns for word-of-the-day should carry the explicit target skill name in the outbound NLU payload instead of expecting the robot to infer it from `intent/domain` alone
|
||||
- the same `jibo test 6` capture also shows the blue-ring cleanup loop was partly self-inflicted in `.NET`: after `word-of-the-day/right_word` we stopped the active turn, but later stray binary audio on the same transID could still re-arm buffering even without a fresh `LISTEN`, so the next pass now requires a real listen phase before post-turn audio can reopen buffered completion
|
||||
- the local buffered-audio seam is still producing repeated `whisper.cpp returned no transcript` and `ffmpeg ... Codec not found` failures, so lightweight waveform or energy screening is worth considering once the core launch flow is stable
|
||||
|
||||
Near-term interaction work should now prioritize:
|
||||
|
||||
@@ -230,9 +230,8 @@ public sealed class JiboInteractionService(
|
||||
"@be/word-of-the-day",
|
||||
SkillPayload: new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["destination"] = "word-of-the-day",
|
||||
["skillId"] = "@be/word-of-the-day",
|
||||
["cloudResponseMode"] = "completion_only"
|
||||
["domain"] = "word-of-the-day",
|
||||
["skillId"] = "@be/word-of-the-day"
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
var nluGuess = ReadClientEntity(turn, "guess");
|
||||
var wordOfDayGuess = ResolveWordOfDayGuess(turn, transcript, nluGuess);
|
||||
var outboundIntent = isWordOfDayLaunch
|
||||
? "loadMenu"
|
||||
? "menu"
|
||||
: isWordOfDayGuess
|
||||
? "guess"
|
||||
: string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
|
||||
@@ -34,6 +34,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
: plan.IntentName ?? "unknown";
|
||||
var outboundAsrText = isWordOfDayGuess && !string.IsNullOrWhiteSpace(wordOfDayGuess)
|
||||
? wordOfDayGuess
|
||||
: isWordOfDayLaunch
|
||||
? string.Empty
|
||||
: string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
|
||||
? nluGuess
|
||||
: isYesNoTurn && isYesNoIntent
|
||||
@@ -42,72 +44,30 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
? clientIntent
|
||||
: transcript;
|
||||
var outboundRules = isWordOfDayLaunch
|
||||
? ["main-menu/execute_fun_stuff"]
|
||||
? ["word-of-the-day/menu"]
|
||||
: isYesNoTurn && isYesNoIntent ? [yesNoCreateRule!] : rules;
|
||||
var entities = ReadEntities(turn, messageType, isYesNoTurn && isYesNoIntent, isWordOfDayLaunch, isWordOfDayGuess, wordOfDayGuess);
|
||||
object listenMessage;
|
||||
if (isWordOfDayLaunch)
|
||||
var listenMessage = new
|
||||
{
|
||||
listenMessage = new
|
||||
type = "LISTEN",
|
||||
transID = transId,
|
||||
data = new
|
||||
{
|
||||
type = "LISTEN",
|
||||
transID = transId,
|
||||
skillID = "@be/word-of-the-day",
|
||||
onRobot = true,
|
||||
data = new
|
||||
asr = new
|
||||
{
|
||||
asr = new
|
||||
{
|
||||
confidence = 0.95,
|
||||
final = true,
|
||||
text = outboundAsrText
|
||||
},
|
||||
nlu = new
|
||||
{
|
||||
confidence = 0.95,
|
||||
intent = outboundIntent,
|
||||
rules = outboundRules,
|
||||
entities
|
||||
},
|
||||
match = new
|
||||
{
|
||||
intent = outboundIntent,
|
||||
rule = outboundRules.FirstOrDefault() ?? string.Empty,
|
||||
score = 0.95
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
listenMessage = new
|
||||
{
|
||||
type = "LISTEN",
|
||||
transID = transId,
|
||||
data = new
|
||||
confidence = 0.95,
|
||||
final = true,
|
||||
text = outboundAsrText
|
||||
},
|
||||
nlu = BuildNluPayload(outboundIntent, outboundRules, entities, isWordOfDayLaunch ? "@be/word-of-the-day" : null),
|
||||
match = new
|
||||
{
|
||||
asr = new
|
||||
{
|
||||
confidence = 0.95,
|
||||
final = true,
|
||||
text = outboundAsrText
|
||||
},
|
||||
nlu = new
|
||||
{
|
||||
confidence = 0.95,
|
||||
intent = outboundIntent,
|
||||
rules = outboundRules,
|
||||
entities
|
||||
},
|
||||
match = new
|
||||
{
|
||||
intent = outboundIntent,
|
||||
rule = outboundRules.FirstOrDefault() ?? string.Empty,
|
||||
score = 0.95
|
||||
}
|
||||
intent = outboundIntent,
|
||||
rule = outboundRules.FirstOrDefault() ?? string.Empty,
|
||||
score = 0.95
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
var messages = new List<SocketReplyPlan>
|
||||
{
|
||||
@@ -222,7 +182,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
{
|
||||
return new Dictionary<string, object?>
|
||||
{
|
||||
["destination"] = "word-of-the-day"
|
||||
["domain"] = "word-of-the-day"
|
||||
};
|
||||
}
|
||||
|
||||
@@ -410,6 +370,28 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
};
|
||||
}
|
||||
|
||||
private static IReadOnlyDictionary<string, object?> BuildNluPayload(
|
||||
string outboundIntent,
|
||||
IReadOnlyList<string> outboundRules,
|
||||
object entities,
|
||||
string? skillId)
|
||||
{
|
||||
var payload = new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["confidence"] = 0.95,
|
||||
["intent"] = outboundIntent,
|
||||
["rules"] = outboundRules,
|
||||
["entities"] = entities
|
||||
};
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(skillId))
|
||||
{
|
||||
payload["skill"] = skillId;
|
||||
}
|
||||
|
||||
return payload;
|
||||
}
|
||||
|
||||
private static object BuildGenericFallbackSkillPayload(string transId)
|
||||
{
|
||||
return new
|
||||
|
||||
@@ -44,6 +44,14 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!turnState.AwaitingTurnCompletion &&
|
||||
!session.FollowUpOpen &&
|
||||
!turnState.SawListen &&
|
||||
!string.IsNullOrWhiteSpace(turnState.TransId))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
session.LastMessageType = "BINARY_AUDIO";
|
||||
turnState.FirstAudioReceivedUtc ??= DateTimeOffset.UtcNow;
|
||||
turnState.BufferedAudioChunkCount += 1;
|
||||
@@ -140,6 +148,8 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
session.TurnState.IgnoreAdditionalAudioUntilUtc = DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
|
||||
session.FollowUpExpiresUtc = null;
|
||||
ResetBufferedAudio(session);
|
||||
session.TurnState.SawListen = false;
|
||||
session.TurnState.SawContext = false;
|
||||
return ResponsePlanToSocketMessagesMapper.MapCompletionOnly(
|
||||
session.TurnState.TransId ?? session.LastTransId ?? string.Empty,
|
||||
"@be/word-of-the-day")
|
||||
@@ -421,6 +431,8 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
turnState.IgnoreAdditionalAudioUntilUtc = DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
|
||||
session.FollowUpExpiresUtc = null;
|
||||
ResetBufferedAudio(session);
|
||||
turnState.SawListen = false;
|
||||
turnState.SawContext = false;
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -521,9 +533,9 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
? null
|
||||
: DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
|
||||
|
||||
var emitSkillActions = messageType != "CLIENT_NLU" ||
|
||||
string.Equals(plan.IntentName, "word_of_the_day", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase);
|
||||
var emitSkillActions = !string.Equals(plan.IntentName, "word_of_the_day", StringComparison.OrdinalIgnoreCase) &&
|
||||
(messageType != "CLIENT_NLU" ||
|
||||
string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase));
|
||||
var replies = ResponsePlanToSocketMessagesMapper.Map(plan, finalizedTurn, session, emitSkillActions).Select(map => new WebSocketReply
|
||||
{
|
||||
Text = map.Text,
|
||||
@@ -531,6 +543,8 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
}).ToArray();
|
||||
|
||||
ResetBufferedAudio(session);
|
||||
turnState.SawListen = false;
|
||||
turnState.SawContext = false;
|
||||
return replies;
|
||||
}
|
||||
|
||||
|
||||
@@ -144,9 +144,8 @@ public sealed class JiboInteractionServiceTests
|
||||
Assert.Equal("word_of_the_day", decision.IntentName);
|
||||
Assert.Equal("Starting word of the day.", decision.ReplyText);
|
||||
Assert.Equal("@be/word-of-the-day", decision.SkillName);
|
||||
Assert.Equal("word-of-the-day", decision.SkillPayload!["destination"]);
|
||||
Assert.Equal("word-of-the-day", decision.SkillPayload!["domain"]);
|
||||
Assert.Equal("@be/word-of-the-day", decision.SkillPayload["skillId"]);
|
||||
Assert.Equal("completion_only", decision.SkillPayload["cloudResponseMode"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
||||
@@ -489,15 +489,13 @@ public sealed class JiboWebSocketServiceTests
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-wod-launch","data":{"text":"Play word of the day."}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(3, replies.Count);
|
||||
Assert.Equal(2, replies.Count);
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("loadMenu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("Play word of the day.", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||
Assert.Equal("word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("destination").GetString());
|
||||
Assert.Equal("main-menu/execute_fun_stuff", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
|
||||
Assert.Equal("@be/word-of-the-day", listenPayload.RootElement.GetProperty("skillID").GetString());
|
||||
Assert.True(listenPayload.RootElement.GetProperty("onRobot").GetBoolean());
|
||||
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
|
||||
Assert.Equal("menu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal(string.Empty, listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||
Assert.Equal("word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("domain").GetString());
|
||||
Assert.Equal("@be/word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
|
||||
Assert.Equal("word-of-the-day/menu", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
|
||||
|
||||
var session = _store.FindSessionByToken("hub-wod-launch-token");
|
||||
Assert.NotNull(session);
|
||||
@@ -553,14 +551,14 @@ public sealed class JiboWebSocketServiceTests
|
||||
Binary = new byte[3000]
|
||||
});
|
||||
|
||||
Assert.Equal(3, replies.Count);
|
||||
Assert.Equal(2, replies.Count);
|
||||
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
|
||||
Assert.Equal("EOS", ReadReplyType(replies[1]));
|
||||
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
|
||||
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("@be/word-of-the-day", listenPayload.RootElement.GetProperty("skillID").GetString());
|
||||
Assert.True(listenPayload.RootElement.GetProperty("onRobot").GetBoolean());
|
||||
Assert.Equal("menu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("domain").GetString());
|
||||
Assert.Equal("@be/word-of-the-day", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
|
||||
|
||||
var lateReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user