more wod of day fixes

This commit is contained in:
Jacob Dubin
2026-04-18 17:15:49 -05:00
parent 2dac05462b
commit 93bb4ac3a5
13 changed files with 1505 additions and 24 deletions

View File

@@ -112,8 +112,9 @@ Current raw-audio behavior is still a compatibility bridge:
- create-flow yes/no turns now preserve `create/is_it_a_keeper` and `domain=create` in the outbound synthetic `LISTEN` payload
- structured word-of-the-day guesses now complete as `CLIENT_NLU` turns instead of falling back to pending/blank-audio behavior
- spoken word-of-the-day launch phrases now route into the same cloud intent as the on-screen menu path
- spoken word-of-the-day puzzle answers now complete against `word-of-the-day/puzzle` listen rules instead of degrading into generic chat
- late empty same-turn `CLIENT_ASR` follow-ons are ignored after a completed turn so word-of-the-day wins do not get tailed by stale blank-audio comments
- spoken word-of-the-day puzzle answers now emit menu-compatible `guess` turns, including line-number picks resolved through the observed hint order
- voice-triggered word-of-the-day launches now emit the same `loadMenu + destination=word-of-the-day` shape the robot already uses successfully from the menu
- hotphrase `[BLANK_AUDIO]` cleanup turns are ignored instead of reopening the cloud into a stale blank-audio comment path after word-of-the-day completion
- phrase matching has been widened slightly for known test prompts such as joke, dance, surprise, weather, calendar, commute, and news variants
- time replies now use the natural hour format without a leading zero

View File

@@ -18,6 +18,7 @@ public sealed class JiboInteractionService(
: null;
var clientRules = ReadRules(turn, "clientRules").ToArray();
var listenRules = ReadRules(turn, "listenRules").ToArray();
var listenAsrHints = ReadRules(turn, "listenAsrHints").ToArray();
var clientEntities = ReadEntities(turn);
var isYesNoTurn = IsYesNoTurn(turn);
@@ -32,8 +33,8 @@ public sealed class JiboInteractionService(
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
"yes" => new JiboInteractionDecision("yes", "Yes."),
"no" => new JiboInteractionDecision("no", "No."),
"word_of_the_day" => new JiboInteractionDecision("word_of_the_day", "Word of the day is ready."),
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities, transcript),
"word_of_the_day" => BuildWordOfTheDayLaunchDecision(),
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities, transcript, listenAsrHints),
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
@@ -221,19 +222,65 @@ public sealed class JiboInteractionService(
return "chat";
}
private static JiboInteractionDecision BuildWordOfTheDayLaunchDecision()
{
return new JiboInteractionDecision(
"word_of_the_day",
"Starting word of the day.",
null,
new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["destination"] = "word-of-the-day"
});
}
private static JiboInteractionDecision BuildWordOfTheDayGuessDecision(
IReadOnlyDictionary<string, string> clientEntities,
string transcript)
string transcript,
IReadOnlyList<string> listenAsrHints)
{
var guess = clientEntities.TryGetValue("guess", out var guessValue)
? guessValue
: transcript;
var guess = ResolveWordOfTheDayGuess(clientEntities, transcript, listenAsrHints);
var reply = string.IsNullOrWhiteSpace(guess)
? "I heard your word of the day guess."
: $"I heard {guess}.";
return new JiboInteractionDecision("word_of_the_day_guess", reply);
return new JiboInteractionDecision(
"word_of_the_day_guess",
reply,
null,
new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["guess"] = guess
});
}
private static string ResolveWordOfTheDayGuess(
IReadOnlyDictionary<string, string> clientEntities,
string transcript,
IReadOnlyList<string> listenAsrHints)
{
if (clientEntities.TryGetValue("guess", out var guessValue) &&
!string.IsNullOrWhiteSpace(guessValue))
{
return guessValue;
}
var loweredTranscript = transcript.Trim().TrimEnd('.', '!', '?', ',').ToLowerInvariant();
var hintIndex = loweredTranscript switch
{
"1" or "one" or "first" => 0,
"2" or "two" or "second" => 1,
"3" or "three" or "third" => 2,
_ => -1
};
if (hintIndex >= 0 && hintIndex < listenAsrHints.Count)
{
return listenAsrHints[hintIndex];
}
return transcript;
}
private static bool IsYesNoTurn(TurnContext turn)

View File

@@ -31,6 +31,11 @@ public sealed class ProtocolToTurnContextMapper
attributes["listenRules"] = turnState.ListenRules;
}
if (turnState.ListenAsrHints.Count > 0)
{
attributes["listenAsrHints"] = turnState.ListenAsrHints;
}
if (turnState.BufferedAudioBytes > 0)
{
attributes["bufferedAudioBytes"] = turnState.BufferedAudioBytes;

View File

@@ -21,19 +21,30 @@ public sealed class ResponsePlanToSocketMessagesMapper
var isYesNoTurn = !string.IsNullOrWhiteSpace(yesNoCreateRule);
var isYesNoIntent = string.Equals(plan.IntentName, "yes", StringComparison.OrdinalIgnoreCase) ||
string.Equals(plan.IntentName, "no", StringComparison.OrdinalIgnoreCase);
var outboundIntent = string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
var isWordOfDayLaunch = string.Equals(plan.IntentName, "word_of_the_day", StringComparison.OrdinalIgnoreCase);
var isWordOfDayGuess = string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase);
var nluGuess = ReadClientEntity(turn, "guess");
var wordOfDayGuess = ResolveWordOfDayGuess(turn, transcript, nluGuess);
var outboundIntent = isWordOfDayLaunch
? "loadMenu"
: isWordOfDayGuess
? "guess"
: string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
? clientIntent
: plan.IntentName ?? "unknown";
var nluGuess = ReadClientEntity(turn, "guess");
var outboundAsrText = string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
var outboundAsrText = isWordOfDayGuess && !string.IsNullOrWhiteSpace(wordOfDayGuess)
? wordOfDayGuess
: string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
? nluGuess
: isYesNoTurn && isYesNoIntent
? transcript
: string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
? clientIntent
: transcript;
var outboundRules = isYesNoTurn && isYesNoIntent ? [yesNoCreateRule!] : rules;
var entities = ReadEntities(turn, messageType, isYesNoTurn && isYesNoIntent);
var outboundRules = isWordOfDayLaunch
? ["main-menu/execute_fun_stuff"]
: isYesNoTurn && isYesNoIntent ? [yesNoCreateRule!] : rules;
var entities = ReadEntities(turn, messageType, isYesNoTurn && isYesNoIntent, isWordOfDayLaunch, isWordOfDayGuess, wordOfDayGuess);
var messages = new List<SocketReplyPlan>
{
new(JsonSerializer.Serialize(new
@@ -73,7 +84,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
}))
};
if (emitSkillActions && speak is not null)
if (emitSkillActions && speak is not null && !isWordOfDayLaunch && !isWordOfDayGuess)
{
messages.Add(new SocketReplyPlan(
JsonSerializer.Serialize(BuildSkillPayload(plan, turn, transId, speak, skill)),
@@ -145,7 +156,13 @@ public sealed class ResponsePlanToSocketMessagesMapper
};
}
private static object ReadEntities(TurnContext turn, string? messageType, bool yesNoCreateTurn)
private static object ReadEntities(
TurnContext turn,
string? messageType,
bool yesNoCreateTurn,
bool wordOfDayLaunch,
bool wordOfDayGuess,
string? guess)
{
if (yesNoCreateTurn)
{
@@ -155,6 +172,22 @@ public sealed class ResponsePlanToSocketMessagesMapper
};
}
if (wordOfDayLaunch)
{
return new Dictionary<string, object?>
{
["destination"] = "word-of-the-day"
};
}
if (wordOfDayGuess)
{
return new Dictionary<string, object?>
{
["guess"] = guess ?? string.Empty
};
}
if (!string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase))
{
return new Dictionary<string, object?>();
@@ -229,6 +262,43 @@ public sealed class ResponsePlanToSocketMessagesMapper
};
}
private static string? ReadSkillPayloadString(InvokeNativeSkillAction? skill, string key)
{
if (skill?.Payload is null || !skill.Payload.TryGetValue(key, out var value))
{
return null;
}
return value?.ToString();
}
private static string? ResolveWordOfDayGuess(TurnContext turn, string transcript, string? nluGuess)
{
if (!string.IsNullOrWhiteSpace(nluGuess))
{
return nluGuess;
}
var normalized = transcript.Trim().TrimEnd('.', '!', '?', ',').ToLowerInvariant();
var hintIndex = normalized switch
{
"1" or "one" or "first" => 0,
"2" or "two" or "second" => 1,
"3" or "three" or "third" => 2,
_ => -1
};
if (hintIndex < 0)
{
return transcript;
}
var hints = ReadRuleValues(turn, "listenAsrHints").ToArray();
return hintIndex < hints.Length
? hints[hintIndex]
: transcript;
}
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
{
var skillPayload = skill?.Payload;

View File

@@ -235,15 +235,27 @@ public sealed class WebSocketTurnFinalizationService(
if (root.TryGetProperty("data", out var data) && data.ValueKind == JsonValueKind.Object)
{
if (data.TryGetProperty("rules", out var rules) && rules.ValueKind == JsonValueKind.Array)
{
turnState.ListenRules = rules.EnumerateArray()
.Select(item => item.ValueKind == JsonValueKind.String ? item.GetString() ?? string.Empty : item.ToString())
.Where(rule => !string.IsNullOrWhiteSpace(rule))
if (data.TryGetProperty("rules", out var rules) && rules.ValueKind == JsonValueKind.Array)
{
turnState.ListenRules = rules.EnumerateArray()
.Select(item => item.ValueKind == JsonValueKind.String ? item.GetString() ?? string.Empty : item.ToString())
.Where(rule => !string.IsNullOrWhiteSpace(rule))
.ToArray();
session.Metadata["listenRules"] = turnState.ListenRules;
}
if (data.TryGetProperty("asr", out var asr) &&
asr.ValueKind == JsonValueKind.Object &&
asr.TryGetProperty("hints", out var hints) &&
hints.ValueKind == JsonValueKind.Array)
{
turnState.ListenAsrHints = hints.EnumerateArray()
.Where(static item => item.ValueKind == JsonValueKind.String)
.Select(static item => item.GetString() ?? string.Empty)
.Where(static hint => !string.IsNullOrWhiteSpace(hint))
.ToArray();
}
if (data.TryGetProperty("intent", out var intent) && intent.ValueKind == JsonValueKind.String)
{
session.LastIntent = intent.GetString();
@@ -292,6 +304,7 @@ public sealed class WebSocketTurnFinalizationService(
turnState.SawListen = false;
turnState.SawContext = false;
turnState.ListenRules = [];
turnState.ListenAsrHints = [];
}
private async Task<IReadOnlyList<WebSocketReply>> FinalizeTurnAsync(
@@ -302,6 +315,13 @@ public sealed class WebSocketTurnFinalizationService(
CancellationToken cancellationToken)
{
var turn = ProtocolToTurnContextMapper.MapListenMessage(envelope, session, messageType);
if (ShouldIgnoreBlankAudioHotphraseTurn(turn))
{
session.TurnState.AwaitingTurnCompletion = false;
ResetBufferedAudio(session);
return [];
}
var finalizedTurn = await ResolveTranscriptAsync(turn, session, cancellationToken);
if (!IsTranscriptUsable(finalizedTurn))
{
@@ -513,6 +533,11 @@ public sealed class WebSocketTurnFinalizationService(
return false;
}
if (transcript is "blank_audio" or "blank audio")
{
return false;
}
if (transcript.Length >= 6)
{
return true;
@@ -581,6 +606,18 @@ public sealed class WebSocketTurnFinalizationService(
: null;
}
private static bool ShouldIgnoreBlankAudioHotphraseTurn(TurnContext turn)
{
var transcript = NormalizeTranscript(turn.NormalizedTranscript ?? turn.RawTranscript);
if (transcript is not ("blank_audio" or "blank audio"))
{
return false;
}
return ReadRules(turn, "listenRules")
.Any(static rule => string.Equals(rule, "launch", StringComparison.OrdinalIgnoreCase));
}
private static bool ShouldIgnoreLateEmptyTurn(TurnContext turn, CloudSession session, string messageType)
{
if (messageType is not ("CLIENT_ASR" or "CLIENT_NLU"))

View File

@@ -17,4 +17,5 @@ public sealed class WebSocketTurnState
public bool SawListen { get; set; }
public bool SawContext { get; set; }
public IReadOnlyList<string> ListenRules { get; set; } = [];
public IReadOnlyList<string> ListenAsrHints { get; set; } = [];
}