try to fix word of the day

This commit is contained in:
Jacob Dubin
2026-04-18 16:43:38 -05:00
parent 83a9350a9d
commit 6d457fe1c0
11 changed files with 825 additions and 3 deletions

View File

@@ -110,6 +110,7 @@ Current raw-audio behavior is still a compatibility bridge:
- this is intentionally not a claim of real ASR parity
- follow-up turns now preserve enough constraint state to distinguish yes/no-style replies from ordinary free-form chat
- create-flow yes/no turns now preserve `create/is_it_a_keeper` and `domain=create` in the outbound synthetic `LISTEN` payload
- structured word-of-the-day guesses now complete as `CLIENT_NLU` turns instead of falling back to pending/blank-audio behavior
- phrase matching has been widened slightly for known test prompts such as joke, dance, surprise, weather, calendar, commute, and news variants
## Buffered Audio STT
@@ -148,6 +149,12 @@ Latest live-capture guidance after the `2026-04-18` round:
- treat `ffmpeg` decode failures on normalized Ogg captures as evidence that the local audio path still needs more hardening before it can be the default live-test expectation
- keep the Node implementation as the oracle for yes/no turn semantics and audio preprocessing details until the `.NET` port catches up
Capture-storage guidance while moving toward hosted group testing:
- repo-local file captures remain the default for laptop-based reverse engineering
- hosted deployments should keep runtime request handling decoupled from long-term capture retention
- sanitized fixtures remain the preferred durable artifact for parity work and bug reproduction
## Current Interaction Paths
The working cloud model currently looks like three main paths:

View File

@@ -16,9 +16,11 @@ public sealed class JiboInteractionService(
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
? rawClientIntent?.ToString()
: null;
var clientRules = ReadRules(turn, "clientRules").ToArray();
var clientEntities = ReadEntities(turn);
var isYesNoTurn = IsYesNoTurn(turn);
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, isYesNoTurn);
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, clientRules, clientEntities, isYesNoTurn);
return semanticIntent switch
{
"joke" => BuildJokeDecision(catalog),
@@ -29,6 +31,8 @@ public sealed class JiboInteractionService(
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
"yes" => new JiboInteractionDecision("yes", "Yes."),
"no" => new JiboInteractionDecision("no", "No."),
"word_of_the_day" => new JiboInteractionDecision("word_of_the_day", "Word of the day is ready."),
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities),
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
@@ -90,8 +94,26 @@ public sealed class JiboInteractionService(
.Replace("{transcript}", transcript, StringComparison.Ordinal);
}
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent, bool isYesNoTurn)
private static string ResolveSemanticIntent(
string loweredTranscript,
string? clientIntent,
IReadOnlyList<string> clientRules,
IReadOnlyDictionary<string, string> clientEntities,
bool isYesNoTurn)
{
if (string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) &&
clientRules.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase)))
{
return "word_of_the_day_guess";
}
if (string.Equals(clientIntent, "loadMenu", StringComparison.OrdinalIgnoreCase) &&
clientEntities.TryGetValue("destination", out var destination) &&
string.Equals(destination, "word-of-the-day", StringComparison.OrdinalIgnoreCase))
{
return "word_of_the_day";
}
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
{
return "time";
@@ -178,6 +200,19 @@ public sealed class JiboInteractionService(
return "chat";
}
private static JiboInteractionDecision BuildWordOfTheDayGuessDecision(IReadOnlyDictionary<string, string> clientEntities)
{
var guess = clientEntities.TryGetValue("guess", out var guessValue)
? guessValue
: string.Empty;
var reply = string.IsNullOrWhiteSpace(guess)
? "I heard your word of the day guess."
: $"I heard {guess}.";
return new JiboInteractionDecision("word_of_the_day_guess", reply);
}
private static bool IsYesNoTurn(TurnContext turn)
{
return ReadRules(turn, "listenRules").Concat(ReadRules(turn, "clientRules"))
@@ -204,6 +239,26 @@ public sealed class JiboInteractionService(
};
}
private static IReadOnlyDictionary<string, string> ReadEntities(TurnContext turn)
{
if (!turn.Attributes.TryGetValue("clientEntities", out var value) || value is null)
{
return new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
}
return value switch
{
JsonElement { ValueKind: JsonValueKind.Object } json => json.EnumerateObject()
.Where(static property => property.Value.ValueKind == JsonValueKind.String)
.ToDictionary(property => property.Name, property => property.Value.GetString() ?? string.Empty, StringComparer.OrdinalIgnoreCase),
IReadOnlyDictionary<string, string> typed => typed,
IDictionary<string, object?> dictionary => dictionary
.Where(pair => pair.Value is not null)
.ToDictionary(pair => pair.Key, pair => pair.Value?.ToString() ?? string.Empty, StringComparer.OrdinalIgnoreCase),
_ => new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase)
};
}
private static bool MatchesAny(string loweredTranscript, params string[] candidates)
{
return candidates.Any(candidate => loweredTranscript.Contains(candidate, StringComparison.Ordinal));

View File

@@ -24,7 +24,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
var outboundIntent = string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
? clientIntent
: plan.IntentName ?? "unknown";
var outboundAsrText = isYesNoTurn && isYesNoIntent
var nluGuess = ReadClientEntity(turn, "guess");
var outboundAsrText = string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
? nluGuess
: isYesNoTurn && isYesNoIntent
? transcript
: string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
? clientIntent
@@ -206,6 +209,26 @@ public sealed class ResponsePlanToSocketMessagesMapper
: null;
}
private static string? ReadClientEntity(TurnContext turn, string entityName)
{
if (!turn.Attributes.TryGetValue("clientEntities", out var value) || value is null)
{
return null;
}
return value switch
{
JsonElement { ValueKind: JsonValueKind.Object } jsonElement
when jsonElement.TryGetProperty(entityName, out var property) && property.ValueKind == JsonValueKind.String
=> property.GetString(),
IReadOnlyDictionary<string, string> typed when typed.TryGetValue(entityName, out var entityValue)
=> entityValue,
IDictionary<string, object?> dictionary when dictionary.TryGetValue(entityName, out var entityValue)
=> entityValue?.ToString(),
_ => null
};
}
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
{
var skillPayload = skill?.Payload;

View File

@@ -490,7 +490,16 @@ public sealed class WebSocketTurnFinalizationService(
private static bool IsTranscriptUsable(TurnContext turn)
{
var messageType = ReadMessageType(turn);
var clientIntent = ReadAttribute(turn, "clientIntent");
var transcript = NormalizeTranscript(turn.NormalizedTranscript ?? turn.RawTranscript);
if (string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) &&
!string.IsNullOrWhiteSpace(clientIntent))
{
return true;
}
if (string.IsNullOrWhiteSpace(transcript))
{
return false;
@@ -546,4 +555,16 @@ public sealed class WebSocketTurnFinalizationService(
.Replace(" ", " ", StringComparison.Ordinal)
.Trim();
}
private static string? ReadMessageType(TurnContext turn)
{
return ReadAttribute(turn, "messageType");
}
private static string? ReadAttribute(TurnContext turn, string key)
{
return turn.Attributes.TryGetValue(key, out var value)
? value?.ToString()
: null;
}
}