wod and time imrpovements

This commit is contained in:
Jacob Dubin
2026-04-18 16:57:18 -05:00
parent 6d457fe1c0
commit 2dac05462b
10 changed files with 1051 additions and 6 deletions

View File

@@ -90,12 +90,20 @@ Evidence from the latest word-of-the-day capture round:
- word-of-the-day guesses can arrive as structured `CLIENT_NLU` turns with `intent=guess`, `rules=["word-of-the-day/puzzle"]`, and `entities.guess=<word>`
- those structured turns should be treated as first-class cloud inputs even when no free-form transcript is present
Evidence from the continued `2026-04-18` word-of-the-day and time captures:
- spoken "start word of the day" style requests should route into the same word-of-the-day launch path as the menu destination
- spoken puzzle answers like `pastoral` should be treated as valid guesses whenever the active listen rules show `word-of-the-day/puzzle`
- after a successful word-of-the-day completion, late empty same-turn audio should be ignored instead of generating a stale blank-audio follow-up
- clock replies should use the user-facing hour format without a leading zero
Near-term interaction work should now prioritize:
1. preserve and interpret yes/no turn constraints from observed listen rules
2. broaden phrase-to-intent matching for the small set of known working skills before moving to larger NLU ambitions
3. keep synthetic transcript hints as the most reliable parity path when captures already provide them
4. continue evaluating whether local preprocessing is worth further investment or whether managed STT should replace it for the next serious testing phase
5. start separating laptop-local capture storage from the eventual hosted retention/export path so group testing does not depend on repo-local zip handling
## Capture Storage Direction

View File

@@ -111,7 +111,11 @@ Current raw-audio behavior is still a compatibility bridge:
- follow-up turns now preserve enough constraint state to distinguish yes/no-style replies from ordinary free-form chat
- create-flow yes/no turns now preserve `create/is_it_a_keeper` and `domain=create` in the outbound synthetic `LISTEN` payload
- structured word-of-the-day guesses now complete as `CLIENT_NLU` turns instead of falling back to pending/blank-audio behavior
- spoken word-of-the-day launch phrases now route into the same cloud intent as the on-screen menu path
- spoken word-of-the-day puzzle answers now complete against `word-of-the-day/puzzle` listen rules instead of degrading into generic chat
- late empty same-turn `CLIENT_ASR` follow-ons are ignored after a completed turn so word-of-the-day wins do not get tailed by stale blank-audio comments
- phrase matching has been widened slightly for known test prompts such as joke, dance, surprise, weather, calendar, commute, and news variants
- time replies now use the natural hour format without a leading zero
## Buffered Audio STT

View File

@@ -17,22 +17,23 @@ public sealed class JiboInteractionService(
? rawClientIntent?.ToString()
: null;
var clientRules = ReadRules(turn, "clientRules").ToArray();
var listenRules = ReadRules(turn, "listenRules").ToArray();
var clientEntities = ReadEntities(turn);
var isYesNoTurn = IsYesNoTurn(turn);
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, clientRules, clientEntities, isYesNoTurn);
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, clientRules, listenRules, clientEntities, isYesNoTurn);
return semanticIntent switch
{
"joke" => BuildJokeDecision(catalog),
"dance" => BuildDanceDecision(catalog),
"time" => new JiboInteractionDecision("time", $"It is {DateTime.Now:hh:mm tt}."),
"time" => new JiboInteractionDecision("time", $"It is {DateTime.Now:h:mm tt}."),
"date" => new JiboInteractionDecision("date", $"Today is {DateTime.Now:dddd, MMMM d}."),
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
"yes" => new JiboInteractionDecision("yes", "Yes."),
"no" => new JiboInteractionDecision("no", "No."),
"word_of_the_day" => new JiboInteractionDecision("word_of_the_day", "Word of the day is ready."),
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities),
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities, transcript),
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
@@ -98,11 +99,15 @@ public sealed class JiboInteractionService(
string loweredTranscript,
string? clientIntent,
IReadOnlyList<string> clientRules,
IReadOnlyList<string> listenRules,
IReadOnlyDictionary<string, string> clientEntities,
bool isYesNoTurn)
{
var wordOfDayPuzzleTurn = clientRules.Concat(listenRules)
.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase));
if (string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) &&
clientRules.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase)))
wordOfDayPuzzleTurn)
{
return "word_of_the_day_guess";
}
@@ -124,6 +129,22 @@ public sealed class JiboInteractionService(
return "date";
}
if (MatchesAny(
loweredTranscript,
"word of the day",
"start word of the day",
"play word of the day",
"do word of the day",
"open word of the day"))
{
return "word_of_the_day";
}
if (wordOfDayPuzzleTurn && !string.IsNullOrWhiteSpace(loweredTranscript))
{
return "word_of_the_day_guess";
}
if (MatchesAny(loweredTranscript, "joke", "funny", "make me laugh"))
{
return "joke";
@@ -200,11 +221,13 @@ public sealed class JiboInteractionService(
return "chat";
}
private static JiboInteractionDecision BuildWordOfTheDayGuessDecision(IReadOnlyDictionary<string, string> clientEntities)
private static JiboInteractionDecision BuildWordOfTheDayGuessDecision(
IReadOnlyDictionary<string, string> clientEntities,
string transcript)
{
var guess = clientEntities.TryGetValue("guess", out var guessValue)
? guessValue
: string.Empty;
: transcript;
var reply = string.IsNullOrWhiteSpace(guess)
? "I heard your word of the day guess."

View File

@@ -330,6 +330,13 @@ public sealed class WebSocketTurnFinalizationService(
}
var turnState = session.TurnState;
if (ShouldIgnoreLateEmptyTurn(finalizedTurn, session, messageType))
{
turnState.AwaitingTurnCompletion = false;
ResetBufferedAudio(session);
return [];
}
if (string.IsNullOrWhiteSpace(finalizedTurn.NormalizedTranscript) &&
string.IsNullOrWhiteSpace(finalizedTurn.RawTranscript))
{
@@ -493,6 +500,7 @@ public sealed class WebSocketTurnFinalizationService(
var messageType = ReadMessageType(turn);
var clientIntent = ReadAttribute(turn, "clientIntent");
var transcript = NormalizeTranscript(turn.NormalizedTranscript ?? turn.RawTranscript);
var listenRules = ReadRules(turn, "listenRules").Concat(ReadRules(turn, "clientRules")).ToArray();
if (string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) &&
!string.IsNullOrWhiteSpace(clientIntent))
@@ -515,6 +523,11 @@ public sealed class WebSocketTurnFinalizationService(
return true;
}
if (listenRules.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase)))
{
return true;
}
return transcript is "joke" or "dance" or "time" or "date" or "today" or "day" or "hello" or "hi" or "hey";
}
@@ -567,4 +580,27 @@ public sealed class WebSocketTurnFinalizationService(
? value?.ToString()
: null;
}
private static bool ShouldIgnoreLateEmptyTurn(TurnContext turn, CloudSession session, string messageType)
{
if (messageType is not ("CLIENT_ASR" or "CLIENT_NLU"))
{
return false;
}
if (session.TurnState.AwaitingTurnCompletion || session.TurnState.BufferedAudioBytes > 0)
{
return false;
}
if (!string.IsNullOrWhiteSpace(turn.NormalizedTranscript) || !string.IsNullOrWhiteSpace(turn.RawTranscript))
{
return false;
}
var turnTransId = ReadAttribute(turn, "transID");
return !string.IsNullOrWhiteSpace(turnTransId) &&
string.Equals(turnTransId, session.LastTransId, StringComparison.Ordinal) &&
!string.IsNullOrWhiteSpace(session.LastIntent);
}
}

View File

@@ -111,6 +111,40 @@ public sealed class JiboInteractionServiceTests
Assert.Equal("I heard pastoral.", decision.ReplyText);
}
[Fact]
public async Task BuildDecisionAsync_WordOfDayGuess_UsesSpokenTranscriptDuringPuzzleTurn()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "pastoral",
NormalizedTranscript = "pastoral",
Attributes = new Dictionary<string, object?>
{
["listenRules"] = new[] { "word-of-the-day/puzzle" }
}
});
Assert.Equal("word_of_the_day_guess", decision.IntentName);
Assert.Equal("I heard pastoral.", decision.ReplyText);
}
[Fact]
public async Task BuildDecisionAsync_WordOfDayStartPhrase_MapsToSkillIntent()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "start word of the day",
NormalizedTranscript = "start word of the day"
});
Assert.Equal("word_of_the_day", decision.IntentName);
Assert.Equal("Word of the day is ready.", decision.ReplyText);
}
private static JiboInteractionService CreateService()
{
return new JiboInteractionService(

View File

@@ -405,6 +405,72 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal("word-of-the-day/puzzle", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
}
[Fact]
public async Task ClientAsr_WordOfDayGuess_UsesSpokenTranscriptDuringPuzzleTurn()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-spoken-guess-token",
Text = """{"type":"LISTEN","transID":"trans-wod-spoken-guess","data":{"rules":["word-of-the-day/puzzle"],"asr":{"hints":["pastoral","doodad","escarpment"]}}}"""
});
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-spoken-guess-token",
Text = """{"type":"CLIENT_ASR","transID":"trans-wod-spoken-guess","data":{"text":"pastoral"}}"""
});
Assert.Equal(3, replies.Count);
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
Assert.Equal("EOS", ReadReplyType(replies[1]));
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("pastoral", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("word_of_the_day_guess", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("word-of-the-day/puzzle", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
}
[Fact]
public async Task EmptyClientAsr_AfterCompletedWordOfDayTurn_IsIgnored()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-late-empty-token",
Text = """{"type":"LISTEN","transID":"trans-wod-late-empty","data":{"rules":["word-of-the-day/puzzle"]}}"""
});
var winReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-late-empty-token",
Text = """{"type":"CLIENT_NLU","transID":"trans-wod-late-empty","data":{"entities":{"guess":"pastoral"},"intent":"guess","rules":["word-of-the-day/puzzle"]}}"""
});
Assert.Equal(2, winReplies.Count);
var lateReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-late-empty-token",
Text = """{"type":"CLIENT_ASR","transID":"trans-wod-late-empty","data":{}}"""
});
Assert.Empty(lateReplies);
}
[Fact]
public async Task BufferedAudio_WithSyntheticTranscriptHint_FinalizesThroughSttSeam()
{