wod and time imrpovements
This commit is contained in:
@@ -90,12 +90,20 @@ Evidence from the latest word-of-the-day capture round:
|
||||
- word-of-the-day guesses can arrive as structured `CLIENT_NLU` turns with `intent=guess`, `rules=["word-of-the-day/puzzle"]`, and `entities.guess=<word>`
|
||||
- those structured turns should be treated as first-class cloud inputs even when no free-form transcript is present
|
||||
|
||||
Evidence from the continued `2026-04-18` word-of-the-day and time captures:
|
||||
|
||||
- spoken "start word of the day" style requests should route into the same word-of-the-day launch path as the menu destination
|
||||
- spoken puzzle answers like `pastoral` should be treated as valid guesses whenever the active listen rules show `word-of-the-day/puzzle`
|
||||
- after a successful word-of-the-day completion, late empty same-turn audio should be ignored instead of generating a stale blank-audio follow-up
|
||||
- clock replies should use the user-facing hour format without a leading zero
|
||||
|
||||
Near-term interaction work should now prioritize:
|
||||
|
||||
1. preserve and interpret yes/no turn constraints from observed listen rules
|
||||
2. broaden phrase-to-intent matching for the small set of known working skills before moving to larger NLU ambitions
|
||||
3. keep synthetic transcript hints as the most reliable parity path when captures already provide them
|
||||
4. continue evaluating whether local preprocessing is worth further investment or whether managed STT should replace it for the next serious testing phase
|
||||
5. start separating laptop-local capture storage from the eventual hosted retention/export path so group testing does not depend on repo-local zip handling
|
||||
|
||||
## Capture Storage Direction
|
||||
|
||||
|
||||
@@ -111,7 +111,11 @@ Current raw-audio behavior is still a compatibility bridge:
|
||||
- follow-up turns now preserve enough constraint state to distinguish yes/no-style replies from ordinary free-form chat
|
||||
- create-flow yes/no turns now preserve `create/is_it_a_keeper` and `domain=create` in the outbound synthetic `LISTEN` payload
|
||||
- structured word-of-the-day guesses now complete as `CLIENT_NLU` turns instead of falling back to pending/blank-audio behavior
|
||||
- spoken word-of-the-day launch phrases now route into the same cloud intent as the on-screen menu path
|
||||
- spoken word-of-the-day puzzle answers now complete against `word-of-the-day/puzzle` listen rules instead of degrading into generic chat
|
||||
- late empty same-turn `CLIENT_ASR` follow-ons are ignored after a completed turn so word-of-the-day wins do not get tailed by stale blank-audio comments
|
||||
- phrase matching has been widened slightly for known test prompts such as joke, dance, surprise, weather, calendar, commute, and news variants
|
||||
- time replies now use the natural hour format without a leading zero
|
||||
|
||||
## Buffered Audio STT
|
||||
|
||||
|
||||
@@ -17,22 +17,23 @@ public sealed class JiboInteractionService(
|
||||
? rawClientIntent?.ToString()
|
||||
: null;
|
||||
var clientRules = ReadRules(turn, "clientRules").ToArray();
|
||||
var listenRules = ReadRules(turn, "listenRules").ToArray();
|
||||
var clientEntities = ReadEntities(turn);
|
||||
var isYesNoTurn = IsYesNoTurn(turn);
|
||||
|
||||
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, clientRules, clientEntities, isYesNoTurn);
|
||||
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent, clientRules, listenRules, clientEntities, isYesNoTurn);
|
||||
return semanticIntent switch
|
||||
{
|
||||
"joke" => BuildJokeDecision(catalog),
|
||||
"dance" => BuildDanceDecision(catalog),
|
||||
"time" => new JiboInteractionDecision("time", $"It is {DateTime.Now:hh:mm tt}."),
|
||||
"time" => new JiboInteractionDecision("time", $"It is {DateTime.Now:h:mm tt}."),
|
||||
"date" => new JiboInteractionDecision("date", $"Today is {DateTime.Now:dddd, MMMM d}."),
|
||||
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
|
||||
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
|
||||
"yes" => new JiboInteractionDecision("yes", "Yes."),
|
||||
"no" => new JiboInteractionDecision("no", "No."),
|
||||
"word_of_the_day" => new JiboInteractionDecision("word_of_the_day", "Word of the day is ready."),
|
||||
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities),
|
||||
"word_of_the_day_guess" => BuildWordOfTheDayGuessDecision(clientEntities, transcript),
|
||||
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
|
||||
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
|
||||
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
|
||||
@@ -98,11 +99,15 @@ public sealed class JiboInteractionService(
|
||||
string loweredTranscript,
|
||||
string? clientIntent,
|
||||
IReadOnlyList<string> clientRules,
|
||||
IReadOnlyList<string> listenRules,
|
||||
IReadOnlyDictionary<string, string> clientEntities,
|
||||
bool isYesNoTurn)
|
||||
{
|
||||
var wordOfDayPuzzleTurn = clientRules.Concat(listenRules)
|
||||
.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) &&
|
||||
clientRules.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase)))
|
||||
wordOfDayPuzzleTurn)
|
||||
{
|
||||
return "word_of_the_day_guess";
|
||||
}
|
||||
@@ -124,6 +129,22 @@ public sealed class JiboInteractionService(
|
||||
return "date";
|
||||
}
|
||||
|
||||
if (MatchesAny(
|
||||
loweredTranscript,
|
||||
"word of the day",
|
||||
"start word of the day",
|
||||
"play word of the day",
|
||||
"do word of the day",
|
||||
"open word of the day"))
|
||||
{
|
||||
return "word_of_the_day";
|
||||
}
|
||||
|
||||
if (wordOfDayPuzzleTurn && !string.IsNullOrWhiteSpace(loweredTranscript))
|
||||
{
|
||||
return "word_of_the_day_guess";
|
||||
}
|
||||
|
||||
if (MatchesAny(loweredTranscript, "joke", "funny", "make me laugh"))
|
||||
{
|
||||
return "joke";
|
||||
@@ -200,11 +221,13 @@ public sealed class JiboInteractionService(
|
||||
return "chat";
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildWordOfTheDayGuessDecision(IReadOnlyDictionary<string, string> clientEntities)
|
||||
private static JiboInteractionDecision BuildWordOfTheDayGuessDecision(
|
||||
IReadOnlyDictionary<string, string> clientEntities,
|
||||
string transcript)
|
||||
{
|
||||
var guess = clientEntities.TryGetValue("guess", out var guessValue)
|
||||
? guessValue
|
||||
: string.Empty;
|
||||
: transcript;
|
||||
|
||||
var reply = string.IsNullOrWhiteSpace(guess)
|
||||
? "I heard your word of the day guess."
|
||||
|
||||
@@ -330,6 +330,13 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
}
|
||||
|
||||
var turnState = session.TurnState;
|
||||
if (ShouldIgnoreLateEmptyTurn(finalizedTurn, session, messageType))
|
||||
{
|
||||
turnState.AwaitingTurnCompletion = false;
|
||||
ResetBufferedAudio(session);
|
||||
return [];
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(finalizedTurn.NormalizedTranscript) &&
|
||||
string.IsNullOrWhiteSpace(finalizedTurn.RawTranscript))
|
||||
{
|
||||
@@ -493,6 +500,7 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
var messageType = ReadMessageType(turn);
|
||||
var clientIntent = ReadAttribute(turn, "clientIntent");
|
||||
var transcript = NormalizeTranscript(turn.NormalizedTranscript ?? turn.RawTranscript);
|
||||
var listenRules = ReadRules(turn, "listenRules").Concat(ReadRules(turn, "clientRules")).ToArray();
|
||||
|
||||
if (string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.IsNullOrWhiteSpace(clientIntent))
|
||||
@@ -515,6 +523,11 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
return true;
|
||||
}
|
||||
|
||||
if (listenRules.Any(rule => string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return transcript is "joke" or "dance" or "time" or "date" or "today" or "day" or "hello" or "hi" or "hey";
|
||||
}
|
||||
|
||||
@@ -567,4 +580,27 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
? value?.ToString()
|
||||
: null;
|
||||
}
|
||||
|
||||
private static bool ShouldIgnoreLateEmptyTurn(TurnContext turn, CloudSession session, string messageType)
|
||||
{
|
||||
if (messageType is not ("CLIENT_ASR" or "CLIENT_NLU"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (session.TurnState.AwaitingTurnCompletion || session.TurnState.BufferedAudioBytes > 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(turn.NormalizedTranscript) || !string.IsNullOrWhiteSpace(turn.RawTranscript))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var turnTransId = ReadAttribute(turn, "transID");
|
||||
return !string.IsNullOrWhiteSpace(turnTransId) &&
|
||||
string.Equals(turnTransId, session.LastTransId, StringComparison.Ordinal) &&
|
||||
!string.IsNullOrWhiteSpace(session.LastIntent);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,6 +111,40 @@ public sealed class JiboInteractionServiceTests
|
||||
Assert.Equal("I heard pastoral.", decision.ReplyText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_WordOfDayGuess_UsesSpokenTranscriptDuringPuzzleTurn()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "pastoral",
|
||||
NormalizedTranscript = "pastoral",
|
||||
Attributes = new Dictionary<string, object?>
|
||||
{
|
||||
["listenRules"] = new[] { "word-of-the-day/puzzle" }
|
||||
}
|
||||
});
|
||||
|
||||
Assert.Equal("word_of_the_day_guess", decision.IntentName);
|
||||
Assert.Equal("I heard pastoral.", decision.ReplyText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_WordOfDayStartPhrase_MapsToSkillIntent()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "start word of the day",
|
||||
NormalizedTranscript = "start word of the day"
|
||||
});
|
||||
|
||||
Assert.Equal("word_of_the_day", decision.IntentName);
|
||||
Assert.Equal("Word of the day is ready.", decision.ReplyText);
|
||||
}
|
||||
|
||||
private static JiboInteractionService CreateService()
|
||||
{
|
||||
return new JiboInteractionService(
|
||||
|
||||
@@ -405,6 +405,72 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Equal("word-of-the-day/puzzle", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsr_WordOfDayGuess_UsesSpokenTranscriptDuringPuzzleTurn()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-spoken-guess-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-wod-spoken-guess","data":{"rules":["word-of-the-day/puzzle"],"asr":{"hints":["pastoral","doodad","escarpment"]}}}"""
|
||||
});
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-spoken-guess-token",
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-wod-spoken-guess","data":{"text":"pastoral"}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(3, replies.Count);
|
||||
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
|
||||
Assert.Equal("EOS", ReadReplyType(replies[1]));
|
||||
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("pastoral", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||
Assert.Equal("word_of_the_day_guess", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("word-of-the-day/puzzle", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task EmptyClientAsr_AfterCompletedWordOfDayTurn_IsIgnored()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-late-empty-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-wod-late-empty","data":{"rules":["word-of-the-day/puzzle"]}}"""
|
||||
});
|
||||
|
||||
var winReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-late-empty-token",
|
||||
Text = """{"type":"CLIENT_NLU","transID":"trans-wod-late-empty","data":{"entities":{"guess":"pastoral"},"intent":"guess","rules":["word-of-the-day/puzzle"]}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(2, winReplies.Count);
|
||||
|
||||
var lateReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-late-empty-token",
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-wod-late-empty","data":{}}"""
|
||||
});
|
||||
|
||||
Assert.Empty(lateReplies);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BufferedAudio_WithSyntheticTranscriptHint_FinalizesThroughSttSeam()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user