more hey jibo and word of the day changes

This commit is contained in:
Jacob Dubin
2026-04-18 21:41:21 -05:00
parent d4b33a1635
commit 16768a0c80
13 changed files with 5994 additions and 0 deletions

View File

@@ -39,6 +39,11 @@ public sealed class WebSocketTurnFinalizationService(
CancellationToken cancellationToken = default)
{
var turnState = session.TurnState;
if (ShouldIgnoreLateAudio(session))
{
return [];
}
session.LastMessageType = "BINARY_AUDIO";
turnState.FirstAudioReceivedUtc ??= DateTimeOffset.UtcNow;
turnState.BufferedAudioChunkCount += 1;
@@ -312,6 +317,7 @@ public sealed class WebSocketTurnFinalizationService(
turnState.SawContext = false;
turnState.ListenHotphrase = false;
turnState.HotphraseEmptyTurnCount = 0;
turnState.IgnoreAdditionalAudioUntilUtc = null;
turnState.ListenRules = [];
turnState.ListenAsrHints = [];
}
@@ -359,6 +365,11 @@ public sealed class WebSocketTurnFinalizationService(
}
var turnState = session.TurnState;
if (ShouldTreatBufferedHotphraseAsGreeting(finalizedTurn, turnState, allowFallbackOnMissingTranscript))
{
finalizedTurn = WithSyntheticTranscript(finalizedTurn, "hello");
}
if (ShouldIgnoreCompletedWordOfDayTurn(finalizedTurn))
{
turnState.AwaitingTurnCompletion = false;
@@ -459,6 +470,9 @@ public sealed class WebSocketTurnFinalizationService(
? DateTimeOffset.UtcNow.Add(plan.FollowUp.Timeout)
: null;
turnState.AwaitingTurnCompletion = false;
turnState.IgnoreAdditionalAudioUntilUtc = plan.FollowUp.KeepMicOpen
? null
: DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
var emitSkillActions = messageType != "CLIENT_NLU" &&
!string.Equals(plan.IntentName, "word_of_the_day", StringComparison.OrdinalIgnoreCase) &&
@@ -488,6 +502,15 @@ public sealed class WebSocketTurnFinalizationService(
turnAge >= AutoFinalizeMinTurnAge;
}
private static bool ShouldIgnoreLateAudio(CloudSession session)
{
var ignoreUntilUtc = session.TurnState.IgnoreAdditionalAudioUntilUtc;
return !session.TurnState.AwaitingTurnCompletion &&
!session.FollowUpOpen &&
ignoreUntilUtc.HasValue &&
ignoreUntilUtc.Value > DateTimeOffset.UtcNow;
}
private static string? ExtractDataPayload(string? text)
{
if (string.IsNullOrWhiteSpace(text))
@@ -701,6 +724,31 @@ public sealed class WebSocketTurnFinalizationService(
.Any(static rule => string.Equals(rule, "word-of-the-day/right_word", StringComparison.OrdinalIgnoreCase));
}
private static bool ShouldTreatBufferedHotphraseAsGreeting(
TurnContext turn,
WebSocketTurnState turnState,
bool allowFallbackOnMissingTranscript)
{
if (!allowFallbackOnMissingTranscript || !ReadBoolAttribute(turn, "listenHotphrase"))
{
return false;
}
if (!ReadRules(turn, "listenRules")
.Any(static rule => string.Equals(rule, "launch", StringComparison.OrdinalIgnoreCase)))
{
return false;
}
if (!string.IsNullOrWhiteSpace(turn.NormalizedTranscript) || !string.IsNullOrWhiteSpace(turn.RawTranscript))
{
return false;
}
return turnState.BufferedAudioBytes >= AutoFinalizeMinBufferedAudioBytes &&
(turnState.FinalizeAttemptCount > 0 || !string.IsNullOrWhiteSpace(turnState.LastSttError));
}
private static bool ShouldTreatEmptyHotphraseTurnAsGreeting(TurnContext turn)
{
if (!string.IsNullOrWhiteSpace(turn.NormalizedTranscript) || !string.IsNullOrWhiteSpace(turn.RawTranscript))

View File

@@ -2,10 +2,13 @@ namespace Jibo.Cloud.Domain.Models;
public sealed class WebSocketTurnState
{
public static readonly TimeSpan DefaultLateAudioIgnoreWindow = TimeSpan.FromSeconds(2);
public string? TransId { get; set; }
public string? ContextPayload { get; set; }
public bool ListenHotphrase { get; set; }
public int HotphraseEmptyTurnCount { get; set; }
public DateTimeOffset? IgnoreAdditionalAudioUntilUtc { get; set; }
public string? AudioTranscriptHint { get; set; }
public string? LastSttError { get; set; }
public DateTimeOffset? LastSttErrorUtc { get; set; }

View File

@@ -498,6 +498,72 @@ public sealed class JiboWebSocketServiceTests
Assert.False(session.FollowUpOpen);
}
[Fact]
public async Task AutoFinalizedWordOfDayLaunch_IgnoresLateSameTurnAudio()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-auto-token",
Text = """{"type":"LISTEN","transID":"trans-wod-auto","data":{"hotphrase":true,"rules":["launch","globals/global_commands_launch"]}}"""
});
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-auto-token",
Text = """{"type":"CONTEXT","transID":"trans-wod-auto","data":{"audioTranscriptHint":"play word of the day"}}"""
});
for (var index = 0; index < 4; index += 1)
{
var interimReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-auto-token",
Binary = new byte[3000]
});
Assert.Single(interimReplies);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(interimReplies[0]));
}
var session = _store.FindSessionByToken("hub-wod-auto-token");
Assert.NotNull(session);
session.TurnState.FirstAudioReceivedUtc = DateTimeOffset.UtcNow - TimeSpan.FromSeconds(2);
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-auto-token",
Binary = new byte[3000]
});
Assert.Equal(2, replies.Count);
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
Assert.Equal("EOS", ReadReplyType(replies[1]));
var lateReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-wod-auto-token",
Binary = new byte[3000]
});
Assert.Empty(lateReplies);
Assert.False(session.TurnState.AwaitingTurnCompletion);
}
[Fact]
public async Task EmptyClientAsr_AfterCompletedWordOfDayTurn_IsIgnored()
{
@@ -791,6 +857,68 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
}
[Fact]
public async Task BufferedHotphraseAudio_WithSttFailure_BecomesGreetingAndKeepsFollowUpOpen()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-hotphrase-greeting-token",
Text = """{"type":"LISTEN","transID":"trans-hotphrase-greeting","data":{"hotphrase":true,"rules":["launch","globals/global_commands_launch"]}}"""
});
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-hotphrase-greeting-token",
Text = """{"type":"CONTEXT","transID":"trans-hotphrase-greeting","data":{"topic":"conversation"}}"""
});
for (var index = 0; index < 4; index += 1)
{
var interimReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-hotphrase-greeting-token",
Binary = new byte[3000]
});
Assert.Single(interimReplies);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(interimReplies[0]));
}
var session = _store.FindSessionByToken("hub-hotphrase-greeting-token");
Assert.NotNull(session);
session.TurnState.FirstAudioReceivedUtc = DateTimeOffset.UtcNow - TimeSpan.FromSeconds(2);
session.TurnState.LastSttError = "ffmpeg decode failed";
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-hotphrase-greeting-token",
Binary = new byte[3000]
});
Assert.Equal(3, replies.Count);
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
Assert.Equal("EOS", ReadReplyType(replies[1]));
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.True(session.FollowUpOpen);
}
[Fact]
public async Task ClientAsrJokeFlow_MatchesNodePayloadShapeForEosAndSkillAction()
{