more hey jibo and word of the day changes
This commit is contained in:
@@ -39,6 +39,11 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var turnState = session.TurnState;
|
||||
if (ShouldIgnoreLateAudio(session))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
session.LastMessageType = "BINARY_AUDIO";
|
||||
turnState.FirstAudioReceivedUtc ??= DateTimeOffset.UtcNow;
|
||||
turnState.BufferedAudioChunkCount += 1;
|
||||
@@ -312,6 +317,7 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
turnState.SawContext = false;
|
||||
turnState.ListenHotphrase = false;
|
||||
turnState.HotphraseEmptyTurnCount = 0;
|
||||
turnState.IgnoreAdditionalAudioUntilUtc = null;
|
||||
turnState.ListenRules = [];
|
||||
turnState.ListenAsrHints = [];
|
||||
}
|
||||
@@ -359,6 +365,11 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
}
|
||||
|
||||
var turnState = session.TurnState;
|
||||
if (ShouldTreatBufferedHotphraseAsGreeting(finalizedTurn, turnState, allowFallbackOnMissingTranscript))
|
||||
{
|
||||
finalizedTurn = WithSyntheticTranscript(finalizedTurn, "hello");
|
||||
}
|
||||
|
||||
if (ShouldIgnoreCompletedWordOfDayTurn(finalizedTurn))
|
||||
{
|
||||
turnState.AwaitingTurnCompletion = false;
|
||||
@@ -459,6 +470,9 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
? DateTimeOffset.UtcNow.Add(plan.FollowUp.Timeout)
|
||||
: null;
|
||||
turnState.AwaitingTurnCompletion = false;
|
||||
turnState.IgnoreAdditionalAudioUntilUtc = plan.FollowUp.KeepMicOpen
|
||||
? null
|
||||
: DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
|
||||
|
||||
var emitSkillActions = messageType != "CLIENT_NLU" &&
|
||||
!string.Equals(plan.IntentName, "word_of_the_day", StringComparison.OrdinalIgnoreCase) &&
|
||||
@@ -488,6 +502,15 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
turnAge >= AutoFinalizeMinTurnAge;
|
||||
}
|
||||
|
||||
private static bool ShouldIgnoreLateAudio(CloudSession session)
|
||||
{
|
||||
var ignoreUntilUtc = session.TurnState.IgnoreAdditionalAudioUntilUtc;
|
||||
return !session.TurnState.AwaitingTurnCompletion &&
|
||||
!session.FollowUpOpen &&
|
||||
ignoreUntilUtc.HasValue &&
|
||||
ignoreUntilUtc.Value > DateTimeOffset.UtcNow;
|
||||
}
|
||||
|
||||
private static string? ExtractDataPayload(string? text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
@@ -701,6 +724,31 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
.Any(static rule => string.Equals(rule, "word-of-the-day/right_word", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
private static bool ShouldTreatBufferedHotphraseAsGreeting(
|
||||
TurnContext turn,
|
||||
WebSocketTurnState turnState,
|
||||
bool allowFallbackOnMissingTranscript)
|
||||
{
|
||||
if (!allowFallbackOnMissingTranscript || !ReadBoolAttribute(turn, "listenHotphrase"))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ReadRules(turn, "listenRules")
|
||||
.Any(static rule => string.Equals(rule, "launch", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(turn.NormalizedTranscript) || !string.IsNullOrWhiteSpace(turn.RawTranscript))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return turnState.BufferedAudioBytes >= AutoFinalizeMinBufferedAudioBytes &&
|
||||
(turnState.FinalizeAttemptCount > 0 || !string.IsNullOrWhiteSpace(turnState.LastSttError));
|
||||
}
|
||||
|
||||
private static bool ShouldTreatEmptyHotphraseTurnAsGreeting(TurnContext turn)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(turn.NormalizedTranscript) || !string.IsNullOrWhiteSpace(turn.RawTranscript))
|
||||
|
||||
@@ -2,10 +2,13 @@ namespace Jibo.Cloud.Domain.Models;
|
||||
|
||||
public sealed class WebSocketTurnState
|
||||
{
|
||||
public static readonly TimeSpan DefaultLateAudioIgnoreWindow = TimeSpan.FromSeconds(2);
|
||||
|
||||
public string? TransId { get; set; }
|
||||
public string? ContextPayload { get; set; }
|
||||
public bool ListenHotphrase { get; set; }
|
||||
public int HotphraseEmptyTurnCount { get; set; }
|
||||
public DateTimeOffset? IgnoreAdditionalAudioUntilUtc { get; set; }
|
||||
public string? AudioTranscriptHint { get; set; }
|
||||
public string? LastSttError { get; set; }
|
||||
public DateTimeOffset? LastSttErrorUtc { get; set; }
|
||||
|
||||
@@ -498,6 +498,72 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.False(session.FollowUpOpen);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AutoFinalizedWordOfDayLaunch_IgnoresLateSameTurnAudio()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-auto-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-wod-auto","data":{"hotphrase":true,"rules":["launch","globals/global_commands_launch"]}}"""
|
||||
});
|
||||
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-auto-token",
|
||||
Text = """{"type":"CONTEXT","transID":"trans-wod-auto","data":{"audioTranscriptHint":"play word of the day"}}"""
|
||||
});
|
||||
|
||||
for (var index = 0; index < 4; index += 1)
|
||||
{
|
||||
var interimReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-auto-token",
|
||||
Binary = new byte[3000]
|
||||
});
|
||||
|
||||
Assert.Single(interimReplies);
|
||||
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(interimReplies[0]));
|
||||
}
|
||||
|
||||
var session = _store.FindSessionByToken("hub-wod-auto-token");
|
||||
Assert.NotNull(session);
|
||||
session.TurnState.FirstAudioReceivedUtc = DateTimeOffset.UtcNow - TimeSpan.FromSeconds(2);
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-auto-token",
|
||||
Binary = new byte[3000]
|
||||
});
|
||||
|
||||
Assert.Equal(2, replies.Count);
|
||||
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
|
||||
Assert.Equal("EOS", ReadReplyType(replies[1]));
|
||||
|
||||
var lateReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-wod-auto-token",
|
||||
Binary = new byte[3000]
|
||||
});
|
||||
|
||||
Assert.Empty(lateReplies);
|
||||
Assert.False(session.TurnState.AwaitingTurnCompletion);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task EmptyClientAsr_AfterCompletedWordOfDayTurn_IsIgnored()
|
||||
{
|
||||
@@ -791,6 +857,68 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BufferedHotphraseAudio_WithSttFailure_BecomesGreetingAndKeepsFollowUpOpen()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-hotphrase-greeting-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-hotphrase-greeting","data":{"hotphrase":true,"rules":["launch","globals/global_commands_launch"]}}"""
|
||||
});
|
||||
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-hotphrase-greeting-token",
|
||||
Text = """{"type":"CONTEXT","transID":"trans-hotphrase-greeting","data":{"topic":"conversation"}}"""
|
||||
});
|
||||
|
||||
for (var index = 0; index < 4; index += 1)
|
||||
{
|
||||
var interimReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-hotphrase-greeting-token",
|
||||
Binary = new byte[3000]
|
||||
});
|
||||
|
||||
Assert.Single(interimReplies);
|
||||
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(interimReplies[0]));
|
||||
}
|
||||
|
||||
var session = _store.FindSessionByToken("hub-hotphrase-greeting-token");
|
||||
Assert.NotNull(session);
|
||||
session.TurnState.FirstAudioReceivedUtc = DateTimeOffset.UtcNow - TimeSpan.FromSeconds(2);
|
||||
session.TurnState.LastSttError = "ffmpeg decode failed";
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-hotphrase-greeting-token",
|
||||
Binary = new byte[3000]
|
||||
});
|
||||
|
||||
Assert.Equal(3, replies.Count);
|
||||
Assert.Equal("LISTEN", ReadReplyType(replies[0]));
|
||||
Assert.Equal("EOS", ReadReplyType(replies[1]));
|
||||
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
|
||||
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
|
||||
Assert.True(session.FollowUpOpen);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsrJokeFlow_MatchesNodePayloadShapeForEosAndSkillAction()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user