From 874e5a1637621b605a7e5b14dc04540774f53be0 Mon Sep 17 00:00:00 2001 From: Jacob Dubin Date: Wed, 15 Apr 2026 14:33:43 -0500 Subject: [PATCH] fixes for next round of testing --- OpenJibo/README.md | 6 + OpenJibo/docs/protocol-inventory.md | 7 + OpenJibo/src/Jibo.Cloud/dotnet/README.md | 7 + .../Services/JiboWebSocketService.cs | 4 +- .../ResponsePlanToSocketMessagesMapper.cs | 90 +++++++++ .../WebSocketTurnFinalizationService.cs | 177 ++++++++++++------ .../Models/WebSocketTurnState.cs | 3 + .../WebSockets/JiboWebSocketServiceTests.cs | 112 +++++++++++ 8 files changed, 348 insertions(+), 58 deletions(-) diff --git a/OpenJibo/README.md b/OpenJibo/README.md index 9636fc8..b2d3715 100644 --- a/OpenJibo/README.md +++ b/OpenJibo/README.md @@ -99,6 +99,12 @@ What remains unresolved: - the next post-`api-socket` startup requests and timing seen in successful Node runs - broader live websocket behavior on a real robot beyond the current synthetic parity slice +The current websocket bridge now also includes server-driven raw-audio turn completion: + +- enough buffered audio plus `CONTEXT` can now trigger auto-finalize on the server side +- `EOS` is emitted on that auto-finalize path so turns do not remain open indefinitely +- transcript-less raw-audio turns still fall back to a synthetic compatibility response, not real ASR + ## Important Docs - [Cloud overview](/src/Jibo.Cloud/README.md) diff --git a/OpenJibo/docs/protocol-inventory.md b/OpenJibo/docs/protocol-inventory.md index ed3f418..284b99f 100644 --- a/OpenJibo/docs/protocol-inventory.md +++ b/OpenJibo/docs/protocol-inventory.md @@ -66,6 +66,7 @@ The current .NET pass covers only a narrow, explicitly synthetic subset of obser - token/session tracking across websocket turns - explicit per-turn state tracking for transID, rules, context, buffered audio, and finalize attempts - buffered audio accounting and turn-pending state +- auto-finalize triggering for raw audio once `LISTEN`, `CONTEXT`, and minimum buffered-audio thresholds are present - `LISTEN` message handling with synthetic `LISTEN` result payload shaping - `CONTEXT` capture for turn/session state - `CLIENT_NLU` turn completion using remembered listen/session metadata @@ -81,6 +82,12 @@ This does not yet mean parity for: - multi-step skill lifecycles beyond the current synthetic playback response - broader interaction, animation, or ESML command families +Current raw-audio fallback behavior remains explicitly synthetic: + +- when a buffered-audio turn can be resolved through the synthetic transcript-hint seam, `.NET` now auto-finalizes and emits `LISTEN` + `EOS` + `SKILL_ACTION` +- when the turn crosses the finalize threshold without a usable transcript, `.NET` now emits a fallback `LISTEN` + `EOS` + generic `SKILL_ACTION` rather than leaving the robot hanging on an unfinished turn +- that fallback is a compatibility measure inspired by the Node oracle, not a claim of real ASR understanding + ### Internal ASR Direction The current .NET websocket layer now separates: diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/README.md b/OpenJibo/src/Jibo.Cloud/dotnet/README.md index b408be9..175e53e 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/README.md +++ b/OpenJibo/src/Jibo.Cloud/dotnet/README.md @@ -71,6 +71,7 @@ Current websocket scope is still intentionally narrow: - explicit websocket turn-state tracking separate from long-lived cloud session state - synthetic `LISTEN` result shaping for `LISTEN`, `CLIENT_NLU`, and `CLIENT_ASR` - buffered audio state tracking behind a dedicated turn-finalization layer +- raw audio auto-finalization once `LISTEN` + `CONTEXT` + minimum buffered audio thresholds are present - synthetic STT strategy selection for fixture-driven audio turn completion - structured websocket telemetry and live-run fixture export - `CONTEXT` capture and follow-up turn state @@ -100,3 +101,9 @@ It has not yet confirmed: - full startup parity with the successful Node run cadence - consistent eye-open / wake completion on the robot - the later health/log upload sequence currently seen in the working Node run + +Current raw-audio behavior is still a compatibility bridge: + +- if buffered audio has a synthetic transcript hint, the server now auto-finalizes the turn and emits `LISTEN` + `EOS` + `SKILL_ACTION` +- if buffered audio crosses the finalize threshold without a usable transcript, the server now emits a Node-style fallback completion with `EOS` instead of hanging the turn forever +- this is intentionally not a claim of real ASR parity diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboWebSocketService.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboWebSocketService.cs index 64eaf1b..d20d0fb 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboWebSocketService.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboWebSocketService.cs @@ -23,7 +23,7 @@ public sealed class JiboWebSocketService( if (envelope.IsBinary) { - var replies = turnFinalizationService.HandleBinaryAudio(session, envelope); + var replies = await turnFinalizationService.HandleBinaryAudioAsync(session, envelope, cancellationToken); await telemetrySink.RecordTurnEventAsync(envelope, session, "binary_audio_received", new Dictionary { ["bytes"] = envelope.Binary?.Length ?? 0 @@ -42,7 +42,7 @@ public sealed class JiboWebSocketService( if (parsedType == "CONTEXT") { - var replies = turnFinalizationService.HandleContext(session, envelope.Text); + var replies = await turnFinalizationService.HandleContextAsync(session, envelope, cancellationToken); await telemetrySink.RecordTurnEventAsync(envelope, session, "context_received", new Dictionary { ["transID"] = session.TurnState.TransId diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs index f8e2668..a29dcfd 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs @@ -63,6 +63,50 @@ public sealed class ResponsePlanToSocketMessagesMapper return messages; } + public IReadOnlyList MapFallback(CloudSession session, string transId, IReadOnlyList rules) + { + return + [ + JsonSerializer.Serialize(new + { + type = "LISTEN", + transID = transId, + data = new + { + asr = new + { + confidence = 0.95, + final = true, + text = string.Empty + }, + nlu = new + { + confidence = 0.95, + intent = "heyJibo", + rules, + entities = new Dictionary() + }, + match = new + { + intent = "heyJibo", + rule = rules.FirstOrDefault() ?? string.Empty, + score = 0.95 + } + } + }), + JsonSerializer.Serialize(new + { + type = "EOS", + data = new + { + sessionId = session.SessionId, + transID = transId + } + }), + JsonSerializer.Serialize(BuildGenericFallbackSkillPayload(transId)) + ]; + } + private static IReadOnlyList ReadRules(TurnContext turn) { if (!turn.Attributes.TryGetValue("listenRules", out var value)) @@ -132,6 +176,52 @@ public sealed class ResponsePlanToSocketMessagesMapper }; } + private static object BuildGenericFallbackSkillPayload(string transId) + { + return new + { + type = "SKILL_ACTION", + ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), + msgID = $"msg-{Guid.NewGuid():N}", + transID = transId, + data = new + { + skill = new + { + id = "chitchat-skill" + }, + action = new + { + config = new + { + jcp = new + { + type = "SLIM", + config = new + { + play = new + { + esml = "I heard you.", + meta = new + { + prompt_id = "RUNTIME_PROMPT", + prompt_sub_category = "AN", + mim_id = "runtime-chat", + mim_type = "announcement", + intent = "unknown", + transcript = string.Empty + } + } + } + } + } + }, + analytics = new Dictionary(), + final = true + } + }; + } + private static string EscapeXml(string value) { return value diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs index fb38c22..6e84a2e 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs @@ -10,16 +10,28 @@ public sealed class WebSocketTurnFinalizationService( ResponsePlanToSocketMessagesMapper replyMapper, ISttStrategySelector sttStrategySelector) { - public IReadOnlyList HandleBinaryAudio(CloudSession session, WebSocketMessageEnvelope envelope) + private const int AutoFinalizeMinBufferedAudioBytes = 12000; + private const int AutoFinalizeMinBufferedAudioChunks = 5; + + public async Task> HandleBinaryAudioAsync( + CloudSession session, + WebSocketMessageEnvelope envelope, + CancellationToken cancellationToken = default) { var turnState = session.TurnState; session.LastMessageType = "BINARY_AUDIO"; + turnState.FirstAudioReceivedUtc ??= DateTimeOffset.UtcNow; turnState.BufferedAudioChunkCount += 1; turnState.BufferedAudioBytes += envelope.Binary?.Length ?? 0; turnState.LastAudioReceivedUtc = DateTimeOffset.UtcNow; turnState.AwaitingTurnCompletion = true; session.Metadata["lastAudioBytes"] = envelope.Binary?.Length ?? 0; + if (ShouldAutoFinalize(session)) + { + return await FinalizeTurnAsync(session, envelope, "AUTO_FINALIZE", allowFallbackOnMissingTranscript: true, cancellationToken); + } + return [ new WebSocketReply @@ -39,19 +51,28 @@ public sealed class WebSocketTurnFinalizationService( ]; } - public IReadOnlyList HandleContext(CloudSession session, string? text) + public async Task> HandleContextAsync( + CloudSession session, + WebSocketMessageEnvelope envelope, + CancellationToken cancellationToken = default) { var turnState = session.TurnState; - turnState.ContextPayload = ExtractDataPayload(text); + turnState.SawContext = true; + turnState.ContextPayload = ExtractDataPayload(envelope.Text); session.Metadata["context"] = turnState.ContextPayload; - if (TryReadContextProperty(text, "audioTranscriptHint", out var transcriptHint) && + if (TryReadContextProperty(envelope.Text, "audioTranscriptHint", out var transcriptHint) && !string.IsNullOrWhiteSpace(transcriptHint)) { turnState.AudioTranscriptHint = transcriptHint; session.Metadata["audioTranscriptHint"] = transcriptHint; } + if (ShouldAutoFinalize(session)) + { + return await FinalizeTurnAsync(session, envelope, "AUTO_FINALIZE", allowFallbackOnMissingTranscript: true, cancellationToken); + } + return [ new WebSocketReply @@ -76,58 +97,7 @@ public sealed class WebSocketTurnFinalizationService( CancellationToken cancellationToken = default) { PersistTurnHints(session, envelope.Text); - - var turn = turnContextMapper.MapListenMessage(envelope, session, messageType); - var finalizedTurn = await ResolveTranscriptAsync(turn, session, cancellationToken); - var turnState = session.TurnState; - if (string.IsNullOrWhiteSpace(finalizedTurn.NormalizedTranscript) && - string.IsNullOrWhiteSpace(finalizedTurn.RawTranscript)) - { - turnState.AwaitingTurnCompletion = true; - if (turnState.BufferedAudioBytes > 0) - { - turnState.FinalizeAttemptCount += 1; - } - return - [ - new WebSocketReply - { - Text = JsonSerializer.Serialize(new - { - type = "OPENJIBO_TURN_PENDING", - data = new - { - sessionId = session.SessionId, - transID = session.LastTransId, - bufferedAudioBytes = turnState.BufferedAudioBytes, - bufferedAudioChunks = turnState.BufferedAudioChunkCount, - awaitingAudio = turnState.BufferedAudioBytes == 0, - awaitingTranscriptHint = turnState.BufferedAudioBytes > 0 && string.IsNullOrWhiteSpace(turnState.AudioTranscriptHint), - finalizeAttempts = turnState.FinalizeAttemptCount - } - }) - } - ]; - } - - var plan = await conversationBroker.HandleTurnAsync(finalizedTurn, cancellationToken); - var listenAction = plan.Actions.OfType().OrderBy(action => action.Sequence).LastOrDefault(); - session.LastTranscript = finalizedTurn.NormalizedTranscript ?? finalizedTurn.RawTranscript; - session.LastIntent = plan.IntentName; - session.LastListenType = listenAction?.Mode; - session.FollowUpExpiresUtc = plan.FollowUp.KeepMicOpen - ? DateTimeOffset.UtcNow.Add(plan.FollowUp.Timeout) - : null; - turnState.AwaitingTurnCompletion = false; - - var emitSkillActions = messageType != "CLIENT_NLU"; - var replies = replyMapper.Map(plan, finalizedTurn, session, emitSkillActions).Select(text => new WebSocketReply - { - Text = text - }).ToArray(); - - ResetBufferedAudio(session); - return replies; + return await FinalizeTurnAsync(session, envelope, messageType, allowFallbackOnMissingTranscript: false, cancellationToken); } private async Task ResolveTranscriptAsync(TurnContext turn, CloudSession session, CancellationToken cancellationToken) @@ -200,6 +170,13 @@ public sealed class WebSocketTurnFinalizationService( using var document = JsonDocument.Parse(text); var root = document.RootElement; + if (root.TryGetProperty("type", out var type) && + type.ValueKind == JsonValueKind.String && + string.Equals(type.GetString(), "LISTEN", StringComparison.OrdinalIgnoreCase)) + { + turnState.SawListen = true; + } + if (root.TryGetProperty("transID", out var transId) && transId.ValueKind == JsonValueKind.String) { var nextTransId = transId.GetString(); @@ -244,6 +221,7 @@ public sealed class WebSocketTurnFinalizationService( { session.TurnState.BufferedAudioBytes = 0; session.TurnState.BufferedAudioChunkCount = 0; + session.TurnState.FirstAudioReceivedUtc = null; session.TurnState.LastAudioReceivedUtc = null; session.TurnState.FinalizeAttemptCount = 0; session.Metadata.Remove("audioTranscriptHint"); @@ -254,14 +232,101 @@ public sealed class WebSocketTurnFinalizationService( turnState.TransId = transId; turnState.ContextPayload = null; turnState.AudioTranscriptHint = null; + turnState.FirstAudioReceivedUtc = null; turnState.LastAudioReceivedUtc = null; turnState.BufferedAudioChunkCount = 0; turnState.BufferedAudioBytes = 0; turnState.FinalizeAttemptCount = 0; turnState.AwaitingTurnCompletion = false; + turnState.SawListen = false; + turnState.SawContext = false; turnState.ListenRules = []; } + private async Task> FinalizeTurnAsync( + CloudSession session, + WebSocketMessageEnvelope envelope, + string messageType, + bool allowFallbackOnMissingTranscript, + CancellationToken cancellationToken) + { + var turn = turnContextMapper.MapListenMessage(envelope, session, messageType); + var finalizedTurn = await ResolveTranscriptAsync(turn, session, cancellationToken); + var turnState = session.TurnState; + if (string.IsNullOrWhiteSpace(finalizedTurn.NormalizedTranscript) && + string.IsNullOrWhiteSpace(finalizedTurn.RawTranscript)) + { + turnState.AwaitingTurnCompletion = true; + if (turnState.BufferedAudioBytes > 0) + { + turnState.FinalizeAttemptCount += 1; + } + + if (allowFallbackOnMissingTranscript && turnState.BufferedAudioBytes >= AutoFinalizeMinBufferedAudioBytes) + { + turnState.AwaitingTurnCompletion = false; + session.LastTranscript = string.Empty; + session.LastIntent = "heyJibo"; + session.LastListenType = "fallback"; + var fallbackReplies = replyMapper.MapFallback(session, turnState.TransId ?? session.LastTransId ?? string.Empty, turnState.ListenRules) + .Select(text => new WebSocketReply { Text = text }) + .ToArray(); + ResetBufferedAudio(session); + return fallbackReplies; + } + + return + [ + new WebSocketReply + { + Text = JsonSerializer.Serialize(new + { + type = "OPENJIBO_TURN_PENDING", + data = new + { + sessionId = session.SessionId, + transID = session.LastTransId, + bufferedAudioBytes = turnState.BufferedAudioBytes, + bufferedAudioChunks = turnState.BufferedAudioChunkCount, + awaitingAudio = turnState.BufferedAudioBytes == 0, + awaitingTranscriptHint = turnState.BufferedAudioBytes > 0 && string.IsNullOrWhiteSpace(turnState.AudioTranscriptHint), + finalizeAttempts = turnState.FinalizeAttemptCount + } + }) + } + ]; + } + + var plan = await conversationBroker.HandleTurnAsync(finalizedTurn, cancellationToken); + var listenAction = plan.Actions.OfType().OrderBy(action => action.Sequence).LastOrDefault(); + session.LastTranscript = finalizedTurn.NormalizedTranscript ?? finalizedTurn.RawTranscript; + session.LastIntent = plan.IntentName; + session.LastListenType = listenAction?.Mode; + session.FollowUpExpiresUtc = plan.FollowUp.KeepMicOpen + ? DateTimeOffset.UtcNow.Add(plan.FollowUp.Timeout) + : null; + turnState.AwaitingTurnCompletion = false; + + var emitSkillActions = messageType != "CLIENT_NLU"; + var replies = replyMapper.Map(plan, finalizedTurn, session, emitSkillActions).Select(text => new WebSocketReply + { + Text = text + }).ToArray(); + + ResetBufferedAudio(session); + return replies; + } + + private static bool ShouldAutoFinalize(CloudSession session) + { + var turnState = session.TurnState; + return turnState.AwaitingTurnCompletion && + turnState.SawListen && + turnState.SawContext && + turnState.BufferedAudioChunkCount >= AutoFinalizeMinBufferedAudioChunks && + turnState.BufferedAudioBytes >= AutoFinalizeMinBufferedAudioBytes; + } + private static string? ExtractDataPayload(string? text) { if (string.IsNullOrWhiteSpace(text)) diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Domain/Models/WebSocketTurnState.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Domain/Models/WebSocketTurnState.cs index 2390a78..5e410a1 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Domain/Models/WebSocketTurnState.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Domain/Models/WebSocketTurnState.cs @@ -5,10 +5,13 @@ public sealed class WebSocketTurnState public string? TransId { get; set; } public string? ContextPayload { get; set; } public string? AudioTranscriptHint { get; set; } + public DateTimeOffset? FirstAudioReceivedUtc { get; set; } public DateTimeOffset? LastAudioReceivedUtc { get; set; } public int BufferedAudioChunkCount { get; set; } public int BufferedAudioBytes { get; set; } public int FinalizeAttemptCount { get; set; } public bool AwaitingTurnCompletion { get; set; } + public bool SawListen { get; set; } + public bool SawContext { get; set; } public IReadOnlyList ListenRules { get; set; } = []; } diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs index 5c911d6..788ae55 100644 --- a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs +++ b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs @@ -74,6 +74,118 @@ public sealed class JiboWebSocketServiceTests Assert.Equal(1, payload.RootElement.GetProperty("data").GetProperty("bufferedChunks").GetInt32()); } + [Fact] + public async Task BufferedAudio_WithContextAndTranscriptHint_AutoFinalizesAfterThreshold() + { + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-auto-finalize-token", + Text = """{"type":"LISTEN","transID":"trans-auto","data":{"rules":["launch"]}}""" + }); + + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-auto-finalize-token", + Text = """{"type":"CONTEXT","transID":"trans-auto","data":{"audioTranscriptHint":"tell me a joke"}}""" + }); + + IReadOnlyList replies = []; + for (var index = 0; index < 4; index += 1) + { + replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-auto-finalize-token", + Binary = new byte[3000] + }); + + Assert.Single(replies); + Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(replies[0])); + } + + replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-auto-finalize-token", + Binary = new byte[3000] + }); + + Assert.Equal(3, replies.Count); + Assert.Equal("LISTEN", ReadReplyType(replies[0])); + Assert.Equal("EOS", ReadReplyType(replies[1])); + Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2])); + + using var listenPayload = JsonDocument.Parse(replies[0].Text!); + Assert.Equal("tell me a joke", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString()); + Assert.Equal("joke", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + } + + [Fact] + public async Task BufferedAudio_WithoutTranscriptHint_AutoFinalizesWithFallbackAndEos() + { + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-auto-fallback-token", + Text = """{"type":"LISTEN","transID":"trans-auto-fallback","data":{"rules":["launch"]}}""" + }); + + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-auto-fallback-token", + Text = """{"type":"CONTEXT","transID":"trans-auto-fallback","data":{"topic":"conversation"}}""" + }); + + IReadOnlyList replies = []; + for (var index = 0; index < 4; index += 1) + { + replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-auto-fallback-token", + Binary = new byte[3000] + }); + + Assert.Single(replies); + Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(replies[0])); + } + + replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-auto-fallback-token", + Binary = new byte[3000] + }); + + Assert.Equal(3, replies.Count); + Assert.Equal("LISTEN", ReadReplyType(replies[0])); + Assert.Equal("EOS", ReadReplyType(replies[1])); + Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2])); + + using var listenPayload = JsonDocument.Parse(replies[0].Text!); + Assert.Equal("heyJibo", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + Assert.Equal(string.Empty, listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString()); + } + [Fact] public async Task MultiChunkAudio_AccumulatesBufferedStateAcrossMessages() {