From 500b54a6b635d670ce66f43405f7e55eeaa59851 Mon Sep 17 00:00:00 2001 From: Jacob Dubin Date: Thu, 16 Apr 2026 07:18:33 -0500 Subject: [PATCH] more fixes for testing --- OpenJibo/README.md | 5 + OpenJibo/docs/development-plan.md | 21 +++ OpenJibo/docs/protocol-inventory.md | 25 ++++ .../ResponsePlanToSocketMessagesMapper.cs | 35 +++-- .../src/Jibo.Cloud/node/fixtures/README.md | 5 + .../neo-hub-client-asr-joke.flow.json | 64 ++++++++ .../Fixtures/WebSocketFixtureLoader.cs | 18 ++- .../WebSockets/JiboWebSocketServiceTests.cs | 138 ++++++++++++++++++ 8 files changed, 292 insertions(+), 19 deletions(-) diff --git a/OpenJibo/README.md b/OpenJibo/README.md index b2d3715..0ec0454 100644 --- a/OpenJibo/README.md +++ b/OpenJibo/README.md @@ -105,6 +105,11 @@ The current websocket bridge now also includes server-driven raw-audio turn comp - `EOS` is emitted on that auto-finalize path so turns do not remain open indefinitely - transcript-less raw-audio turns still fall back to a synthetic compatibility response, not real ASR +The current richer websocket parity slice is still intentionally narrow: + +- the successful joke path now has fixture-backed reply sequencing and partial payload-shape fidelity through `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION` +- this is not a claim of broad skill parity or full Jibo websocket coverage + ## Important Docs - [Cloud overview](/src/Jibo.Cloud/README.md) diff --git a/OpenJibo/docs/development-plan.md b/OpenJibo/docs/development-plan.md index 866a770..2684847 100644 --- a/OpenJibo/docs/development-plan.md +++ b/OpenJibo/docs/development-plan.md @@ -19,6 +19,7 @@ It is intentionally broader than the current Node server. The Node server is a p - expand HTTP `X-Amz-Target` coverage from observed traffic and fixtures - grow WebSocket compatibility from stub acceptance into realistic turn orchestration +- keep websocket parity fixture-driven, starting with exact sequencing and payload-shape fidelity for the successful joke vertical slice before claiming broader skill coverage - replace in-memory state with Azure SQL-backed persistence - add structured fixture replay tests - harden region/bootstrap docs by software version @@ -34,6 +35,26 @@ We still need to map more than the current Node server expresses. Priority disco - upload, logging, backup, and key-sharing flows - per-version configuration differences and region handling +## Current WebSocket Discovery Focus + +The next fixture-driven websocket work should continue to separate three buckets: + +- discovered behavior + Grounded by the Node oracle, sanitized fixtures, and live captures +- implemented parity + Only the narrow slices currently replayed and tested in `.NET` +- future hypotheses + Ideas to investigate later, but not behaviors to silently bake into the hosted cloud + +Right now the strongest implemented vertical slice beyond basic listen completion is the successful joke turn: + +- `CLIENT_ASR` transcript-carrying turn completion +- synthetic `LISTEN` result shaping +- `EOS` +- delayed joke `SKILL_ACTION` + +That should remain the model for future websocket work: capture first, fixture second, parity third. + ## Speech, Animation, And ESML The current joke flow is only a small foothold into Jibo expressiveness. diff --git a/OpenJibo/docs/protocol-inventory.md b/OpenJibo/docs/protocol-inventory.md index 8e5dcd5..021b407 100644 --- a/OpenJibo/docs/protocol-inventory.md +++ b/OpenJibo/docs/protocol-inventory.md @@ -74,6 +74,7 @@ The current .NET pass covers only a narrow, explicitly synthetic subset of obser - `EOS` emission after completed turns - delayed `SKILL_ACTION` emission after `EOS` on completed turn flows to better match the Node oracle timing - first richer vertical slice for joke/chat `SKILL_ACTION` playback +- fixture-backed joke-turn payload fidelity for `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION`, including Node-like `EOS` envelope fields and the currently observed joke `SKILL_ACTION` metadata shape This does not yet mean parity for: @@ -81,8 +82,32 @@ This does not yet mean parity for: - real STT provider integration and external ASR lifecycle timing - early-EOS behavior - multi-step skill lifecycles beyond the current synthetic playback response +- broad `SKILL_ACTION` payload coverage outside the currently observed joke/chat playback slice - broader interaction, animation, or ESML command families +### Successful Joke Turn: What Is Grounded Now + +The highest-confidence websocket vertical slice after the starter parity pass is now: + +- inbound `CLIENT_ASR` carrying `"tell me a joke"` +- outbound synthetic `LISTEN` result with joke intent and remembered rules +- outbound `EOS` carrying `ts`, `msgID`, `transID`, and an empty `data` object +- outbound `SKILL_ACTION` about 75 ms later +- joke `SKILL_ACTION` payload shape aligned with the Node oracle for: + - `data.skill.id = "@be/joke"` + - `data.action.config.jcp.type = "SLIM"` + - `data.action.config.jcp.config.play.meta.prompt_id = "RUNTIME_PROMPT"` + - `data.action.config.jcp.config.play.meta.prompt_sub_category = "AN"` + - `data.action.config.jcp.config.play.meta.mim_id = "runtime-joke"` + - `data.action.config.jcp.config.play.meta.mim_type = "announcement"` + +What remains intentionally unclaimed for that slice: + +- whether the joke payload is complete beyond those fields +- whether other successful skills use the same payload shape +- whether additional websocket messages appear in other successful skill paths +- whether any timing gaps besides the observed 75 ms `EOS -> SKILL_ACTION` delay matter + Current raw-audio fallback behavior remains explicitly synthetic: - when a buffered-audio turn can be resolved through the synthetic transcript-hint seam, `.NET` now auto-finalizes and emits `LISTEN` + `EOS` + `SKILL_ACTION` diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs index 17efb7e..3701fce 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs @@ -48,11 +48,10 @@ public sealed class ResponsePlanToSocketMessagesMapper messages.Add(new SocketReplyPlan(JsonSerializer.Serialize(new { type = "EOS", - data = new - { - sessionId = plan.SessionId, - transID = transId - } + ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), + msgID = CreateHubMessageId(), + transID = transId, + data = new { } }))); if (emitSkillActions && speak is not null) @@ -99,11 +98,10 @@ public sealed class ResponsePlanToSocketMessagesMapper new SocketReplyPlan(JsonSerializer.Serialize(new { type = "EOS", - data = new - { - sessionId = session.SessionId, - transID = transId - } + ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), + msgID = CreateHubMessageId(), + transID = transId, + data = new { } })), new SocketReplyPlan(JsonSerializer.Serialize(BuildGenericFallbackSkillPayload(transId)), DelayMs: 75) ]; @@ -138,7 +136,7 @@ public sealed class ResponsePlanToSocketMessagesMapper { type = "SKILL_ACTION", ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), - msgID = $"msg-{Guid.NewGuid():N}", + msgID = CreateHubMessageId(), transID = transId, data = new { @@ -163,9 +161,7 @@ public sealed class ResponsePlanToSocketMessagesMapper prompt_id = "RUNTIME_PROMPT", prompt_sub_category = "AN", mim_id = mimId, - mim_type = "announcement", - intent = plan.IntentName ?? "unknown", - transcript = turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty + mim_type = "announcement" } } } @@ -184,7 +180,7 @@ public sealed class ResponsePlanToSocketMessagesMapper { type = "SKILL_ACTION", ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), - msgID = $"msg-{Guid.NewGuid():N}", + msgID = CreateHubMessageId(), transID = transId, data = new { @@ -209,9 +205,7 @@ public sealed class ResponsePlanToSocketMessagesMapper prompt_id = "RUNTIME_PROMPT", prompt_sub_category = "AN", mim_id = "runtime-chat", - mim_type = "announcement", - intent = "unknown", - transcript = string.Empty + mim_type = "announcement" } } } @@ -234,5 +228,10 @@ public sealed class ResponsePlanToSocketMessagesMapper .Replace("'", "'", StringComparison.Ordinal); } + private static string CreateHubMessageId() + { + return $"mid-{Guid.NewGuid()}"; + } + public sealed record SocketReplyPlan(string Text, int DelayMs = 0); } diff --git a/OpenJibo/src/Jibo.Cloud/node/fixtures/README.md b/OpenJibo/src/Jibo.Cloud/node/fixtures/README.md index a14f979..85e8c18 100644 --- a/OpenJibo/src/Jibo.Cloud/node/fixtures/README.md +++ b/OpenJibo/src/Jibo.Cloud/node/fixtures/README.md @@ -9,4 +9,9 @@ Current fixture groups: - `websocket/` Sanitized Neo-Hub turn-flow examples used to replay `LISTEN`, `CONTEXT`, `CLIENT_NLU`, `CLIENT_ASR`, buffered-audio accumulation, pending/finalize states, and synthetic `EOS` / `SKILL_ACTION` behavior against the .NET implementation. +Current websocket fixture depth is uneven on purpose: + +- `neo-hub-client-asr-joke.flow.json` now asserts a richer vertical slice than reply types alone. It captures the observed Node-oriented `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION` joke turn with payload-shape expectations for `EOS` and joke `SKILL_ACTION`. +- The other websocket fixtures are still mainly sequencing fixtures. They are useful for replay and guardrails, but they should not be read as proof of broader payload parity. + Expand this folder whenever new robot traffic is captured and cleaned. diff --git a/OpenJibo/src/Jibo.Cloud/node/fixtures/websocket/neo-hub-client-asr-joke.flow.json b/OpenJibo/src/Jibo.Cloud/node/fixtures/websocket/neo-hub-client-asr-joke.flow.json index 41bf798..b06ab3b 100644 --- a/OpenJibo/src/Jibo.Cloud/node/fixtures/websocket/neo-hub-client-asr-joke.flow.json +++ b/OpenJibo/src/Jibo.Cloud/node/fixtures/websocket/neo-hub-client-asr-joke.flow.json @@ -36,6 +36,70 @@ "LISTEN", "EOS", "SKILL_ACTION" + ], + "expectedReplies": [ + { + "type": "LISTEN", + "jsonSubset": { + "type": "LISTEN", + "transID": "fixture-trans-joke", + "data": { + "asr": { + "text": "tell me a joke" + }, + "nlu": { + "intent": "joke", + "rules": [ + "wake-word" + ] + }, + "match": { + "intent": "joke", + "rule": "wake-word" + } + } + } + }, + { + "type": "EOS", + "jsonSubset": { + "type": "EOS", + "transID": "fixture-trans-joke", + "data": {} + } + }, + { + "type": "SKILL_ACTION", + "delayMs": 75, + "jsonSubset": { + "type": "SKILL_ACTION", + "transID": "fixture-trans-joke", + "data": { + "skill": { + "id": "@be/joke" + }, + "action": { + "config": { + "jcp": { + "type": "SLIM", + "config": { + "play": { + "meta": { + "prompt_id": "RUNTIME_PROMPT", + "prompt_sub_category": "AN", + "mim_id": "runtime-joke", + "mim_type": "announcement" + } + } + } + } + } + }, + "analytics": {}, + "final": true + } + } + } ] } ] diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/Fixtures/WebSocketFixtureLoader.cs b/OpenJibo/tests/Jibo.Cloud.Tests/Fixtures/WebSocketFixtureLoader.cs index 3d7d2e6..cd00caa 100644 --- a/OpenJibo/tests/Jibo.Cloud.Tests/Fixtures/WebSocketFixtureLoader.cs +++ b/OpenJibo/tests/Jibo.Cloud.Tests/Fixtures/WebSocketFixtureLoader.cs @@ -5,6 +5,11 @@ namespace Jibo.Cloud.Tests.Fixtures; internal static class WebSocketFixtureLoader { + private static readonly JsonSerializerOptions SerializerOptions = new() + { + PropertyNameCaseInsensitive = true + }; + public static WebSocketFixture Load(string relativePath) { var fullPath = Path.Combine(AppContext.BaseDirectory, relativePath); @@ -32,7 +37,10 @@ internal static class WebSocketFixtureLoader .EnumerateArray() .Select(item => item.GetString() ?? string.Empty) .Where(item => !string.IsNullOrWhiteSpace(item)) - .ToArray() + .ToArray(), + ExpectedReplies = stepElement.TryGetProperty("expectedReplies", out var expectedReplies) && expectedReplies.ValueKind == JsonValueKind.Array + ? JsonSerializer.Deserialize>(expectedReplies.GetRawText(), SerializerOptions) ?? [] + : [] }); } @@ -54,4 +62,12 @@ internal sealed class WebSocketFixtureStep { public WebSocketMessageEnvelope Message { get; init; } = new(); public IReadOnlyList ExpectedReplyTypes { get; init; } = []; + public IReadOnlyList ExpectedReplies { get; init; } = []; +} + +internal sealed class ExpectedWebSocketReply +{ + public string Type { get; init; } = string.Empty; + public int? DelayMs { get; init; } + public JsonElement? JsonSubset { get; init; } } diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs index bf285c9..1cd5399 100644 --- a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs +++ b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs @@ -54,6 +54,25 @@ public sealed class JiboWebSocketServiceTests using var listenPayload = JsonDocument.Parse(replies[0].Text!); Assert.Equal("hello jibo", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString()); Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + + using var eosPayload = JsonDocument.Parse(replies[1].Text!); + Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _)); + Assert.StartsWith("mid-", eosPayload.RootElement.GetProperty("msgID").GetString()); + Assert.Equal("trans-hello", eosPayload.RootElement.GetProperty("transID").GetString()); + Assert.Equal(JsonValueKind.Object, eosPayload.RootElement.GetProperty("data").ValueKind); + + using var skillPayload = JsonDocument.Parse(replies[2].Text!); + Assert.StartsWith("mid-", skillPayload.RootElement.GetProperty("msgID").GetString()); + var meta = skillPayload.RootElement + .GetProperty("data") + .GetProperty("action") + .GetProperty("config") + .GetProperty("jcp") + .GetProperty("config") + .GetProperty("play") + .GetProperty("meta"); + Assert.False(meta.TryGetProperty("intent", out _)); + Assert.False(meta.TryGetProperty("transcript", out _)); } [Fact] @@ -426,6 +445,60 @@ public sealed class JiboWebSocketServiceTests Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString()); } + [Fact] + public async Task ClientAsrJokeFlow_MatchesNodePayloadShapeForEosAndSkillAction() + { + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-client-asr-joke-token", + Text = """{"type":"LISTEN","transID":"trans-joke-shape","data":{"rules":["wake-word"]}}""" + }); + + var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-client-asr-joke-token", + Text = """{"type":"CLIENT_ASR","transID":"trans-joke-shape","data":{"text":"tell me a joke"}}""" + }); + + Assert.Equal(3, replies.Count); + Assert.Equal(75, replies[2].DelayMs); + + using var eosPayload = JsonDocument.Parse(replies[1].Text!); + Assert.Equal("EOS", eosPayload.RootElement.GetProperty("type").GetString()); + Assert.Equal("trans-joke-shape", eosPayload.RootElement.GetProperty("transID").GetString()); + Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _)); + Assert.StartsWith("mid-", eosPayload.RootElement.GetProperty("msgID").GetString()); + Assert.Empty(eosPayload.RootElement.GetProperty("data").EnumerateObject()); + + using var skillPayload = JsonDocument.Parse(replies[2].Text!); + Assert.Equal("SKILL_ACTION", skillPayload.RootElement.GetProperty("type").GetString()); + Assert.Equal("trans-joke-shape", skillPayload.RootElement.GetProperty("transID").GetString()); + Assert.StartsWith("mid-", skillPayload.RootElement.GetProperty("msgID").GetString()); + Assert.Equal("@be/joke", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString()); + + var meta = skillPayload.RootElement + .GetProperty("data") + .GetProperty("action") + .GetProperty("config") + .GetProperty("jcp") + .GetProperty("config") + .GetProperty("play") + .GetProperty("meta"); + + Assert.Equal("RUNTIME_PROMPT", meta.GetProperty("prompt_id").GetString()); + Assert.Equal("AN", meta.GetProperty("prompt_sub_category").GetString()); + Assert.Equal("runtime-joke", meta.GetProperty("mim_id").GetString()); + Assert.Equal("announcement", meta.GetProperty("mim_type").GetString()); + Assert.False(meta.TryGetProperty("intent", out _)); + Assert.False(meta.TryGetProperty("transcript", out _)); + } + [Fact] public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio() { @@ -501,6 +574,71 @@ public sealed class JiboWebSocketServiceTests var replies = await _service.HandleMessageAsync(step.Message); var actualTypes = replies.Select(ReadReplyType).ToArray(); Assert.Equal(step.ExpectedReplyTypes, actualTypes); + + if (step.ExpectedReplies.Count > 0) + { + Assert.Equal(replies.Count, step.ExpectedReplies.Count); + + for (var index = 0; index < step.ExpectedReplies.Count; index += 1) + { + var expectedReply = step.ExpectedReplies[index]; + Assert.Equal(expectedReply.Type, actualTypes[index]); + + if (expectedReply.DelayMs.HasValue) + { + Assert.Equal(expectedReply.DelayMs.Value, replies[index].DelayMs); + } + + if (expectedReply.JsonSubset is { ValueKind: JsonValueKind.Object } jsonSubset) + { + using var actualPayload = JsonDocument.Parse(replies[index].Text!); + AssertJsonContains(jsonSubset, actualPayload.RootElement); + } + } + } + } + } + + private static void AssertJsonContains(JsonElement expected, JsonElement actual) + { + Assert.Equal(expected.ValueKind, actual.ValueKind); + + switch (expected.ValueKind) + { + case JsonValueKind.Object: + foreach (var property in expected.EnumerateObject()) + { + Assert.True(actual.TryGetProperty(property.Name, out var actualProperty), $"Expected property '{property.Name}' was not found."); + AssertJsonContains(property.Value, actualProperty); + } + break; + case JsonValueKind.Array: + { + var expectedItems = expected.EnumerateArray().ToArray(); + var actualItems = actual.EnumerateArray().ToArray(); + Assert.Equal(expectedItems.Length, actualItems.Length); + for (var index = 0; index < expectedItems.Length; index += 1) + { + AssertJsonContains(expectedItems[index], actualItems[index]); + } + break; + } + case JsonValueKind.String: + Assert.Equal(expected.GetString(), actual.GetString()); + break; + case JsonValueKind.Number: + Assert.Equal(expected.GetRawText(), actual.GetRawText()); + break; + case JsonValueKind.True: + case JsonValueKind.False: + Assert.Equal(expected.GetBoolean(), actual.GetBoolean()); + break; + case JsonValueKind.Null: + Assert.Equal(JsonValueKind.Null, actual.ValueKind); + break; + default: + Assert.Equal(expected.GetRawText(), actual.GetRawText()); + break; } }