more fixes for testing

This commit is contained in:
Jacob Dubin
2026-04-16 07:18:33 -05:00
parent b1069805bf
commit 500b54a6b6
8 changed files with 292 additions and 19 deletions

View File

@@ -105,6 +105,11 @@ The current websocket bridge now also includes server-driven raw-audio turn comp
- `EOS` is emitted on that auto-finalize path so turns do not remain open indefinitely
- transcript-less raw-audio turns still fall back to a synthetic compatibility response, not real ASR
The current richer websocket parity slice is still intentionally narrow:
- the successful joke path now has fixture-backed reply sequencing and partial payload-shape fidelity through `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION`
- this is not a claim of broad skill parity or full Jibo websocket coverage
## Important Docs
- [Cloud overview](/src/Jibo.Cloud/README.md)

View File

@@ -19,6 +19,7 @@ It is intentionally broader than the current Node server. The Node server is a p
- expand HTTP `X-Amz-Target` coverage from observed traffic and fixtures
- grow WebSocket compatibility from stub acceptance into realistic turn orchestration
- keep websocket parity fixture-driven, starting with exact sequencing and payload-shape fidelity for the successful joke vertical slice before claiming broader skill coverage
- replace in-memory state with Azure SQL-backed persistence
- add structured fixture replay tests
- harden region/bootstrap docs by software version
@@ -34,6 +35,26 @@ We still need to map more than the current Node server expresses. Priority disco
- upload, logging, backup, and key-sharing flows
- per-version configuration differences and region handling
## Current WebSocket Discovery Focus
The next fixture-driven websocket work should continue to separate three buckets:
- discovered behavior
Grounded by the Node oracle, sanitized fixtures, and live captures
- implemented parity
Only the narrow slices currently replayed and tested in `.NET`
- future hypotheses
Ideas to investigate later, but not behaviors to silently bake into the hosted cloud
Right now the strongest implemented vertical slice beyond basic listen completion is the successful joke turn:
- `CLIENT_ASR` transcript-carrying turn completion
- synthetic `LISTEN` result shaping
- `EOS`
- delayed joke `SKILL_ACTION`
That should remain the model for future websocket work: capture first, fixture second, parity third.
## Speech, Animation, And ESML
The current joke flow is only a small foothold into Jibo expressiveness.

View File

@@ -74,6 +74,7 @@ The current .NET pass covers only a narrow, explicitly synthetic subset of obser
- `EOS` emission after completed turns
- delayed `SKILL_ACTION` emission after `EOS` on completed turn flows to better match the Node oracle timing
- first richer vertical slice for joke/chat `SKILL_ACTION` playback
- fixture-backed joke-turn payload fidelity for `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION`, including Node-like `EOS` envelope fields and the currently observed joke `SKILL_ACTION` metadata shape
This does not yet mean parity for:
@@ -81,8 +82,32 @@ This does not yet mean parity for:
- real STT provider integration and external ASR lifecycle timing
- early-EOS behavior
- multi-step skill lifecycles beyond the current synthetic playback response
- broad `SKILL_ACTION` payload coverage outside the currently observed joke/chat playback slice
- broader interaction, animation, or ESML command families
### Successful Joke Turn: What Is Grounded Now
The highest-confidence websocket vertical slice after the starter parity pass is now:
- inbound `CLIENT_ASR` carrying `"tell me a joke"`
- outbound synthetic `LISTEN` result with joke intent and remembered rules
- outbound `EOS` carrying `ts`, `msgID`, `transID`, and an empty `data` object
- outbound `SKILL_ACTION` about 75 ms later
- joke `SKILL_ACTION` payload shape aligned with the Node oracle for:
- `data.skill.id = "@be/joke"`
- `data.action.config.jcp.type = "SLIM"`
- `data.action.config.jcp.config.play.meta.prompt_id = "RUNTIME_PROMPT"`
- `data.action.config.jcp.config.play.meta.prompt_sub_category = "AN"`
- `data.action.config.jcp.config.play.meta.mim_id = "runtime-joke"`
- `data.action.config.jcp.config.play.meta.mim_type = "announcement"`
What remains intentionally unclaimed for that slice:
- whether the joke payload is complete beyond those fields
- whether other successful skills use the same payload shape
- whether additional websocket messages appear in other successful skill paths
- whether any timing gaps besides the observed 75 ms `EOS -> SKILL_ACTION` delay matter
Current raw-audio fallback behavior remains explicitly synthetic:
- when a buffered-audio turn can be resolved through the synthetic transcript-hint seam, `.NET` now auto-finalizes and emits `LISTEN` + `EOS` + `SKILL_ACTION`

View File

@@ -48,11 +48,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
messages.Add(new SocketReplyPlan(JsonSerializer.Serialize(new
{
type = "EOS",
data = new
{
sessionId = plan.SessionId,
transID = transId
}
ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
msgID = CreateHubMessageId(),
transID = transId,
data = new { }
})));
if (emitSkillActions && speak is not null)
@@ -99,11 +98,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
new SocketReplyPlan(JsonSerializer.Serialize(new
{
type = "EOS",
data = new
{
sessionId = session.SessionId,
transID = transId
}
ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
msgID = CreateHubMessageId(),
transID = transId,
data = new { }
})),
new SocketReplyPlan(JsonSerializer.Serialize(BuildGenericFallbackSkillPayload(transId)), DelayMs: 75)
];
@@ -138,7 +136,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
{
type = "SKILL_ACTION",
ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
msgID = $"msg-{Guid.NewGuid():N}",
msgID = CreateHubMessageId(),
transID = transId,
data = new
{
@@ -163,9 +161,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
prompt_id = "RUNTIME_PROMPT",
prompt_sub_category = "AN",
mim_id = mimId,
mim_type = "announcement",
intent = plan.IntentName ?? "unknown",
transcript = turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty
mim_type = "announcement"
}
}
}
@@ -184,7 +180,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
{
type = "SKILL_ACTION",
ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
msgID = $"msg-{Guid.NewGuid():N}",
msgID = CreateHubMessageId(),
transID = transId,
data = new
{
@@ -209,9 +205,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
prompt_id = "RUNTIME_PROMPT",
prompt_sub_category = "AN",
mim_id = "runtime-chat",
mim_type = "announcement",
intent = "unknown",
transcript = string.Empty
mim_type = "announcement"
}
}
}
@@ -234,5 +228,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
.Replace("'", "'", StringComparison.Ordinal);
}
private static string CreateHubMessageId()
{
return $"mid-{Guid.NewGuid()}";
}
public sealed record SocketReplyPlan(string Text, int DelayMs = 0);
}

View File

@@ -9,4 +9,9 @@ Current fixture groups:
- `websocket/`
Sanitized Neo-Hub turn-flow examples used to replay `LISTEN`, `CONTEXT`, `CLIENT_NLU`, `CLIENT_ASR`, buffered-audio accumulation, pending/finalize states, and synthetic `EOS` / `SKILL_ACTION` behavior against the .NET implementation.
Current websocket fixture depth is uneven on purpose:
- `neo-hub-client-asr-joke.flow.json` now asserts a richer vertical slice than reply types alone. It captures the observed Node-oriented `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION` joke turn with payload-shape expectations for `EOS` and joke `SKILL_ACTION`.
- The other websocket fixtures are still mainly sequencing fixtures. They are useful for replay and guardrails, but they should not be read as proof of broader payload parity.
Expand this folder whenever new robot traffic is captured and cleaned.

View File

@@ -36,6 +36,70 @@
"LISTEN",
"EOS",
"SKILL_ACTION"
],
"expectedReplies": [
{
"type": "LISTEN",
"jsonSubset": {
"type": "LISTEN",
"transID": "fixture-trans-joke",
"data": {
"asr": {
"text": "tell me a joke"
},
"nlu": {
"intent": "joke",
"rules": [
"wake-word"
]
},
"match": {
"intent": "joke",
"rule": "wake-word"
}
}
}
},
{
"type": "EOS",
"jsonSubset": {
"type": "EOS",
"transID": "fixture-trans-joke",
"data": {}
}
},
{
"type": "SKILL_ACTION",
"delayMs": 75,
"jsonSubset": {
"type": "SKILL_ACTION",
"transID": "fixture-trans-joke",
"data": {
"skill": {
"id": "@be/joke"
},
"action": {
"config": {
"jcp": {
"type": "SLIM",
"config": {
"play": {
"meta": {
"prompt_id": "RUNTIME_PROMPT",
"prompt_sub_category": "AN",
"mim_id": "runtime-joke",
"mim_type": "announcement"
}
}
}
}
}
},
"analytics": {},
"final": true
}
}
}
]
}
]

View File

@@ -5,6 +5,11 @@ namespace Jibo.Cloud.Tests.Fixtures;
internal static class WebSocketFixtureLoader
{
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNameCaseInsensitive = true
};
public static WebSocketFixture Load(string relativePath)
{
var fullPath = Path.Combine(AppContext.BaseDirectory, relativePath);
@@ -32,7 +37,10 @@ internal static class WebSocketFixtureLoader
.EnumerateArray()
.Select(item => item.GetString() ?? string.Empty)
.Where(item => !string.IsNullOrWhiteSpace(item))
.ToArray()
.ToArray(),
ExpectedReplies = stepElement.TryGetProperty("expectedReplies", out var expectedReplies) && expectedReplies.ValueKind == JsonValueKind.Array
? JsonSerializer.Deserialize<List<ExpectedWebSocketReply>>(expectedReplies.GetRawText(), SerializerOptions) ?? []
: []
});
}
@@ -54,4 +62,12 @@ internal sealed class WebSocketFixtureStep
{
public WebSocketMessageEnvelope Message { get; init; } = new();
public IReadOnlyList<string> ExpectedReplyTypes { get; init; } = [];
public IReadOnlyList<ExpectedWebSocketReply> ExpectedReplies { get; init; } = [];
}
internal sealed class ExpectedWebSocketReply
{
public string Type { get; init; } = string.Empty;
public int? DelayMs { get; init; }
public JsonElement? JsonSubset { get; init; }
}

View File

@@ -54,6 +54,25 @@ public sealed class JiboWebSocketServiceTests
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("hello jibo", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
using var eosPayload = JsonDocument.Parse(replies[1].Text!);
Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _));
Assert.StartsWith("mid-", eosPayload.RootElement.GetProperty("msgID").GetString());
Assert.Equal("trans-hello", eosPayload.RootElement.GetProperty("transID").GetString());
Assert.Equal(JsonValueKind.Object, eosPayload.RootElement.GetProperty("data").ValueKind);
using var skillPayload = JsonDocument.Parse(replies[2].Text!);
Assert.StartsWith("mid-", skillPayload.RootElement.GetProperty("msgID").GetString());
var meta = skillPayload.RootElement
.GetProperty("data")
.GetProperty("action")
.GetProperty("config")
.GetProperty("jcp")
.GetProperty("config")
.GetProperty("play")
.GetProperty("meta");
Assert.False(meta.TryGetProperty("intent", out _));
Assert.False(meta.TryGetProperty("transcript", out _));
}
[Fact]
@@ -426,6 +445,60 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
}
[Fact]
public async Task ClientAsrJokeFlow_MatchesNodePayloadShapeForEosAndSkillAction()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-client-asr-joke-token",
Text = """{"type":"LISTEN","transID":"trans-joke-shape","data":{"rules":["wake-word"]}}"""
});
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-client-asr-joke-token",
Text = """{"type":"CLIENT_ASR","transID":"trans-joke-shape","data":{"text":"tell me a joke"}}"""
});
Assert.Equal(3, replies.Count);
Assert.Equal(75, replies[2].DelayMs);
using var eosPayload = JsonDocument.Parse(replies[1].Text!);
Assert.Equal("EOS", eosPayload.RootElement.GetProperty("type").GetString());
Assert.Equal("trans-joke-shape", eosPayload.RootElement.GetProperty("transID").GetString());
Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _));
Assert.StartsWith("mid-", eosPayload.RootElement.GetProperty("msgID").GetString());
Assert.Empty(eosPayload.RootElement.GetProperty("data").EnumerateObject());
using var skillPayload = JsonDocument.Parse(replies[2].Text!);
Assert.Equal("SKILL_ACTION", skillPayload.RootElement.GetProperty("type").GetString());
Assert.Equal("trans-joke-shape", skillPayload.RootElement.GetProperty("transID").GetString());
Assert.StartsWith("mid-", skillPayload.RootElement.GetProperty("msgID").GetString());
Assert.Equal("@be/joke", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
var meta = skillPayload.RootElement
.GetProperty("data")
.GetProperty("action")
.GetProperty("config")
.GetProperty("jcp")
.GetProperty("config")
.GetProperty("play")
.GetProperty("meta");
Assert.Equal("RUNTIME_PROMPT", meta.GetProperty("prompt_id").GetString());
Assert.Equal("AN", meta.GetProperty("prompt_sub_category").GetString());
Assert.Equal("runtime-joke", meta.GetProperty("mim_id").GetString());
Assert.Equal("announcement", meta.GetProperty("mim_type").GetString());
Assert.False(meta.TryGetProperty("intent", out _));
Assert.False(meta.TryGetProperty("transcript", out _));
}
[Fact]
public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio()
{
@@ -501,6 +574,71 @@ public sealed class JiboWebSocketServiceTests
var replies = await _service.HandleMessageAsync(step.Message);
var actualTypes = replies.Select(ReadReplyType).ToArray();
Assert.Equal(step.ExpectedReplyTypes, actualTypes);
if (step.ExpectedReplies.Count > 0)
{
Assert.Equal(replies.Count, step.ExpectedReplies.Count);
for (var index = 0; index < step.ExpectedReplies.Count; index += 1)
{
var expectedReply = step.ExpectedReplies[index];
Assert.Equal(expectedReply.Type, actualTypes[index]);
if (expectedReply.DelayMs.HasValue)
{
Assert.Equal(expectedReply.DelayMs.Value, replies[index].DelayMs);
}
if (expectedReply.JsonSubset is { ValueKind: JsonValueKind.Object } jsonSubset)
{
using var actualPayload = JsonDocument.Parse(replies[index].Text!);
AssertJsonContains(jsonSubset, actualPayload.RootElement);
}
}
}
}
}
private static void AssertJsonContains(JsonElement expected, JsonElement actual)
{
Assert.Equal(expected.ValueKind, actual.ValueKind);
switch (expected.ValueKind)
{
case JsonValueKind.Object:
foreach (var property in expected.EnumerateObject())
{
Assert.True(actual.TryGetProperty(property.Name, out var actualProperty), $"Expected property '{property.Name}' was not found.");
AssertJsonContains(property.Value, actualProperty);
}
break;
case JsonValueKind.Array:
{
var expectedItems = expected.EnumerateArray().ToArray();
var actualItems = actual.EnumerateArray().ToArray();
Assert.Equal(expectedItems.Length, actualItems.Length);
for (var index = 0; index < expectedItems.Length; index += 1)
{
AssertJsonContains(expectedItems[index], actualItems[index]);
}
break;
}
case JsonValueKind.String:
Assert.Equal(expected.GetString(), actual.GetString());
break;
case JsonValueKind.Number:
Assert.Equal(expected.GetRawText(), actual.GetRawText());
break;
case JsonValueKind.True:
case JsonValueKind.False:
Assert.Equal(expected.GetBoolean(), actual.GetBoolean());
break;
case JsonValueKind.Null:
Assert.Equal(JsonValueKind.Null, actual.ValueKind);
break;
default:
Assert.Equal(expected.GetRawText(), actual.GetRawText());
break;
}
}