more fixes for testing
This commit is contained in:
@@ -105,6 +105,11 @@ The current websocket bridge now also includes server-driven raw-audio turn comp
|
||||
- `EOS` is emitted on that auto-finalize path so turns do not remain open indefinitely
|
||||
- transcript-less raw-audio turns still fall back to a synthetic compatibility response, not real ASR
|
||||
|
||||
The current richer websocket parity slice is still intentionally narrow:
|
||||
|
||||
- the successful joke path now has fixture-backed reply sequencing and partial payload-shape fidelity through `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION`
|
||||
- this is not a claim of broad skill parity or full Jibo websocket coverage
|
||||
|
||||
## Important Docs
|
||||
|
||||
- [Cloud overview](/src/Jibo.Cloud/README.md)
|
||||
|
||||
@@ -19,6 +19,7 @@ It is intentionally broader than the current Node server. The Node server is a p
|
||||
|
||||
- expand HTTP `X-Amz-Target` coverage from observed traffic and fixtures
|
||||
- grow WebSocket compatibility from stub acceptance into realistic turn orchestration
|
||||
- keep websocket parity fixture-driven, starting with exact sequencing and payload-shape fidelity for the successful joke vertical slice before claiming broader skill coverage
|
||||
- replace in-memory state with Azure SQL-backed persistence
|
||||
- add structured fixture replay tests
|
||||
- harden region/bootstrap docs by software version
|
||||
@@ -34,6 +35,26 @@ We still need to map more than the current Node server expresses. Priority disco
|
||||
- upload, logging, backup, and key-sharing flows
|
||||
- per-version configuration differences and region handling
|
||||
|
||||
## Current WebSocket Discovery Focus
|
||||
|
||||
The next fixture-driven websocket work should continue to separate three buckets:
|
||||
|
||||
- discovered behavior
|
||||
Grounded by the Node oracle, sanitized fixtures, and live captures
|
||||
- implemented parity
|
||||
Only the narrow slices currently replayed and tested in `.NET`
|
||||
- future hypotheses
|
||||
Ideas to investigate later, but not behaviors to silently bake into the hosted cloud
|
||||
|
||||
Right now the strongest implemented vertical slice beyond basic listen completion is the successful joke turn:
|
||||
|
||||
- `CLIENT_ASR` transcript-carrying turn completion
|
||||
- synthetic `LISTEN` result shaping
|
||||
- `EOS`
|
||||
- delayed joke `SKILL_ACTION`
|
||||
|
||||
That should remain the model for future websocket work: capture first, fixture second, parity third.
|
||||
|
||||
## Speech, Animation, And ESML
|
||||
|
||||
The current joke flow is only a small foothold into Jibo expressiveness.
|
||||
|
||||
@@ -74,6 +74,7 @@ The current .NET pass covers only a narrow, explicitly synthetic subset of obser
|
||||
- `EOS` emission after completed turns
|
||||
- delayed `SKILL_ACTION` emission after `EOS` on completed turn flows to better match the Node oracle timing
|
||||
- first richer vertical slice for joke/chat `SKILL_ACTION` playback
|
||||
- fixture-backed joke-turn payload fidelity for `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION`, including Node-like `EOS` envelope fields and the currently observed joke `SKILL_ACTION` metadata shape
|
||||
|
||||
This does not yet mean parity for:
|
||||
|
||||
@@ -81,8 +82,32 @@ This does not yet mean parity for:
|
||||
- real STT provider integration and external ASR lifecycle timing
|
||||
- early-EOS behavior
|
||||
- multi-step skill lifecycles beyond the current synthetic playback response
|
||||
- broad `SKILL_ACTION` payload coverage outside the currently observed joke/chat playback slice
|
||||
- broader interaction, animation, or ESML command families
|
||||
|
||||
### Successful Joke Turn: What Is Grounded Now
|
||||
|
||||
The highest-confidence websocket vertical slice after the starter parity pass is now:
|
||||
|
||||
- inbound `CLIENT_ASR` carrying `"tell me a joke"`
|
||||
- outbound synthetic `LISTEN` result with joke intent and remembered rules
|
||||
- outbound `EOS` carrying `ts`, `msgID`, `transID`, and an empty `data` object
|
||||
- outbound `SKILL_ACTION` about 75 ms later
|
||||
- joke `SKILL_ACTION` payload shape aligned with the Node oracle for:
|
||||
- `data.skill.id = "@be/joke"`
|
||||
- `data.action.config.jcp.type = "SLIM"`
|
||||
- `data.action.config.jcp.config.play.meta.prompt_id = "RUNTIME_PROMPT"`
|
||||
- `data.action.config.jcp.config.play.meta.prompt_sub_category = "AN"`
|
||||
- `data.action.config.jcp.config.play.meta.mim_id = "runtime-joke"`
|
||||
- `data.action.config.jcp.config.play.meta.mim_type = "announcement"`
|
||||
|
||||
What remains intentionally unclaimed for that slice:
|
||||
|
||||
- whether the joke payload is complete beyond those fields
|
||||
- whether other successful skills use the same payload shape
|
||||
- whether additional websocket messages appear in other successful skill paths
|
||||
- whether any timing gaps besides the observed 75 ms `EOS -> SKILL_ACTION` delay matter
|
||||
|
||||
Current raw-audio fallback behavior remains explicitly synthetic:
|
||||
|
||||
- when a buffered-audio turn can be resolved through the synthetic transcript-hint seam, `.NET` now auto-finalizes and emits `LISTEN` + `EOS` + `SKILL_ACTION`
|
||||
|
||||
@@ -48,11 +48,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
messages.Add(new SocketReplyPlan(JsonSerializer.Serialize(new
|
||||
{
|
||||
type = "EOS",
|
||||
data = new
|
||||
{
|
||||
sessionId = plan.SessionId,
|
||||
transID = transId
|
||||
}
|
||||
ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
|
||||
msgID = CreateHubMessageId(),
|
||||
transID = transId,
|
||||
data = new { }
|
||||
})));
|
||||
|
||||
if (emitSkillActions && speak is not null)
|
||||
@@ -99,11 +98,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
new SocketReplyPlan(JsonSerializer.Serialize(new
|
||||
{
|
||||
type = "EOS",
|
||||
data = new
|
||||
{
|
||||
sessionId = session.SessionId,
|
||||
transID = transId
|
||||
}
|
||||
ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
|
||||
msgID = CreateHubMessageId(),
|
||||
transID = transId,
|
||||
data = new { }
|
||||
})),
|
||||
new SocketReplyPlan(JsonSerializer.Serialize(BuildGenericFallbackSkillPayload(transId)), DelayMs: 75)
|
||||
];
|
||||
@@ -138,7 +136,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
{
|
||||
type = "SKILL_ACTION",
|
||||
ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
|
||||
msgID = $"msg-{Guid.NewGuid():N}",
|
||||
msgID = CreateHubMessageId(),
|
||||
transID = transId,
|
||||
data = new
|
||||
{
|
||||
@@ -163,9 +161,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
prompt_id = "RUNTIME_PROMPT",
|
||||
prompt_sub_category = "AN",
|
||||
mim_id = mimId,
|
||||
mim_type = "announcement",
|
||||
intent = plan.IntentName ?? "unknown",
|
||||
transcript = turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty
|
||||
mim_type = "announcement"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -184,7 +180,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
{
|
||||
type = "SKILL_ACTION",
|
||||
ts = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
|
||||
msgID = $"msg-{Guid.NewGuid():N}",
|
||||
msgID = CreateHubMessageId(),
|
||||
transID = transId,
|
||||
data = new
|
||||
{
|
||||
@@ -209,9 +205,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
prompt_id = "RUNTIME_PROMPT",
|
||||
prompt_sub_category = "AN",
|
||||
mim_id = "runtime-chat",
|
||||
mim_type = "announcement",
|
||||
intent = "unknown",
|
||||
transcript = string.Empty
|
||||
mim_type = "announcement"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -234,5 +228,10 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
.Replace("'", "'", StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static string CreateHubMessageId()
|
||||
{
|
||||
return $"mid-{Guid.NewGuid()}";
|
||||
}
|
||||
|
||||
public sealed record SocketReplyPlan(string Text, int DelayMs = 0);
|
||||
}
|
||||
|
||||
@@ -9,4 +9,9 @@ Current fixture groups:
|
||||
- `websocket/`
|
||||
Sanitized Neo-Hub turn-flow examples used to replay `LISTEN`, `CONTEXT`, `CLIENT_NLU`, `CLIENT_ASR`, buffered-audio accumulation, pending/finalize states, and synthetic `EOS` / `SKILL_ACTION` behavior against the .NET implementation.
|
||||
|
||||
Current websocket fixture depth is uneven on purpose:
|
||||
|
||||
- `neo-hub-client-asr-joke.flow.json` now asserts a richer vertical slice than reply types alone. It captures the observed Node-oriented `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION` joke turn with payload-shape expectations for `EOS` and joke `SKILL_ACTION`.
|
||||
- The other websocket fixtures are still mainly sequencing fixtures. They are useful for replay and guardrails, but they should not be read as proof of broader payload parity.
|
||||
|
||||
Expand this folder whenever new robot traffic is captured and cleaned.
|
||||
|
||||
@@ -36,6 +36,70 @@
|
||||
"LISTEN",
|
||||
"EOS",
|
||||
"SKILL_ACTION"
|
||||
],
|
||||
"expectedReplies": [
|
||||
{
|
||||
"type": "LISTEN",
|
||||
"jsonSubset": {
|
||||
"type": "LISTEN",
|
||||
"transID": "fixture-trans-joke",
|
||||
"data": {
|
||||
"asr": {
|
||||
"text": "tell me a joke"
|
||||
},
|
||||
"nlu": {
|
||||
"intent": "joke",
|
||||
"rules": [
|
||||
"wake-word"
|
||||
]
|
||||
},
|
||||
"match": {
|
||||
"intent": "joke",
|
||||
"rule": "wake-word"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "EOS",
|
||||
"jsonSubset": {
|
||||
"type": "EOS",
|
||||
"transID": "fixture-trans-joke",
|
||||
"data": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "SKILL_ACTION",
|
||||
"delayMs": 75,
|
||||
"jsonSubset": {
|
||||
"type": "SKILL_ACTION",
|
||||
"transID": "fixture-trans-joke",
|
||||
"data": {
|
||||
"skill": {
|
||||
"id": "@be/joke"
|
||||
},
|
||||
"action": {
|
||||
"config": {
|
||||
"jcp": {
|
||||
"type": "SLIM",
|
||||
"config": {
|
||||
"play": {
|
||||
"meta": {
|
||||
"prompt_id": "RUNTIME_PROMPT",
|
||||
"prompt_sub_category": "AN",
|
||||
"mim_id": "runtime-joke",
|
||||
"mim_type": "announcement"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"analytics": {},
|
||||
"final": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@@ -5,6 +5,11 @@ namespace Jibo.Cloud.Tests.Fixtures;
|
||||
|
||||
internal static class WebSocketFixtureLoader
|
||||
{
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
};
|
||||
|
||||
public static WebSocketFixture Load(string relativePath)
|
||||
{
|
||||
var fullPath = Path.Combine(AppContext.BaseDirectory, relativePath);
|
||||
@@ -32,7 +37,10 @@ internal static class WebSocketFixtureLoader
|
||||
.EnumerateArray()
|
||||
.Select(item => item.GetString() ?? string.Empty)
|
||||
.Where(item => !string.IsNullOrWhiteSpace(item))
|
||||
.ToArray()
|
||||
.ToArray(),
|
||||
ExpectedReplies = stepElement.TryGetProperty("expectedReplies", out var expectedReplies) && expectedReplies.ValueKind == JsonValueKind.Array
|
||||
? JsonSerializer.Deserialize<List<ExpectedWebSocketReply>>(expectedReplies.GetRawText(), SerializerOptions) ?? []
|
||||
: []
|
||||
});
|
||||
}
|
||||
|
||||
@@ -54,4 +62,12 @@ internal sealed class WebSocketFixtureStep
|
||||
{
|
||||
public WebSocketMessageEnvelope Message { get; init; } = new();
|
||||
public IReadOnlyList<string> ExpectedReplyTypes { get; init; } = [];
|
||||
public IReadOnlyList<ExpectedWebSocketReply> ExpectedReplies { get; init; } = [];
|
||||
}
|
||||
|
||||
internal sealed class ExpectedWebSocketReply
|
||||
{
|
||||
public string Type { get; init; } = string.Empty;
|
||||
public int? DelayMs { get; init; }
|
||||
public JsonElement? JsonSubset { get; init; }
|
||||
}
|
||||
|
||||
@@ -54,6 +54,25 @@ public sealed class JiboWebSocketServiceTests
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("hello jibo", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||
Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
|
||||
using var eosPayload = JsonDocument.Parse(replies[1].Text!);
|
||||
Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _));
|
||||
Assert.StartsWith("mid-", eosPayload.RootElement.GetProperty("msgID").GetString());
|
||||
Assert.Equal("trans-hello", eosPayload.RootElement.GetProperty("transID").GetString());
|
||||
Assert.Equal(JsonValueKind.Object, eosPayload.RootElement.GetProperty("data").ValueKind);
|
||||
|
||||
using var skillPayload = JsonDocument.Parse(replies[2].Text!);
|
||||
Assert.StartsWith("mid-", skillPayload.RootElement.GetProperty("msgID").GetString());
|
||||
var meta = skillPayload.RootElement
|
||||
.GetProperty("data")
|
||||
.GetProperty("action")
|
||||
.GetProperty("config")
|
||||
.GetProperty("jcp")
|
||||
.GetProperty("config")
|
||||
.GetProperty("play")
|
||||
.GetProperty("meta");
|
||||
Assert.False(meta.TryGetProperty("intent", out _));
|
||||
Assert.False(meta.TryGetProperty("transcript", out _));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -426,6 +445,60 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsrJokeFlow_MatchesNodePayloadShapeForEosAndSkillAction()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-client-asr-joke-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-joke-shape","data":{"rules":["wake-word"]}}"""
|
||||
});
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-client-asr-joke-token",
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-joke-shape","data":{"text":"tell me a joke"}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(3, replies.Count);
|
||||
Assert.Equal(75, replies[2].DelayMs);
|
||||
|
||||
using var eosPayload = JsonDocument.Parse(replies[1].Text!);
|
||||
Assert.Equal("EOS", eosPayload.RootElement.GetProperty("type").GetString());
|
||||
Assert.Equal("trans-joke-shape", eosPayload.RootElement.GetProperty("transID").GetString());
|
||||
Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _));
|
||||
Assert.StartsWith("mid-", eosPayload.RootElement.GetProperty("msgID").GetString());
|
||||
Assert.Empty(eosPayload.RootElement.GetProperty("data").EnumerateObject());
|
||||
|
||||
using var skillPayload = JsonDocument.Parse(replies[2].Text!);
|
||||
Assert.Equal("SKILL_ACTION", skillPayload.RootElement.GetProperty("type").GetString());
|
||||
Assert.Equal("trans-joke-shape", skillPayload.RootElement.GetProperty("transID").GetString());
|
||||
Assert.StartsWith("mid-", skillPayload.RootElement.GetProperty("msgID").GetString());
|
||||
Assert.Equal("@be/joke", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
|
||||
|
||||
var meta = skillPayload.RootElement
|
||||
.GetProperty("data")
|
||||
.GetProperty("action")
|
||||
.GetProperty("config")
|
||||
.GetProperty("jcp")
|
||||
.GetProperty("config")
|
||||
.GetProperty("play")
|
||||
.GetProperty("meta");
|
||||
|
||||
Assert.Equal("RUNTIME_PROMPT", meta.GetProperty("prompt_id").GetString());
|
||||
Assert.Equal("AN", meta.GetProperty("prompt_sub_category").GetString());
|
||||
Assert.Equal("runtime-joke", meta.GetProperty("mim_id").GetString());
|
||||
Assert.Equal("announcement", meta.GetProperty("mim_type").GetString());
|
||||
Assert.False(meta.TryGetProperty("intent", out _));
|
||||
Assert.False(meta.TryGetProperty("transcript", out _));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio()
|
||||
{
|
||||
@@ -501,6 +574,71 @@ public sealed class JiboWebSocketServiceTests
|
||||
var replies = await _service.HandleMessageAsync(step.Message);
|
||||
var actualTypes = replies.Select(ReadReplyType).ToArray();
|
||||
Assert.Equal(step.ExpectedReplyTypes, actualTypes);
|
||||
|
||||
if (step.ExpectedReplies.Count > 0)
|
||||
{
|
||||
Assert.Equal(replies.Count, step.ExpectedReplies.Count);
|
||||
|
||||
for (var index = 0; index < step.ExpectedReplies.Count; index += 1)
|
||||
{
|
||||
var expectedReply = step.ExpectedReplies[index];
|
||||
Assert.Equal(expectedReply.Type, actualTypes[index]);
|
||||
|
||||
if (expectedReply.DelayMs.HasValue)
|
||||
{
|
||||
Assert.Equal(expectedReply.DelayMs.Value, replies[index].DelayMs);
|
||||
}
|
||||
|
||||
if (expectedReply.JsonSubset is { ValueKind: JsonValueKind.Object } jsonSubset)
|
||||
{
|
||||
using var actualPayload = JsonDocument.Parse(replies[index].Text!);
|
||||
AssertJsonContains(jsonSubset, actualPayload.RootElement);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void AssertJsonContains(JsonElement expected, JsonElement actual)
|
||||
{
|
||||
Assert.Equal(expected.ValueKind, actual.ValueKind);
|
||||
|
||||
switch (expected.ValueKind)
|
||||
{
|
||||
case JsonValueKind.Object:
|
||||
foreach (var property in expected.EnumerateObject())
|
||||
{
|
||||
Assert.True(actual.TryGetProperty(property.Name, out var actualProperty), $"Expected property '{property.Name}' was not found.");
|
||||
AssertJsonContains(property.Value, actualProperty);
|
||||
}
|
||||
break;
|
||||
case JsonValueKind.Array:
|
||||
{
|
||||
var expectedItems = expected.EnumerateArray().ToArray();
|
||||
var actualItems = actual.EnumerateArray().ToArray();
|
||||
Assert.Equal(expectedItems.Length, actualItems.Length);
|
||||
for (var index = 0; index < expectedItems.Length; index += 1)
|
||||
{
|
||||
AssertJsonContains(expectedItems[index], actualItems[index]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case JsonValueKind.String:
|
||||
Assert.Equal(expected.GetString(), actual.GetString());
|
||||
break;
|
||||
case JsonValueKind.Number:
|
||||
Assert.Equal(expected.GetRawText(), actual.GetRawText());
|
||||
break;
|
||||
case JsonValueKind.True:
|
||||
case JsonValueKind.False:
|
||||
Assert.Equal(expected.GetBoolean(), actual.GetBoolean());
|
||||
break;
|
||||
case JsonValueKind.Null:
|
||||
Assert.Equal(JsonValueKind.Null, actual.ValueKind);
|
||||
break;
|
||||
default:
|
||||
Assert.Equal(expected.GetRawText(), actual.GetRawText());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user