open jibo architecture narrowing and streamlining

This commit is contained in:
Jacob Dubin
2026-04-17 17:49:43 -05:00
parent fe1e11653f
commit b030d6faeb
13 changed files with 511 additions and 74 deletions

View File

@@ -69,6 +69,19 @@ Near-term ASR work should stay staged:
That keeps Node as the reverse-engineering oracle while letting the long-term `.NET` cloud gain real STT seams without pretending they are finished.
## Working Cloud Framework
The current evidence in captures, fixtures, and Node behavior supports three main cloud interaction paths:
1. local Jibo behavior observed by the cloud
The robot or its local skill stack already interpreted the turn and the cloud mainly tracks, acknowledges, or lightly completes it.
2. local Jibo behavior overridden or redirected by the cloud
The robot reports the turn state, but the cloud chooses a different synthetic reply path.
3. raw audio interpreted by the cloud
The robot sends buffered audio and the cloud performs transcript resolution before sending back `LISTEN`, `EOS`, and ESML-driven playback.
Those are the right primary buckets for now. Additional side channels may still emerge later, especially around proactive traffic, direct skill/service sockets, or future on-device OS changes, but they should be treated as extensions to this model until captures prove otherwise.
## Speech, Animation, And ESML
The current joke flow is only a small foothold into Jibo expressiveness.

View File

@@ -120,6 +120,13 @@ That enables two distinct STT paths:
The local tool path is intentionally off by default. It exists to help map real robot audio behavior while the stable hosted cloud remains the primary goal.
For local Ubuntu testing, the checked-in API host config now enables that path by default with the current Node-aligned tool locations:
- `/usr/bin/ffmpeg`
- `/usr/bin/whisper.cpp/build/bin/whisper-cli`
- `/usr/bin/whisper.cpp/models/ggml-base.en.bin`
- temp audio under `/tmp/openjibo-stt`
Configuration lives under `OpenJibo:Stt`:
- `EnableLocalWhisperCpp`
@@ -130,3 +137,13 @@ Configuration lives under `OpenJibo:Stt`:
- `TempDirectory`
This is not yet a claim of production-ready onboard ASR. It is a `.NET` discovery seam that keeps us compatible with the Node oracle while we evaluate longer-term options such as Azure-hosted STT or a managed decode/transcribe stack.
## Current Interaction Paths
The working cloud model currently looks like three main paths:
1. Jibo reports what already happened locally and the cloud tracks or lightly completes the turn.
2. Jibo reports what happened locally and the cloud responds with a different synthetic completion path.
3. Jibo streams raw audio and the cloud interprets the turn before sending ESML back.
That framing matches the repo evidence so far and is a good operating model for current discovery. There may still be smaller side paths around proactive traffic, direct skill-to-service communication, or future on-robot extensions, but those are not the main cloud revive loop yet.

View File

@@ -8,6 +8,14 @@
"ProtocolTelemetry": {
"Enabled": true,
"DirectoryPath": "captures/http"
},
"Stt": {
"EnableLocalWhisperCpp": true,
"FfmpegPath": "/usr/bin/ffmpeg",
"WhisperCliPath": "/usr/bin/whisper.cpp/build/bin/whisper-cli",
"WhisperModelPath": "/usr/bin/whisper.cpp/models/ggml-base.en.bin",
"WhisperLanguage": "en",
"TempDirectory": "/tmp/openjibo-stt"
}
}
}

View File

@@ -0,0 +1,21 @@
namespace Jibo.Cloud.Application.Abstractions;
public interface IJiboExperienceContentRepository
{
Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default);
}
public sealed class JiboExperienceCatalog
{
public IReadOnlyList<string> Jokes { get; init; } = [];
public IReadOnlyList<string> DanceAnimations { get; init; } = [];
public IReadOnlyList<string> GreetingReplies { get; init; } = [];
public IReadOnlyList<string> HowAreYouReplies { get; init; } = [];
public IReadOnlyList<string> SurpriseReplies { get; init; } = [];
public IReadOnlyList<string> PersonalReportReplies { get; init; } = [];
public IReadOnlyList<string> WeatherReplies { get; init; } = [];
public IReadOnlyList<string> CalendarReplies { get; init; } = [];
public IReadOnlyList<string> CommuteReplies { get; init; } = [];
public IReadOnlyList<string> NewsReplies { get; init; } = [];
public IReadOnlyList<string> GenericFallbackReplies { get; init; } = [];
}

View File

@@ -2,36 +2,17 @@ using Jibo.Runtime.Abstractions;
namespace Jibo.Cloud.Application.Services;
public sealed class DemoConversationBroker : IConversationBroker
public sealed class DemoConversationBroker(JiboInteractionService interactionService) : IConversationBroker
{
public Task<ResponsePlan> HandleTurnAsync(TurnContext turn, CancellationToken cancellationToken = default)
public async Task<ResponsePlan> HandleTurnAsync(TurnContext turn, CancellationToken cancellationToken = default)
{
var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim();
var lowered = transcript.ToLowerInvariant();
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
? rawClientIntent?.ToString()
: null;
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent);
var reply = semanticIntent switch
{
"time" => $"It is {DateTime.Now:hh:mm tt}.",
"date" => $"Today is {DateTime.Now:dddd, MMMM d}.",
"dance" => "Okay. Watch this.",
_ => transcript.Length == 0
? "I am listening."
: lowered.Contains("hello") || lowered.Contains("hi")
? "Hello from the OpenJibo cloud."
: lowered.Contains("joke")
? "Why did the robot bring a ladder? Because it wanted to reach the cloud."
: $"I heard: {transcript}"
};
var decision = await interactionService.BuildDecisionAsync(turn, cancellationToken);
var plan = new ResponsePlan
{
SessionId = turn.SessionId,
Status = ResponseStatus.Succeeded,
IntentName = semanticIntent,
IntentName = decision.IntentName,
Topic = "conversation",
DeviceId = turn.DeviceId,
TargetHost = turn.HostName,
@@ -41,7 +22,7 @@ public sealed class DemoConversationBroker : IConversationBroker
new SpeakAction
{
Sequence = 0,
Text = reply,
Text = decision.ReplyText,
Voice = "griffin"
},
new ListenAction
@@ -65,54 +46,16 @@ public sealed class DemoConversationBroker : IConversationBroker
}
};
if (string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase))
if (!string.IsNullOrWhiteSpace(decision.SkillName))
{
plan.Actions.Add(new InvokeNativeSkillAction
{
Sequence = 2,
SkillName = "@be/joke",
Payload = new Dictionary<string, object?>
{
["replyType"] = "joke"
}
SkillName = decision.SkillName,
Payload = decision.SkillPayload ?? new Dictionary<string, object?>()
});
}
return Task.FromResult(plan);
}
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent)
{
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
{
return "time";
}
if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase))
{
return "date";
}
if (loweredTranscript.Contains("joke", StringComparison.Ordinal))
{
return "joke";
}
if (loweredTranscript.Contains("dance", StringComparison.Ordinal))
{
return "dance";
}
if (loweredTranscript.Contains("time", StringComparison.Ordinal))
{
return "time";
}
if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal))
{
return "date";
}
return "chat";
return plan;
}
}

View File

@@ -0,0 +1,19 @@
namespace Jibo.Cloud.Application.Services;
public interface IJiboRandomizer
{
T Choose<T>(IReadOnlyList<T> items);
}
public sealed class DefaultJiboRandomizer : IJiboRandomizer
{
public T Choose<T>(IReadOnlyList<T> items)
{
if (items.Count == 0)
{
throw new InvalidOperationException("Cannot choose from an empty list.");
}
return items[Random.Shared.Next(items.Count)];
}
}

View File

@@ -0,0 +1,28 @@
using Jibo.Cloud.Application.Abstractions;
namespace Jibo.Cloud.Application.Services;
public sealed class JiboExperienceContentCache(IJiboExperienceContentRepository repository)
{
private readonly SemaphoreSlim _gate = new(1, 1);
private JiboExperienceCatalog? _catalog;
public async Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default)
{
if (_catalog is not null)
{
return _catalog;
}
await _gate.WaitAsync(cancellationToken);
try
{
_catalog ??= await repository.GetCatalogAsync(cancellationToken);
return _catalog;
}
finally
{
_gate.Release();
}
}
}

View File

@@ -0,0 +1,175 @@
using Jibo.Cloud.Application.Abstractions;
using Jibo.Runtime.Abstractions;
namespace Jibo.Cloud.Application.Services;
public sealed class JiboInteractionService(
JiboExperienceContentCache contentCache,
IJiboRandomizer randomizer)
{
public async Task<JiboInteractionDecision> BuildDecisionAsync(TurnContext turn, CancellationToken cancellationToken = default)
{
var catalog = await contentCache.GetCatalogAsync(cancellationToken);
var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim();
var lowered = transcript.ToLowerInvariant();
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
? rawClientIntent?.ToString()
: null;
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent);
return semanticIntent switch
{
"joke" => BuildJokeDecision(catalog),
"dance" => BuildDanceDecision(catalog),
"time" => new JiboInteractionDecision("time", $"It is {DateTime.Now:hh:mm tt}."),
"date" => new JiboInteractionDecision("date", $"Today is {DateTime.Now:dddd, MMMM d}."),
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
"calendar" => new JiboInteractionDecision("calendar", randomizer.Choose(catalog.CalendarReplies)),
"commute" => new JiboInteractionDecision("commute", randomizer.Choose(catalog.CommuteReplies)),
"news" => new JiboInteractionDecision("news", randomizer.Choose(catalog.NewsReplies)),
_ => new JiboInteractionDecision("chat", BuildGenericReply(catalog, transcript, lowered))
};
}
private JiboInteractionDecision BuildJokeDecision(JiboExperienceCatalog catalog)
{
var joke = randomizer.Choose(catalog.Jokes);
return new JiboInteractionDecision(
"joke",
joke,
"@be/joke",
new Dictionary<string, object?>
{
["replyType"] = "joke"
});
}
private JiboInteractionDecision BuildDanceDecision(JiboExperienceCatalog catalog)
{
var dance = randomizer.Choose(catalog.DanceAnimations);
return new JiboInteractionDecision(
"dance",
"Okay. Watch this.",
"chitchat-skill",
new Dictionary<string, object?>
{
["esml"] = $"<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, {dance}' /></speak>",
["mim_id"] = "runtime-chat",
["mim_type"] = "announcement"
});
}
private string BuildGenericReply(JiboExperienceCatalog catalog, string transcript, string lowered)
{
if (string.IsNullOrWhiteSpace(transcript))
{
return "I am listening.";
}
if (lowered.Contains("good morning", StringComparison.Ordinal))
{
return "Good morning! It is nice to hear your voice.";
}
if (lowered.Contains("good afternoon", StringComparison.Ordinal))
{
return "Good afternoon. I am happy to be here.";
}
if (lowered.Contains("good night", StringComparison.Ordinal))
{
return "Good night. Sleep tight.";
}
return randomizer.Choose(catalog.GenericFallbackReplies).Replace("{transcript}", transcript, StringComparison.Ordinal);
}
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent)
{
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
{
return "time";
}
if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase))
{
return "date";
}
if (loweredTranscript.Contains("joke", StringComparison.Ordinal))
{
return "joke";
}
if (loweredTranscript.Contains("dance", StringComparison.Ordinal))
{
return "dance";
}
if (loweredTranscript.Contains("surprise", StringComparison.Ordinal))
{
return "surprise";
}
if (loweredTranscript.Contains("personal report", StringComparison.Ordinal))
{
return "personal_report";
}
if (loweredTranscript.Contains("weather", StringComparison.Ordinal))
{
return "weather";
}
if (loweredTranscript.Contains("calendar", StringComparison.Ordinal))
{
return "calendar";
}
if (loweredTranscript.Contains("commute", StringComparison.Ordinal))
{
return "commute";
}
if (loweredTranscript.Contains("news", StringComparison.Ordinal))
{
return "news";
}
if (loweredTranscript.Contains("how are you", StringComparison.Ordinal) ||
loweredTranscript.Contains("what's up", StringComparison.Ordinal) ||
loweredTranscript.Contains("what s up", StringComparison.Ordinal))
{
return "how_are_you";
}
if (loweredTranscript.Contains("hello", StringComparison.Ordinal) ||
loweredTranscript.Contains("hi", StringComparison.Ordinal) ||
loweredTranscript.Contains("hey", StringComparison.Ordinal))
{
return "hello";
}
if (loweredTranscript.Contains("time", StringComparison.Ordinal))
{
return "time";
}
if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal))
{
return "date";
}
return "chat";
}
}
public sealed record JiboInteractionDecision(
string IntentName,
string ReplyText,
string? SkillName = null,
IDictionary<string, object?>? SkillPayload = null);

View File

@@ -164,16 +164,18 @@ public sealed class ResponsePlanToSocketMessagesMapper
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
{
var skillPayload = skill?.Payload;
var isJoke = string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase) ||
string.Equals(skill?.SkillName, "@be/joke", StringComparison.OrdinalIgnoreCase);
var isDance = string.Equals(plan.IntentName, "dance", StringComparison.OrdinalIgnoreCase);
var skillId = isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill";
var esml = isDance
var skillId = ReadPayloadString(skillPayload, "skillId") ?? (isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill");
var esml = ReadPayloadString(skillPayload, "esml") ?? (isDance
? "<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, rom-upbeat' /></speak>"
: isJoke
? $"<speak><es cat='happy' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>"
: $"<speak><es cat='neutral' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>";
var mimId = isJoke ? "runtime-joke" : "runtime-chat";
: $"<speak><es cat='neutral' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>");
var mimId = ReadPayloadString(skillPayload, "mim_id") ?? (isJoke ? "runtime-joke" : "runtime-chat");
var mimType = ReadPayloadString(skillPayload, "mim_type") ?? "announcement";
return new
{
@@ -204,7 +206,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
prompt_id = "RUNTIME_PROMPT",
prompt_sub_category = "AN",
mim_id = mimId,
mim_type = "announcement"
mim_type = mimType
}
}
}
@@ -271,6 +273,16 @@ public sealed class ResponsePlanToSocketMessagesMapper
.Replace("'", "&apos;", StringComparison.Ordinal);
}
private static string? ReadPayloadString(IDictionary<string, object?>? payload, string key)
{
if (payload is null || !payload.TryGetValue(key, out var value))
{
return null;
}
return value?.ToString();
}
private static string CreateHubMessageId()
{
return $"mid-{Guid.NewGuid()}";

View File

@@ -0,0 +1,81 @@
using Jibo.Cloud.Application.Abstractions;
namespace Jibo.Cloud.Infrastructure.Content;
public sealed class InMemoryJiboExperienceContentRepository : IJiboExperienceContentRepository
{
private static readonly JiboExperienceCatalog Catalog = new()
{
Jokes =
[
"Why did the robot cross the road? Because it was programmed by the chicken.",
"Why was the robot tired when it got home? It had a hard drive.",
"What do you call a pirate robot? Arrrr two dee two.",
"Why did the robot go on vacation? It needed to recharge.",
"What kind of shoes do frogs wear? Open-toed."
],
DanceAnimations =
[
"rom-upbeat",
"rom-ballroom",
"rom-silly",
"rom-slowdance",
"rom-electronic",
"rom-twerk"
],
GreetingReplies =
[
"Hi there. It is really good to talk with you.",
"Hello there. I am glad you said hi.",
"Hey. I am happy to see you."
],
HowAreYouReplies =
[
"I am feeling cheerful and robotic.",
"I am doing great. Thanks for asking.",
"I am feeling bright-eyed and ready to help."
],
SurpriseReplies =
[
"I can definitely surprise you. We are still mapping that path, but I am ready for the next experiment.",
"Surprise mode is still taking shape, but I heard you loud and clear.",
"That sounds fun. I am not all the way there yet, but we can keep teaching me."
],
PersonalReportReplies =
[
"I heard your personal report request. That cloud path is still being mapped.",
"Personal report is recognized, but I am not ready to deliver the real report yet."
],
WeatherReplies =
[
"I heard your weather request. We still need to wire the real provider behind it.",
"Weather is on the map now, even though the real forecast path is not finished yet."
],
CalendarReplies =
[
"I heard your calendar request. The cloud knows the phrase, but the real calendar integration is still ahead.",
"Calendar is recognized. We still need to connect the actual service path."
],
CommuteReplies =
[
"I heard your commute request. That one is recognized, but not fully implemented yet.",
"Commute is on the discovery list now. The real travel answer still needs a provider."
],
NewsReplies =
[
"I heard your news request. That path is still a future cloud integration.",
"News is recognized, but I do not have the full news service behind it yet."
],
GenericFallbackReplies =
[
"Okay. You said, {transcript}.",
"I heard you say, {transcript}.",
"Thanks. I heard, {transcript}."
]
};
public Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default)
{
return Task.FromResult(Catalog);
}
}

View File

@@ -1,6 +1,7 @@
using Jibo.Cloud.Application.Abstractions;
using Jibo.Cloud.Application.Services;
using Jibo.Cloud.Infrastructure.Audio;
using Jibo.Cloud.Infrastructure.Content;
using Jibo.Cloud.Infrastructure.Persistence;
using Jibo.Cloud.Infrastructure.Telemetry;
using Jibo.Runtime.Abstractions;
@@ -23,6 +24,10 @@ public static class ServiceCollectionExtensions
services.AddSingleton(sttOptions);
services.AddSingleton<ICloudStateStore, InMemoryCloudStateStore>();
services.AddSingleton<IJiboExperienceContentRepository, InMemoryJiboExperienceContentRepository>();
services.AddSingleton<JiboExperienceContentCache>();
services.AddSingleton<IJiboRandomizer, DefaultJiboRandomizer>();
services.AddSingleton<JiboInteractionService>();
services.AddSingleton<IConversationBroker, DemoConversationBroker>();
services.AddSingleton<IExternalProcessRunner, ExternalProcessRunner>();
services.AddSingleton<ISttStrategy, LocalWhisperCppBufferedAudioSttStrategy>();

View File

@@ -0,0 +1,73 @@
using Jibo.Cloud.Application.Services;
using Jibo.Cloud.Infrastructure.Content;
using Jibo.Runtime.Abstractions;
namespace Jibo.Cloud.Tests.WebSockets;
public sealed class JiboInteractionServiceTests
{
[Fact]
public async Task BuildDecisionAsync_Joke_UsesCatalogBackedRandomContent()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "tell me a joke",
NormalizedTranscript = "tell me a joke"
});
Assert.Equal("joke", decision.IntentName);
Assert.Equal("@be/joke", decision.SkillName);
Assert.Equal("Why did the robot cross the road? Because it was programmed by the chicken.", decision.ReplyText);
}
[Fact]
public async Task BuildDecisionAsync_Dance_UsesCatalogBackedAnimation()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "do a dance",
NormalizedTranscript = "do a dance"
});
Assert.Equal("dance", decision.IntentName);
Assert.Equal("chitchat-skill", decision.SkillName);
Assert.Equal("Okay. Watch this.", decision.ReplyText);
Assert.Equal("<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, rom-upbeat' /></speak>", decision.SkillPayload!["esml"]);
}
[Fact]
public async Task BuildDecisionAsync_ClientNluAskForDate_MapsToDateIntent()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
Attributes = new Dictionary<string, object?>
{
["clientIntent"] = "askForDate"
}
});
Assert.Equal("date", decision.IntentName);
Assert.Contains("Today is", decision.ReplyText, StringComparison.Ordinal);
}
private static JiboInteractionService CreateService()
{
return new JiboInteractionService(
new JiboExperienceContentCache(new InMemoryJiboExperienceContentRepository()),
new FirstItemRandomizer());
}
private sealed class FirstItemRandomizer : IJiboRandomizer
{
public T Choose<T>(IReadOnlyList<T> items)
{
return items[0];
}
}
}

View File

@@ -2,6 +2,7 @@ using System.Text.Json;
using Jibo.Cloud.Application.Abstractions;
using Jibo.Cloud.Application.Services;
using Jibo.Cloud.Domain.Models;
using Jibo.Cloud.Infrastructure.Content;
using Jibo.Cloud.Infrastructure.Persistence;
using Jibo.Cloud.Tests.Fixtures;
@@ -16,7 +17,9 @@ public sealed class JiboWebSocketServiceTests
{
_store = new InMemoryCloudStateStore();
var turnContextMapper = new ProtocolToTurnContextMapper();
var conversationBroker = new DemoConversationBroker();
var contentRepository = new InMemoryJiboExperienceContentRepository();
var contentCache = new JiboExperienceContentCache(contentRepository);
var conversationBroker = new DemoConversationBroker(new JiboInteractionService(contentCache, new DefaultJiboRandomizer()));
var replyMapper = new ResponsePlanToSocketMessagesMapper();
var sttSelector = new DefaultSttStrategySelector(
[
@@ -53,7 +56,7 @@ public sealed class JiboWebSocketServiceTests
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("hello jibo", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
using var eosPayload = JsonDocument.Parse(replies[1].Text!);
Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _));
@@ -475,7 +478,7 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal(3, finalizeReplies.Count);
using var listenPayload = JsonDocument.Parse(finalizeReplies[0].Text!);
Assert.Equal("hello from buffered audio", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
using var skillPayload = JsonDocument.Parse(finalizeReplies[2].Text!);
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
@@ -535,6 +538,45 @@ public sealed class JiboWebSocketServiceTests
Assert.False(meta.TryGetProperty("transcript", out _));
}
[Fact]
public async Task ClientAsrDanceFlow_EmitsAnimatedSkillAction()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-client-asr-dance-token",
Text = """{"type":"LISTEN","transID":"trans-dance-shape","data":{"rules":["wake-word"]}}"""
});
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-client-asr-dance-token",
Text = """{"type":"CLIENT_ASR","transID":"trans-dance-shape","data":{"text":"do a dance"}}"""
});
Assert.Equal(3, replies.Count);
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
using var skillPayload = JsonDocument.Parse(replies[2].Text!);
var esml = skillPayload.RootElement
.GetProperty("data")
.GetProperty("action")
.GetProperty("config")
.GetProperty("jcp")
.GetProperty("config")
.GetProperty("play")
.GetProperty("esml")
.GetString();
Assert.Contains("<anim cat='dance' filter='music, ", esml, StringComparison.Ordinal);
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
}
[Fact]
public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio()
{