diff --git a/OpenJibo/docs/development-plan.md b/OpenJibo/docs/development-plan.md index c30678e..6062a69 100644 --- a/OpenJibo/docs/development-plan.md +++ b/OpenJibo/docs/development-plan.md @@ -69,6 +69,19 @@ Near-term ASR work should stay staged: That keeps Node as the reverse-engineering oracle while letting the long-term `.NET` cloud gain real STT seams without pretending they are finished. +## Working Cloud Framework + +The current evidence in captures, fixtures, and Node behavior supports three main cloud interaction paths: + +1. local Jibo behavior observed by the cloud + The robot or its local skill stack already interpreted the turn and the cloud mainly tracks, acknowledges, or lightly completes it. +2. local Jibo behavior overridden or redirected by the cloud + The robot reports the turn state, but the cloud chooses a different synthetic reply path. +3. raw audio interpreted by the cloud + The robot sends buffered audio and the cloud performs transcript resolution before sending back `LISTEN`, `EOS`, and ESML-driven playback. + +Those are the right primary buckets for now. Additional side channels may still emerge later, especially around proactive traffic, direct skill/service sockets, or future on-device OS changes, but they should be treated as extensions to this model until captures prove otherwise. + ## Speech, Animation, And ESML The current joke flow is only a small foothold into Jibo expressiveness. diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/README.md b/OpenJibo/src/Jibo.Cloud/dotnet/README.md index 376a51e..542a694 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/README.md +++ b/OpenJibo/src/Jibo.Cloud/dotnet/README.md @@ -120,6 +120,13 @@ That enables two distinct STT paths: The local tool path is intentionally off by default. It exists to help map real robot audio behavior while the stable hosted cloud remains the primary goal. +For local Ubuntu testing, the checked-in API host config now enables that path by default with the current Node-aligned tool locations: + +- `/usr/bin/ffmpeg` +- `/usr/bin/whisper.cpp/build/bin/whisper-cli` +- `/usr/bin/whisper.cpp/models/ggml-base.en.bin` +- temp audio under `/tmp/openjibo-stt` + Configuration lives under `OpenJibo:Stt`: - `EnableLocalWhisperCpp` @@ -130,3 +137,13 @@ Configuration lives under `OpenJibo:Stt`: - `TempDirectory` This is not yet a claim of production-ready onboard ASR. It is a `.NET` discovery seam that keeps us compatible with the Node oracle while we evaluate longer-term options such as Azure-hosted STT or a managed decode/transcribe stack. + +## Current Interaction Paths + +The working cloud model currently looks like three main paths: + +1. Jibo reports what already happened locally and the cloud tracks or lightly completes the turn. +2. Jibo reports what happened locally and the cloud responds with a different synthetic completion path. +3. Jibo streams raw audio and the cloud interprets the turn before sending ESML back. + +That framing matches the repo evidence so far and is a good operating model for current discovery. There may still be smaller side paths around proactive traffic, direct skill-to-service communication, or future on-robot extensions, but those are not the main cloud revive loop yet. diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Api/appsettings.json b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Api/appsettings.json index a507257..cb14b27 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Api/appsettings.json +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Api/appsettings.json @@ -8,6 +8,14 @@ "ProtocolTelemetry": { "Enabled": true, "DirectoryPath": "captures/http" + }, + "Stt": { + "EnableLocalWhisperCpp": true, + "FfmpegPath": "/usr/bin/ffmpeg", + "WhisperCliPath": "/usr/bin/whisper.cpp/build/bin/whisper-cli", + "WhisperModelPath": "/usr/bin/whisper.cpp/models/ggml-base.en.bin", + "WhisperLanguage": "en", + "TempDirectory": "/tmp/openjibo-stt" } } } diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Abstractions/IJiboExperienceContentRepository.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Abstractions/IJiboExperienceContentRepository.cs new file mode 100644 index 0000000..2c94456 --- /dev/null +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Abstractions/IJiboExperienceContentRepository.cs @@ -0,0 +1,21 @@ +namespace Jibo.Cloud.Application.Abstractions; + +public interface IJiboExperienceContentRepository +{ + Task GetCatalogAsync(CancellationToken cancellationToken = default); +} + +public sealed class JiboExperienceCatalog +{ + public IReadOnlyList Jokes { get; init; } = []; + public IReadOnlyList DanceAnimations { get; init; } = []; + public IReadOnlyList GreetingReplies { get; init; } = []; + public IReadOnlyList HowAreYouReplies { get; init; } = []; + public IReadOnlyList SurpriseReplies { get; init; } = []; + public IReadOnlyList PersonalReportReplies { get; init; } = []; + public IReadOnlyList WeatherReplies { get; init; } = []; + public IReadOnlyList CalendarReplies { get; init; } = []; + public IReadOnlyList CommuteReplies { get; init; } = []; + public IReadOnlyList NewsReplies { get; init; } = []; + public IReadOnlyList GenericFallbackReplies { get; init; } = []; +} diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/DemoConversationBroker.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/DemoConversationBroker.cs index 52cccc7..1040ff0 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/DemoConversationBroker.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/DemoConversationBroker.cs @@ -2,36 +2,17 @@ using Jibo.Runtime.Abstractions; namespace Jibo.Cloud.Application.Services; -public sealed class DemoConversationBroker : IConversationBroker +public sealed class DemoConversationBroker(JiboInteractionService interactionService) : IConversationBroker { - public Task HandleTurnAsync(TurnContext turn, CancellationToken cancellationToken = default) + public async Task HandleTurnAsync(TurnContext turn, CancellationToken cancellationToken = default) { - var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim(); - var lowered = transcript.ToLowerInvariant(); - var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent) - ? rawClientIntent?.ToString() - : null; - var semanticIntent = ResolveSemanticIntent(lowered, clientIntent); - - var reply = semanticIntent switch - { - "time" => $"It is {DateTime.Now:hh:mm tt}.", - "date" => $"Today is {DateTime.Now:dddd, MMMM d}.", - "dance" => "Okay. Watch this.", - _ => transcript.Length == 0 - ? "I am listening." - : lowered.Contains("hello") || lowered.Contains("hi") - ? "Hello from the OpenJibo cloud." - : lowered.Contains("joke") - ? "Why did the robot bring a ladder? Because it wanted to reach the cloud." - : $"I heard: {transcript}" - }; + var decision = await interactionService.BuildDecisionAsync(turn, cancellationToken); var plan = new ResponsePlan { SessionId = turn.SessionId, Status = ResponseStatus.Succeeded, - IntentName = semanticIntent, + IntentName = decision.IntentName, Topic = "conversation", DeviceId = turn.DeviceId, TargetHost = turn.HostName, @@ -41,7 +22,7 @@ public sealed class DemoConversationBroker : IConversationBroker new SpeakAction { Sequence = 0, - Text = reply, + Text = decision.ReplyText, Voice = "griffin" }, new ListenAction @@ -65,54 +46,16 @@ public sealed class DemoConversationBroker : IConversationBroker } }; - if (string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase)) + if (!string.IsNullOrWhiteSpace(decision.SkillName)) { plan.Actions.Add(new InvokeNativeSkillAction { Sequence = 2, - SkillName = "@be/joke", - Payload = new Dictionary - { - ["replyType"] = "joke" - } + SkillName = decision.SkillName, + Payload = decision.SkillPayload ?? new Dictionary() }); } - return Task.FromResult(plan); - } - - private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent) - { - if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase)) - { - return "time"; - } - - if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase)) - { - return "date"; - } - - if (loweredTranscript.Contains("joke", StringComparison.Ordinal)) - { - return "joke"; - } - - if (loweredTranscript.Contains("dance", StringComparison.Ordinal)) - { - return "dance"; - } - - if (loweredTranscript.Contains("time", StringComparison.Ordinal)) - { - return "time"; - } - - if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal)) - { - return "date"; - } - - return "chat"; + return plan; } } diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/IJiboRandomizer.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/IJiboRandomizer.cs new file mode 100644 index 0000000..27af9d6 --- /dev/null +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/IJiboRandomizer.cs @@ -0,0 +1,19 @@ +namespace Jibo.Cloud.Application.Services; + +public interface IJiboRandomizer +{ + T Choose(IReadOnlyList items); +} + +public sealed class DefaultJiboRandomizer : IJiboRandomizer +{ + public T Choose(IReadOnlyList items) + { + if (items.Count == 0) + { + throw new InvalidOperationException("Cannot choose from an empty list."); + } + + return items[Random.Shared.Next(items.Count)]; + } +} diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboExperienceContentCache.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboExperienceContentCache.cs new file mode 100644 index 0000000..ac00780 --- /dev/null +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboExperienceContentCache.cs @@ -0,0 +1,28 @@ +using Jibo.Cloud.Application.Abstractions; + +namespace Jibo.Cloud.Application.Services; + +public sealed class JiboExperienceContentCache(IJiboExperienceContentRepository repository) +{ + private readonly SemaphoreSlim _gate = new(1, 1); + private JiboExperienceCatalog? _catalog; + + public async Task GetCatalogAsync(CancellationToken cancellationToken = default) + { + if (_catalog is not null) + { + return _catalog; + } + + await _gate.WaitAsync(cancellationToken); + try + { + _catalog ??= await repository.GetCatalogAsync(cancellationToken); + return _catalog; + } + finally + { + _gate.Release(); + } + } +} diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs new file mode 100644 index 0000000..f2e04bf --- /dev/null +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs @@ -0,0 +1,175 @@ +using Jibo.Cloud.Application.Abstractions; +using Jibo.Runtime.Abstractions; + +namespace Jibo.Cloud.Application.Services; + +public sealed class JiboInteractionService( + JiboExperienceContentCache contentCache, + IJiboRandomizer randomizer) +{ + public async Task BuildDecisionAsync(TurnContext turn, CancellationToken cancellationToken = default) + { + var catalog = await contentCache.GetCatalogAsync(cancellationToken); + var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim(); + var lowered = transcript.ToLowerInvariant(); + var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent) + ? rawClientIntent?.ToString() + : null; + + var semanticIntent = ResolveSemanticIntent(lowered, clientIntent); + return semanticIntent switch + { + "joke" => BuildJokeDecision(catalog), + "dance" => BuildDanceDecision(catalog), + "time" => new JiboInteractionDecision("time", $"It is {DateTime.Now:hh:mm tt}."), + "date" => new JiboInteractionDecision("date", $"Today is {DateTime.Now:dddd, MMMM d}."), + "hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)), + "how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)), + "surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)), + "personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)), + "weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)), + "calendar" => new JiboInteractionDecision("calendar", randomizer.Choose(catalog.CalendarReplies)), + "commute" => new JiboInteractionDecision("commute", randomizer.Choose(catalog.CommuteReplies)), + "news" => new JiboInteractionDecision("news", randomizer.Choose(catalog.NewsReplies)), + _ => new JiboInteractionDecision("chat", BuildGenericReply(catalog, transcript, lowered)) + }; + } + + private JiboInteractionDecision BuildJokeDecision(JiboExperienceCatalog catalog) + { + var joke = randomizer.Choose(catalog.Jokes); + return new JiboInteractionDecision( + "joke", + joke, + "@be/joke", + new Dictionary + { + ["replyType"] = "joke" + }); + } + + private JiboInteractionDecision BuildDanceDecision(JiboExperienceCatalog catalog) + { + var dance = randomizer.Choose(catalog.DanceAnimations); + return new JiboInteractionDecision( + "dance", + "Okay. Watch this.", + "chitchat-skill", + new Dictionary + { + ["esml"] = $"Okay. Watch this.", + ["mim_id"] = "runtime-chat", + ["mim_type"] = "announcement" + }); + } + + private string BuildGenericReply(JiboExperienceCatalog catalog, string transcript, string lowered) + { + if (string.IsNullOrWhiteSpace(transcript)) + { + return "I am listening."; + } + + if (lowered.Contains("good morning", StringComparison.Ordinal)) + { + return "Good morning! It is nice to hear your voice."; + } + + if (lowered.Contains("good afternoon", StringComparison.Ordinal)) + { + return "Good afternoon. I am happy to be here."; + } + + if (lowered.Contains("good night", StringComparison.Ordinal)) + { + return "Good night. Sleep tight."; + } + + return randomizer.Choose(catalog.GenericFallbackReplies).Replace("{transcript}", transcript, StringComparison.Ordinal); + } + + private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent) + { + if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase)) + { + return "time"; + } + + if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase)) + { + return "date"; + } + + if (loweredTranscript.Contains("joke", StringComparison.Ordinal)) + { + return "joke"; + } + + if (loweredTranscript.Contains("dance", StringComparison.Ordinal)) + { + return "dance"; + } + + if (loweredTranscript.Contains("surprise", StringComparison.Ordinal)) + { + return "surprise"; + } + + if (loweredTranscript.Contains("personal report", StringComparison.Ordinal)) + { + return "personal_report"; + } + + if (loweredTranscript.Contains("weather", StringComparison.Ordinal)) + { + return "weather"; + } + + if (loweredTranscript.Contains("calendar", StringComparison.Ordinal)) + { + return "calendar"; + } + + if (loweredTranscript.Contains("commute", StringComparison.Ordinal)) + { + return "commute"; + } + + if (loweredTranscript.Contains("news", StringComparison.Ordinal)) + { + return "news"; + } + + if (loweredTranscript.Contains("how are you", StringComparison.Ordinal) || + loweredTranscript.Contains("what's up", StringComparison.Ordinal) || + loweredTranscript.Contains("what s up", StringComparison.Ordinal)) + { + return "how_are_you"; + } + + if (loweredTranscript.Contains("hello", StringComparison.Ordinal) || + loweredTranscript.Contains("hi", StringComparison.Ordinal) || + loweredTranscript.Contains("hey", StringComparison.Ordinal)) + { + return "hello"; + } + + if (loweredTranscript.Contains("time", StringComparison.Ordinal)) + { + return "time"; + } + + if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal)) + { + return "date"; + } + + return "chat"; + } +} + +public sealed record JiboInteractionDecision( + string IntentName, + string ReplyText, + string? SkillName = null, + IDictionary? SkillPayload = null); diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs index 46a6b92..70cb9a9 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs @@ -164,16 +164,18 @@ public sealed class ResponsePlanToSocketMessagesMapper private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill) { + var skillPayload = skill?.Payload; var isJoke = string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase) || string.Equals(skill?.SkillName, "@be/joke", StringComparison.OrdinalIgnoreCase); var isDance = string.Equals(plan.IntentName, "dance", StringComparison.OrdinalIgnoreCase); - var skillId = isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill"; - var esml = isDance + var skillId = ReadPayloadString(skillPayload, "skillId") ?? (isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill"); + var esml = ReadPayloadString(skillPayload, "esml") ?? (isDance ? "Okay. Watch this." : isJoke ? $"{EscapeXml(speak.Text)}" - : $"{EscapeXml(speak.Text)}"; - var mimId = isJoke ? "runtime-joke" : "runtime-chat"; + : $"{EscapeXml(speak.Text)}"); + var mimId = ReadPayloadString(skillPayload, "mim_id") ?? (isJoke ? "runtime-joke" : "runtime-chat"); + var mimType = ReadPayloadString(skillPayload, "mim_type") ?? "announcement"; return new { @@ -204,7 +206,7 @@ public sealed class ResponsePlanToSocketMessagesMapper prompt_id = "RUNTIME_PROMPT", prompt_sub_category = "AN", mim_id = mimId, - mim_type = "announcement" + mim_type = mimType } } } @@ -271,6 +273,16 @@ public sealed class ResponsePlanToSocketMessagesMapper .Replace("'", "'", StringComparison.Ordinal); } + private static string? ReadPayloadString(IDictionary? payload, string key) + { + if (payload is null || !payload.TryGetValue(key, out var value)) + { + return null; + } + + return value?.ToString(); + } + private static string CreateHubMessageId() { return $"mid-{Guid.NewGuid()}"; diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Infrastructure/Content/InMemoryJiboExperienceContentRepository.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Infrastructure/Content/InMemoryJiboExperienceContentRepository.cs new file mode 100644 index 0000000..6b6162a --- /dev/null +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Infrastructure/Content/InMemoryJiboExperienceContentRepository.cs @@ -0,0 +1,81 @@ +using Jibo.Cloud.Application.Abstractions; + +namespace Jibo.Cloud.Infrastructure.Content; + +public sealed class InMemoryJiboExperienceContentRepository : IJiboExperienceContentRepository +{ + private static readonly JiboExperienceCatalog Catalog = new() + { + Jokes = + [ + "Why did the robot cross the road? Because it was programmed by the chicken.", + "Why was the robot tired when it got home? It had a hard drive.", + "What do you call a pirate robot? Arrrr two dee two.", + "Why did the robot go on vacation? It needed to recharge.", + "What kind of shoes do frogs wear? Open-toed." + ], + DanceAnimations = + [ + "rom-upbeat", + "rom-ballroom", + "rom-silly", + "rom-slowdance", + "rom-electronic", + "rom-twerk" + ], + GreetingReplies = + [ + "Hi there. It is really good to talk with you.", + "Hello there. I am glad you said hi.", + "Hey. I am happy to see you." + ], + HowAreYouReplies = + [ + "I am feeling cheerful and robotic.", + "I am doing great. Thanks for asking.", + "I am feeling bright-eyed and ready to help." + ], + SurpriseReplies = + [ + "I can definitely surprise you. We are still mapping that path, but I am ready for the next experiment.", + "Surprise mode is still taking shape, but I heard you loud and clear.", + "That sounds fun. I am not all the way there yet, but we can keep teaching me." + ], + PersonalReportReplies = + [ + "I heard your personal report request. That cloud path is still being mapped.", + "Personal report is recognized, but I am not ready to deliver the real report yet." + ], + WeatherReplies = + [ + "I heard your weather request. We still need to wire the real provider behind it.", + "Weather is on the map now, even though the real forecast path is not finished yet." + ], + CalendarReplies = + [ + "I heard your calendar request. The cloud knows the phrase, but the real calendar integration is still ahead.", + "Calendar is recognized. We still need to connect the actual service path." + ], + CommuteReplies = + [ + "I heard your commute request. That one is recognized, but not fully implemented yet.", + "Commute is on the discovery list now. The real travel answer still needs a provider." + ], + NewsReplies = + [ + "I heard your news request. That path is still a future cloud integration.", + "News is recognized, but I do not have the full news service behind it yet." + ], + GenericFallbackReplies = + [ + "Okay. You said, {transcript}.", + "I heard you say, {transcript}.", + "Thanks. I heard, {transcript}." + ] + }; + + public Task GetCatalogAsync(CancellationToken cancellationToken = default) + { + return Task.FromResult(Catalog); + } +} diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Infrastructure/DependencyInjection/ServiceCollectionExtensions.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Infrastructure/DependencyInjection/ServiceCollectionExtensions.cs index 1feb12a..038415b 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Infrastructure/DependencyInjection/ServiceCollectionExtensions.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Infrastructure/DependencyInjection/ServiceCollectionExtensions.cs @@ -1,6 +1,7 @@ using Jibo.Cloud.Application.Abstractions; using Jibo.Cloud.Application.Services; using Jibo.Cloud.Infrastructure.Audio; +using Jibo.Cloud.Infrastructure.Content; using Jibo.Cloud.Infrastructure.Persistence; using Jibo.Cloud.Infrastructure.Telemetry; using Jibo.Runtime.Abstractions; @@ -23,6 +24,10 @@ public static class ServiceCollectionExtensions services.AddSingleton(sttOptions); services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs new file mode 100644 index 0000000..eb9bb1c --- /dev/null +++ b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs @@ -0,0 +1,73 @@ +using Jibo.Cloud.Application.Services; +using Jibo.Cloud.Infrastructure.Content; +using Jibo.Runtime.Abstractions; + +namespace Jibo.Cloud.Tests.WebSockets; + +public sealed class JiboInteractionServiceTests +{ + [Fact] + public async Task BuildDecisionAsync_Joke_UsesCatalogBackedRandomContent() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "tell me a joke", + NormalizedTranscript = "tell me a joke" + }); + + Assert.Equal("joke", decision.IntentName); + Assert.Equal("@be/joke", decision.SkillName); + Assert.Equal("Why did the robot cross the road? Because it was programmed by the chicken.", decision.ReplyText); + } + + [Fact] + public async Task BuildDecisionAsync_Dance_UsesCatalogBackedAnimation() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "do a dance", + NormalizedTranscript = "do a dance" + }); + + Assert.Equal("dance", decision.IntentName); + Assert.Equal("chitchat-skill", decision.SkillName); + Assert.Equal("Okay. Watch this.", decision.ReplyText); + Assert.Equal("Okay. Watch this.", decision.SkillPayload!["esml"]); + } + + [Fact] + public async Task BuildDecisionAsync_ClientNluAskForDate_MapsToDateIntent() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + Attributes = new Dictionary + { + ["clientIntent"] = "askForDate" + } + }); + + Assert.Equal("date", decision.IntentName); + Assert.Contains("Today is", decision.ReplyText, StringComparison.Ordinal); + } + + private static JiboInteractionService CreateService() + { + return new JiboInteractionService( + new JiboExperienceContentCache(new InMemoryJiboExperienceContentRepository()), + new FirstItemRandomizer()); + } + + private sealed class FirstItemRandomizer : IJiboRandomizer + { + public T Choose(IReadOnlyList items) + { + return items[0]; + } + } +} diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs index e9dbc58..2beb93d 100644 --- a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs +++ b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs @@ -2,6 +2,7 @@ using System.Text.Json; using Jibo.Cloud.Application.Abstractions; using Jibo.Cloud.Application.Services; using Jibo.Cloud.Domain.Models; +using Jibo.Cloud.Infrastructure.Content; using Jibo.Cloud.Infrastructure.Persistence; using Jibo.Cloud.Tests.Fixtures; @@ -16,7 +17,9 @@ public sealed class JiboWebSocketServiceTests { _store = new InMemoryCloudStateStore(); var turnContextMapper = new ProtocolToTurnContextMapper(); - var conversationBroker = new DemoConversationBroker(); + var contentRepository = new InMemoryJiboExperienceContentRepository(); + var contentCache = new JiboExperienceContentCache(contentRepository); + var conversationBroker = new DemoConversationBroker(new JiboInteractionService(contentCache, new DefaultJiboRandomizer())); var replyMapper = new ResponsePlanToSocketMessagesMapper(); var sttSelector = new DefaultSttStrategySelector( [ @@ -53,7 +56,7 @@ public sealed class JiboWebSocketServiceTests using var listenPayload = JsonDocument.Parse(replies[0].Text!); Assert.Equal("hello jibo", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString()); - Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); using var eosPayload = JsonDocument.Parse(replies[1].Text!); Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _)); @@ -475,7 +478,7 @@ public sealed class JiboWebSocketServiceTests Assert.Equal(3, finalizeReplies.Count); using var listenPayload = JsonDocument.Parse(finalizeReplies[0].Text!); Assert.Equal("hello from buffered audio", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString()); - Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); using var skillPayload = JsonDocument.Parse(finalizeReplies[2].Text!); Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString()); @@ -535,6 +538,45 @@ public sealed class JiboWebSocketServiceTests Assert.False(meta.TryGetProperty("transcript", out _)); } + [Fact] + public async Task ClientAsrDanceFlow_EmitsAnimatedSkillAction() + { + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-client-asr-dance-token", + Text = """{"type":"LISTEN","transID":"trans-dance-shape","data":{"rules":["wake-word"]}}""" + }); + + var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-client-asr-dance-token", + Text = """{"type":"CLIENT_ASR","transID":"trans-dance-shape","data":{"text":"do a dance"}}""" + }); + + Assert.Equal(3, replies.Count); + Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2])); + + using var skillPayload = JsonDocument.Parse(replies[2].Text!); + var esml = skillPayload.RootElement + .GetProperty("data") + .GetProperty("action") + .GetProperty("config") + .GetProperty("jcp") + .GetProperty("config") + .GetProperty("play") + .GetProperty("esml") + .GetString(); + + Assert.Contains("