open jibo architecture narrowing and streamlining
This commit is contained in:
@@ -69,6 +69,19 @@ Near-term ASR work should stay staged:
|
||||
|
||||
That keeps Node as the reverse-engineering oracle while letting the long-term `.NET` cloud gain real STT seams without pretending they are finished.
|
||||
|
||||
## Working Cloud Framework
|
||||
|
||||
The current evidence in captures, fixtures, and Node behavior supports three main cloud interaction paths:
|
||||
|
||||
1. local Jibo behavior observed by the cloud
|
||||
The robot or its local skill stack already interpreted the turn and the cloud mainly tracks, acknowledges, or lightly completes it.
|
||||
2. local Jibo behavior overridden or redirected by the cloud
|
||||
The robot reports the turn state, but the cloud chooses a different synthetic reply path.
|
||||
3. raw audio interpreted by the cloud
|
||||
The robot sends buffered audio and the cloud performs transcript resolution before sending back `LISTEN`, `EOS`, and ESML-driven playback.
|
||||
|
||||
Those are the right primary buckets for now. Additional side channels may still emerge later, especially around proactive traffic, direct skill/service sockets, or future on-device OS changes, but they should be treated as extensions to this model until captures prove otherwise.
|
||||
|
||||
## Speech, Animation, And ESML
|
||||
|
||||
The current joke flow is only a small foothold into Jibo expressiveness.
|
||||
|
||||
@@ -120,6 +120,13 @@ That enables two distinct STT paths:
|
||||
|
||||
The local tool path is intentionally off by default. It exists to help map real robot audio behavior while the stable hosted cloud remains the primary goal.
|
||||
|
||||
For local Ubuntu testing, the checked-in API host config now enables that path by default with the current Node-aligned tool locations:
|
||||
|
||||
- `/usr/bin/ffmpeg`
|
||||
- `/usr/bin/whisper.cpp/build/bin/whisper-cli`
|
||||
- `/usr/bin/whisper.cpp/models/ggml-base.en.bin`
|
||||
- temp audio under `/tmp/openjibo-stt`
|
||||
|
||||
Configuration lives under `OpenJibo:Stt`:
|
||||
|
||||
- `EnableLocalWhisperCpp`
|
||||
@@ -130,3 +137,13 @@ Configuration lives under `OpenJibo:Stt`:
|
||||
- `TempDirectory`
|
||||
|
||||
This is not yet a claim of production-ready onboard ASR. It is a `.NET` discovery seam that keeps us compatible with the Node oracle while we evaluate longer-term options such as Azure-hosted STT or a managed decode/transcribe stack.
|
||||
|
||||
## Current Interaction Paths
|
||||
|
||||
The working cloud model currently looks like three main paths:
|
||||
|
||||
1. Jibo reports what already happened locally and the cloud tracks or lightly completes the turn.
|
||||
2. Jibo reports what happened locally and the cloud responds with a different synthetic completion path.
|
||||
3. Jibo streams raw audio and the cloud interprets the turn before sending ESML back.
|
||||
|
||||
That framing matches the repo evidence so far and is a good operating model for current discovery. There may still be smaller side paths around proactive traffic, direct skill-to-service communication, or future on-robot extensions, but those are not the main cloud revive loop yet.
|
||||
|
||||
@@ -8,6 +8,14 @@
|
||||
"ProtocolTelemetry": {
|
||||
"Enabled": true,
|
||||
"DirectoryPath": "captures/http"
|
||||
},
|
||||
"Stt": {
|
||||
"EnableLocalWhisperCpp": true,
|
||||
"FfmpegPath": "/usr/bin/ffmpeg",
|
||||
"WhisperCliPath": "/usr/bin/whisper.cpp/build/bin/whisper-cli",
|
||||
"WhisperModelPath": "/usr/bin/whisper.cpp/models/ggml-base.en.bin",
|
||||
"WhisperLanguage": "en",
|
||||
"TempDirectory": "/tmp/openjibo-stt"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
namespace Jibo.Cloud.Application.Abstractions;
|
||||
|
||||
public interface IJiboExperienceContentRepository
|
||||
{
|
||||
Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public sealed class JiboExperienceCatalog
|
||||
{
|
||||
public IReadOnlyList<string> Jokes { get; init; } = [];
|
||||
public IReadOnlyList<string> DanceAnimations { get; init; } = [];
|
||||
public IReadOnlyList<string> GreetingReplies { get; init; } = [];
|
||||
public IReadOnlyList<string> HowAreYouReplies { get; init; } = [];
|
||||
public IReadOnlyList<string> SurpriseReplies { get; init; } = [];
|
||||
public IReadOnlyList<string> PersonalReportReplies { get; init; } = [];
|
||||
public IReadOnlyList<string> WeatherReplies { get; init; } = [];
|
||||
public IReadOnlyList<string> CalendarReplies { get; init; } = [];
|
||||
public IReadOnlyList<string> CommuteReplies { get; init; } = [];
|
||||
public IReadOnlyList<string> NewsReplies { get; init; } = [];
|
||||
public IReadOnlyList<string> GenericFallbackReplies { get; init; } = [];
|
||||
}
|
||||
@@ -2,36 +2,17 @@ using Jibo.Runtime.Abstractions;
|
||||
|
||||
namespace Jibo.Cloud.Application.Services;
|
||||
|
||||
public sealed class DemoConversationBroker : IConversationBroker
|
||||
public sealed class DemoConversationBroker(JiboInteractionService interactionService) : IConversationBroker
|
||||
{
|
||||
public Task<ResponsePlan> HandleTurnAsync(TurnContext turn, CancellationToken cancellationToken = default)
|
||||
public async Task<ResponsePlan> HandleTurnAsync(TurnContext turn, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim();
|
||||
var lowered = transcript.ToLowerInvariant();
|
||||
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
|
||||
? rawClientIntent?.ToString()
|
||||
: null;
|
||||
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent);
|
||||
|
||||
var reply = semanticIntent switch
|
||||
{
|
||||
"time" => $"It is {DateTime.Now:hh:mm tt}.",
|
||||
"date" => $"Today is {DateTime.Now:dddd, MMMM d}.",
|
||||
"dance" => "Okay. Watch this.",
|
||||
_ => transcript.Length == 0
|
||||
? "I am listening."
|
||||
: lowered.Contains("hello") || lowered.Contains("hi")
|
||||
? "Hello from the OpenJibo cloud."
|
||||
: lowered.Contains("joke")
|
||||
? "Why did the robot bring a ladder? Because it wanted to reach the cloud."
|
||||
: $"I heard: {transcript}"
|
||||
};
|
||||
var decision = await interactionService.BuildDecisionAsync(turn, cancellationToken);
|
||||
|
||||
var plan = new ResponsePlan
|
||||
{
|
||||
SessionId = turn.SessionId,
|
||||
Status = ResponseStatus.Succeeded,
|
||||
IntentName = semanticIntent,
|
||||
IntentName = decision.IntentName,
|
||||
Topic = "conversation",
|
||||
DeviceId = turn.DeviceId,
|
||||
TargetHost = turn.HostName,
|
||||
@@ -41,7 +22,7 @@ public sealed class DemoConversationBroker : IConversationBroker
|
||||
new SpeakAction
|
||||
{
|
||||
Sequence = 0,
|
||||
Text = reply,
|
||||
Text = decision.ReplyText,
|
||||
Voice = "griffin"
|
||||
},
|
||||
new ListenAction
|
||||
@@ -65,54 +46,16 @@ public sealed class DemoConversationBroker : IConversationBroker
|
||||
}
|
||||
};
|
||||
|
||||
if (string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase))
|
||||
if (!string.IsNullOrWhiteSpace(decision.SkillName))
|
||||
{
|
||||
plan.Actions.Add(new InvokeNativeSkillAction
|
||||
{
|
||||
Sequence = 2,
|
||||
SkillName = "@be/joke",
|
||||
Payload = new Dictionary<string, object?>
|
||||
{
|
||||
["replyType"] = "joke"
|
||||
}
|
||||
SkillName = decision.SkillName,
|
||||
Payload = decision.SkillPayload ?? new Dictionary<string, object?>()
|
||||
});
|
||||
}
|
||||
|
||||
return Task.FromResult(plan);
|
||||
}
|
||||
|
||||
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent)
|
||||
{
|
||||
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "time";
|
||||
}
|
||||
|
||||
if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "date";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("joke", StringComparison.Ordinal))
|
||||
{
|
||||
return "joke";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("dance", StringComparison.Ordinal))
|
||||
{
|
||||
return "dance";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("time", StringComparison.Ordinal))
|
||||
{
|
||||
return "time";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal))
|
||||
{
|
||||
return "date";
|
||||
}
|
||||
|
||||
return "chat";
|
||||
return plan;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
namespace Jibo.Cloud.Application.Services;
|
||||
|
||||
public interface IJiboRandomizer
|
||||
{
|
||||
T Choose<T>(IReadOnlyList<T> items);
|
||||
}
|
||||
|
||||
public sealed class DefaultJiboRandomizer : IJiboRandomizer
|
||||
{
|
||||
public T Choose<T>(IReadOnlyList<T> items)
|
||||
{
|
||||
if (items.Count == 0)
|
||||
{
|
||||
throw new InvalidOperationException("Cannot choose from an empty list.");
|
||||
}
|
||||
|
||||
return items[Random.Shared.Next(items.Count)];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
using Jibo.Cloud.Application.Abstractions;
|
||||
|
||||
namespace Jibo.Cloud.Application.Services;
|
||||
|
||||
public sealed class JiboExperienceContentCache(IJiboExperienceContentRepository repository)
|
||||
{
|
||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||
private JiboExperienceCatalog? _catalog;
|
||||
|
||||
public async Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_catalog is not null)
|
||||
{
|
||||
return _catalog;
|
||||
}
|
||||
|
||||
await _gate.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
_catalog ??= await repository.GetCatalogAsync(cancellationToken);
|
||||
return _catalog;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_gate.Release();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
using Jibo.Cloud.Application.Abstractions;
|
||||
using Jibo.Runtime.Abstractions;
|
||||
|
||||
namespace Jibo.Cloud.Application.Services;
|
||||
|
||||
public sealed class JiboInteractionService(
|
||||
JiboExperienceContentCache contentCache,
|
||||
IJiboRandomizer randomizer)
|
||||
{
|
||||
public async Task<JiboInteractionDecision> BuildDecisionAsync(TurnContext turn, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var catalog = await contentCache.GetCatalogAsync(cancellationToken);
|
||||
var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim();
|
||||
var lowered = transcript.ToLowerInvariant();
|
||||
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
|
||||
? rawClientIntent?.ToString()
|
||||
: null;
|
||||
|
||||
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent);
|
||||
return semanticIntent switch
|
||||
{
|
||||
"joke" => BuildJokeDecision(catalog),
|
||||
"dance" => BuildDanceDecision(catalog),
|
||||
"time" => new JiboInteractionDecision("time", $"It is {DateTime.Now:hh:mm tt}."),
|
||||
"date" => new JiboInteractionDecision("date", $"Today is {DateTime.Now:dddd, MMMM d}."),
|
||||
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
|
||||
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
|
||||
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
|
||||
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
|
||||
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
|
||||
"calendar" => new JiboInteractionDecision("calendar", randomizer.Choose(catalog.CalendarReplies)),
|
||||
"commute" => new JiboInteractionDecision("commute", randomizer.Choose(catalog.CommuteReplies)),
|
||||
"news" => new JiboInteractionDecision("news", randomizer.Choose(catalog.NewsReplies)),
|
||||
_ => new JiboInteractionDecision("chat", BuildGenericReply(catalog, transcript, lowered))
|
||||
};
|
||||
}
|
||||
|
||||
private JiboInteractionDecision BuildJokeDecision(JiboExperienceCatalog catalog)
|
||||
{
|
||||
var joke = randomizer.Choose(catalog.Jokes);
|
||||
return new JiboInteractionDecision(
|
||||
"joke",
|
||||
joke,
|
||||
"@be/joke",
|
||||
new Dictionary<string, object?>
|
||||
{
|
||||
["replyType"] = "joke"
|
||||
});
|
||||
}
|
||||
|
||||
private JiboInteractionDecision BuildDanceDecision(JiboExperienceCatalog catalog)
|
||||
{
|
||||
var dance = randomizer.Choose(catalog.DanceAnimations);
|
||||
return new JiboInteractionDecision(
|
||||
"dance",
|
||||
"Okay. Watch this.",
|
||||
"chitchat-skill",
|
||||
new Dictionary<string, object?>
|
||||
{
|
||||
["esml"] = $"<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, {dance}' /></speak>",
|
||||
["mim_id"] = "runtime-chat",
|
||||
["mim_type"] = "announcement"
|
||||
});
|
||||
}
|
||||
|
||||
private string BuildGenericReply(JiboExperienceCatalog catalog, string transcript, string lowered)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(transcript))
|
||||
{
|
||||
return "I am listening.";
|
||||
}
|
||||
|
||||
if (lowered.Contains("good morning", StringComparison.Ordinal))
|
||||
{
|
||||
return "Good morning! It is nice to hear your voice.";
|
||||
}
|
||||
|
||||
if (lowered.Contains("good afternoon", StringComparison.Ordinal))
|
||||
{
|
||||
return "Good afternoon. I am happy to be here.";
|
||||
}
|
||||
|
||||
if (lowered.Contains("good night", StringComparison.Ordinal))
|
||||
{
|
||||
return "Good night. Sleep tight.";
|
||||
}
|
||||
|
||||
return randomizer.Choose(catalog.GenericFallbackReplies).Replace("{transcript}", transcript, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent)
|
||||
{
|
||||
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "time";
|
||||
}
|
||||
|
||||
if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "date";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("joke", StringComparison.Ordinal))
|
||||
{
|
||||
return "joke";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("dance", StringComparison.Ordinal))
|
||||
{
|
||||
return "dance";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("surprise", StringComparison.Ordinal))
|
||||
{
|
||||
return "surprise";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("personal report", StringComparison.Ordinal))
|
||||
{
|
||||
return "personal_report";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("weather", StringComparison.Ordinal))
|
||||
{
|
||||
return "weather";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("calendar", StringComparison.Ordinal))
|
||||
{
|
||||
return "calendar";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("commute", StringComparison.Ordinal))
|
||||
{
|
||||
return "commute";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("news", StringComparison.Ordinal))
|
||||
{
|
||||
return "news";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("how are you", StringComparison.Ordinal) ||
|
||||
loweredTranscript.Contains("what's up", StringComparison.Ordinal) ||
|
||||
loweredTranscript.Contains("what s up", StringComparison.Ordinal))
|
||||
{
|
||||
return "how_are_you";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("hello", StringComparison.Ordinal) ||
|
||||
loweredTranscript.Contains("hi", StringComparison.Ordinal) ||
|
||||
loweredTranscript.Contains("hey", StringComparison.Ordinal))
|
||||
{
|
||||
return "hello";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("time", StringComparison.Ordinal))
|
||||
{
|
||||
return "time";
|
||||
}
|
||||
|
||||
if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal))
|
||||
{
|
||||
return "date";
|
||||
}
|
||||
|
||||
return "chat";
|
||||
}
|
||||
}
|
||||
|
||||
public sealed record JiboInteractionDecision(
|
||||
string IntentName,
|
||||
string ReplyText,
|
||||
string? SkillName = null,
|
||||
IDictionary<string, object?>? SkillPayload = null);
|
||||
@@ -164,16 +164,18 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
|
||||
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
|
||||
{
|
||||
var skillPayload = skill?.Payload;
|
||||
var isJoke = string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(skill?.SkillName, "@be/joke", StringComparison.OrdinalIgnoreCase);
|
||||
var isDance = string.Equals(plan.IntentName, "dance", StringComparison.OrdinalIgnoreCase);
|
||||
var skillId = isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill";
|
||||
var esml = isDance
|
||||
var skillId = ReadPayloadString(skillPayload, "skillId") ?? (isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill");
|
||||
var esml = ReadPayloadString(skillPayload, "esml") ?? (isDance
|
||||
? "<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, rom-upbeat' /></speak>"
|
||||
: isJoke
|
||||
? $"<speak><es cat='happy' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>"
|
||||
: $"<speak><es cat='neutral' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>";
|
||||
var mimId = isJoke ? "runtime-joke" : "runtime-chat";
|
||||
: $"<speak><es cat='neutral' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>");
|
||||
var mimId = ReadPayloadString(skillPayload, "mim_id") ?? (isJoke ? "runtime-joke" : "runtime-chat");
|
||||
var mimType = ReadPayloadString(skillPayload, "mim_type") ?? "announcement";
|
||||
|
||||
return new
|
||||
{
|
||||
@@ -204,7 +206,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
prompt_id = "RUNTIME_PROMPT",
|
||||
prompt_sub_category = "AN",
|
||||
mim_id = mimId,
|
||||
mim_type = "announcement"
|
||||
mim_type = mimType
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -271,6 +273,16 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
.Replace("'", "'", StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static string? ReadPayloadString(IDictionary<string, object?>? payload, string key)
|
||||
{
|
||||
if (payload is null || !payload.TryGetValue(key, out var value))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return value?.ToString();
|
||||
}
|
||||
|
||||
private static string CreateHubMessageId()
|
||||
{
|
||||
return $"mid-{Guid.NewGuid()}";
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
using Jibo.Cloud.Application.Abstractions;
|
||||
|
||||
namespace Jibo.Cloud.Infrastructure.Content;
|
||||
|
||||
public sealed class InMemoryJiboExperienceContentRepository : IJiboExperienceContentRepository
|
||||
{
|
||||
private static readonly JiboExperienceCatalog Catalog = new()
|
||||
{
|
||||
Jokes =
|
||||
[
|
||||
"Why did the robot cross the road? Because it was programmed by the chicken.",
|
||||
"Why was the robot tired when it got home? It had a hard drive.",
|
||||
"What do you call a pirate robot? Arrrr two dee two.",
|
||||
"Why did the robot go on vacation? It needed to recharge.",
|
||||
"What kind of shoes do frogs wear? Open-toed."
|
||||
],
|
||||
DanceAnimations =
|
||||
[
|
||||
"rom-upbeat",
|
||||
"rom-ballroom",
|
||||
"rom-silly",
|
||||
"rom-slowdance",
|
||||
"rom-electronic",
|
||||
"rom-twerk"
|
||||
],
|
||||
GreetingReplies =
|
||||
[
|
||||
"Hi there. It is really good to talk with you.",
|
||||
"Hello there. I am glad you said hi.",
|
||||
"Hey. I am happy to see you."
|
||||
],
|
||||
HowAreYouReplies =
|
||||
[
|
||||
"I am feeling cheerful and robotic.",
|
||||
"I am doing great. Thanks for asking.",
|
||||
"I am feeling bright-eyed and ready to help."
|
||||
],
|
||||
SurpriseReplies =
|
||||
[
|
||||
"I can definitely surprise you. We are still mapping that path, but I am ready for the next experiment.",
|
||||
"Surprise mode is still taking shape, but I heard you loud and clear.",
|
||||
"That sounds fun. I am not all the way there yet, but we can keep teaching me."
|
||||
],
|
||||
PersonalReportReplies =
|
||||
[
|
||||
"I heard your personal report request. That cloud path is still being mapped.",
|
||||
"Personal report is recognized, but I am not ready to deliver the real report yet."
|
||||
],
|
||||
WeatherReplies =
|
||||
[
|
||||
"I heard your weather request. We still need to wire the real provider behind it.",
|
||||
"Weather is on the map now, even though the real forecast path is not finished yet."
|
||||
],
|
||||
CalendarReplies =
|
||||
[
|
||||
"I heard your calendar request. The cloud knows the phrase, but the real calendar integration is still ahead.",
|
||||
"Calendar is recognized. We still need to connect the actual service path."
|
||||
],
|
||||
CommuteReplies =
|
||||
[
|
||||
"I heard your commute request. That one is recognized, but not fully implemented yet.",
|
||||
"Commute is on the discovery list now. The real travel answer still needs a provider."
|
||||
],
|
||||
NewsReplies =
|
||||
[
|
||||
"I heard your news request. That path is still a future cloud integration.",
|
||||
"News is recognized, but I do not have the full news service behind it yet."
|
||||
],
|
||||
GenericFallbackReplies =
|
||||
[
|
||||
"Okay. You said, {transcript}.",
|
||||
"I heard you say, {transcript}.",
|
||||
"Thanks. I heard, {transcript}."
|
||||
]
|
||||
};
|
||||
|
||||
public Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
return Task.FromResult(Catalog);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
using Jibo.Cloud.Application.Abstractions;
|
||||
using Jibo.Cloud.Application.Services;
|
||||
using Jibo.Cloud.Infrastructure.Audio;
|
||||
using Jibo.Cloud.Infrastructure.Content;
|
||||
using Jibo.Cloud.Infrastructure.Persistence;
|
||||
using Jibo.Cloud.Infrastructure.Telemetry;
|
||||
using Jibo.Runtime.Abstractions;
|
||||
@@ -23,6 +24,10 @@ public static class ServiceCollectionExtensions
|
||||
|
||||
services.AddSingleton(sttOptions);
|
||||
services.AddSingleton<ICloudStateStore, InMemoryCloudStateStore>();
|
||||
services.AddSingleton<IJiboExperienceContentRepository, InMemoryJiboExperienceContentRepository>();
|
||||
services.AddSingleton<JiboExperienceContentCache>();
|
||||
services.AddSingleton<IJiboRandomizer, DefaultJiboRandomizer>();
|
||||
services.AddSingleton<JiboInteractionService>();
|
||||
services.AddSingleton<IConversationBroker, DemoConversationBroker>();
|
||||
services.AddSingleton<IExternalProcessRunner, ExternalProcessRunner>();
|
||||
services.AddSingleton<ISttStrategy, LocalWhisperCppBufferedAudioSttStrategy>();
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
using Jibo.Cloud.Application.Services;
|
||||
using Jibo.Cloud.Infrastructure.Content;
|
||||
using Jibo.Runtime.Abstractions;
|
||||
|
||||
namespace Jibo.Cloud.Tests.WebSockets;
|
||||
|
||||
public sealed class JiboInteractionServiceTests
|
||||
{
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_Joke_UsesCatalogBackedRandomContent()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "tell me a joke",
|
||||
NormalizedTranscript = "tell me a joke"
|
||||
});
|
||||
|
||||
Assert.Equal("joke", decision.IntentName);
|
||||
Assert.Equal("@be/joke", decision.SkillName);
|
||||
Assert.Equal("Why did the robot cross the road? Because it was programmed by the chicken.", decision.ReplyText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_Dance_UsesCatalogBackedAnimation()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "do a dance",
|
||||
NormalizedTranscript = "do a dance"
|
||||
});
|
||||
|
||||
Assert.Equal("dance", decision.IntentName);
|
||||
Assert.Equal("chitchat-skill", decision.SkillName);
|
||||
Assert.Equal("Okay. Watch this.", decision.ReplyText);
|
||||
Assert.Equal("<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, rom-upbeat' /></speak>", decision.SkillPayload!["esml"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_ClientNluAskForDate_MapsToDateIntent()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
Attributes = new Dictionary<string, object?>
|
||||
{
|
||||
["clientIntent"] = "askForDate"
|
||||
}
|
||||
});
|
||||
|
||||
Assert.Equal("date", decision.IntentName);
|
||||
Assert.Contains("Today is", decision.ReplyText, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static JiboInteractionService CreateService()
|
||||
{
|
||||
return new JiboInteractionService(
|
||||
new JiboExperienceContentCache(new InMemoryJiboExperienceContentRepository()),
|
||||
new FirstItemRandomizer());
|
||||
}
|
||||
|
||||
private sealed class FirstItemRandomizer : IJiboRandomizer
|
||||
{
|
||||
public T Choose<T>(IReadOnlyList<T> items)
|
||||
{
|
||||
return items[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ using System.Text.Json;
|
||||
using Jibo.Cloud.Application.Abstractions;
|
||||
using Jibo.Cloud.Application.Services;
|
||||
using Jibo.Cloud.Domain.Models;
|
||||
using Jibo.Cloud.Infrastructure.Content;
|
||||
using Jibo.Cloud.Infrastructure.Persistence;
|
||||
using Jibo.Cloud.Tests.Fixtures;
|
||||
|
||||
@@ -16,7 +17,9 @@ public sealed class JiboWebSocketServiceTests
|
||||
{
|
||||
_store = new InMemoryCloudStateStore();
|
||||
var turnContextMapper = new ProtocolToTurnContextMapper();
|
||||
var conversationBroker = new DemoConversationBroker();
|
||||
var contentRepository = new InMemoryJiboExperienceContentRepository();
|
||||
var contentCache = new JiboExperienceContentCache(contentRepository);
|
||||
var conversationBroker = new DemoConversationBroker(new JiboInteractionService(contentCache, new DefaultJiboRandomizer()));
|
||||
var replyMapper = new ResponsePlanToSocketMessagesMapper();
|
||||
var sttSelector = new DefaultSttStrategySelector(
|
||||
[
|
||||
@@ -53,7 +56,7 @@ public sealed class JiboWebSocketServiceTests
|
||||
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("hello jibo", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||
Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
|
||||
using var eosPayload = JsonDocument.Parse(replies[1].Text!);
|
||||
Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _));
|
||||
@@ -475,7 +478,7 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Equal(3, finalizeReplies.Count);
|
||||
using var listenPayload = JsonDocument.Parse(finalizeReplies[0].Text!);
|
||||
Assert.Equal("hello from buffered audio", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||
Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
|
||||
using var skillPayload = JsonDocument.Parse(finalizeReplies[2].Text!);
|
||||
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
|
||||
@@ -535,6 +538,45 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.False(meta.TryGetProperty("transcript", out _));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsrDanceFlow_EmitsAnimatedSkillAction()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-client-asr-dance-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-dance-shape","data":{"rules":["wake-word"]}}"""
|
||||
});
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-client-asr-dance-token",
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-dance-shape","data":{"text":"do a dance"}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(3, replies.Count);
|
||||
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
|
||||
|
||||
using var skillPayload = JsonDocument.Parse(replies[2].Text!);
|
||||
var esml = skillPayload.RootElement
|
||||
.GetProperty("data")
|
||||
.GetProperty("action")
|
||||
.GetProperty("config")
|
||||
.GetProperty("jcp")
|
||||
.GetProperty("config")
|
||||
.GetProperty("play")
|
||||
.GetProperty("esml")
|
||||
.GetString();
|
||||
|
||||
Assert.Contains("<anim cat='dance' filter='music, ", esml, StringComparison.Ordinal);
|
||||
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user