open jibo architecture narrowing and streamlining
This commit is contained in:
@@ -69,6 +69,19 @@ Near-term ASR work should stay staged:
|
|||||||
|
|
||||||
That keeps Node as the reverse-engineering oracle while letting the long-term `.NET` cloud gain real STT seams without pretending they are finished.
|
That keeps Node as the reverse-engineering oracle while letting the long-term `.NET` cloud gain real STT seams without pretending they are finished.
|
||||||
|
|
||||||
|
## Working Cloud Framework
|
||||||
|
|
||||||
|
The current evidence in captures, fixtures, and Node behavior supports three main cloud interaction paths:
|
||||||
|
|
||||||
|
1. local Jibo behavior observed by the cloud
|
||||||
|
The robot or its local skill stack already interpreted the turn and the cloud mainly tracks, acknowledges, or lightly completes it.
|
||||||
|
2. local Jibo behavior overridden or redirected by the cloud
|
||||||
|
The robot reports the turn state, but the cloud chooses a different synthetic reply path.
|
||||||
|
3. raw audio interpreted by the cloud
|
||||||
|
The robot sends buffered audio and the cloud performs transcript resolution before sending back `LISTEN`, `EOS`, and ESML-driven playback.
|
||||||
|
|
||||||
|
Those are the right primary buckets for now. Additional side channels may still emerge later, especially around proactive traffic, direct skill/service sockets, or future on-device OS changes, but they should be treated as extensions to this model until captures prove otherwise.
|
||||||
|
|
||||||
## Speech, Animation, And ESML
|
## Speech, Animation, And ESML
|
||||||
|
|
||||||
The current joke flow is only a small foothold into Jibo expressiveness.
|
The current joke flow is only a small foothold into Jibo expressiveness.
|
||||||
|
|||||||
@@ -120,6 +120,13 @@ That enables two distinct STT paths:
|
|||||||
|
|
||||||
The local tool path is intentionally off by default. It exists to help map real robot audio behavior while the stable hosted cloud remains the primary goal.
|
The local tool path is intentionally off by default. It exists to help map real robot audio behavior while the stable hosted cloud remains the primary goal.
|
||||||
|
|
||||||
|
For local Ubuntu testing, the checked-in API host config now enables that path by default with the current Node-aligned tool locations:
|
||||||
|
|
||||||
|
- `/usr/bin/ffmpeg`
|
||||||
|
- `/usr/bin/whisper.cpp/build/bin/whisper-cli`
|
||||||
|
- `/usr/bin/whisper.cpp/models/ggml-base.en.bin`
|
||||||
|
- temp audio under `/tmp/openjibo-stt`
|
||||||
|
|
||||||
Configuration lives under `OpenJibo:Stt`:
|
Configuration lives under `OpenJibo:Stt`:
|
||||||
|
|
||||||
- `EnableLocalWhisperCpp`
|
- `EnableLocalWhisperCpp`
|
||||||
@@ -130,3 +137,13 @@ Configuration lives under `OpenJibo:Stt`:
|
|||||||
- `TempDirectory`
|
- `TempDirectory`
|
||||||
|
|
||||||
This is not yet a claim of production-ready onboard ASR. It is a `.NET` discovery seam that keeps us compatible with the Node oracle while we evaluate longer-term options such as Azure-hosted STT or a managed decode/transcribe stack.
|
This is not yet a claim of production-ready onboard ASR. It is a `.NET` discovery seam that keeps us compatible with the Node oracle while we evaluate longer-term options such as Azure-hosted STT or a managed decode/transcribe stack.
|
||||||
|
|
||||||
|
## Current Interaction Paths
|
||||||
|
|
||||||
|
The working cloud model currently looks like three main paths:
|
||||||
|
|
||||||
|
1. Jibo reports what already happened locally and the cloud tracks or lightly completes the turn.
|
||||||
|
2. Jibo reports what happened locally and the cloud responds with a different synthetic completion path.
|
||||||
|
3. Jibo streams raw audio and the cloud interprets the turn before sending ESML back.
|
||||||
|
|
||||||
|
That framing matches the repo evidence so far and is a good operating model for current discovery. There may still be smaller side paths around proactive traffic, direct skill-to-service communication, or future on-robot extensions, but those are not the main cloud revive loop yet.
|
||||||
|
|||||||
@@ -8,6 +8,14 @@
|
|||||||
"ProtocolTelemetry": {
|
"ProtocolTelemetry": {
|
||||||
"Enabled": true,
|
"Enabled": true,
|
||||||
"DirectoryPath": "captures/http"
|
"DirectoryPath": "captures/http"
|
||||||
|
},
|
||||||
|
"Stt": {
|
||||||
|
"EnableLocalWhisperCpp": true,
|
||||||
|
"FfmpegPath": "/usr/bin/ffmpeg",
|
||||||
|
"WhisperCliPath": "/usr/bin/whisper.cpp/build/bin/whisper-cli",
|
||||||
|
"WhisperModelPath": "/usr/bin/whisper.cpp/models/ggml-base.en.bin",
|
||||||
|
"WhisperLanguage": "en",
|
||||||
|
"TempDirectory": "/tmp/openjibo-stt"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,21 @@
|
|||||||
|
namespace Jibo.Cloud.Application.Abstractions;
|
||||||
|
|
||||||
|
public interface IJiboExperienceContentRepository
|
||||||
|
{
|
||||||
|
Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default);
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class JiboExperienceCatalog
|
||||||
|
{
|
||||||
|
public IReadOnlyList<string> Jokes { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> DanceAnimations { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> GreetingReplies { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> HowAreYouReplies { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> SurpriseReplies { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> PersonalReportReplies { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> WeatherReplies { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> CalendarReplies { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> CommuteReplies { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> NewsReplies { get; init; } = [];
|
||||||
|
public IReadOnlyList<string> GenericFallbackReplies { get; init; } = [];
|
||||||
|
}
|
||||||
@@ -2,36 +2,17 @@ using Jibo.Runtime.Abstractions;
|
|||||||
|
|
||||||
namespace Jibo.Cloud.Application.Services;
|
namespace Jibo.Cloud.Application.Services;
|
||||||
|
|
||||||
public sealed class DemoConversationBroker : IConversationBroker
|
public sealed class DemoConversationBroker(JiboInteractionService interactionService) : IConversationBroker
|
||||||
{
|
{
|
||||||
public Task<ResponsePlan> HandleTurnAsync(TurnContext turn, CancellationToken cancellationToken = default)
|
public async Task<ResponsePlan> HandleTurnAsync(TurnContext turn, CancellationToken cancellationToken = default)
|
||||||
{
|
{
|
||||||
var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim();
|
var decision = await interactionService.BuildDecisionAsync(turn, cancellationToken);
|
||||||
var lowered = transcript.ToLowerInvariant();
|
|
||||||
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
|
|
||||||
? rawClientIntent?.ToString()
|
|
||||||
: null;
|
|
||||||
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent);
|
|
||||||
|
|
||||||
var reply = semanticIntent switch
|
|
||||||
{
|
|
||||||
"time" => $"It is {DateTime.Now:hh:mm tt}.",
|
|
||||||
"date" => $"Today is {DateTime.Now:dddd, MMMM d}.",
|
|
||||||
"dance" => "Okay. Watch this.",
|
|
||||||
_ => transcript.Length == 0
|
|
||||||
? "I am listening."
|
|
||||||
: lowered.Contains("hello") || lowered.Contains("hi")
|
|
||||||
? "Hello from the OpenJibo cloud."
|
|
||||||
: lowered.Contains("joke")
|
|
||||||
? "Why did the robot bring a ladder? Because it wanted to reach the cloud."
|
|
||||||
: $"I heard: {transcript}"
|
|
||||||
};
|
|
||||||
|
|
||||||
var plan = new ResponsePlan
|
var plan = new ResponsePlan
|
||||||
{
|
{
|
||||||
SessionId = turn.SessionId,
|
SessionId = turn.SessionId,
|
||||||
Status = ResponseStatus.Succeeded,
|
Status = ResponseStatus.Succeeded,
|
||||||
IntentName = semanticIntent,
|
IntentName = decision.IntentName,
|
||||||
Topic = "conversation",
|
Topic = "conversation",
|
||||||
DeviceId = turn.DeviceId,
|
DeviceId = turn.DeviceId,
|
||||||
TargetHost = turn.HostName,
|
TargetHost = turn.HostName,
|
||||||
@@ -41,7 +22,7 @@ public sealed class DemoConversationBroker : IConversationBroker
|
|||||||
new SpeakAction
|
new SpeakAction
|
||||||
{
|
{
|
||||||
Sequence = 0,
|
Sequence = 0,
|
||||||
Text = reply,
|
Text = decision.ReplyText,
|
||||||
Voice = "griffin"
|
Voice = "griffin"
|
||||||
},
|
},
|
||||||
new ListenAction
|
new ListenAction
|
||||||
@@ -65,54 +46,16 @@ public sealed class DemoConversationBroker : IConversationBroker
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase))
|
if (!string.IsNullOrWhiteSpace(decision.SkillName))
|
||||||
{
|
{
|
||||||
plan.Actions.Add(new InvokeNativeSkillAction
|
plan.Actions.Add(new InvokeNativeSkillAction
|
||||||
{
|
{
|
||||||
Sequence = 2,
|
Sequence = 2,
|
||||||
SkillName = "@be/joke",
|
SkillName = decision.SkillName,
|
||||||
Payload = new Dictionary<string, object?>
|
Payload = decision.SkillPayload ?? new Dictionary<string, object?>()
|
||||||
{
|
|
||||||
["replyType"] = "joke"
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return Task.FromResult(plan);
|
return plan;
|
||||||
}
|
|
||||||
|
|
||||||
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent)
|
|
||||||
{
|
|
||||||
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
return "time";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
return "date";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (loweredTranscript.Contains("joke", StringComparison.Ordinal))
|
|
||||||
{
|
|
||||||
return "joke";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (loweredTranscript.Contains("dance", StringComparison.Ordinal))
|
|
||||||
{
|
|
||||||
return "dance";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (loweredTranscript.Contains("time", StringComparison.Ordinal))
|
|
||||||
{
|
|
||||||
return "time";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal))
|
|
||||||
{
|
|
||||||
return "date";
|
|
||||||
}
|
|
||||||
|
|
||||||
return "chat";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,19 @@
|
|||||||
|
namespace Jibo.Cloud.Application.Services;
|
||||||
|
|
||||||
|
public interface IJiboRandomizer
|
||||||
|
{
|
||||||
|
T Choose<T>(IReadOnlyList<T> items);
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class DefaultJiboRandomizer : IJiboRandomizer
|
||||||
|
{
|
||||||
|
public T Choose<T>(IReadOnlyList<T> items)
|
||||||
|
{
|
||||||
|
if (items.Count == 0)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("Cannot choose from an empty list.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return items[Random.Shared.Next(items.Count)];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
using Jibo.Cloud.Application.Abstractions;
|
||||||
|
|
||||||
|
namespace Jibo.Cloud.Application.Services;
|
||||||
|
|
||||||
|
public sealed class JiboExperienceContentCache(IJiboExperienceContentRepository repository)
|
||||||
|
{
|
||||||
|
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||||
|
private JiboExperienceCatalog? _catalog;
|
||||||
|
|
||||||
|
public async Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
if (_catalog is not null)
|
||||||
|
{
|
||||||
|
return _catalog;
|
||||||
|
}
|
||||||
|
|
||||||
|
await _gate.WaitAsync(cancellationToken);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_catalog ??= await repository.GetCatalogAsync(cancellationToken);
|
||||||
|
return _catalog;
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
_gate.Release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,175 @@
|
|||||||
|
using Jibo.Cloud.Application.Abstractions;
|
||||||
|
using Jibo.Runtime.Abstractions;
|
||||||
|
|
||||||
|
namespace Jibo.Cloud.Application.Services;
|
||||||
|
|
||||||
|
public sealed class JiboInteractionService(
|
||||||
|
JiboExperienceContentCache contentCache,
|
||||||
|
IJiboRandomizer randomizer)
|
||||||
|
{
|
||||||
|
public async Task<JiboInteractionDecision> BuildDecisionAsync(TurnContext turn, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var catalog = await contentCache.GetCatalogAsync(cancellationToken);
|
||||||
|
var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim();
|
||||||
|
var lowered = transcript.ToLowerInvariant();
|
||||||
|
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
|
||||||
|
? rawClientIntent?.ToString()
|
||||||
|
: null;
|
||||||
|
|
||||||
|
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent);
|
||||||
|
return semanticIntent switch
|
||||||
|
{
|
||||||
|
"joke" => BuildJokeDecision(catalog),
|
||||||
|
"dance" => BuildDanceDecision(catalog),
|
||||||
|
"time" => new JiboInteractionDecision("time", $"It is {DateTime.Now:hh:mm tt}."),
|
||||||
|
"date" => new JiboInteractionDecision("date", $"Today is {DateTime.Now:dddd, MMMM d}."),
|
||||||
|
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
|
||||||
|
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
|
||||||
|
"surprise" => new JiboInteractionDecision("surprise", randomizer.Choose(catalog.SurpriseReplies)),
|
||||||
|
"personal_report" => new JiboInteractionDecision("personal_report", randomizer.Choose(catalog.PersonalReportReplies)),
|
||||||
|
"weather" => new JiboInteractionDecision("weather", randomizer.Choose(catalog.WeatherReplies)),
|
||||||
|
"calendar" => new JiboInteractionDecision("calendar", randomizer.Choose(catalog.CalendarReplies)),
|
||||||
|
"commute" => new JiboInteractionDecision("commute", randomizer.Choose(catalog.CommuteReplies)),
|
||||||
|
"news" => new JiboInteractionDecision("news", randomizer.Choose(catalog.NewsReplies)),
|
||||||
|
_ => new JiboInteractionDecision("chat", BuildGenericReply(catalog, transcript, lowered))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private JiboInteractionDecision BuildJokeDecision(JiboExperienceCatalog catalog)
|
||||||
|
{
|
||||||
|
var joke = randomizer.Choose(catalog.Jokes);
|
||||||
|
return new JiboInteractionDecision(
|
||||||
|
"joke",
|
||||||
|
joke,
|
||||||
|
"@be/joke",
|
||||||
|
new Dictionary<string, object?>
|
||||||
|
{
|
||||||
|
["replyType"] = "joke"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private JiboInteractionDecision BuildDanceDecision(JiboExperienceCatalog catalog)
|
||||||
|
{
|
||||||
|
var dance = randomizer.Choose(catalog.DanceAnimations);
|
||||||
|
return new JiboInteractionDecision(
|
||||||
|
"dance",
|
||||||
|
"Okay. Watch this.",
|
||||||
|
"chitchat-skill",
|
||||||
|
new Dictionary<string, object?>
|
||||||
|
{
|
||||||
|
["esml"] = $"<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, {dance}' /></speak>",
|
||||||
|
["mim_id"] = "runtime-chat",
|
||||||
|
["mim_type"] = "announcement"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private string BuildGenericReply(JiboExperienceCatalog catalog, string transcript, string lowered)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(transcript))
|
||||||
|
{
|
||||||
|
return "I am listening.";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lowered.Contains("good morning", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "Good morning! It is nice to hear your voice.";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lowered.Contains("good afternoon", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "Good afternoon. I am happy to be here.";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lowered.Contains("good night", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "Good night. Sleep tight.";
|
||||||
|
}
|
||||||
|
|
||||||
|
return randomizer.Choose(catalog.GenericFallbackReplies).Replace("{transcript}", transcript, StringComparison.Ordinal);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent)
|
||||||
|
{
|
||||||
|
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return "time";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return "date";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("joke", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "joke";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("dance", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "dance";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("surprise", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "surprise";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("personal report", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "personal_report";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("weather", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "weather";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("calendar", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "calendar";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("commute", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "commute";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("news", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "news";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("how are you", StringComparison.Ordinal) ||
|
||||||
|
loweredTranscript.Contains("what's up", StringComparison.Ordinal) ||
|
||||||
|
loweredTranscript.Contains("what s up", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "how_are_you";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("hello", StringComparison.Ordinal) ||
|
||||||
|
loweredTranscript.Contains("hi", StringComparison.Ordinal) ||
|
||||||
|
loweredTranscript.Contains("hey", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "hello";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("time", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "time";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal))
|
||||||
|
{
|
||||||
|
return "date";
|
||||||
|
}
|
||||||
|
|
||||||
|
return "chat";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed record JiboInteractionDecision(
|
||||||
|
string IntentName,
|
||||||
|
string ReplyText,
|
||||||
|
string? SkillName = null,
|
||||||
|
IDictionary<string, object?>? SkillPayload = null);
|
||||||
@@ -164,16 +164,18 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
|||||||
|
|
||||||
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
|
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
|
||||||
{
|
{
|
||||||
|
var skillPayload = skill?.Payload;
|
||||||
var isJoke = string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase) ||
|
var isJoke = string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase) ||
|
||||||
string.Equals(skill?.SkillName, "@be/joke", StringComparison.OrdinalIgnoreCase);
|
string.Equals(skill?.SkillName, "@be/joke", StringComparison.OrdinalIgnoreCase);
|
||||||
var isDance = string.Equals(plan.IntentName, "dance", StringComparison.OrdinalIgnoreCase);
|
var isDance = string.Equals(plan.IntentName, "dance", StringComparison.OrdinalIgnoreCase);
|
||||||
var skillId = isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill";
|
var skillId = ReadPayloadString(skillPayload, "skillId") ?? (isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill");
|
||||||
var esml = isDance
|
var esml = ReadPayloadString(skillPayload, "esml") ?? (isDance
|
||||||
? "<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, rom-upbeat' /></speak>"
|
? "<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, rom-upbeat' /></speak>"
|
||||||
: isJoke
|
: isJoke
|
||||||
? $"<speak><es cat='happy' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>"
|
? $"<speak><es cat='happy' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>"
|
||||||
: $"<speak><es cat='neutral' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>";
|
: $"<speak><es cat='neutral' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>");
|
||||||
var mimId = isJoke ? "runtime-joke" : "runtime-chat";
|
var mimId = ReadPayloadString(skillPayload, "mim_id") ?? (isJoke ? "runtime-joke" : "runtime-chat");
|
||||||
|
var mimType = ReadPayloadString(skillPayload, "mim_type") ?? "announcement";
|
||||||
|
|
||||||
return new
|
return new
|
||||||
{
|
{
|
||||||
@@ -204,7 +206,7 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
|||||||
prompt_id = "RUNTIME_PROMPT",
|
prompt_id = "RUNTIME_PROMPT",
|
||||||
prompt_sub_category = "AN",
|
prompt_sub_category = "AN",
|
||||||
mim_id = mimId,
|
mim_id = mimId,
|
||||||
mim_type = "announcement"
|
mim_type = mimType
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -271,6 +273,16 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
|||||||
.Replace("'", "'", StringComparison.Ordinal);
|
.Replace("'", "'", StringComparison.Ordinal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static string? ReadPayloadString(IDictionary<string, object?>? payload, string key)
|
||||||
|
{
|
||||||
|
if (payload is null || !payload.TryGetValue(key, out var value))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return value?.ToString();
|
||||||
|
}
|
||||||
|
|
||||||
private static string CreateHubMessageId()
|
private static string CreateHubMessageId()
|
||||||
{
|
{
|
||||||
return $"mid-{Guid.NewGuid()}";
|
return $"mid-{Guid.NewGuid()}";
|
||||||
|
|||||||
@@ -0,0 +1,81 @@
|
|||||||
|
using Jibo.Cloud.Application.Abstractions;
|
||||||
|
|
||||||
|
namespace Jibo.Cloud.Infrastructure.Content;
|
||||||
|
|
||||||
|
public sealed class InMemoryJiboExperienceContentRepository : IJiboExperienceContentRepository
|
||||||
|
{
|
||||||
|
private static readonly JiboExperienceCatalog Catalog = new()
|
||||||
|
{
|
||||||
|
Jokes =
|
||||||
|
[
|
||||||
|
"Why did the robot cross the road? Because it was programmed by the chicken.",
|
||||||
|
"Why was the robot tired when it got home? It had a hard drive.",
|
||||||
|
"What do you call a pirate robot? Arrrr two dee two.",
|
||||||
|
"Why did the robot go on vacation? It needed to recharge.",
|
||||||
|
"What kind of shoes do frogs wear? Open-toed."
|
||||||
|
],
|
||||||
|
DanceAnimations =
|
||||||
|
[
|
||||||
|
"rom-upbeat",
|
||||||
|
"rom-ballroom",
|
||||||
|
"rom-silly",
|
||||||
|
"rom-slowdance",
|
||||||
|
"rom-electronic",
|
||||||
|
"rom-twerk"
|
||||||
|
],
|
||||||
|
GreetingReplies =
|
||||||
|
[
|
||||||
|
"Hi there. It is really good to talk with you.",
|
||||||
|
"Hello there. I am glad you said hi.",
|
||||||
|
"Hey. I am happy to see you."
|
||||||
|
],
|
||||||
|
HowAreYouReplies =
|
||||||
|
[
|
||||||
|
"I am feeling cheerful and robotic.",
|
||||||
|
"I am doing great. Thanks for asking.",
|
||||||
|
"I am feeling bright-eyed and ready to help."
|
||||||
|
],
|
||||||
|
SurpriseReplies =
|
||||||
|
[
|
||||||
|
"I can definitely surprise you. We are still mapping that path, but I am ready for the next experiment.",
|
||||||
|
"Surprise mode is still taking shape, but I heard you loud and clear.",
|
||||||
|
"That sounds fun. I am not all the way there yet, but we can keep teaching me."
|
||||||
|
],
|
||||||
|
PersonalReportReplies =
|
||||||
|
[
|
||||||
|
"I heard your personal report request. That cloud path is still being mapped.",
|
||||||
|
"Personal report is recognized, but I am not ready to deliver the real report yet."
|
||||||
|
],
|
||||||
|
WeatherReplies =
|
||||||
|
[
|
||||||
|
"I heard your weather request. We still need to wire the real provider behind it.",
|
||||||
|
"Weather is on the map now, even though the real forecast path is not finished yet."
|
||||||
|
],
|
||||||
|
CalendarReplies =
|
||||||
|
[
|
||||||
|
"I heard your calendar request. The cloud knows the phrase, but the real calendar integration is still ahead.",
|
||||||
|
"Calendar is recognized. We still need to connect the actual service path."
|
||||||
|
],
|
||||||
|
CommuteReplies =
|
||||||
|
[
|
||||||
|
"I heard your commute request. That one is recognized, but not fully implemented yet.",
|
||||||
|
"Commute is on the discovery list now. The real travel answer still needs a provider."
|
||||||
|
],
|
||||||
|
NewsReplies =
|
||||||
|
[
|
||||||
|
"I heard your news request. That path is still a future cloud integration.",
|
||||||
|
"News is recognized, but I do not have the full news service behind it yet."
|
||||||
|
],
|
||||||
|
GenericFallbackReplies =
|
||||||
|
[
|
||||||
|
"Okay. You said, {transcript}.",
|
||||||
|
"I heard you say, {transcript}.",
|
||||||
|
"Thanks. I heard, {transcript}."
|
||||||
|
]
|
||||||
|
};
|
||||||
|
|
||||||
|
public Task<JiboExperienceCatalog> GetCatalogAsync(CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
return Task.FromResult(Catalog);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
using Jibo.Cloud.Application.Abstractions;
|
using Jibo.Cloud.Application.Abstractions;
|
||||||
using Jibo.Cloud.Application.Services;
|
using Jibo.Cloud.Application.Services;
|
||||||
using Jibo.Cloud.Infrastructure.Audio;
|
using Jibo.Cloud.Infrastructure.Audio;
|
||||||
|
using Jibo.Cloud.Infrastructure.Content;
|
||||||
using Jibo.Cloud.Infrastructure.Persistence;
|
using Jibo.Cloud.Infrastructure.Persistence;
|
||||||
using Jibo.Cloud.Infrastructure.Telemetry;
|
using Jibo.Cloud.Infrastructure.Telemetry;
|
||||||
using Jibo.Runtime.Abstractions;
|
using Jibo.Runtime.Abstractions;
|
||||||
@@ -23,6 +24,10 @@ public static class ServiceCollectionExtensions
|
|||||||
|
|
||||||
services.AddSingleton(sttOptions);
|
services.AddSingleton(sttOptions);
|
||||||
services.AddSingleton<ICloudStateStore, InMemoryCloudStateStore>();
|
services.AddSingleton<ICloudStateStore, InMemoryCloudStateStore>();
|
||||||
|
services.AddSingleton<IJiboExperienceContentRepository, InMemoryJiboExperienceContentRepository>();
|
||||||
|
services.AddSingleton<JiboExperienceContentCache>();
|
||||||
|
services.AddSingleton<IJiboRandomizer, DefaultJiboRandomizer>();
|
||||||
|
services.AddSingleton<JiboInteractionService>();
|
||||||
services.AddSingleton<IConversationBroker, DemoConversationBroker>();
|
services.AddSingleton<IConversationBroker, DemoConversationBroker>();
|
||||||
services.AddSingleton<IExternalProcessRunner, ExternalProcessRunner>();
|
services.AddSingleton<IExternalProcessRunner, ExternalProcessRunner>();
|
||||||
services.AddSingleton<ISttStrategy, LocalWhisperCppBufferedAudioSttStrategy>();
|
services.AddSingleton<ISttStrategy, LocalWhisperCppBufferedAudioSttStrategy>();
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
using Jibo.Cloud.Application.Services;
|
||||||
|
using Jibo.Cloud.Infrastructure.Content;
|
||||||
|
using Jibo.Runtime.Abstractions;
|
||||||
|
|
||||||
|
namespace Jibo.Cloud.Tests.WebSockets;
|
||||||
|
|
||||||
|
public sealed class JiboInteractionServiceTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public async Task BuildDecisionAsync_Joke_UsesCatalogBackedRandomContent()
|
||||||
|
{
|
||||||
|
var service = CreateService();
|
||||||
|
|
||||||
|
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||||
|
{
|
||||||
|
RawTranscript = "tell me a joke",
|
||||||
|
NormalizedTranscript = "tell me a joke"
|
||||||
|
});
|
||||||
|
|
||||||
|
Assert.Equal("joke", decision.IntentName);
|
||||||
|
Assert.Equal("@be/joke", decision.SkillName);
|
||||||
|
Assert.Equal("Why did the robot cross the road? Because it was programmed by the chicken.", decision.ReplyText);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task BuildDecisionAsync_Dance_UsesCatalogBackedAnimation()
|
||||||
|
{
|
||||||
|
var service = CreateService();
|
||||||
|
|
||||||
|
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||||
|
{
|
||||||
|
RawTranscript = "do a dance",
|
||||||
|
NormalizedTranscript = "do a dance"
|
||||||
|
});
|
||||||
|
|
||||||
|
Assert.Equal("dance", decision.IntentName);
|
||||||
|
Assert.Equal("chitchat-skill", decision.SkillName);
|
||||||
|
Assert.Equal("Okay. Watch this.", decision.ReplyText);
|
||||||
|
Assert.Equal("<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, rom-upbeat' /></speak>", decision.SkillPayload!["esml"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task BuildDecisionAsync_ClientNluAskForDate_MapsToDateIntent()
|
||||||
|
{
|
||||||
|
var service = CreateService();
|
||||||
|
|
||||||
|
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||||
|
{
|
||||||
|
Attributes = new Dictionary<string, object?>
|
||||||
|
{
|
||||||
|
["clientIntent"] = "askForDate"
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Assert.Equal("date", decision.IntentName);
|
||||||
|
Assert.Contains("Today is", decision.ReplyText, StringComparison.Ordinal);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static JiboInteractionService CreateService()
|
||||||
|
{
|
||||||
|
return new JiboInteractionService(
|
||||||
|
new JiboExperienceContentCache(new InMemoryJiboExperienceContentRepository()),
|
||||||
|
new FirstItemRandomizer());
|
||||||
|
}
|
||||||
|
|
||||||
|
private sealed class FirstItemRandomizer : IJiboRandomizer
|
||||||
|
{
|
||||||
|
public T Choose<T>(IReadOnlyList<T> items)
|
||||||
|
{
|
||||||
|
return items[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ using System.Text.Json;
|
|||||||
using Jibo.Cloud.Application.Abstractions;
|
using Jibo.Cloud.Application.Abstractions;
|
||||||
using Jibo.Cloud.Application.Services;
|
using Jibo.Cloud.Application.Services;
|
||||||
using Jibo.Cloud.Domain.Models;
|
using Jibo.Cloud.Domain.Models;
|
||||||
|
using Jibo.Cloud.Infrastructure.Content;
|
||||||
using Jibo.Cloud.Infrastructure.Persistence;
|
using Jibo.Cloud.Infrastructure.Persistence;
|
||||||
using Jibo.Cloud.Tests.Fixtures;
|
using Jibo.Cloud.Tests.Fixtures;
|
||||||
|
|
||||||
@@ -16,7 +17,9 @@ public sealed class JiboWebSocketServiceTests
|
|||||||
{
|
{
|
||||||
_store = new InMemoryCloudStateStore();
|
_store = new InMemoryCloudStateStore();
|
||||||
var turnContextMapper = new ProtocolToTurnContextMapper();
|
var turnContextMapper = new ProtocolToTurnContextMapper();
|
||||||
var conversationBroker = new DemoConversationBroker();
|
var contentRepository = new InMemoryJiboExperienceContentRepository();
|
||||||
|
var contentCache = new JiboExperienceContentCache(contentRepository);
|
||||||
|
var conversationBroker = new DemoConversationBroker(new JiboInteractionService(contentCache, new DefaultJiboRandomizer()));
|
||||||
var replyMapper = new ResponsePlanToSocketMessagesMapper();
|
var replyMapper = new ResponsePlanToSocketMessagesMapper();
|
||||||
var sttSelector = new DefaultSttStrategySelector(
|
var sttSelector = new DefaultSttStrategySelector(
|
||||||
[
|
[
|
||||||
@@ -53,7 +56,7 @@ public sealed class JiboWebSocketServiceTests
|
|||||||
|
|
||||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||||
Assert.Equal("hello jibo", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
Assert.Equal("hello jibo", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||||
Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||||
|
|
||||||
using var eosPayload = JsonDocument.Parse(replies[1].Text!);
|
using var eosPayload = JsonDocument.Parse(replies[1].Text!);
|
||||||
Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _));
|
Assert.True(eosPayload.RootElement.TryGetProperty("ts", out _));
|
||||||
@@ -475,7 +478,7 @@ public sealed class JiboWebSocketServiceTests
|
|||||||
Assert.Equal(3, finalizeReplies.Count);
|
Assert.Equal(3, finalizeReplies.Count);
|
||||||
using var listenPayload = JsonDocument.Parse(finalizeReplies[0].Text!);
|
using var listenPayload = JsonDocument.Parse(finalizeReplies[0].Text!);
|
||||||
Assert.Equal("hello from buffered audio", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
Assert.Equal("hello from buffered audio", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
|
||||||
Assert.Equal("chat", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
Assert.Equal("hello", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||||
|
|
||||||
using var skillPayload = JsonDocument.Parse(finalizeReplies[2].Text!);
|
using var skillPayload = JsonDocument.Parse(finalizeReplies[2].Text!);
|
||||||
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
|
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
|
||||||
@@ -535,6 +538,45 @@ public sealed class JiboWebSocketServiceTests
|
|||||||
Assert.False(meta.TryGetProperty("transcript", out _));
|
Assert.False(meta.TryGetProperty("transcript", out _));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task ClientAsrDanceFlow_EmitsAnimatedSkillAction()
|
||||||
|
{
|
||||||
|
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||||
|
{
|
||||||
|
HostName = "neo-hub.jibo.com",
|
||||||
|
Path = "/listen",
|
||||||
|
Kind = "neo-hub-listen",
|
||||||
|
Token = "hub-client-asr-dance-token",
|
||||||
|
Text = """{"type":"LISTEN","transID":"trans-dance-shape","data":{"rules":["wake-word"]}}"""
|
||||||
|
});
|
||||||
|
|
||||||
|
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||||
|
{
|
||||||
|
HostName = "neo-hub.jibo.com",
|
||||||
|
Path = "/listen",
|
||||||
|
Kind = "neo-hub-listen",
|
||||||
|
Token = "hub-client-asr-dance-token",
|
||||||
|
Text = """{"type":"CLIENT_ASR","transID":"trans-dance-shape","data":{"text":"do a dance"}}"""
|
||||||
|
});
|
||||||
|
|
||||||
|
Assert.Equal(3, replies.Count);
|
||||||
|
Assert.Equal("SKILL_ACTION", ReadReplyType(replies[2]));
|
||||||
|
|
||||||
|
using var skillPayload = JsonDocument.Parse(replies[2].Text!);
|
||||||
|
var esml = skillPayload.RootElement
|
||||||
|
.GetProperty("data")
|
||||||
|
.GetProperty("action")
|
||||||
|
.GetProperty("config")
|
||||||
|
.GetProperty("jcp")
|
||||||
|
.GetProperty("config")
|
||||||
|
.GetProperty("play")
|
||||||
|
.GetProperty("esml")
|
||||||
|
.GetString();
|
||||||
|
|
||||||
|
Assert.Contains("<anim cat='dance' filter='music, ", esml, StringComparison.Ordinal);
|
||||||
|
Assert.Equal("chitchat-skill", skillPayload.RootElement.GetProperty("data").GetProperty("skill").GetProperty("id").GetString());
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio()
|
public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio()
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user