From 7d31c3390cf52dd8d86803753eb80180e15c6313 Mon Sep 17 00:00:00 2001 From: Jacob Dubin Date: Wed, 6 May 2026 16:01:15 -0500 Subject: [PATCH] Add chitchat state machine routing --- .../Services/ChitchatStateMachine.cs | 214 ++++++++++++++++++ .../Services/JiboInteractionService.cs | 15 +- .../Services/ProtocolToTurnContextMapper.cs | 3 +- .../WebSocketTurnFinalizationService.cs | 35 +++ .../WebSockets/JiboInteractionServiceTests.cs | 73 ++++++ .../WebSockets/JiboWebSocketServiceTests.cs | 30 +++ 6 files changed, 366 insertions(+), 4 deletions(-) create mode 100644 OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ChitchatStateMachine.cs diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ChitchatStateMachine.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ChitchatStateMachine.cs new file mode 100644 index 0000000..ffa5c94 --- /dev/null +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ChitchatStateMachine.cs @@ -0,0 +1,214 @@ +using Jibo.Cloud.Application.Abstractions; + +namespace Jibo.Cloud.Application.Services; + +internal static class ChitchatStateMachine +{ + internal const string StateMetadataKey = "chitchatState"; + internal const string RouteMetadataKey = "chitchatRoute"; + internal const string EmotionMetadataKey = "chitchatEmotion"; + + internal const string IdleState = "idle"; + private const string IntentSplitState = "intent_split"; + private const string ProcessQueryState = "process_query"; + private const string CompleteState = "complete"; + + private const string ScriptedResponseRoute = "ScriptedResponse"; + private const string EmotionQueryRoute = "EmotionQuery"; + private const string EmotionCommandRoute = "EmotionCommand"; + private const string ErrorResponseRoute = "ErrorResponse"; + + private static readonly string[] EmotionQueryPhrases = + [ + "how are you feeling", + "how do you feel", + "what mood are you in", + "what is your mood", + "what's your mood", + "are you happy", + "are you sad", + "are you excited", + "do you have emotions" + ]; + + private static readonly (string Emotion, string[] Phrases)[] EmotionCommandPhrases = + [ + ("happy", ["smile", "be happy", "look happy", "cheer up"]), + ("sad", ["be sad", "look sad"]), + ("excited", ["be excited", "get excited", "act excited"]), + ("calm", ["be calm", "relax"]) + ]; + + private static readonly string[] EmotionCommandReplies = + [ + "I can do that mood. Watch this.", + "Switching mood now.", + "Okay, mood change activated." + ]; + + public static JiboInteractionDecision? TryBuildDecision( + string semanticIntent, + string transcript, + string loweredTranscript, + JiboExperienceCatalog catalog, + IJiboRandomizer randomizer, + Func buildErrorResponse) + { + switch (semanticIntent) + { + case "hello": + return BuildScriptedResponseDecision( + "hello", + randomizer.Choose(catalog.GreetingReplies)); + case "robot_personality": + return BuildScriptedResponseDecision( + "robot_personality", + randomizer.Choose(catalog.PersonalityReplies)); + case "how_are_you": + return BuildEmotionQueryDecision( + "how_are_you", + randomizer.Choose(catalog.HowAreYouReplies)); + case "chat": + if (IsEmotionQuery(loweredTranscript)) + { + return BuildEmotionQueryDecision( + "emotion_query", + randomizer.Choose(catalog.HowAreYouReplies)); + } + + if (TryResolveEmotionCommand(loweredTranscript, out var emotion)) + { + return BuildEmotionCommandDecision(randomizer, emotion!); + } + + return BuildErrorResponseDecision( + "chat", + buildErrorResponse(), + transcript); + default: + return null; + } + } + + public static bool IsLikelyEmotionUtterance(string normalizedLoweredTranscript) + { + return IsEmotionQuery(normalizedLoweredTranscript) || + TryResolveEmotionCommand(normalizedLoweredTranscript, out _); + } + + private static JiboInteractionDecision BuildScriptedResponseDecision(string intentName, string replyText) + { + return new JiboInteractionDecision( + intentName, + replyText, + ContextUpdates: BuildContextUpdates( + ScriptedResponseRoute, + emotion: null)); + } + + private static JiboInteractionDecision BuildEmotionQueryDecision(string intentName, string replyText) + { + return new JiboInteractionDecision( + intentName, + replyText, + ContextUpdates: BuildContextUpdates( + EmotionQueryRoute, + emotion: null)); + } + + private static JiboInteractionDecision BuildEmotionCommandDecision(IJiboRandomizer randomizer, string emotion) + { + var (esmlEmotion, responseSuffix) = emotion switch + { + "happy" => ("happy", "I am feeling happy."), + "sad" => ("sad", "I can do a thoughtful mood too."), + "excited" => ("happy", "I am feeling excited."), + "calm" => ("neutral", "I am in a calmer mood."), + _ => ("neutral", "Mood updated.") + }; + + return new JiboInteractionDecision( + "emotion_command", + randomizer.Choose(EmotionCommandReplies), + "chitchat-skill", + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["esml"] = $"{responseSuffix}", + ["mim_id"] = "runtime-chat", + ["mim_type"] = "announcement", + ["prompt_id"] = "RUNTIME_EMOTION_COMMAND", + ["prompt_sub_category"] = "AN" + }, + ContextUpdates: BuildContextUpdates( + EmotionCommandRoute, + emotion)); + } + + private static JiboInteractionDecision BuildErrorResponseDecision(string intentName, string replyText, string transcript) + { + var normalizedTranscript = string.IsNullOrWhiteSpace(transcript) + ? string.Empty + : transcript.Trim(); + return new JiboInteractionDecision( + intentName, + replyText, + ContextUpdates: BuildContextUpdates( + ErrorResponseRoute, + emotion: null, + rawTranscript: normalizedTranscript)); + } + + private static IDictionary BuildContextUpdates( + string route, + string? emotion, + string? rawTranscript = null) + { + return new Dictionary(StringComparer.OrdinalIgnoreCase) + { + [StateMetadataKey] = CompleteState, + [RouteMetadataKey] = route, + [EmotionMetadataKey] = emotion ?? string.Empty, + ["chitchatLastState"] = IntentSplitState, + ["chitchatProcessState"] = ProcessQueryState, + ["chitchatRawTranscript"] = rawTranscript ?? string.Empty + }; + } + + private static bool IsEmotionQuery(string loweredTranscript) + { + return ContainsAnyPhrase(loweredTranscript, EmotionQueryPhrases); + } + + private static bool TryResolveEmotionCommand(string loweredTranscript, out string? emotion) + { + emotion = null; + foreach (var mapping in EmotionCommandPhrases) + { + if (!ContainsAnyPhrase(loweredTranscript, mapping.Phrases)) + { + continue; + } + + emotion = mapping.Emotion; + return true; + } + + return false; + } + + private static bool ContainsAnyPhrase(string loweredTranscript, IEnumerable phrases) + { + foreach (var phrase in phrases) + { + if (string.Equals(loweredTranscript, phrase, StringComparison.Ordinal) || + loweredTranscript.StartsWith($"{phrase} ", StringComparison.Ordinal) || + loweredTranscript.Contains($" {phrase} ", StringComparison.Ordinal) || + loweredTranscript.EndsWith($" {phrase}", StringComparison.Ordinal)) + { + return true; + } + } + + return false; + } +} diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs index fc29a29..a1c4cdb 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs @@ -64,6 +64,18 @@ public sealed class JiboInteractionService( return personalReportDecision; } + var chitchatDecision = ChitchatStateMachine.TryBuildDecision( + semanticIntent, + transcript, + lowered, + catalog, + randomizer, + () => BuildGenericReply(catalog, transcript, lowered)); + if (chitchatDecision is not null) + { + return chitchatDecision; + } + return semanticIntent switch { "joke" => BuildJokeDecision(catalog), @@ -96,11 +108,8 @@ public sealed class JiboInteractionService( "photo_gallery" => BuildPhotoGalleryLaunchDecision(), "snapshot" => BuildPhotoCreateDecision("snapshot", "Taking a picture.", "createOnePhoto"), "photobooth" => BuildPhotoCreateDecision("photobooth", "Starting photobooth.", "createSomePhotos"), - "hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)), - "how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)), "robot_age" => BuildRobotAgeDecision(referenceLocalTime), "robot_birthday" => BuildRobotBirthdayDecision(), - "robot_personality" => new JiboInteractionDecision("robot_personality", randomizer.Choose(catalog.PersonalityReplies)), "memory_set_name" => BuildRememberNameDecision(turn, transcript), "memory_get_name" => BuildRecallNameDecision(turn), "memory_set_birthday" => BuildRememberBirthdayDecision(turn, transcript), diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ProtocolToTurnContextMapper.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ProtocolToTurnContextMapper.cs index d8e4b13..65ea1cd 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ProtocolToTurnContextMapper.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ProtocolToTurnContextMapper.cs @@ -59,7 +59,8 @@ public sealed class ProtocolToTurnContextMapper foreach (var pair in session.Metadata) { - if (!pair.Key.StartsWith("personalReport", StringComparison.OrdinalIgnoreCase) || + if ((!pair.Key.StartsWith("personalReport", StringComparison.OrdinalIgnoreCase) && + !pair.Key.StartsWith("chitchat", StringComparison.OrdinalIgnoreCase)) || pair.Value is null) { continue; diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs index 5b45cac..aa974db 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs @@ -902,6 +902,11 @@ public sealed partial class WebSocketTurnFinalizationService( return true; } + if (ChitchatStateMachine.IsLikelyEmotionUtterance(transcript)) + { + return true; + } + if (transcript.Length >= 6) { return true; @@ -1058,6 +1063,9 @@ public sealed partial class WebSocketTurnFinalizationService( var previousCalendarEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CalendarEnabledMetadataKey) ?? true; var previousCommuteEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CommuteEnabledMetadataKey) ?? true; var previousNewsEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.NewsEnabledMetadataKey) ?? true; + var previousChitchatState = ReadMetadataString(session.Metadata, ChitchatStateMachine.StateMetadataKey); + var previousChitchatRoute = ReadMetadataString(session.Metadata, ChitchatStateMachine.RouteMetadataKey); + var previousChitchatEmotion = ReadMetadataString(session.Metadata, ChitchatStateMachine.EmotionMetadataKey); foreach (var pair in contextUpdates) { @@ -1078,6 +1086,9 @@ public sealed partial class WebSocketTurnFinalizationService( var nextCommuteEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CommuteEnabledMetadataKey) ?? true; var nextNewsEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.NewsEnabledMetadataKey) ?? true; var serviceError = ReadMetadataString(session.Metadata, PersonalReportOrchestrator.LastServiceErrorMetadataKey); + var nextChitchatState = ReadMetadataString(session.Metadata, ChitchatStateMachine.StateMetadataKey); + var nextChitchatRoute = ReadMetadataString(session.Metadata, ChitchatStateMachine.RouteMetadataKey); + var nextChitchatEmotion = ReadMetadataString(session.Metadata, ChitchatStateMachine.EmotionMetadataKey); if (!string.Equals(previousState, nextState, StringComparison.OrdinalIgnoreCase)) { @@ -1140,6 +1151,30 @@ public sealed partial class WebSocketTurnFinalizationService( ["intent"] = intentName }), cancellationToken); } + + if (!string.Equals(previousChitchatState, nextChitchatState, StringComparison.OrdinalIgnoreCase) && + !string.IsNullOrWhiteSpace(nextChitchatState)) + { + await sink.RecordTurnDiagnosticAsync("chitchat_state_transition", BuildTurnDiagnosticSnapshot(session, envelope, new Dictionary + { + ["previousState"] = previousChitchatState, + ["state"] = nextChitchatState, + ["intent"] = intentName + }), cancellationToken); + } + + if (!string.Equals(previousChitchatRoute, nextChitchatRoute, StringComparison.OrdinalIgnoreCase) && + !string.IsNullOrWhiteSpace(nextChitchatRoute)) + { + await sink.RecordTurnDiagnosticAsync("chitchat_route_selected", BuildTurnDiagnosticSnapshot(session, envelope, new Dictionary + { + ["route"] = nextChitchatRoute, + ["previousRoute"] = previousChitchatRoute, + ["emotion"] = nextChitchatEmotion, + ["previousEmotion"] = previousChitchatEmotion, + ["intent"] = intentName + }), cancellationToken); + } } private async Task EmitServiceToggleDiagnosticAsync( diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs index 59b4337..e637dd4 100644 --- a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs +++ b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs @@ -17,6 +17,9 @@ public sealed class JiboInteractionServiceTests private const string PersonalReportCalendarEnabledKey = "personalReportCalendarEnabled"; private const string PersonalReportCommuteEnabledKey = "personalReportCommuteEnabled"; private const string PersonalReportNewsEnabledKey = "personalReportNewsEnabled"; + private const string ChitchatStateKey = "chitchatState"; + private const string ChitchatRouteKey = "chitchatRoute"; + private const string ChitchatEmotionKey = "chitchatEmotion"; [Fact] public async Task BuildDecisionAsync_Joke_UsesCatalogBackedRandomContent() @@ -147,6 +150,76 @@ public sealed class JiboInteractionServiceTests Assert.Equal("I do. I am curious, playful, and always up for a new experiment.", decision.ReplyText); } + [Fact] + public async Task BuildDecisionAsync_Hello_RoutesThroughChitchatScriptedResponse() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "hello", + NormalizedTranscript = "hello" + }); + + Assert.Equal("hello", decision.IntentName); + Assert.NotNull(decision.ContextUpdates); + Assert.Equal("complete", decision.ContextUpdates![ChitchatStateKey]); + Assert.Equal("ScriptedResponse", decision.ContextUpdates[ChitchatRouteKey]); + } + + [Fact] + public async Task BuildDecisionAsync_AreYouHappy_RoutesThroughEmotionQuerySplit() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "are you happy", + NormalizedTranscript = "are you happy" + }); + + Assert.Equal("emotion_query", decision.IntentName); + Assert.NotNull(decision.ContextUpdates); + Assert.Equal("EmotionQuery", decision.ContextUpdates![ChitchatRouteKey]); + Assert.Equal(string.Empty, decision.ContextUpdates[ChitchatEmotionKey]); + } + + [Fact] + public async Task BuildDecisionAsync_Smile_RoutesThroughEmotionCommandSplit() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "smile", + NormalizedTranscript = "smile" + }); + + Assert.Equal("emotion_command", decision.IntentName); + Assert.Equal("chitchat-skill", decision.SkillName); + Assert.NotNull(decision.SkillPayload); + Assert.Contains("cat='happy'", decision.SkillPayload!["esml"]?.ToString(), StringComparison.Ordinal); + Assert.NotNull(decision.ContextUpdates); + Assert.Equal("EmotionCommand", decision.ContextUpdates![ChitchatRouteKey]); + Assert.Equal("happy", decision.ContextUpdates[ChitchatEmotionKey]); + } + + [Fact] + public async Task BuildDecisionAsync_UnhandledChat_RoutesThroughErrorResponseSplit() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "blargh", + NormalizedTranscript = "blargh" + }); + + Assert.Equal("chat", decision.IntentName); + Assert.NotNull(decision.ContextUpdates); + Assert.Equal("ErrorResponse", decision.ContextUpdates![ChitchatRouteKey]); + } + [Fact] public async Task BuildDecisionAsync_BirthdayMemory_SetThenRecallWithinTenant() { diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs index c25341e..4ca4d73 100644 --- a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs +++ b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs @@ -3241,6 +3241,36 @@ public sealed class JiboWebSocketServiceTests Assert.Equal("idle", stateValue?.ToString()); } + [Fact] + public async Task ClientAsrChitchatEmotionCommand_PersistsSplitRouteMetadata() + { + const string routeKey = "chitchatRoute"; + const string emotionKey = "chitchatEmotion"; + var token = _store.IssueRobotToken("chitchat-emotion-device-a"); + + var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = token, + Text = """{"type":"CLIENT_ASR","transID":"trans-chitchat-emotion","data":{"text":"smile"}}""" + }); + + Assert.Equal(3, replies.Count); + using (var listenPayload = JsonDocument.Parse(replies[0].Text!)) + { + Assert.Equal("emotion_command", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + } + + var session = _store.FindSessionByToken(token); + Assert.NotNull(session); + Assert.True(session.Metadata.TryGetValue(routeKey, out var routeValue)); + Assert.Equal("EmotionCommand", routeValue?.ToString()); + Assert.True(session.Metadata.TryGetValue(emotionKey, out var emotionValue)); + Assert.Equal("happy", emotionValue?.ToString()); + } + [Fact] public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio() {