Add chitchat state machine routing

This commit is contained in:
Jacob Dubin
2026-05-06 16:01:15 -05:00
parent 0ccfa5db68
commit 7d31c3390c
6 changed files with 366 additions and 4 deletions

View File

@@ -0,0 +1,214 @@
using Jibo.Cloud.Application.Abstractions;
namespace Jibo.Cloud.Application.Services;
internal static class ChitchatStateMachine
{
internal const string StateMetadataKey = "chitchatState";
internal const string RouteMetadataKey = "chitchatRoute";
internal const string EmotionMetadataKey = "chitchatEmotion";
internal const string IdleState = "idle";
private const string IntentSplitState = "intent_split";
private const string ProcessQueryState = "process_query";
private const string CompleteState = "complete";
private const string ScriptedResponseRoute = "ScriptedResponse";
private const string EmotionQueryRoute = "EmotionQuery";
private const string EmotionCommandRoute = "EmotionCommand";
private const string ErrorResponseRoute = "ErrorResponse";
private static readonly string[] EmotionQueryPhrases =
[
"how are you feeling",
"how do you feel",
"what mood are you in",
"what is your mood",
"what's your mood",
"are you happy",
"are you sad",
"are you excited",
"do you have emotions"
];
private static readonly (string Emotion, string[] Phrases)[] EmotionCommandPhrases =
[
("happy", ["smile", "be happy", "look happy", "cheer up"]),
("sad", ["be sad", "look sad"]),
("excited", ["be excited", "get excited", "act excited"]),
("calm", ["be calm", "relax"])
];
private static readonly string[] EmotionCommandReplies =
[
"I can do that mood. Watch this.",
"Switching mood now.",
"Okay, mood change activated."
];
public static JiboInteractionDecision? TryBuildDecision(
string semanticIntent,
string transcript,
string loweredTranscript,
JiboExperienceCatalog catalog,
IJiboRandomizer randomizer,
Func<string> buildErrorResponse)
{
switch (semanticIntent)
{
case "hello":
return BuildScriptedResponseDecision(
"hello",
randomizer.Choose(catalog.GreetingReplies));
case "robot_personality":
return BuildScriptedResponseDecision(
"robot_personality",
randomizer.Choose(catalog.PersonalityReplies));
case "how_are_you":
return BuildEmotionQueryDecision(
"how_are_you",
randomizer.Choose(catalog.HowAreYouReplies));
case "chat":
if (IsEmotionQuery(loweredTranscript))
{
return BuildEmotionQueryDecision(
"emotion_query",
randomizer.Choose(catalog.HowAreYouReplies));
}
if (TryResolveEmotionCommand(loweredTranscript, out var emotion))
{
return BuildEmotionCommandDecision(randomizer, emotion!);
}
return BuildErrorResponseDecision(
"chat",
buildErrorResponse(),
transcript);
default:
return null;
}
}
public static bool IsLikelyEmotionUtterance(string normalizedLoweredTranscript)
{
return IsEmotionQuery(normalizedLoweredTranscript) ||
TryResolveEmotionCommand(normalizedLoweredTranscript, out _);
}
private static JiboInteractionDecision BuildScriptedResponseDecision(string intentName, string replyText)
{
return new JiboInteractionDecision(
intentName,
replyText,
ContextUpdates: BuildContextUpdates(
ScriptedResponseRoute,
emotion: null));
}
private static JiboInteractionDecision BuildEmotionQueryDecision(string intentName, string replyText)
{
return new JiboInteractionDecision(
intentName,
replyText,
ContextUpdates: BuildContextUpdates(
EmotionQueryRoute,
emotion: null));
}
private static JiboInteractionDecision BuildEmotionCommandDecision(IJiboRandomizer randomizer, string emotion)
{
var (esmlEmotion, responseSuffix) = emotion switch
{
"happy" => ("happy", "I am feeling happy."),
"sad" => ("sad", "I can do a thoughtful mood too."),
"excited" => ("happy", "I am feeling excited."),
"calm" => ("neutral", "I am in a calmer mood."),
_ => ("neutral", "Mood updated.")
};
return new JiboInteractionDecision(
"emotion_command",
randomizer.Choose(EmotionCommandReplies),
"chitchat-skill",
new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["esml"] = $"<speak><es cat='{esmlEmotion}' filter='!ssa-only, !sfx-only' endNeutral='true'>{responseSuffix}</es></speak>",
["mim_id"] = "runtime-chat",
["mim_type"] = "announcement",
["prompt_id"] = "RUNTIME_EMOTION_COMMAND",
["prompt_sub_category"] = "AN"
},
ContextUpdates: BuildContextUpdates(
EmotionCommandRoute,
emotion));
}
private static JiboInteractionDecision BuildErrorResponseDecision(string intentName, string replyText, string transcript)
{
var normalizedTranscript = string.IsNullOrWhiteSpace(transcript)
? string.Empty
: transcript.Trim();
return new JiboInteractionDecision(
intentName,
replyText,
ContextUpdates: BuildContextUpdates(
ErrorResponseRoute,
emotion: null,
rawTranscript: normalizedTranscript));
}
private static IDictionary<string, object?> BuildContextUpdates(
string route,
string? emotion,
string? rawTranscript = null)
{
return new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
[StateMetadataKey] = CompleteState,
[RouteMetadataKey] = route,
[EmotionMetadataKey] = emotion ?? string.Empty,
["chitchatLastState"] = IntentSplitState,
["chitchatProcessState"] = ProcessQueryState,
["chitchatRawTranscript"] = rawTranscript ?? string.Empty
};
}
private static bool IsEmotionQuery(string loweredTranscript)
{
return ContainsAnyPhrase(loweredTranscript, EmotionQueryPhrases);
}
private static bool TryResolveEmotionCommand(string loweredTranscript, out string? emotion)
{
emotion = null;
foreach (var mapping in EmotionCommandPhrases)
{
if (!ContainsAnyPhrase(loweredTranscript, mapping.Phrases))
{
continue;
}
emotion = mapping.Emotion;
return true;
}
return false;
}
private static bool ContainsAnyPhrase(string loweredTranscript, IEnumerable<string> phrases)
{
foreach (var phrase in phrases)
{
if (string.Equals(loweredTranscript, phrase, StringComparison.Ordinal) ||
loweredTranscript.StartsWith($"{phrase} ", StringComparison.Ordinal) ||
loweredTranscript.Contains($" {phrase} ", StringComparison.Ordinal) ||
loweredTranscript.EndsWith($" {phrase}", StringComparison.Ordinal))
{
return true;
}
}
return false;
}
}

View File

@@ -64,6 +64,18 @@ public sealed class JiboInteractionService(
return personalReportDecision;
}
var chitchatDecision = ChitchatStateMachine.TryBuildDecision(
semanticIntent,
transcript,
lowered,
catalog,
randomizer,
() => BuildGenericReply(catalog, transcript, lowered));
if (chitchatDecision is not null)
{
return chitchatDecision;
}
return semanticIntent switch
{
"joke" => BuildJokeDecision(catalog),
@@ -96,11 +108,8 @@ public sealed class JiboInteractionService(
"photo_gallery" => BuildPhotoGalleryLaunchDecision(),
"snapshot" => BuildPhotoCreateDecision("snapshot", "Taking a picture.", "createOnePhoto"),
"photobooth" => BuildPhotoCreateDecision("photobooth", "Starting photobooth.", "createSomePhotos"),
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
"robot_age" => BuildRobotAgeDecision(referenceLocalTime),
"robot_birthday" => BuildRobotBirthdayDecision(),
"robot_personality" => new JiboInteractionDecision("robot_personality", randomizer.Choose(catalog.PersonalityReplies)),
"memory_set_name" => BuildRememberNameDecision(turn, transcript),
"memory_get_name" => BuildRecallNameDecision(turn),
"memory_set_birthday" => BuildRememberBirthdayDecision(turn, transcript),

View File

@@ -59,7 +59,8 @@ public sealed class ProtocolToTurnContextMapper
foreach (var pair in session.Metadata)
{
if (!pair.Key.StartsWith("personalReport", StringComparison.OrdinalIgnoreCase) ||
if ((!pair.Key.StartsWith("personalReport", StringComparison.OrdinalIgnoreCase) &&
!pair.Key.StartsWith("chitchat", StringComparison.OrdinalIgnoreCase)) ||
pair.Value is null)
{
continue;

View File

@@ -902,6 +902,11 @@ public sealed partial class WebSocketTurnFinalizationService(
return true;
}
if (ChitchatStateMachine.IsLikelyEmotionUtterance(transcript))
{
return true;
}
if (transcript.Length >= 6)
{
return true;
@@ -1058,6 +1063,9 @@ public sealed partial class WebSocketTurnFinalizationService(
var previousCalendarEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CalendarEnabledMetadataKey) ?? true;
var previousCommuteEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CommuteEnabledMetadataKey) ?? true;
var previousNewsEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.NewsEnabledMetadataKey) ?? true;
var previousChitchatState = ReadMetadataString(session.Metadata, ChitchatStateMachine.StateMetadataKey);
var previousChitchatRoute = ReadMetadataString(session.Metadata, ChitchatStateMachine.RouteMetadataKey);
var previousChitchatEmotion = ReadMetadataString(session.Metadata, ChitchatStateMachine.EmotionMetadataKey);
foreach (var pair in contextUpdates)
{
@@ -1078,6 +1086,9 @@ public sealed partial class WebSocketTurnFinalizationService(
var nextCommuteEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CommuteEnabledMetadataKey) ?? true;
var nextNewsEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.NewsEnabledMetadataKey) ?? true;
var serviceError = ReadMetadataString(session.Metadata, PersonalReportOrchestrator.LastServiceErrorMetadataKey);
var nextChitchatState = ReadMetadataString(session.Metadata, ChitchatStateMachine.StateMetadataKey);
var nextChitchatRoute = ReadMetadataString(session.Metadata, ChitchatStateMachine.RouteMetadataKey);
var nextChitchatEmotion = ReadMetadataString(session.Metadata, ChitchatStateMachine.EmotionMetadataKey);
if (!string.Equals(previousState, nextState, StringComparison.OrdinalIgnoreCase))
{
@@ -1140,6 +1151,30 @@ public sealed partial class WebSocketTurnFinalizationService(
["intent"] = intentName
}), cancellationToken);
}
if (!string.Equals(previousChitchatState, nextChitchatState, StringComparison.OrdinalIgnoreCase) &&
!string.IsNullOrWhiteSpace(nextChitchatState))
{
await sink.RecordTurnDiagnosticAsync("chitchat_state_transition", BuildTurnDiagnosticSnapshot(session, envelope, new Dictionary<string, object?>
{
["previousState"] = previousChitchatState,
["state"] = nextChitchatState,
["intent"] = intentName
}), cancellationToken);
}
if (!string.Equals(previousChitchatRoute, nextChitchatRoute, StringComparison.OrdinalIgnoreCase) &&
!string.IsNullOrWhiteSpace(nextChitchatRoute))
{
await sink.RecordTurnDiagnosticAsync("chitchat_route_selected", BuildTurnDiagnosticSnapshot(session, envelope, new Dictionary<string, object?>
{
["route"] = nextChitchatRoute,
["previousRoute"] = previousChitchatRoute,
["emotion"] = nextChitchatEmotion,
["previousEmotion"] = previousChitchatEmotion,
["intent"] = intentName
}), cancellationToken);
}
}
private async Task EmitServiceToggleDiagnosticAsync(

View File

@@ -17,6 +17,9 @@ public sealed class JiboInteractionServiceTests
private const string PersonalReportCalendarEnabledKey = "personalReportCalendarEnabled";
private const string PersonalReportCommuteEnabledKey = "personalReportCommuteEnabled";
private const string PersonalReportNewsEnabledKey = "personalReportNewsEnabled";
private const string ChitchatStateKey = "chitchatState";
private const string ChitchatRouteKey = "chitchatRoute";
private const string ChitchatEmotionKey = "chitchatEmotion";
[Fact]
public async Task BuildDecisionAsync_Joke_UsesCatalogBackedRandomContent()
@@ -147,6 +150,76 @@ public sealed class JiboInteractionServiceTests
Assert.Equal("I do. I am curious, playful, and always up for a new experiment.", decision.ReplyText);
}
[Fact]
public async Task BuildDecisionAsync_Hello_RoutesThroughChitchatScriptedResponse()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "hello",
NormalizedTranscript = "hello"
});
Assert.Equal("hello", decision.IntentName);
Assert.NotNull(decision.ContextUpdates);
Assert.Equal("complete", decision.ContextUpdates![ChitchatStateKey]);
Assert.Equal("ScriptedResponse", decision.ContextUpdates[ChitchatRouteKey]);
}
[Fact]
public async Task BuildDecisionAsync_AreYouHappy_RoutesThroughEmotionQuerySplit()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "are you happy",
NormalizedTranscript = "are you happy"
});
Assert.Equal("emotion_query", decision.IntentName);
Assert.NotNull(decision.ContextUpdates);
Assert.Equal("EmotionQuery", decision.ContextUpdates![ChitchatRouteKey]);
Assert.Equal(string.Empty, decision.ContextUpdates[ChitchatEmotionKey]);
}
[Fact]
public async Task BuildDecisionAsync_Smile_RoutesThroughEmotionCommandSplit()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "smile",
NormalizedTranscript = "smile"
});
Assert.Equal("emotion_command", decision.IntentName);
Assert.Equal("chitchat-skill", decision.SkillName);
Assert.NotNull(decision.SkillPayload);
Assert.Contains("cat='happy'", decision.SkillPayload!["esml"]?.ToString(), StringComparison.Ordinal);
Assert.NotNull(decision.ContextUpdates);
Assert.Equal("EmotionCommand", decision.ContextUpdates![ChitchatRouteKey]);
Assert.Equal("happy", decision.ContextUpdates[ChitchatEmotionKey]);
}
[Fact]
public async Task BuildDecisionAsync_UnhandledChat_RoutesThroughErrorResponseSplit()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "blargh",
NormalizedTranscript = "blargh"
});
Assert.Equal("chat", decision.IntentName);
Assert.NotNull(decision.ContextUpdates);
Assert.Equal("ErrorResponse", decision.ContextUpdates![ChitchatRouteKey]);
}
[Fact]
public async Task BuildDecisionAsync_BirthdayMemory_SetThenRecallWithinTenant()
{

View File

@@ -3241,6 +3241,36 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal("idle", stateValue?.ToString());
}
[Fact]
public async Task ClientAsrChitchatEmotionCommand_PersistsSplitRouteMetadata()
{
const string routeKey = "chitchatRoute";
const string emotionKey = "chitchatEmotion";
var token = _store.IssueRobotToken("chitchat-emotion-device-a");
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = token,
Text = """{"type":"CLIENT_ASR","transID":"trans-chitchat-emotion","data":{"text":"smile"}}"""
});
Assert.Equal(3, replies.Count);
using (var listenPayload = JsonDocument.Parse(replies[0].Text!))
{
Assert.Equal("emotion_command", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
}
var session = _store.FindSessionByToken(token);
Assert.NotNull(session);
Assert.True(session.Metadata.TryGetValue(routeKey, out var routeValue));
Assert.Equal("EmotionCommand", routeValue?.ToString());
Assert.True(session.Metadata.TryGetValue(emotionKey, out var emotionValue));
Assert.Equal("happy", emotionValue?.ToString());
}
[Fact]
public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio()
{