Add chitchat state machine routing
This commit is contained in:
@@ -0,0 +1,214 @@
|
||||
using Jibo.Cloud.Application.Abstractions;
|
||||
|
||||
namespace Jibo.Cloud.Application.Services;
|
||||
|
||||
internal static class ChitchatStateMachine
|
||||
{
|
||||
internal const string StateMetadataKey = "chitchatState";
|
||||
internal const string RouteMetadataKey = "chitchatRoute";
|
||||
internal const string EmotionMetadataKey = "chitchatEmotion";
|
||||
|
||||
internal const string IdleState = "idle";
|
||||
private const string IntentSplitState = "intent_split";
|
||||
private const string ProcessQueryState = "process_query";
|
||||
private const string CompleteState = "complete";
|
||||
|
||||
private const string ScriptedResponseRoute = "ScriptedResponse";
|
||||
private const string EmotionQueryRoute = "EmotionQuery";
|
||||
private const string EmotionCommandRoute = "EmotionCommand";
|
||||
private const string ErrorResponseRoute = "ErrorResponse";
|
||||
|
||||
private static readonly string[] EmotionQueryPhrases =
|
||||
[
|
||||
"how are you feeling",
|
||||
"how do you feel",
|
||||
"what mood are you in",
|
||||
"what is your mood",
|
||||
"what's your mood",
|
||||
"are you happy",
|
||||
"are you sad",
|
||||
"are you excited",
|
||||
"do you have emotions"
|
||||
];
|
||||
|
||||
private static readonly (string Emotion, string[] Phrases)[] EmotionCommandPhrases =
|
||||
[
|
||||
("happy", ["smile", "be happy", "look happy", "cheer up"]),
|
||||
("sad", ["be sad", "look sad"]),
|
||||
("excited", ["be excited", "get excited", "act excited"]),
|
||||
("calm", ["be calm", "relax"])
|
||||
];
|
||||
|
||||
private static readonly string[] EmotionCommandReplies =
|
||||
[
|
||||
"I can do that mood. Watch this.",
|
||||
"Switching mood now.",
|
||||
"Okay, mood change activated."
|
||||
];
|
||||
|
||||
public static JiboInteractionDecision? TryBuildDecision(
|
||||
string semanticIntent,
|
||||
string transcript,
|
||||
string loweredTranscript,
|
||||
JiboExperienceCatalog catalog,
|
||||
IJiboRandomizer randomizer,
|
||||
Func<string> buildErrorResponse)
|
||||
{
|
||||
switch (semanticIntent)
|
||||
{
|
||||
case "hello":
|
||||
return BuildScriptedResponseDecision(
|
||||
"hello",
|
||||
randomizer.Choose(catalog.GreetingReplies));
|
||||
case "robot_personality":
|
||||
return BuildScriptedResponseDecision(
|
||||
"robot_personality",
|
||||
randomizer.Choose(catalog.PersonalityReplies));
|
||||
case "how_are_you":
|
||||
return BuildEmotionQueryDecision(
|
||||
"how_are_you",
|
||||
randomizer.Choose(catalog.HowAreYouReplies));
|
||||
case "chat":
|
||||
if (IsEmotionQuery(loweredTranscript))
|
||||
{
|
||||
return BuildEmotionQueryDecision(
|
||||
"emotion_query",
|
||||
randomizer.Choose(catalog.HowAreYouReplies));
|
||||
}
|
||||
|
||||
if (TryResolveEmotionCommand(loweredTranscript, out var emotion))
|
||||
{
|
||||
return BuildEmotionCommandDecision(randomizer, emotion!);
|
||||
}
|
||||
|
||||
return BuildErrorResponseDecision(
|
||||
"chat",
|
||||
buildErrorResponse(),
|
||||
transcript);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static bool IsLikelyEmotionUtterance(string normalizedLoweredTranscript)
|
||||
{
|
||||
return IsEmotionQuery(normalizedLoweredTranscript) ||
|
||||
TryResolveEmotionCommand(normalizedLoweredTranscript, out _);
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildScriptedResponseDecision(string intentName, string replyText)
|
||||
{
|
||||
return new JiboInteractionDecision(
|
||||
intentName,
|
||||
replyText,
|
||||
ContextUpdates: BuildContextUpdates(
|
||||
ScriptedResponseRoute,
|
||||
emotion: null));
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildEmotionQueryDecision(string intentName, string replyText)
|
||||
{
|
||||
return new JiboInteractionDecision(
|
||||
intentName,
|
||||
replyText,
|
||||
ContextUpdates: BuildContextUpdates(
|
||||
EmotionQueryRoute,
|
||||
emotion: null));
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildEmotionCommandDecision(IJiboRandomizer randomizer, string emotion)
|
||||
{
|
||||
var (esmlEmotion, responseSuffix) = emotion switch
|
||||
{
|
||||
"happy" => ("happy", "I am feeling happy."),
|
||||
"sad" => ("sad", "I can do a thoughtful mood too."),
|
||||
"excited" => ("happy", "I am feeling excited."),
|
||||
"calm" => ("neutral", "I am in a calmer mood."),
|
||||
_ => ("neutral", "Mood updated.")
|
||||
};
|
||||
|
||||
return new JiboInteractionDecision(
|
||||
"emotion_command",
|
||||
randomizer.Choose(EmotionCommandReplies),
|
||||
"chitchat-skill",
|
||||
new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["esml"] = $"<speak><es cat='{esmlEmotion}' filter='!ssa-only, !sfx-only' endNeutral='true'>{responseSuffix}</es></speak>",
|
||||
["mim_id"] = "runtime-chat",
|
||||
["mim_type"] = "announcement",
|
||||
["prompt_id"] = "RUNTIME_EMOTION_COMMAND",
|
||||
["prompt_sub_category"] = "AN"
|
||||
},
|
||||
ContextUpdates: BuildContextUpdates(
|
||||
EmotionCommandRoute,
|
||||
emotion));
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildErrorResponseDecision(string intentName, string replyText, string transcript)
|
||||
{
|
||||
var normalizedTranscript = string.IsNullOrWhiteSpace(transcript)
|
||||
? string.Empty
|
||||
: transcript.Trim();
|
||||
return new JiboInteractionDecision(
|
||||
intentName,
|
||||
replyText,
|
||||
ContextUpdates: BuildContextUpdates(
|
||||
ErrorResponseRoute,
|
||||
emotion: null,
|
||||
rawTranscript: normalizedTranscript));
|
||||
}
|
||||
|
||||
private static IDictionary<string, object?> BuildContextUpdates(
|
||||
string route,
|
||||
string? emotion,
|
||||
string? rawTranscript = null)
|
||||
{
|
||||
return new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
[StateMetadataKey] = CompleteState,
|
||||
[RouteMetadataKey] = route,
|
||||
[EmotionMetadataKey] = emotion ?? string.Empty,
|
||||
["chitchatLastState"] = IntentSplitState,
|
||||
["chitchatProcessState"] = ProcessQueryState,
|
||||
["chitchatRawTranscript"] = rawTranscript ?? string.Empty
|
||||
};
|
||||
}
|
||||
|
||||
private static bool IsEmotionQuery(string loweredTranscript)
|
||||
{
|
||||
return ContainsAnyPhrase(loweredTranscript, EmotionQueryPhrases);
|
||||
}
|
||||
|
||||
private static bool TryResolveEmotionCommand(string loweredTranscript, out string? emotion)
|
||||
{
|
||||
emotion = null;
|
||||
foreach (var mapping in EmotionCommandPhrases)
|
||||
{
|
||||
if (!ContainsAnyPhrase(loweredTranscript, mapping.Phrases))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
emotion = mapping.Emotion;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool ContainsAnyPhrase(string loweredTranscript, IEnumerable<string> phrases)
|
||||
{
|
||||
foreach (var phrase in phrases)
|
||||
{
|
||||
if (string.Equals(loweredTranscript, phrase, StringComparison.Ordinal) ||
|
||||
loweredTranscript.StartsWith($"{phrase} ", StringComparison.Ordinal) ||
|
||||
loweredTranscript.Contains($" {phrase} ", StringComparison.Ordinal) ||
|
||||
loweredTranscript.EndsWith($" {phrase}", StringComparison.Ordinal))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -64,6 +64,18 @@ public sealed class JiboInteractionService(
|
||||
return personalReportDecision;
|
||||
}
|
||||
|
||||
var chitchatDecision = ChitchatStateMachine.TryBuildDecision(
|
||||
semanticIntent,
|
||||
transcript,
|
||||
lowered,
|
||||
catalog,
|
||||
randomizer,
|
||||
() => BuildGenericReply(catalog, transcript, lowered));
|
||||
if (chitchatDecision is not null)
|
||||
{
|
||||
return chitchatDecision;
|
||||
}
|
||||
|
||||
return semanticIntent switch
|
||||
{
|
||||
"joke" => BuildJokeDecision(catalog),
|
||||
@@ -96,11 +108,8 @@ public sealed class JiboInteractionService(
|
||||
"photo_gallery" => BuildPhotoGalleryLaunchDecision(),
|
||||
"snapshot" => BuildPhotoCreateDecision("snapshot", "Taking a picture.", "createOnePhoto"),
|
||||
"photobooth" => BuildPhotoCreateDecision("photobooth", "Starting photobooth.", "createSomePhotos"),
|
||||
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
|
||||
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
|
||||
"robot_age" => BuildRobotAgeDecision(referenceLocalTime),
|
||||
"robot_birthday" => BuildRobotBirthdayDecision(),
|
||||
"robot_personality" => new JiboInteractionDecision("robot_personality", randomizer.Choose(catalog.PersonalityReplies)),
|
||||
"memory_set_name" => BuildRememberNameDecision(turn, transcript),
|
||||
"memory_get_name" => BuildRecallNameDecision(turn),
|
||||
"memory_set_birthday" => BuildRememberBirthdayDecision(turn, transcript),
|
||||
|
||||
@@ -59,7 +59,8 @@ public sealed class ProtocolToTurnContextMapper
|
||||
|
||||
foreach (var pair in session.Metadata)
|
||||
{
|
||||
if (!pair.Key.StartsWith("personalReport", StringComparison.OrdinalIgnoreCase) ||
|
||||
if ((!pair.Key.StartsWith("personalReport", StringComparison.OrdinalIgnoreCase) &&
|
||||
!pair.Key.StartsWith("chitchat", StringComparison.OrdinalIgnoreCase)) ||
|
||||
pair.Value is null)
|
||||
{
|
||||
continue;
|
||||
|
||||
@@ -902,6 +902,11 @@ public sealed partial class WebSocketTurnFinalizationService(
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ChitchatStateMachine.IsLikelyEmotionUtterance(transcript))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (transcript.Length >= 6)
|
||||
{
|
||||
return true;
|
||||
@@ -1058,6 +1063,9 @@ public sealed partial class WebSocketTurnFinalizationService(
|
||||
var previousCalendarEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CalendarEnabledMetadataKey) ?? true;
|
||||
var previousCommuteEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CommuteEnabledMetadataKey) ?? true;
|
||||
var previousNewsEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.NewsEnabledMetadataKey) ?? true;
|
||||
var previousChitchatState = ReadMetadataString(session.Metadata, ChitchatStateMachine.StateMetadataKey);
|
||||
var previousChitchatRoute = ReadMetadataString(session.Metadata, ChitchatStateMachine.RouteMetadataKey);
|
||||
var previousChitchatEmotion = ReadMetadataString(session.Metadata, ChitchatStateMachine.EmotionMetadataKey);
|
||||
|
||||
foreach (var pair in contextUpdates)
|
||||
{
|
||||
@@ -1078,6 +1086,9 @@ public sealed partial class WebSocketTurnFinalizationService(
|
||||
var nextCommuteEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.CommuteEnabledMetadataKey) ?? true;
|
||||
var nextNewsEnabled = ReadMetadataBool(session.Metadata, PersonalReportOrchestrator.NewsEnabledMetadataKey) ?? true;
|
||||
var serviceError = ReadMetadataString(session.Metadata, PersonalReportOrchestrator.LastServiceErrorMetadataKey);
|
||||
var nextChitchatState = ReadMetadataString(session.Metadata, ChitchatStateMachine.StateMetadataKey);
|
||||
var nextChitchatRoute = ReadMetadataString(session.Metadata, ChitchatStateMachine.RouteMetadataKey);
|
||||
var nextChitchatEmotion = ReadMetadataString(session.Metadata, ChitchatStateMachine.EmotionMetadataKey);
|
||||
|
||||
if (!string.Equals(previousState, nextState, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
@@ -1140,6 +1151,30 @@ public sealed partial class WebSocketTurnFinalizationService(
|
||||
["intent"] = intentName
|
||||
}), cancellationToken);
|
||||
}
|
||||
|
||||
if (!string.Equals(previousChitchatState, nextChitchatState, StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.IsNullOrWhiteSpace(nextChitchatState))
|
||||
{
|
||||
await sink.RecordTurnDiagnosticAsync("chitchat_state_transition", BuildTurnDiagnosticSnapshot(session, envelope, new Dictionary<string, object?>
|
||||
{
|
||||
["previousState"] = previousChitchatState,
|
||||
["state"] = nextChitchatState,
|
||||
["intent"] = intentName
|
||||
}), cancellationToken);
|
||||
}
|
||||
|
||||
if (!string.Equals(previousChitchatRoute, nextChitchatRoute, StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.IsNullOrWhiteSpace(nextChitchatRoute))
|
||||
{
|
||||
await sink.RecordTurnDiagnosticAsync("chitchat_route_selected", BuildTurnDiagnosticSnapshot(session, envelope, new Dictionary<string, object?>
|
||||
{
|
||||
["route"] = nextChitchatRoute,
|
||||
["previousRoute"] = previousChitchatRoute,
|
||||
["emotion"] = nextChitchatEmotion,
|
||||
["previousEmotion"] = previousChitchatEmotion,
|
||||
["intent"] = intentName
|
||||
}), cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task EmitServiceToggleDiagnosticAsync(
|
||||
|
||||
@@ -17,6 +17,9 @@ public sealed class JiboInteractionServiceTests
|
||||
private const string PersonalReportCalendarEnabledKey = "personalReportCalendarEnabled";
|
||||
private const string PersonalReportCommuteEnabledKey = "personalReportCommuteEnabled";
|
||||
private const string PersonalReportNewsEnabledKey = "personalReportNewsEnabled";
|
||||
private const string ChitchatStateKey = "chitchatState";
|
||||
private const string ChitchatRouteKey = "chitchatRoute";
|
||||
private const string ChitchatEmotionKey = "chitchatEmotion";
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_Joke_UsesCatalogBackedRandomContent()
|
||||
@@ -147,6 +150,76 @@ public sealed class JiboInteractionServiceTests
|
||||
Assert.Equal("I do. I am curious, playful, and always up for a new experiment.", decision.ReplyText);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_Hello_RoutesThroughChitchatScriptedResponse()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "hello",
|
||||
NormalizedTranscript = "hello"
|
||||
});
|
||||
|
||||
Assert.Equal("hello", decision.IntentName);
|
||||
Assert.NotNull(decision.ContextUpdates);
|
||||
Assert.Equal("complete", decision.ContextUpdates![ChitchatStateKey]);
|
||||
Assert.Equal("ScriptedResponse", decision.ContextUpdates[ChitchatRouteKey]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_AreYouHappy_RoutesThroughEmotionQuerySplit()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "are you happy",
|
||||
NormalizedTranscript = "are you happy"
|
||||
});
|
||||
|
||||
Assert.Equal("emotion_query", decision.IntentName);
|
||||
Assert.NotNull(decision.ContextUpdates);
|
||||
Assert.Equal("EmotionQuery", decision.ContextUpdates![ChitchatRouteKey]);
|
||||
Assert.Equal(string.Empty, decision.ContextUpdates[ChitchatEmotionKey]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_Smile_RoutesThroughEmotionCommandSplit()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "smile",
|
||||
NormalizedTranscript = "smile"
|
||||
});
|
||||
|
||||
Assert.Equal("emotion_command", decision.IntentName);
|
||||
Assert.Equal("chitchat-skill", decision.SkillName);
|
||||
Assert.NotNull(decision.SkillPayload);
|
||||
Assert.Contains("cat='happy'", decision.SkillPayload!["esml"]?.ToString(), StringComparison.Ordinal);
|
||||
Assert.NotNull(decision.ContextUpdates);
|
||||
Assert.Equal("EmotionCommand", decision.ContextUpdates![ChitchatRouteKey]);
|
||||
Assert.Equal("happy", decision.ContextUpdates[ChitchatEmotionKey]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_UnhandledChat_RoutesThroughErrorResponseSplit()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "blargh",
|
||||
NormalizedTranscript = "blargh"
|
||||
});
|
||||
|
||||
Assert.Equal("chat", decision.IntentName);
|
||||
Assert.NotNull(decision.ContextUpdates);
|
||||
Assert.Equal("ErrorResponse", decision.ContextUpdates![ChitchatRouteKey]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_BirthdayMemory_SetThenRecallWithinTenant()
|
||||
{
|
||||
|
||||
@@ -3241,6 +3241,36 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Equal("idle", stateValue?.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsrChitchatEmotionCommand_PersistsSplitRouteMetadata()
|
||||
{
|
||||
const string routeKey = "chitchatRoute";
|
||||
const string emotionKey = "chitchatEmotion";
|
||||
var token = _store.IssueRobotToken("chitchat-emotion-device-a");
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = token,
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-chitchat-emotion","data":{"text":"smile"}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(3, replies.Count);
|
||||
using (var listenPayload = JsonDocument.Parse(replies[0].Text!))
|
||||
{
|
||||
Assert.Equal("emotion_command", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
}
|
||||
|
||||
var session = _store.FindSessionByToken(token);
|
||||
Assert.NotNull(session);
|
||||
Assert.True(session.Metadata.TryGetValue(routeKey, out var routeValue));
|
||||
Assert.Equal("EmotionCommand", routeValue?.ToString());
|
||||
Assert.True(session.Metadata.TryGetValue(emotionKey, out var emotionValue));
|
||||
Assert.Equal("happy", emotionValue?.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task FollowUpTurn_UsesNewTurnStateWithoutLeakingBufferedAudio()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user