From e1dca81519c7120be07b844f2aed60d8c23c2192 Mon Sep 17 00:00:00 2001 From: Jacob Dubin Date: Mon, 20 Apr 2026 22:55:42 -0500 Subject: [PATCH] jibo photo skills voice activation --- OpenJibo/docs/development-plan.md | 6 ++ OpenJibo/docs/feature-backlog.md | 7 +- .../Services/DemoConversationBroker.cs | 3 + .../Services/JiboInteractionService.cs | 73 ++++++++++++++ .../ResponsePlanToSocketMessagesMapper.cs | 30 ++++++ .../WebSocketTurnFinalizationService.cs | 3 + .../WebSockets/JiboInteractionServiceTests.cs | 48 +++++++++ .../WebSockets/JiboWebSocketServiceTests.cs | 97 +++++++++++++++++++ 8 files changed, 266 insertions(+), 1 deletion(-) diff --git a/OpenJibo/docs/development-plan.md b/OpenJibo/docs/development-plan.md index e6a22af..69b1c05 100644 --- a/OpenJibo/docs/development-plan.md +++ b/OpenJibo/docs/development-plan.md @@ -184,6 +184,12 @@ Latest clock discovery findings: - Direct timer and alarm actions use `timerValue` and `alarmValue` utterances, not a generic chat path. - A practical first OpenJibo slice is therefore: keep custom spoken time/date answers for now, but route `open clock`, `open timer`, `open alarm`, `set a timer ...`, and `set an alarm ...` through stock-shaped local `@be/clock` handoffs. +Latest photo discovery findings: + +- `@be/gallery` is the local gallery browser and opens from `intent = "menu"`. +- `snapshot` and `photobooth` are not gallery submodes; stock main-menu logic remaps them into `@be/create` with `createOnePhoto` and `createSomePhotos`. +- A practical first OpenJibo photo slice is therefore: route `open photo gallery` to `@be/gallery`, route `snap a picture` to `@be/create/createOnePhoto`, and route `open photobooth` to `@be/create/createSomePhotos`. + ## Speech, Animation, And ESML The current joke flow is only a small foothold into Jibo expressiveness. diff --git a/OpenJibo/docs/feature-backlog.md b/OpenJibo/docs/feature-backlog.md index ed2737d..f9cdab9 100644 --- a/OpenJibo/docs/feature-backlog.md +++ b/OpenJibo/docs/feature-backlog.md @@ -140,18 +140,23 @@ Parallel tags: ### 7. Photo Family Audit -- Status: `ready` +- Status: `in_progress` - Tags: `protocol`, `docs` - Why now: photo confirmation improved already, and the robot skill inventory includes `gallery`. - Current evidence: - `@be/gallery` exists in the robot skill inventory - current captures already show `snapshot` and related menu destinations + - `JiboOs` shows `@be/gallery` opens from `intent = menu`, while `snapshot` and `photobooth` actually map into `@be/create` with `createOnePhoto` and `createSomePhotos` - Implementation notes: - separate three flows: - snap a picture - photo gallery - photobooth - document whether each one is local-only, cloud-assisted, or upload-backed +- Progress so far: + - voice `open photo gallery` now launches local `@be/gallery` with a stock-shaped `menu` handoff + - voice `snap a picture` now launches local `@be/create` with `createOnePhoto` + - voice `open photobooth` now launches local `@be/create` with `createSomePhotos` - Exit criteria: - known photo menu and voice phrases map to the correct local path - capture storage expectations are documented for laptop versus hosted testing diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/DemoConversationBroker.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/DemoConversationBroker.cs index e0baaab..7ee7624 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/DemoConversationBroker.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/DemoConversationBroker.cs @@ -79,6 +79,9 @@ public sealed class DemoConversationBroker(JiboInteractionService interactionSer "alarm_menu" => false, "timer_value" => false, "alarm_value" => false, + "photo_gallery" => false, + "snapshot" => false, + "photobooth" => false, "news" => false, _ => true }; diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs index 590f23b..63241fd 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/JiboInteractionService.cs @@ -39,6 +39,9 @@ public sealed class JiboInteractionService( "alarm_menu" => BuildClockLaunchDecision("alarm", "Opening the alarm."), "timer_value" => BuildTimerValueDecision(lowered), "alarm_value" => BuildAlarmValueDecision(lowered), + "photo_gallery" => BuildPhotoGalleryLaunchDecision(), + "snapshot" => BuildPhotoCreateDecision("snapshot", "Taking a picture.", "createOnePhoto"), + "photobooth" => BuildPhotoCreateDecision("photobooth", "Starting photobooth.", "createSomePhotos"), "hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)), "how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)), "yes" => new JiboInteractionDecision("yes", "Yes."), @@ -146,6 +149,18 @@ public sealed class JiboInteractionService( return "word_of_the_day"; } + if (string.Equals(clientIntent, "loadMenu", StringComparison.OrdinalIgnoreCase) && + clientEntities.TryGetValue("destination", out var photoDestination)) + { + return photoDestination.ToLowerInvariant() switch + { + "snapshot" => "snapshot", + "photobooth" => "photobooth", + "gallery" or "photo-gallery" or "photos" => "photo_gallery", + _ => "chat" + }; + } + if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase)) { return "time"; @@ -251,6 +266,38 @@ public sealed class JiboInteractionService( return "radio"; } + if (MatchesAny( + loweredTranscript, + "snap a picture", + "take a picture", + "take a photo", + "snap a photo")) + { + return "snapshot"; + } + + if (MatchesAny( + loweredTranscript, + "photo booth", + "photobooth", + "open photobooth", + "start photobooth")) + { + return "photobooth"; + } + + if (MatchesAny( + loweredTranscript, + "photo gallery", + "open the gallery", + "open photo gallery", + "show my photos", + "open my photos", + "gallery")) + { + return "photo_gallery"; + } + if (MatchesAny(loweredTranscript, "dance", "boogie")) { return "dance"; @@ -352,6 +399,32 @@ public sealed class JiboInteractionService( }); } + private static JiboInteractionDecision BuildPhotoGalleryLaunchDecision() + { + return new JiboInteractionDecision( + "photo_gallery", + "Opening the photo gallery.", + "@be/gallery", + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["skillId"] = "@be/gallery", + ["localIntent"] = "menu" + }); + } + + private static JiboInteractionDecision BuildPhotoCreateDecision(string intentName, string replyText, string localIntent) + { + return new JiboInteractionDecision( + intentName, + replyText, + "@be/create", + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + ["skillId"] = "@be/create", + ["localIntent"] = localIntent + }); + } + private static JiboInteractionDecision BuildClockLaunchDecision(string domain, string replyText) { return new JiboInteractionDecision( diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs index 0200578..6c91388 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/ResponsePlanToSocketMessagesMapper.cs @@ -25,7 +25,11 @@ public sealed class ResponsePlanToSocketMessagesMapper var isWordOfDayGuess = string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase); var isRadioLaunch = string.Equals(plan.IntentName, "radio", StringComparison.OrdinalIgnoreCase) || string.Equals(plan.IntentName, "radio_genre", StringComparison.OrdinalIgnoreCase); + var isPhotoGalleryLaunch = string.Equals(plan.IntentName, "photo_gallery", StringComparison.OrdinalIgnoreCase); + var isPhotoCreateLaunch = string.Equals(plan.IntentName, "snapshot", StringComparison.OrdinalIgnoreCase) || + string.Equals(plan.IntentName, "photobooth", StringComparison.OrdinalIgnoreCase); var isClockSkillLaunch = string.Equals(skill?.SkillName, "@be/clock", StringComparison.OrdinalIgnoreCase); + var localIntent = ReadSkillPayloadString(skill, "localIntent"); var clockIntent = ReadSkillPayloadString(skill, "clockIntent"); var clockDomain = ReadSkillPayloadString(skill, "domain"); var timerHours = ReadSkillPayloadString(skill, "hours"); @@ -41,6 +45,8 @@ public sealed class ResponsePlanToSocketMessagesMapper ? "menu" : isRadioLaunch ? "menu" + : (isPhotoGalleryLaunch || isPhotoCreateLaunch) && !string.IsNullOrWhiteSpace(localIntent) + ? localIntent : isClockSkillLaunch && !string.IsNullOrWhiteSpace(clockIntent) ? clockIntent : isWordOfDayGuess @@ -54,6 +60,8 @@ public sealed class ResponsePlanToSocketMessagesMapper ? string.Empty : isRadioLaunch ? transcript + : isPhotoGalleryLaunch || isPhotoCreateLaunch + ? transcript : isClockSkillLaunch ? transcript : string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess) @@ -67,6 +75,8 @@ public sealed class ResponsePlanToSocketMessagesMapper ? ["word-of-the-day/menu"] : isRadioLaunch ? Array.Empty() + : isPhotoGalleryLaunch || isPhotoCreateLaunch + ? string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) ? rules : Array.Empty() : isClockSkillLaunch ? string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) ? rules : Array.Empty() : isWordOfDayGuess @@ -108,6 +118,8 @@ public sealed class ResponsePlanToSocketMessagesMapper entities, isWordOfDayLaunch ? "@be/word-of-the-day" : isRadioLaunch ? "@be/radio" : + isPhotoGalleryLaunch ? "@be/gallery" : + isPhotoCreateLaunch ? "@be/create" : isClockSkillLaunch ? "@be/clock" : null), match = new @@ -182,6 +194,24 @@ public sealed class ResponsePlanToSocketMessagesMapper DelayMs: 125)); } + if ((isPhotoGalleryLaunch || isPhotoCreateLaunch) && + !string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase)) + { + var skillId = isPhotoGalleryLaunch ? "@be/gallery" : "@be/create"; + messages.Add(new SocketReplyPlan( + JsonSerializer.Serialize(BuildSkillRedirectPayload( + transId, + skillId, + outboundIntent, + outboundAsrText, + outboundRules, + entities)), + DelayMs: 75)); + messages.Add(new SocketReplyPlan( + JsonSerializer.Serialize(BuildCompletionOnlySkillPayload(transId, skillId)), + DelayMs: 125)); + } + if (emitSkillActions && speak is not null) { messages.Add(new SocketReplyPlan( diff --git a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs index b81ecd3..2782ab0 100644 --- a/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs +++ b/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/WebSocketTurnFinalizationService.cs @@ -569,6 +569,9 @@ public sealed class WebSocketTurnFinalizationService( !string.Equals(plan.IntentName, "alarm_menu", StringComparison.OrdinalIgnoreCase) && !string.Equals(plan.IntentName, "timer_value", StringComparison.OrdinalIgnoreCase) && !string.Equals(plan.IntentName, "alarm_value", StringComparison.OrdinalIgnoreCase) && + !string.Equals(plan.IntentName, "photo_gallery", StringComparison.OrdinalIgnoreCase) && + !string.Equals(plan.IntentName, "snapshot", StringComparison.OrdinalIgnoreCase) && + !string.Equals(plan.IntentName, "photobooth", StringComparison.OrdinalIgnoreCase) && (messageType != "CLIENT_NLU" || string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase)); var replies = ResponsePlanToSocketMessagesMapper.Map(plan, finalizedTurn, session, emitSkillActions).Select(map => new WebSocketReply diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs index 72eeb98..2823bfa 100644 --- a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs +++ b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboInteractionServiceTests.cs @@ -237,6 +237,54 @@ public sealed class JiboInteractionServiceTests Assert.Equal("am", decision.SkillPayload["ampm"]); } + [Fact] + public async Task BuildDecisionAsync_OpenPhotoGallery_MapsToGalleryLaunch() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "open photo gallery", + NormalizedTranscript = "open photo gallery" + }); + + Assert.Equal("photo_gallery", decision.IntentName); + Assert.Equal("@be/gallery", decision.SkillName); + Assert.Equal("menu", decision.SkillPayload!["localIntent"]); + } + + [Fact] + public async Task BuildDecisionAsync_SnapAPicture_MapsToCreateOnePhoto() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "snap a picture", + NormalizedTranscript = "snap a picture" + }); + + Assert.Equal("snapshot", decision.IntentName); + Assert.Equal("@be/create", decision.SkillName); + Assert.Equal("createOnePhoto", decision.SkillPayload!["localIntent"]); + } + + [Fact] + public async Task BuildDecisionAsync_OpenPhotobooth_MapsToCreateSomePhotos() + { + var service = CreateService(); + + var decision = await service.BuildDecisionAsync(new TurnContext + { + RawTranscript = "open photobooth", + NormalizedTranscript = "open photobooth" + }); + + Assert.Equal("photobooth", decision.IntentName); + Assert.Equal("@be/create", decision.SkillName); + Assert.Equal("createSomePhotos", decision.SkillPayload!["localIntent"]); + } + [Fact] public async Task BuildDecisionAsync_TellMeTheNews_UsesNimbusCloudSkillPath() { diff --git a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs index 82bfb54..cda3f00 100644 --- a/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs +++ b/OpenJibo/tests/Jibo.Cloud.Tests/WebSockets/JiboWebSocketServiceTests.cs @@ -414,6 +414,103 @@ public sealed class JiboWebSocketServiceTests Assert.Equal("am", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("ampm").GetString()); } + [Fact] + public async Task ClientAsr_OpenPhotoGallery_RedirectsIntoGallerySkill() + { + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-photo-gallery-token", + Text = """{"type":"LISTEN","transID":"trans-photo-gallery","data":{"rules":["globals/global_commands_launch"]}}""" + }); + + var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-photo-gallery-token", + Text = """{"type":"CLIENT_ASR","transID":"trans-photo-gallery","data":{"text":"open photo gallery"}}""" + }); + + Assert.Equal(4, replies.Count); + Assert.Equal("SKILL_REDIRECT", ReadReplyType(replies[2])); + + using var listenPayload = JsonDocument.Parse(replies[0].Text!); + Assert.Equal("menu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + Assert.Equal("@be/gallery", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString()); + + using var redirectPayload = JsonDocument.Parse(replies[2].Text!); + Assert.Equal("@be/gallery", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString()); + Assert.Equal("menu", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + } + + [Fact] + public async Task ClientAsr_SnapAPicture_RedirectsIntoCreateSkill() + { + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-snapshot-token", + Text = """{"type":"LISTEN","transID":"trans-snapshot","data":{"rules":["globals/global_commands_launch"]}}""" + }); + + var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-snapshot-token", + Text = """{"type":"CLIENT_ASR","transID":"trans-snapshot","data":{"text":"snap a picture"}}""" + }); + + Assert.Equal(4, replies.Count); + + using var listenPayload = JsonDocument.Parse(replies[0].Text!); + Assert.Equal("createOnePhoto", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + Assert.Equal("@be/create", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString()); + + using var redirectPayload = JsonDocument.Parse(replies[2].Text!); + Assert.Equal("@be/create", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString()); + Assert.Equal("createOnePhoto", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + } + + [Fact] + public async Task ClientAsr_OpenPhotobooth_RedirectsIntoCreateSkill() + { + await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-photobooth-token", + Text = """{"type":"LISTEN","transID":"trans-photobooth","data":{"rules":["globals/global_commands_launch"]}}""" + }); + + var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope + { + HostName = "neo-hub.jibo.com", + Path = "/listen", + Kind = "neo-hub-listen", + Token = "hub-photobooth-token", + Text = """{"type":"CLIENT_ASR","transID":"trans-photobooth","data":{"text":"open photobooth"}}""" + }); + + Assert.Equal(4, replies.Count); + + using var listenPayload = JsonDocument.Parse(replies[0].Text!); + Assert.Equal("createSomePhotos", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + Assert.Equal("@be/create", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString()); + + using var redirectPayload = JsonDocument.Parse(replies[2].Text!); + Assert.Equal("@be/create", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString()); + Assert.Equal("createSomePhotos", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString()); + } + [Fact] public async Task ClientAsr_YesNoCreateFlow_PreservesCreateRuleAndDomain() {