jibo photo skills voice activation

This commit is contained in:
Jacob Dubin
2026-04-20 22:55:42 -05:00
parent ab47ad7a2d
commit e1dca81519
8 changed files with 266 additions and 1 deletions

View File

@@ -184,6 +184,12 @@ Latest clock discovery findings:
- Direct timer and alarm actions use `timerValue` and `alarmValue` utterances, not a generic chat path. - Direct timer and alarm actions use `timerValue` and `alarmValue` utterances, not a generic chat path.
- A practical first OpenJibo slice is therefore: keep custom spoken time/date answers for now, but route `open clock`, `open timer`, `open alarm`, `set a timer ...`, and `set an alarm ...` through stock-shaped local `@be/clock` handoffs. - A practical first OpenJibo slice is therefore: keep custom spoken time/date answers for now, but route `open clock`, `open timer`, `open alarm`, `set a timer ...`, and `set an alarm ...` through stock-shaped local `@be/clock` handoffs.
Latest photo discovery findings:
- `@be/gallery` is the local gallery browser and opens from `intent = "menu"`.
- `snapshot` and `photobooth` are not gallery submodes; stock main-menu logic remaps them into `@be/create` with `createOnePhoto` and `createSomePhotos`.
- A practical first OpenJibo photo slice is therefore: route `open photo gallery` to `@be/gallery`, route `snap a picture` to `@be/create/createOnePhoto`, and route `open photobooth` to `@be/create/createSomePhotos`.
## Speech, Animation, And ESML ## Speech, Animation, And ESML
The current joke flow is only a small foothold into Jibo expressiveness. The current joke flow is only a small foothold into Jibo expressiveness.

View File

@@ -140,18 +140,23 @@ Parallel tags:
### 7. Photo Family Audit ### 7. Photo Family Audit
- Status: `ready` - Status: `in_progress`
- Tags: `protocol`, `docs` - Tags: `protocol`, `docs`
- Why now: photo confirmation improved already, and the robot skill inventory includes `gallery`. - Why now: photo confirmation improved already, and the robot skill inventory includes `gallery`.
- Current evidence: - Current evidence:
- `@be/gallery` exists in the robot skill inventory - `@be/gallery` exists in the robot skill inventory
- current captures already show `snapshot` and related menu destinations - current captures already show `snapshot` and related menu destinations
- `JiboOs` shows `@be/gallery` opens from `intent = menu`, while `snapshot` and `photobooth` actually map into `@be/create` with `createOnePhoto` and `createSomePhotos`
- Implementation notes: - Implementation notes:
- separate three flows: - separate three flows:
- snap a picture - snap a picture
- photo gallery - photo gallery
- photobooth - photobooth
- document whether each one is local-only, cloud-assisted, or upload-backed - document whether each one is local-only, cloud-assisted, or upload-backed
- Progress so far:
- voice `open photo gallery` now launches local `@be/gallery` with a stock-shaped `menu` handoff
- voice `snap a picture` now launches local `@be/create` with `createOnePhoto`
- voice `open photobooth` now launches local `@be/create` with `createSomePhotos`
- Exit criteria: - Exit criteria:
- known photo menu and voice phrases map to the correct local path - known photo menu and voice phrases map to the correct local path
- capture storage expectations are documented for laptop versus hosted testing - capture storage expectations are documented for laptop versus hosted testing

View File

@@ -79,6 +79,9 @@ public sealed class DemoConversationBroker(JiboInteractionService interactionSer
"alarm_menu" => false, "alarm_menu" => false,
"timer_value" => false, "timer_value" => false,
"alarm_value" => false, "alarm_value" => false,
"photo_gallery" => false,
"snapshot" => false,
"photobooth" => false,
"news" => false, "news" => false,
_ => true _ => true
}; };

View File

@@ -39,6 +39,9 @@ public sealed class JiboInteractionService(
"alarm_menu" => BuildClockLaunchDecision("alarm", "Opening the alarm."), "alarm_menu" => BuildClockLaunchDecision("alarm", "Opening the alarm."),
"timer_value" => BuildTimerValueDecision(lowered), "timer_value" => BuildTimerValueDecision(lowered),
"alarm_value" => BuildAlarmValueDecision(lowered), "alarm_value" => BuildAlarmValueDecision(lowered),
"photo_gallery" => BuildPhotoGalleryLaunchDecision(),
"snapshot" => BuildPhotoCreateDecision("snapshot", "Taking a picture.", "createOnePhoto"),
"photobooth" => BuildPhotoCreateDecision("photobooth", "Starting photobooth.", "createSomePhotos"),
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)), "hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)), "how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
"yes" => new JiboInteractionDecision("yes", "Yes."), "yes" => new JiboInteractionDecision("yes", "Yes."),
@@ -146,6 +149,18 @@ public sealed class JiboInteractionService(
return "word_of_the_day"; return "word_of_the_day";
} }
if (string.Equals(clientIntent, "loadMenu", StringComparison.OrdinalIgnoreCase) &&
clientEntities.TryGetValue("destination", out var photoDestination))
{
return photoDestination.ToLowerInvariant() switch
{
"snapshot" => "snapshot",
"photobooth" => "photobooth",
"gallery" or "photo-gallery" or "photos" => "photo_gallery",
_ => "chat"
};
}
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase)) if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
{ {
return "time"; return "time";
@@ -251,6 +266,38 @@ public sealed class JiboInteractionService(
return "radio"; return "radio";
} }
if (MatchesAny(
loweredTranscript,
"snap a picture",
"take a picture",
"take a photo",
"snap a photo"))
{
return "snapshot";
}
if (MatchesAny(
loweredTranscript,
"photo booth",
"photobooth",
"open photobooth",
"start photobooth"))
{
return "photobooth";
}
if (MatchesAny(
loweredTranscript,
"photo gallery",
"open the gallery",
"open photo gallery",
"show my photos",
"open my photos",
"gallery"))
{
return "photo_gallery";
}
if (MatchesAny(loweredTranscript, "dance", "boogie")) if (MatchesAny(loweredTranscript, "dance", "boogie"))
{ {
return "dance"; return "dance";
@@ -352,6 +399,32 @@ public sealed class JiboInteractionService(
}); });
} }
private static JiboInteractionDecision BuildPhotoGalleryLaunchDecision()
{
return new JiboInteractionDecision(
"photo_gallery",
"Opening the photo gallery.",
"@be/gallery",
new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["skillId"] = "@be/gallery",
["localIntent"] = "menu"
});
}
private static JiboInteractionDecision BuildPhotoCreateDecision(string intentName, string replyText, string localIntent)
{
return new JiboInteractionDecision(
intentName,
replyText,
"@be/create",
new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["skillId"] = "@be/create",
["localIntent"] = localIntent
});
}
private static JiboInteractionDecision BuildClockLaunchDecision(string domain, string replyText) private static JiboInteractionDecision BuildClockLaunchDecision(string domain, string replyText)
{ {
return new JiboInteractionDecision( return new JiboInteractionDecision(

View File

@@ -25,7 +25,11 @@ public sealed class ResponsePlanToSocketMessagesMapper
var isWordOfDayGuess = string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase); var isWordOfDayGuess = string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase);
var isRadioLaunch = string.Equals(plan.IntentName, "radio", StringComparison.OrdinalIgnoreCase) || var isRadioLaunch = string.Equals(plan.IntentName, "radio", StringComparison.OrdinalIgnoreCase) ||
string.Equals(plan.IntentName, "radio_genre", StringComparison.OrdinalIgnoreCase); string.Equals(plan.IntentName, "radio_genre", StringComparison.OrdinalIgnoreCase);
var isPhotoGalleryLaunch = string.Equals(plan.IntentName, "photo_gallery", StringComparison.OrdinalIgnoreCase);
var isPhotoCreateLaunch = string.Equals(plan.IntentName, "snapshot", StringComparison.OrdinalIgnoreCase) ||
string.Equals(plan.IntentName, "photobooth", StringComparison.OrdinalIgnoreCase);
var isClockSkillLaunch = string.Equals(skill?.SkillName, "@be/clock", StringComparison.OrdinalIgnoreCase); var isClockSkillLaunch = string.Equals(skill?.SkillName, "@be/clock", StringComparison.OrdinalIgnoreCase);
var localIntent = ReadSkillPayloadString(skill, "localIntent");
var clockIntent = ReadSkillPayloadString(skill, "clockIntent"); var clockIntent = ReadSkillPayloadString(skill, "clockIntent");
var clockDomain = ReadSkillPayloadString(skill, "domain"); var clockDomain = ReadSkillPayloadString(skill, "domain");
var timerHours = ReadSkillPayloadString(skill, "hours"); var timerHours = ReadSkillPayloadString(skill, "hours");
@@ -41,6 +45,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
? "menu" ? "menu"
: isRadioLaunch : isRadioLaunch
? "menu" ? "menu"
: (isPhotoGalleryLaunch || isPhotoCreateLaunch) && !string.IsNullOrWhiteSpace(localIntent)
? localIntent
: isClockSkillLaunch && !string.IsNullOrWhiteSpace(clockIntent) : isClockSkillLaunch && !string.IsNullOrWhiteSpace(clockIntent)
? clockIntent ? clockIntent
: isWordOfDayGuess : isWordOfDayGuess
@@ -54,6 +60,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
? string.Empty ? string.Empty
: isRadioLaunch : isRadioLaunch
? transcript ? transcript
: isPhotoGalleryLaunch || isPhotoCreateLaunch
? transcript
: isClockSkillLaunch : isClockSkillLaunch
? transcript ? transcript
: string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess) : string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
@@ -67,6 +75,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
? ["word-of-the-day/menu"] ? ["word-of-the-day/menu"]
: isRadioLaunch : isRadioLaunch
? Array.Empty<string>() ? Array.Empty<string>()
: isPhotoGalleryLaunch || isPhotoCreateLaunch
? string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) ? rules : Array.Empty<string>()
: isClockSkillLaunch : isClockSkillLaunch
? string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) ? rules : Array.Empty<string>() ? string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) ? rules : Array.Empty<string>()
: isWordOfDayGuess : isWordOfDayGuess
@@ -108,6 +118,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
entities, entities,
isWordOfDayLaunch ? "@be/word-of-the-day" : isWordOfDayLaunch ? "@be/word-of-the-day" :
isRadioLaunch ? "@be/radio" : isRadioLaunch ? "@be/radio" :
isPhotoGalleryLaunch ? "@be/gallery" :
isPhotoCreateLaunch ? "@be/create" :
isClockSkillLaunch ? "@be/clock" : isClockSkillLaunch ? "@be/clock" :
null), null),
match = new match = new
@@ -182,6 +194,24 @@ public sealed class ResponsePlanToSocketMessagesMapper
DelayMs: 125)); DelayMs: 125));
} }
if ((isPhotoGalleryLaunch || isPhotoCreateLaunch) &&
!string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase))
{
var skillId = isPhotoGalleryLaunch ? "@be/gallery" : "@be/create";
messages.Add(new SocketReplyPlan(
JsonSerializer.Serialize(BuildSkillRedirectPayload(
transId,
skillId,
outboundIntent,
outboundAsrText,
outboundRules,
entities)),
DelayMs: 75));
messages.Add(new SocketReplyPlan(
JsonSerializer.Serialize(BuildCompletionOnlySkillPayload(transId, skillId)),
DelayMs: 125));
}
if (emitSkillActions && speak is not null) if (emitSkillActions && speak is not null)
{ {
messages.Add(new SocketReplyPlan( messages.Add(new SocketReplyPlan(

View File

@@ -569,6 +569,9 @@ public sealed class WebSocketTurnFinalizationService(
!string.Equals(plan.IntentName, "alarm_menu", StringComparison.OrdinalIgnoreCase) && !string.Equals(plan.IntentName, "alarm_menu", StringComparison.OrdinalIgnoreCase) &&
!string.Equals(plan.IntentName, "timer_value", StringComparison.OrdinalIgnoreCase) && !string.Equals(plan.IntentName, "timer_value", StringComparison.OrdinalIgnoreCase) &&
!string.Equals(plan.IntentName, "alarm_value", StringComparison.OrdinalIgnoreCase) && !string.Equals(plan.IntentName, "alarm_value", StringComparison.OrdinalIgnoreCase) &&
!string.Equals(plan.IntentName, "photo_gallery", StringComparison.OrdinalIgnoreCase) &&
!string.Equals(plan.IntentName, "snapshot", StringComparison.OrdinalIgnoreCase) &&
!string.Equals(plan.IntentName, "photobooth", StringComparison.OrdinalIgnoreCase) &&
(messageType != "CLIENT_NLU" || (messageType != "CLIENT_NLU" ||
string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase)); string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase));
var replies = ResponsePlanToSocketMessagesMapper.Map(plan, finalizedTurn, session, emitSkillActions).Select(map => new WebSocketReply var replies = ResponsePlanToSocketMessagesMapper.Map(plan, finalizedTurn, session, emitSkillActions).Select(map => new WebSocketReply

View File

@@ -237,6 +237,54 @@ public sealed class JiboInteractionServiceTests
Assert.Equal("am", decision.SkillPayload["ampm"]); Assert.Equal("am", decision.SkillPayload["ampm"]);
} }
[Fact]
public async Task BuildDecisionAsync_OpenPhotoGallery_MapsToGalleryLaunch()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "open photo gallery",
NormalizedTranscript = "open photo gallery"
});
Assert.Equal("photo_gallery", decision.IntentName);
Assert.Equal("@be/gallery", decision.SkillName);
Assert.Equal("menu", decision.SkillPayload!["localIntent"]);
}
[Fact]
public async Task BuildDecisionAsync_SnapAPicture_MapsToCreateOnePhoto()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "snap a picture",
NormalizedTranscript = "snap a picture"
});
Assert.Equal("snapshot", decision.IntentName);
Assert.Equal("@be/create", decision.SkillName);
Assert.Equal("createOnePhoto", decision.SkillPayload!["localIntent"]);
}
[Fact]
public async Task BuildDecisionAsync_OpenPhotobooth_MapsToCreateSomePhotos()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "open photobooth",
NormalizedTranscript = "open photobooth"
});
Assert.Equal("photobooth", decision.IntentName);
Assert.Equal("@be/create", decision.SkillName);
Assert.Equal("createSomePhotos", decision.SkillPayload!["localIntent"]);
}
[Fact] [Fact]
public async Task BuildDecisionAsync_TellMeTheNews_UsesNimbusCloudSkillPath() public async Task BuildDecisionAsync_TellMeTheNews_UsesNimbusCloudSkillPath()
{ {

View File

@@ -414,6 +414,103 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal("am", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("ampm").GetString()); Assert.Equal("am", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("ampm").GetString());
} }
[Fact]
public async Task ClientAsr_OpenPhotoGallery_RedirectsIntoGallerySkill()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-photo-gallery-token",
Text = """{"type":"LISTEN","transID":"trans-photo-gallery","data":{"rules":["globals/global_commands_launch"]}}"""
});
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-photo-gallery-token",
Text = """{"type":"CLIENT_ASR","transID":"trans-photo-gallery","data":{"text":"open photo gallery"}}"""
});
Assert.Equal(4, replies.Count);
Assert.Equal("SKILL_REDIRECT", ReadReplyType(replies[2]));
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("menu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("@be/gallery", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
using var redirectPayload = JsonDocument.Parse(replies[2].Text!);
Assert.Equal("@be/gallery", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString());
Assert.Equal("menu", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
}
[Fact]
public async Task ClientAsr_SnapAPicture_RedirectsIntoCreateSkill()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-snapshot-token",
Text = """{"type":"LISTEN","transID":"trans-snapshot","data":{"rules":["globals/global_commands_launch"]}}"""
});
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-snapshot-token",
Text = """{"type":"CLIENT_ASR","transID":"trans-snapshot","data":{"text":"snap a picture"}}"""
});
Assert.Equal(4, replies.Count);
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("createOnePhoto", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("@be/create", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
using var redirectPayload = JsonDocument.Parse(replies[2].Text!);
Assert.Equal("@be/create", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString());
Assert.Equal("createOnePhoto", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
}
[Fact]
public async Task ClientAsr_OpenPhotobooth_RedirectsIntoCreateSkill()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-photobooth-token",
Text = """{"type":"LISTEN","transID":"trans-photobooth","data":{"rules":["globals/global_commands_launch"]}}"""
});
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-photobooth-token",
Text = """{"type":"CLIENT_ASR","transID":"trans-photobooth","data":{"text":"open photobooth"}}"""
});
Assert.Equal(4, replies.Count);
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("createSomePhotos", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("@be/create", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
using var redirectPayload = JsonDocument.Parse(replies[2].Text!);
Assert.Equal("@be/create", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString());
Assert.Equal("createSomePhotos", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
}
[Fact] [Fact]
public async Task ClientAsr_YesNoCreateFlow_PreservesCreateRuleAndDomain() public async Task ClientAsr_YesNoCreateFlow_PreservesCreateRuleAndDomain()
{ {