jibo photo skills voice activation
This commit is contained in:
@@ -184,6 +184,12 @@ Latest clock discovery findings:
|
||||
- Direct timer and alarm actions use `timerValue` and `alarmValue` utterances, not a generic chat path.
|
||||
- A practical first OpenJibo slice is therefore: keep custom spoken time/date answers for now, but route `open clock`, `open timer`, `open alarm`, `set a timer ...`, and `set an alarm ...` through stock-shaped local `@be/clock` handoffs.
|
||||
|
||||
Latest photo discovery findings:
|
||||
|
||||
- `@be/gallery` is the local gallery browser and opens from `intent = "menu"`.
|
||||
- `snapshot` and `photobooth` are not gallery submodes; stock main-menu logic remaps them into `@be/create` with `createOnePhoto` and `createSomePhotos`.
|
||||
- A practical first OpenJibo photo slice is therefore: route `open photo gallery` to `@be/gallery`, route `snap a picture` to `@be/create/createOnePhoto`, and route `open photobooth` to `@be/create/createSomePhotos`.
|
||||
|
||||
## Speech, Animation, And ESML
|
||||
|
||||
The current joke flow is only a small foothold into Jibo expressiveness.
|
||||
|
||||
@@ -140,18 +140,23 @@ Parallel tags:
|
||||
|
||||
### 7. Photo Family Audit
|
||||
|
||||
- Status: `ready`
|
||||
- Status: `in_progress`
|
||||
- Tags: `protocol`, `docs`
|
||||
- Why now: photo confirmation improved already, and the robot skill inventory includes `gallery`.
|
||||
- Current evidence:
|
||||
- `@be/gallery` exists in the robot skill inventory
|
||||
- current captures already show `snapshot` and related menu destinations
|
||||
- `JiboOs` shows `@be/gallery` opens from `intent = menu`, while `snapshot` and `photobooth` actually map into `@be/create` with `createOnePhoto` and `createSomePhotos`
|
||||
- Implementation notes:
|
||||
- separate three flows:
|
||||
- snap a picture
|
||||
- photo gallery
|
||||
- photobooth
|
||||
- document whether each one is local-only, cloud-assisted, or upload-backed
|
||||
- Progress so far:
|
||||
- voice `open photo gallery` now launches local `@be/gallery` with a stock-shaped `menu` handoff
|
||||
- voice `snap a picture` now launches local `@be/create` with `createOnePhoto`
|
||||
- voice `open photobooth` now launches local `@be/create` with `createSomePhotos`
|
||||
- Exit criteria:
|
||||
- known photo menu and voice phrases map to the correct local path
|
||||
- capture storage expectations are documented for laptop versus hosted testing
|
||||
|
||||
@@ -79,6 +79,9 @@ public sealed class DemoConversationBroker(JiboInteractionService interactionSer
|
||||
"alarm_menu" => false,
|
||||
"timer_value" => false,
|
||||
"alarm_value" => false,
|
||||
"photo_gallery" => false,
|
||||
"snapshot" => false,
|
||||
"photobooth" => false,
|
||||
"news" => false,
|
||||
_ => true
|
||||
};
|
||||
|
||||
@@ -39,6 +39,9 @@ public sealed class JiboInteractionService(
|
||||
"alarm_menu" => BuildClockLaunchDecision("alarm", "Opening the alarm."),
|
||||
"timer_value" => BuildTimerValueDecision(lowered),
|
||||
"alarm_value" => BuildAlarmValueDecision(lowered),
|
||||
"photo_gallery" => BuildPhotoGalleryLaunchDecision(),
|
||||
"snapshot" => BuildPhotoCreateDecision("snapshot", "Taking a picture.", "createOnePhoto"),
|
||||
"photobooth" => BuildPhotoCreateDecision("photobooth", "Starting photobooth.", "createSomePhotos"),
|
||||
"hello" => new JiboInteractionDecision("hello", randomizer.Choose(catalog.GreetingReplies)),
|
||||
"how_are_you" => new JiboInteractionDecision("how_are_you", randomizer.Choose(catalog.HowAreYouReplies)),
|
||||
"yes" => new JiboInteractionDecision("yes", "Yes."),
|
||||
@@ -146,6 +149,18 @@ public sealed class JiboInteractionService(
|
||||
return "word_of_the_day";
|
||||
}
|
||||
|
||||
if (string.Equals(clientIntent, "loadMenu", StringComparison.OrdinalIgnoreCase) &&
|
||||
clientEntities.TryGetValue("destination", out var photoDestination))
|
||||
{
|
||||
return photoDestination.ToLowerInvariant() switch
|
||||
{
|
||||
"snapshot" => "snapshot",
|
||||
"photobooth" => "photobooth",
|
||||
"gallery" or "photo-gallery" or "photos" => "photo_gallery",
|
||||
_ => "chat"
|
||||
};
|
||||
}
|
||||
|
||||
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "time";
|
||||
@@ -251,6 +266,38 @@ public sealed class JiboInteractionService(
|
||||
return "radio";
|
||||
}
|
||||
|
||||
if (MatchesAny(
|
||||
loweredTranscript,
|
||||
"snap a picture",
|
||||
"take a picture",
|
||||
"take a photo",
|
||||
"snap a photo"))
|
||||
{
|
||||
return "snapshot";
|
||||
}
|
||||
|
||||
if (MatchesAny(
|
||||
loweredTranscript,
|
||||
"photo booth",
|
||||
"photobooth",
|
||||
"open photobooth",
|
||||
"start photobooth"))
|
||||
{
|
||||
return "photobooth";
|
||||
}
|
||||
|
||||
if (MatchesAny(
|
||||
loweredTranscript,
|
||||
"photo gallery",
|
||||
"open the gallery",
|
||||
"open photo gallery",
|
||||
"show my photos",
|
||||
"open my photos",
|
||||
"gallery"))
|
||||
{
|
||||
return "photo_gallery";
|
||||
}
|
||||
|
||||
if (MatchesAny(loweredTranscript, "dance", "boogie"))
|
||||
{
|
||||
return "dance";
|
||||
@@ -352,6 +399,32 @@ public sealed class JiboInteractionService(
|
||||
});
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildPhotoGalleryLaunchDecision()
|
||||
{
|
||||
return new JiboInteractionDecision(
|
||||
"photo_gallery",
|
||||
"Opening the photo gallery.",
|
||||
"@be/gallery",
|
||||
new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["skillId"] = "@be/gallery",
|
||||
["localIntent"] = "menu"
|
||||
});
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildPhotoCreateDecision(string intentName, string replyText, string localIntent)
|
||||
{
|
||||
return new JiboInteractionDecision(
|
||||
intentName,
|
||||
replyText,
|
||||
"@be/create",
|
||||
new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["skillId"] = "@be/create",
|
||||
["localIntent"] = localIntent
|
||||
});
|
||||
}
|
||||
|
||||
private static JiboInteractionDecision BuildClockLaunchDecision(string domain, string replyText)
|
||||
{
|
||||
return new JiboInteractionDecision(
|
||||
|
||||
@@ -25,7 +25,11 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
var isWordOfDayGuess = string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase);
|
||||
var isRadioLaunch = string.Equals(plan.IntentName, "radio", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(plan.IntentName, "radio_genre", StringComparison.OrdinalIgnoreCase);
|
||||
var isPhotoGalleryLaunch = string.Equals(plan.IntentName, "photo_gallery", StringComparison.OrdinalIgnoreCase);
|
||||
var isPhotoCreateLaunch = string.Equals(plan.IntentName, "snapshot", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(plan.IntentName, "photobooth", StringComparison.OrdinalIgnoreCase);
|
||||
var isClockSkillLaunch = string.Equals(skill?.SkillName, "@be/clock", StringComparison.OrdinalIgnoreCase);
|
||||
var localIntent = ReadSkillPayloadString(skill, "localIntent");
|
||||
var clockIntent = ReadSkillPayloadString(skill, "clockIntent");
|
||||
var clockDomain = ReadSkillPayloadString(skill, "domain");
|
||||
var timerHours = ReadSkillPayloadString(skill, "hours");
|
||||
@@ -41,6 +45,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
? "menu"
|
||||
: isRadioLaunch
|
||||
? "menu"
|
||||
: (isPhotoGalleryLaunch || isPhotoCreateLaunch) && !string.IsNullOrWhiteSpace(localIntent)
|
||||
? localIntent
|
||||
: isClockSkillLaunch && !string.IsNullOrWhiteSpace(clockIntent)
|
||||
? clockIntent
|
||||
: isWordOfDayGuess
|
||||
@@ -54,6 +60,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
? string.Empty
|
||||
: isRadioLaunch
|
||||
? transcript
|
||||
: isPhotoGalleryLaunch || isPhotoCreateLaunch
|
||||
? transcript
|
||||
: isClockSkillLaunch
|
||||
? transcript
|
||||
: string.Equals(clientIntent, "guess", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(nluGuess)
|
||||
@@ -67,6 +75,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
? ["word-of-the-day/menu"]
|
||||
: isRadioLaunch
|
||||
? Array.Empty<string>()
|
||||
: isPhotoGalleryLaunch || isPhotoCreateLaunch
|
||||
? string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) ? rules : Array.Empty<string>()
|
||||
: isClockSkillLaunch
|
||||
? string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) ? rules : Array.Empty<string>()
|
||||
: isWordOfDayGuess
|
||||
@@ -108,6 +118,8 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
entities,
|
||||
isWordOfDayLaunch ? "@be/word-of-the-day" :
|
||||
isRadioLaunch ? "@be/radio" :
|
||||
isPhotoGalleryLaunch ? "@be/gallery" :
|
||||
isPhotoCreateLaunch ? "@be/create" :
|
||||
isClockSkillLaunch ? "@be/clock" :
|
||||
null),
|
||||
match = new
|
||||
@@ -182,6 +194,24 @@ public sealed class ResponsePlanToSocketMessagesMapper
|
||||
DelayMs: 125));
|
||||
}
|
||||
|
||||
if ((isPhotoGalleryLaunch || isPhotoCreateLaunch) &&
|
||||
!string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var skillId = isPhotoGalleryLaunch ? "@be/gallery" : "@be/create";
|
||||
messages.Add(new SocketReplyPlan(
|
||||
JsonSerializer.Serialize(BuildSkillRedirectPayload(
|
||||
transId,
|
||||
skillId,
|
||||
outboundIntent,
|
||||
outboundAsrText,
|
||||
outboundRules,
|
||||
entities)),
|
||||
DelayMs: 75));
|
||||
messages.Add(new SocketReplyPlan(
|
||||
JsonSerializer.Serialize(BuildCompletionOnlySkillPayload(transId, skillId)),
|
||||
DelayMs: 125));
|
||||
}
|
||||
|
||||
if (emitSkillActions && speak is not null)
|
||||
{
|
||||
messages.Add(new SocketReplyPlan(
|
||||
|
||||
@@ -569,6 +569,9 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
!string.Equals(plan.IntentName, "alarm_menu", StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.Equals(plan.IntentName, "timer_value", StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.Equals(plan.IntentName, "alarm_value", StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.Equals(plan.IntentName, "photo_gallery", StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.Equals(plan.IntentName, "snapshot", StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.Equals(plan.IntentName, "photobooth", StringComparison.OrdinalIgnoreCase) &&
|
||||
(messageType != "CLIENT_NLU" ||
|
||||
string.Equals(plan.IntentName, "word_of_the_day_guess", StringComparison.OrdinalIgnoreCase));
|
||||
var replies = ResponsePlanToSocketMessagesMapper.Map(plan, finalizedTurn, session, emitSkillActions).Select(map => new WebSocketReply
|
||||
|
||||
@@ -237,6 +237,54 @@ public sealed class JiboInteractionServiceTests
|
||||
Assert.Equal("am", decision.SkillPayload["ampm"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_OpenPhotoGallery_MapsToGalleryLaunch()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "open photo gallery",
|
||||
NormalizedTranscript = "open photo gallery"
|
||||
});
|
||||
|
||||
Assert.Equal("photo_gallery", decision.IntentName);
|
||||
Assert.Equal("@be/gallery", decision.SkillName);
|
||||
Assert.Equal("menu", decision.SkillPayload!["localIntent"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_SnapAPicture_MapsToCreateOnePhoto()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "snap a picture",
|
||||
NormalizedTranscript = "snap a picture"
|
||||
});
|
||||
|
||||
Assert.Equal("snapshot", decision.IntentName);
|
||||
Assert.Equal("@be/create", decision.SkillName);
|
||||
Assert.Equal("createOnePhoto", decision.SkillPayload!["localIntent"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_OpenPhotobooth_MapsToCreateSomePhotos()
|
||||
{
|
||||
var service = CreateService();
|
||||
|
||||
var decision = await service.BuildDecisionAsync(new TurnContext
|
||||
{
|
||||
RawTranscript = "open photobooth",
|
||||
NormalizedTranscript = "open photobooth"
|
||||
});
|
||||
|
||||
Assert.Equal("photobooth", decision.IntentName);
|
||||
Assert.Equal("@be/create", decision.SkillName);
|
||||
Assert.Equal("createSomePhotos", decision.SkillPayload!["localIntent"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BuildDecisionAsync_TellMeTheNews_UsesNimbusCloudSkillPath()
|
||||
{
|
||||
|
||||
@@ -414,6 +414,103 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Equal("am", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("ampm").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsr_OpenPhotoGallery_RedirectsIntoGallerySkill()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-photo-gallery-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-photo-gallery","data":{"rules":["globals/global_commands_launch"]}}"""
|
||||
});
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-photo-gallery-token",
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-photo-gallery","data":{"text":"open photo gallery"}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(4, replies.Count);
|
||||
Assert.Equal("SKILL_REDIRECT", ReadReplyType(replies[2]));
|
||||
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("menu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("@be/gallery", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
|
||||
|
||||
using var redirectPayload = JsonDocument.Parse(replies[2].Text!);
|
||||
Assert.Equal("@be/gallery", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString());
|
||||
Assert.Equal("menu", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsr_SnapAPicture_RedirectsIntoCreateSkill()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-snapshot-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-snapshot","data":{"rules":["globals/global_commands_launch"]}}"""
|
||||
});
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-snapshot-token",
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-snapshot","data":{"text":"snap a picture"}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(4, replies.Count);
|
||||
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("createOnePhoto", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("@be/create", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
|
||||
|
||||
using var redirectPayload = JsonDocument.Parse(replies[2].Text!);
|
||||
Assert.Equal("@be/create", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString());
|
||||
Assert.Equal("createOnePhoto", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsr_OpenPhotobooth_RedirectsIntoCreateSkill()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-photobooth-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-photobooth","data":{"rules":["globals/global_commands_launch"]}}"""
|
||||
});
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-photobooth-token",
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-photobooth","data":{"text":"open photobooth"}}"""
|
||||
});
|
||||
|
||||
Assert.Equal(4, replies.Count);
|
||||
|
||||
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
|
||||
Assert.Equal("createSomePhotos", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
Assert.Equal("@be/create", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("skill").GetString());
|
||||
|
||||
using var redirectPayload = JsonDocument.Parse(replies[2].Text!);
|
||||
Assert.Equal("@be/create", redirectPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("skillID").GetString());
|
||||
Assert.Equal("createSomePhotos", redirectPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsr_YesNoCreateFlow_PreservesCreateRuleAndDomain()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user