Normalize transcripts and expand speech regressions
This commit is contained in:
@@ -38,13 +38,25 @@ public sealed class JiboInteractionService(
|
||||
@"\b(?:volume|loudness)\s+(?:2|two|to)\s+(?<value>10|\d|one|two|three|four|five|six|seven|eight|nine|ten)\b",
|
||||
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
|
||||
private static readonly Regex CommandPhrasePattern = new(
|
||||
@"[^\w\s]",
|
||||
RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
|
||||
private static readonly Regex CommandWhitespacePattern = new(
|
||||
@"\s+",
|
||||
RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
private static readonly string[] CommandLeadPhrases =
|
||||
[
|
||||
"hey jibo",
|
||||
"hello jibo",
|
||||
"hi jibo",
|
||||
"jibo",
|
||||
"o",
|
||||
"oh",
|
||||
"so",
|
||||
"well",
|
||||
"um",
|
||||
"uh",
|
||||
"hmm",
|
||||
"erm",
|
||||
"ah",
|
||||
"please",
|
||||
"ok jibo",
|
||||
"okay jibo"
|
||||
];
|
||||
|
||||
private static readonly Regex AlarmDeletePattern = new(
|
||||
@"\b(?:cancel|delete|remove|stop|turn\s+off)\s+(?:the\s+)?(?:alarm|along|elo)\b",
|
||||
@@ -3543,11 +3555,15 @@ public sealed class JiboInteractionService(
|
||||
return normalized is
|
||||
"what is the date" or
|
||||
"what s the date" or
|
||||
"what's the date" or
|
||||
"what date is it" or
|
||||
"today s date" or
|
||||
"today date" or
|
||||
"what's today's date" or
|
||||
"what is today s date" or
|
||||
"what s today s date" or
|
||||
"what's today s date" or
|
||||
"what's todays date" or
|
||||
"what is todays date" or
|
||||
"what s todays date";
|
||||
}
|
||||
@@ -4311,11 +4327,13 @@ public sealed class JiboInteractionService(
|
||||
{
|
||||
var normalized = NormalizeCommandPhrase(loweredTranscript);
|
||||
return normalized.StartsWith("what is my favorite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("what s my favorite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("what is my favourite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("what s my favourite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("do you remember my favorite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("do you remember my favourite", StringComparison.Ordinal);
|
||||
normalized.StartsWith("what s my favorite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("what's my favorite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("what is my favourite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("what s my favourite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("what's my favourite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("do you remember my favorite", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("do you remember my favourite", StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private static string? TryExtractPreferenceLookupCategory(string transcript)
|
||||
@@ -4878,10 +4896,12 @@ public sealed class JiboInteractionService(
|
||||
|
||||
private static string NormalizeCommandPhrase(string value)
|
||||
{
|
||||
return CommandWhitespacePattern.Replace(
|
||||
CommandPhrasePattern.Replace(value.Trim().ToLowerInvariant(), " "),
|
||||
" ")
|
||||
.Trim();
|
||||
var normalized = TranscriptTextNormalizer.NormalizeLooseText(value);
|
||||
if (string.Equals(normalized, "uh huh", StringComparison.Ordinal) ||
|
||||
normalized.StartsWith("uh huh ", StringComparison.Ordinal))
|
||||
return normalized;
|
||||
|
||||
return TranscriptTextNormalizer.StripLeadingPhrases(normalized, CommandLeadPhrases);
|
||||
}
|
||||
|
||||
private static string? TryNormalizeVolumeLevel(string token)
|
||||
|
||||
@@ -41,6 +41,7 @@ internal static class PersonalReportOrchestrator
|
||||
"yeah",
|
||||
"yep",
|
||||
"yup",
|
||||
"uh huh",
|
||||
"sure",
|
||||
"ok",
|
||||
"okay",
|
||||
@@ -260,7 +261,7 @@ internal static class PersonalReportOrchestrator
|
||||
|
||||
if (toggles.WeatherEnabled)
|
||||
{
|
||||
reportSections.Add("First, your weather.");
|
||||
reportSections.Add("Weather.");
|
||||
var weatherDecision = await buildWeatherDecisionAsync(turn, "weather", cancellationToken);
|
||||
reportSections.Add(weatherDecision.ReplyText);
|
||||
if (IsWeatherErrorReply(weatherDecision.ReplyText)) serviceError = "weather";
|
||||
@@ -275,13 +276,6 @@ internal static class PersonalReportOrchestrator
|
||||
catalog.CalendarNothingReplies,
|
||||
"Looking at your calendar, I don't see anything scheduled today."),
|
||||
userName));
|
||||
reportSections.Add(
|
||||
RenderReportSkillTemplate(
|
||||
ChooseReportSkillTemplate(
|
||||
catalog.CalendarOutroReplies,
|
||||
[],
|
||||
"And that's it."),
|
||||
userName));
|
||||
}
|
||||
|
||||
if (toggles.CommuteEnabled)
|
||||
@@ -302,7 +296,7 @@ internal static class PersonalReportOrchestrator
|
||||
catalog.NewsCategoryIntroReplies,
|
||||
"Here's today's news, from the associated press."),
|
||||
userName));
|
||||
reportSections.Add(randomizer.Choose(catalog.NewsBriefings));
|
||||
reportSections.Add(ChooseShortestBriefing(catalog.NewsBriefings));
|
||||
reportSections.Add(
|
||||
RenderReportSkillTemplate(
|
||||
ChooseReportSkillTemplate(
|
||||
@@ -632,7 +626,8 @@ internal static class PersonalReportOrchestrator
|
||||
|
||||
var speakerAwareTemplate = usableTemplates.FirstOrDefault(static template =>
|
||||
template.Contains("${speaker}", StringComparison.OrdinalIgnoreCase));
|
||||
return speakerAwareTemplate ?? usableTemplates[0];
|
||||
return ChooseShortestTemplate(speakerAwareTemplate is not null ? [speakerAwareTemplate] : usableTemplates)
|
||||
?? fallback;
|
||||
}
|
||||
|
||||
private static string RenderPersonalReportTemplate(string template, string userName)
|
||||
@@ -649,13 +644,33 @@ internal static class PersonalReportOrchestrator
|
||||
IReadOnlyList<string> secondaryTemplates,
|
||||
string fallback)
|
||||
{
|
||||
var primary = primaryTemplates.FirstOrDefault(static template => !string.IsNullOrWhiteSpace(template));
|
||||
var primary = ChooseShortestTemplate(primaryTemplates);
|
||||
if (!string.IsNullOrWhiteSpace(primary)) return primary!;
|
||||
|
||||
var secondary = secondaryTemplates.FirstOrDefault(static template => !string.IsNullOrWhiteSpace(template));
|
||||
var secondary = ChooseShortestTemplate(secondaryTemplates);
|
||||
return !string.IsNullOrWhiteSpace(secondary) ? secondary! : fallback;
|
||||
}
|
||||
|
||||
private static string ChooseShortestBriefing(IReadOnlyList<string> briefings)
|
||||
{
|
||||
var selected = ChooseShortestTemplate(briefings);
|
||||
if (string.IsNullOrWhiteSpace(selected)) return string.Empty;
|
||||
|
||||
var firstSentence = selected.Split(['.', '!', '?'], 2, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
|
||||
.FirstOrDefault();
|
||||
return string.IsNullOrWhiteSpace(firstSentence) ? selected : firstSentence;
|
||||
}
|
||||
|
||||
private static string? ChooseShortestTemplate(IEnumerable<string> templates)
|
||||
{
|
||||
var selected = templates
|
||||
.Where(static template => !string.IsNullOrWhiteSpace(template))
|
||||
.OrderBy(static template => template.Length)
|
||||
.FirstOrDefault();
|
||||
|
||||
return selected;
|
||||
}
|
||||
|
||||
private static string RenderReportSkillTemplate(string template, string userName)
|
||||
{
|
||||
return template
|
||||
@@ -670,4 +685,4 @@ internal static class PersonalReportOrchestrator
|
||||
bool CalendarEnabled,
|
||||
bool CommuteEnabled,
|
||||
bool NewsEnabled);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace Jibo.Cloud.Application.Services;
|
||||
|
||||
internal static partial class TranscriptTextNormalizer
|
||||
{
|
||||
private static readonly Regex PunctuationToSpaceRegex = new(
|
||||
@"[^\p{L}\p{N}\s']+",
|
||||
RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
|
||||
private static readonly Regex WhitespaceRegex = new(
|
||||
@"\s+",
|
||||
RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
|
||||
internal static string NormalizeLooseText(string? value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
||||
|
||||
return WhitespaceRegex.Replace(
|
||||
PunctuationToSpaceRegex.Replace(value.Trim().ToLowerInvariant(), " "),
|
||||
" ")
|
||||
.Trim();
|
||||
}
|
||||
|
||||
internal static string StripLeadingPhrases(string value, params string[] phrases)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value) || phrases.Length == 0) return value;
|
||||
|
||||
var normalized = value;
|
||||
while (TryStripLeadingPhrase(normalized, phrases, out var trimmed))
|
||||
normalized = trimmed;
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private static bool TryStripLeadingPhrase(string normalizedValue, IReadOnlyList<string> phrases, out string trimmed)
|
||||
{
|
||||
foreach (var phrase in phrases)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(phrase)) continue;
|
||||
|
||||
if (string.Equals(normalizedValue, phrase, StringComparison.Ordinal))
|
||||
{
|
||||
trimmed = string.Empty;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (normalizedValue.StartsWith($"{phrase} ", StringComparison.Ordinal))
|
||||
{
|
||||
trimmed = normalizedValue[(phrase.Length + 1)..].TrimStart();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
trimmed = normalizedValue;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -90,6 +90,8 @@ public sealed partial class WebSocketTurnFinalizationService(
|
||||
|
||||
private static readonly string[] TranscriptNoisePrefixes =
|
||||
[
|
||||
"o",
|
||||
"oh",
|
||||
"uh",
|
||||
"um",
|
||||
"hmm",
|
||||
@@ -1627,11 +1629,7 @@ public sealed partial class WebSocketTurnFinalizationService(
|
||||
|
||||
private static string NormalizeTranscript(string? transcript)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(transcript)) return string.Empty;
|
||||
|
||||
return TranscriptNormalizationRegex().Replace(transcript.Trim().ToLowerInvariant(), " ")
|
||||
.Replace(" ", " ", StringComparison.Ordinal)
|
||||
.Trim();
|
||||
return TranscriptTextNormalizer.NormalizeLooseText(transcript);
|
||||
}
|
||||
|
||||
private static string? ReadMessageType(TurnContext turn)
|
||||
@@ -1939,9 +1937,6 @@ public sealed partial class WebSocketTurnFinalizationService(
|
||||
};
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"[^\w\s]")]
|
||||
private static partial Regex TranscriptNormalizationRegex();
|
||||
|
||||
private enum YesNoReply
|
||||
{
|
||||
None = 0,
|
||||
|
||||
Reference in New Issue
Block a user