Normalize loose STT transcripts before routing

This commit is contained in:
Jacob Dubin
2026-05-17 14:02:47 -05:00
parent 193fa56847
commit c0485da46d
5 changed files with 111 additions and 7 deletions

View File

@@ -20,7 +20,7 @@ public sealed class SyntheticBufferedAudioSttStrategy : ISttStrategy
return Task.FromResult(new SttResult
{
Text = transcriptHint.Trim(),
Text = NormalizeLooseTranscript(transcriptHint),
Provider = Name,
Confidence = 0.75f,
Locale = turn.Locale,
@@ -51,4 +51,16 @@ public sealed class SyntheticBufferedAudioSttStrategy : ISttStrategy
? transcriptHint?.ToString()
: null;
}
}
private static string NormalizeLooseTranscript(string? value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
var lowered = value.Trim().ToLowerInvariant();
lowered = System.Text.RegularExpressions.Regex.Replace(lowered, @"[^\p{L}\p{N}\s']+", " ",
System.Text.RegularExpressions.RegexOptions.CultureInvariant | System.Text.RegularExpressions.RegexOptions.Compiled);
lowered = System.Text.RegularExpressions.Regex.Replace(lowered, @"\s+"," ",
System.Text.RegularExpressions.RegexOptions.CultureInvariant | System.Text.RegularExpressions.RegexOptions.Compiled);
return lowered.Trim();
}
}

View File

@@ -0,0 +1,24 @@
using System.Text.RegularExpressions;
namespace Jibo.Cloud.Infrastructure.Audio;
internal static class AudioTranscriptNormalizer
{
private static readonly Regex PunctuationToSpaceRegex = new(
@"[^\p{L}\p{N}\s']+",
RegexOptions.CultureInvariant | RegexOptions.Compiled);
private static readonly Regex WhitespaceRegex = new(
@"\s+",
RegexOptions.CultureInvariant | RegexOptions.Compiled);
public static string NormalizeLooseTranscript(string? value)
{
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
return WhitespaceRegex.Replace(
PunctuationToSpaceRegex.Replace(value.Trim().ToLowerInvariant(), " "),
" ")
.Trim();
}
}

View File

@@ -52,6 +52,7 @@ public sealed class LocalWhisperCppBufferedAudioSttStrategy(
cancellationToken);
var transcript = ExtractTranscript(whisperResult.StdOut);
transcript = AudioTranscriptNormalizer.NormalizeLooseTranscript(transcript);
if (string.IsNullOrWhiteSpace(transcript))
throw new InvalidOperationException("whisper.cpp returned no transcript for the buffered audio turn.");
@@ -154,4 +155,4 @@ public sealed class LocalWhisperCppBufferedAudioSttStrategy(
return !checkFileExists || File.Exists(path);
}
}
}