Normalize loose STT transcripts before routing
This commit is contained in:
@@ -20,7 +20,7 @@ public sealed class SyntheticBufferedAudioSttStrategy : ISttStrategy
|
||||
|
||||
return Task.FromResult(new SttResult
|
||||
{
|
||||
Text = transcriptHint.Trim(),
|
||||
Text = NormalizeLooseTranscript(transcriptHint),
|
||||
Provider = Name,
|
||||
Confidence = 0.75f,
|
||||
Locale = turn.Locale,
|
||||
@@ -51,4 +51,16 @@ public sealed class SyntheticBufferedAudioSttStrategy : ISttStrategy
|
||||
? transcriptHint?.ToString()
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
private static string NormalizeLooseTranscript(string? value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
||||
|
||||
var lowered = value.Trim().ToLowerInvariant();
|
||||
lowered = System.Text.RegularExpressions.Regex.Replace(lowered, @"[^\p{L}\p{N}\s']+", " ",
|
||||
System.Text.RegularExpressions.RegexOptions.CultureInvariant | System.Text.RegularExpressions.RegexOptions.Compiled);
|
||||
lowered = System.Text.RegularExpressions.Regex.Replace(lowered, @"\s+"," ",
|
||||
System.Text.RegularExpressions.RegexOptions.CultureInvariant | System.Text.RegularExpressions.RegexOptions.Compiled);
|
||||
return lowered.Trim();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace Jibo.Cloud.Infrastructure.Audio;
|
||||
|
||||
internal static class AudioTranscriptNormalizer
|
||||
{
|
||||
private static readonly Regex PunctuationToSpaceRegex = new(
|
||||
@"[^\p{L}\p{N}\s']+",
|
||||
RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
|
||||
private static readonly Regex WhitespaceRegex = new(
|
||||
@"\s+",
|
||||
RegexOptions.CultureInvariant | RegexOptions.Compiled);
|
||||
|
||||
public static string NormalizeLooseTranscript(string? value)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(value)) return string.Empty;
|
||||
|
||||
return WhitespaceRegex.Replace(
|
||||
PunctuationToSpaceRegex.Replace(value.Trim().ToLowerInvariant(), " "),
|
||||
" ")
|
||||
.Trim();
|
||||
}
|
||||
}
|
||||
@@ -52,6 +52,7 @@ public sealed class LocalWhisperCppBufferedAudioSttStrategy(
|
||||
cancellationToken);
|
||||
|
||||
var transcript = ExtractTranscript(whisperResult.StdOut);
|
||||
transcript = AudioTranscriptNormalizer.NormalizeLooseTranscript(transcript);
|
||||
if (string.IsNullOrWhiteSpace(transcript))
|
||||
throw new InvalidOperationException("whisper.cpp returned no transcript for the buffered audio turn.");
|
||||
|
||||
@@ -154,4 +155,4 @@ public sealed class LocalWhisperCppBufferedAudioSttStrategy(
|
||||
|
||||
return !checkFileExists || File.Exists(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user