Add low-signal short-turn screening
This commit is contained in:
@@ -975,6 +975,7 @@ For `1.0.19`:
|
||||
- next implementation pass should supply the real Azure Storage connection string / deployment wiring and validate the live round-trip in the storage account smoke test
|
||||
10. Update, backup, and restore proof - implemented (update creation and backup creation now survive persisted reloads; restore is the persisted-state rehydration proof path, not a new cloud API)
|
||||
11. STT upgrade and noise screening
|
||||
- progress update (`2026-05-21`): added a low-signal short-turn screen in websocket finalization so filler-only fragments and stray single-token leftovers like `so command` get rejected before they can become bad turns, while preserving the existing yes/no and word-of-the-day short-turn flows
|
||||
12. Hosted capture/storage plan / indexing for group testing
|
||||
13. Binary-safe media storage / sync to cloud drive: OneDrive, Google Drive, Box, etc.
|
||||
14. Provider-backed news and weather parity polish
|
||||
|
||||
@@ -84,6 +84,7 @@ The goal is to port these in small batches, capture the source-backed phrasing w
|
||||
- the restore proof is the persisted-state rehydration path; do not scope it into a new hosted restore API until we have real device evidence
|
||||
- continue alarm/gallery/yes-no cleanup from `1.0.18` evidence where regressions are still open
|
||||
- improve short-turn STT reliability and low-signal screening
|
||||
- the latest STT pass adds a websocket-side low-signal screen for filler-only and stray single-token leftovers while keeping yes/no and word-of-the-day turns intact
|
||||
|
||||
### 3. Pegasus-To-Cloud Platform Porting
|
||||
|
||||
|
||||
@@ -106,6 +106,38 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
"honestly"
|
||||
];
|
||||
|
||||
private static readonly HashSet<string> SingleTokenUsableTranscripts = new(StringComparer.Ordinal)
|
||||
{
|
||||
"joke",
|
||||
"funny",
|
||||
"dance",
|
||||
"boogie",
|
||||
"time",
|
||||
"date",
|
||||
"today",
|
||||
"day",
|
||||
"hello",
|
||||
"hi",
|
||||
"hey",
|
||||
"weather",
|
||||
"news",
|
||||
"radio",
|
||||
"stop",
|
||||
"sleep",
|
||||
"sing",
|
||||
"help",
|
||||
"yes",
|
||||
"yeah",
|
||||
"yep",
|
||||
"yup",
|
||||
"sure",
|
||||
"ok",
|
||||
"okay",
|
||||
"no",
|
||||
"nope",
|
||||
"nah"
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> YesNoAffirmativeLeadTokens = new(StringComparer.Ordinal)
|
||||
{
|
||||
"yes",
|
||||
@@ -1117,8 +1149,6 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
|
||||
if (ChitchatStateMachine.IsLikelyEmotionUtterance(transcript)) return true;
|
||||
|
||||
if (transcript.Length >= 6) return true;
|
||||
|
||||
if (IsYesNoTurn(turn) && IsYesNoReplyTranscript(transcript)) return true;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(pendingProactivityOffer) &&
|
||||
@@ -1128,9 +1158,19 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
if (listenRules.Any(rule =>
|
||||
string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase))) return true;
|
||||
|
||||
if (IsLowSignalSingleTokenTranscript(transcript)) return false;
|
||||
|
||||
if (transcript.Length >= 6) return true;
|
||||
|
||||
return transcript is "joke" or "dance" or "time" or "date" or "today" or "day" or "hello" or "hi" or "hey";
|
||||
}
|
||||
|
||||
private static bool IsLowSignalSingleTokenTranscript(string transcript)
|
||||
{
|
||||
var tokens = transcript.Split(' ', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
return tokens.Length == 1 && !SingleTokenUsableTranscripts.Contains(tokens[0]);
|
||||
}
|
||||
|
||||
private static bool IsYesNoTurn(TurnContext turn)
|
||||
{
|
||||
return ReadRules(turn, "listenRules")
|
||||
@@ -1942,4 +1982,4 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
Affirmative = 1,
|
||||
Negative = 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3801,6 +3801,35 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Null(session.LastTranscript);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ClientAsr_FillerPlusGenericCommand_IsIgnoredAsLowSignalNoise()
|
||||
{
|
||||
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-low-signal-command-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-low-signal-command","data":{"rules":["wake-word"]}}"""
|
||||
});
|
||||
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-low-signal-command-token",
|
||||
Text = """{"type":"CLIENT_ASR","transID":"trans-low-signal-command","data":{"text":"so command"}}"""
|
||||
});
|
||||
|
||||
Assert.Empty(replies);
|
||||
|
||||
var session = _store.FindSessionByToken("hub-low-signal-command-token");
|
||||
Assert.NotNull(session);
|
||||
Assert.Null(session.LastIntent);
|
||||
Assert.Null(session.LastTranscript);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BufferedAudio_WithSyntheticTranscriptHint_FinalizesThroughSttSeam()
|
||||
{
|
||||
@@ -5212,4 +5241,4 @@ public sealed class JiboWebSocketServiceTests
|
||||
return items[^1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user