Add low-signal short-turn screening
This commit is contained in:
@@ -975,6 +975,7 @@ For `1.0.19`:
|
|||||||
- next implementation pass should supply the real Azure Storage connection string / deployment wiring and validate the live round-trip in the storage account smoke test
|
- next implementation pass should supply the real Azure Storage connection string / deployment wiring and validate the live round-trip in the storage account smoke test
|
||||||
10. Update, backup, and restore proof - implemented (update creation and backup creation now survive persisted reloads; restore is the persisted-state rehydration proof path, not a new cloud API)
|
10. Update, backup, and restore proof - implemented (update creation and backup creation now survive persisted reloads; restore is the persisted-state rehydration proof path, not a new cloud API)
|
||||||
11. STT upgrade and noise screening
|
11. STT upgrade and noise screening
|
||||||
|
- progress update (`2026-05-21`): added a low-signal short-turn screen in websocket finalization so filler-only fragments and stray single-token leftovers like `so command` get rejected before they can become bad turns, while preserving the existing yes/no and word-of-the-day short-turn flows
|
||||||
12. Hosted capture/storage plan / indexing for group testing
|
12. Hosted capture/storage plan / indexing for group testing
|
||||||
13. Binary-safe media storage / sync to cloud drive: OneDrive, Google Drive, Box, etc.
|
13. Binary-safe media storage / sync to cloud drive: OneDrive, Google Drive, Box, etc.
|
||||||
14. Provider-backed news and weather parity polish
|
14. Provider-backed news and weather parity polish
|
||||||
|
|||||||
@@ -84,6 +84,7 @@ The goal is to port these in small batches, capture the source-backed phrasing w
|
|||||||
- the restore proof is the persisted-state rehydration path; do not scope it into a new hosted restore API until we have real device evidence
|
- the restore proof is the persisted-state rehydration path; do not scope it into a new hosted restore API until we have real device evidence
|
||||||
- continue alarm/gallery/yes-no cleanup from `1.0.18` evidence where regressions are still open
|
- continue alarm/gallery/yes-no cleanup from `1.0.18` evidence where regressions are still open
|
||||||
- improve short-turn STT reliability and low-signal screening
|
- improve short-turn STT reliability and low-signal screening
|
||||||
|
- the latest STT pass adds a websocket-side low-signal screen for filler-only and stray single-token leftovers while keeping yes/no and word-of-the-day turns intact
|
||||||
|
|
||||||
### 3. Pegasus-To-Cloud Platform Porting
|
### 3. Pegasus-To-Cloud Platform Porting
|
||||||
|
|
||||||
|
|||||||
@@ -106,6 +106,38 @@ public sealed class WebSocketTurnFinalizationService(
|
|||||||
"honestly"
|
"honestly"
|
||||||
];
|
];
|
||||||
|
|
||||||
|
private static readonly HashSet<string> SingleTokenUsableTranscripts = new(StringComparer.Ordinal)
|
||||||
|
{
|
||||||
|
"joke",
|
||||||
|
"funny",
|
||||||
|
"dance",
|
||||||
|
"boogie",
|
||||||
|
"time",
|
||||||
|
"date",
|
||||||
|
"today",
|
||||||
|
"day",
|
||||||
|
"hello",
|
||||||
|
"hi",
|
||||||
|
"hey",
|
||||||
|
"weather",
|
||||||
|
"news",
|
||||||
|
"radio",
|
||||||
|
"stop",
|
||||||
|
"sleep",
|
||||||
|
"sing",
|
||||||
|
"help",
|
||||||
|
"yes",
|
||||||
|
"yeah",
|
||||||
|
"yep",
|
||||||
|
"yup",
|
||||||
|
"sure",
|
||||||
|
"ok",
|
||||||
|
"okay",
|
||||||
|
"no",
|
||||||
|
"nope",
|
||||||
|
"nah"
|
||||||
|
};
|
||||||
|
|
||||||
private static readonly HashSet<string> YesNoAffirmativeLeadTokens = new(StringComparer.Ordinal)
|
private static readonly HashSet<string> YesNoAffirmativeLeadTokens = new(StringComparer.Ordinal)
|
||||||
{
|
{
|
||||||
"yes",
|
"yes",
|
||||||
@@ -1117,8 +1149,6 @@ public sealed class WebSocketTurnFinalizationService(
|
|||||||
|
|
||||||
if (ChitchatStateMachine.IsLikelyEmotionUtterance(transcript)) return true;
|
if (ChitchatStateMachine.IsLikelyEmotionUtterance(transcript)) return true;
|
||||||
|
|
||||||
if (transcript.Length >= 6) return true;
|
|
||||||
|
|
||||||
if (IsYesNoTurn(turn) && IsYesNoReplyTranscript(transcript)) return true;
|
if (IsYesNoTurn(turn) && IsYesNoReplyTranscript(transcript)) return true;
|
||||||
|
|
||||||
if (!string.IsNullOrWhiteSpace(pendingProactivityOffer) &&
|
if (!string.IsNullOrWhiteSpace(pendingProactivityOffer) &&
|
||||||
@@ -1128,9 +1158,19 @@ public sealed class WebSocketTurnFinalizationService(
|
|||||||
if (listenRules.Any(rule =>
|
if (listenRules.Any(rule =>
|
||||||
string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase))) return true;
|
string.Equals(rule, "word-of-the-day/puzzle", StringComparison.OrdinalIgnoreCase))) return true;
|
||||||
|
|
||||||
|
if (IsLowSignalSingleTokenTranscript(transcript)) return false;
|
||||||
|
|
||||||
|
if (transcript.Length >= 6) return true;
|
||||||
|
|
||||||
return transcript is "joke" or "dance" or "time" or "date" or "today" or "day" or "hello" or "hi" or "hey";
|
return transcript is "joke" or "dance" or "time" or "date" or "today" or "day" or "hello" or "hi" or "hey";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static bool IsLowSignalSingleTokenTranscript(string transcript)
|
||||||
|
{
|
||||||
|
var tokens = transcript.Split(' ', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||||
|
return tokens.Length == 1 && !SingleTokenUsableTranscripts.Contains(tokens[0]);
|
||||||
|
}
|
||||||
|
|
||||||
private static bool IsYesNoTurn(TurnContext turn)
|
private static bool IsYesNoTurn(TurnContext turn)
|
||||||
{
|
{
|
||||||
return ReadRules(turn, "listenRules")
|
return ReadRules(turn, "listenRules")
|
||||||
@@ -1942,4 +1982,4 @@ public sealed class WebSocketTurnFinalizationService(
|
|||||||
Affirmative = 1,
|
Affirmative = 1,
|
||||||
Negative = 2
|
Negative = 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3801,6 +3801,35 @@ public sealed class JiboWebSocketServiceTests
|
|||||||
Assert.Null(session.LastTranscript);
|
Assert.Null(session.LastTranscript);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task ClientAsr_FillerPlusGenericCommand_IsIgnoredAsLowSignalNoise()
|
||||||
|
{
|
||||||
|
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||||
|
{
|
||||||
|
HostName = "neo-hub.jibo.com",
|
||||||
|
Path = "/listen",
|
||||||
|
Kind = "neo-hub-listen",
|
||||||
|
Token = "hub-low-signal-command-token",
|
||||||
|
Text = """{"type":"LISTEN","transID":"trans-low-signal-command","data":{"rules":["wake-word"]}}"""
|
||||||
|
});
|
||||||
|
|
||||||
|
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||||
|
{
|
||||||
|
HostName = "neo-hub.jibo.com",
|
||||||
|
Path = "/listen",
|
||||||
|
Kind = "neo-hub-listen",
|
||||||
|
Token = "hub-low-signal-command-token",
|
||||||
|
Text = """{"type":"CLIENT_ASR","transID":"trans-low-signal-command","data":{"text":"so command"}}"""
|
||||||
|
});
|
||||||
|
|
||||||
|
Assert.Empty(replies);
|
||||||
|
|
||||||
|
var session = _store.FindSessionByToken("hub-low-signal-command-token");
|
||||||
|
Assert.NotNull(session);
|
||||||
|
Assert.Null(session.LastIntent);
|
||||||
|
Assert.Null(session.LastTranscript);
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public async Task BufferedAudio_WithSyntheticTranscriptHint_FinalizesThroughSttSeam()
|
public async Task BufferedAudio_WithSyntheticTranscriptHint_FinalizesThroughSttSeam()
|
||||||
{
|
{
|
||||||
@@ -5212,4 +5241,4 @@ public sealed class JiboWebSocketServiceTests
|
|||||||
return items[^1];
|
return items[^1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user