fix for quiet Jibo ASR and NLU events
This commit is contained in:
@@ -106,6 +106,7 @@ Evidence from the smaller `2026-04-18/19` hotphrase and word-of-the-day verifica
|
||||
- the newer `jibo test 2` bundle shows voice launch now reaches Nimbus and receives a cloud response, but a generic `SLIM/RUNTIME_PROMPT` just says "starting word of the day" instead of performing the menu-style redirect the on-screen path uses
|
||||
- the `jibo test 3` bundle confirms Nimbus rejects `REDIRECT` in that cloud-skill slot, so the better next experiment is to hint the on-robot target skill directly on the synthetic `LISTEN` result and skip Nimbus `SKILL_ACTION` entirely for word-of-the-day launch
|
||||
- the same bundle also shows `word-of-the-day/right_word` cleanup turns need a short ignore window for trailing audio or the robot can stay stuck in a blue-ring listening state
|
||||
- the `jibo test 4` bundle exposed a broader websocket issue: inbound robot `LISTEN` setup packets were still being routed through turn finalization instead of just priming pending state, which can corrupt menu and word-of-the-day flows by treating setup turns like resolved intents
|
||||
- the local buffered-audio seam is still producing repeated `whisper.cpp returned no transcript` and `ffmpeg ... Codec not found` failures, so lightweight waveform or energy screening is worth considering once the core launch flow is stable
|
||||
|
||||
Near-term interaction work should now prioritize:
|
||||
|
||||
@@ -44,7 +44,22 @@ public sealed class JiboWebSocketService(
|
||||
return replies;
|
||||
}
|
||||
|
||||
if (parsedType is "LISTEN" or "CLIENT_NLU" or "CLIENT_ASR")
|
||||
if (parsedType == "LISTEN")
|
||||
{
|
||||
var replies = ContainsInlineTurnPayload(envelope.Text)
|
||||
? await turnFinalizationService.HandleTurnAsync(session, envelope, parsedType, cancellationToken)
|
||||
: turnFinalizationService.HandleListenSetup(session, envelope);
|
||||
await telemetrySink.RecordTurnEventAsync(envelope, session, "turn_processed", new Dictionary<string, object?>
|
||||
{
|
||||
["messageType"] = parsedType,
|
||||
["replyCount"] = replies.Count,
|
||||
["transcript"] = session.LastTranscript,
|
||||
["intent"] = session.LastIntent
|
||||
}, cancellationToken);
|
||||
return replies;
|
||||
}
|
||||
|
||||
if (parsedType is "CLIENT_NLU" or "CLIENT_ASR")
|
||||
{
|
||||
var replies = await turnFinalizationService.HandleTurnAsync(session, envelope, parsedType, cancellationToken);
|
||||
await telemetrySink.RecordTurnEventAsync(envelope, session, "turn_processed", new Dictionary<string, object?>
|
||||
@@ -96,4 +111,38 @@ public sealed class JiboWebSocketService(
|
||||
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
private static bool ContainsInlineTurnPayload(string? text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var document = JsonDocument.Parse(text);
|
||||
if (!document.RootElement.TryGetProperty("data", out var data) || data.ValueKind != JsonValueKind.Object)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (data.TryGetProperty("text", out var transcript) &&
|
||||
transcript.ValueKind == JsonValueKind.String &&
|
||||
!string.IsNullOrWhiteSpace(transcript.GetString()))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return data.TryGetProperty("asr", out var asr) &&
|
||||
asr.ValueKind == JsonValueKind.Object &&
|
||||
asr.TryGetProperty("text", out var asrText) &&
|
||||
asrText.ValueKind == JsonValueKind.String &&
|
||||
!string.IsNullOrWhiteSpace(asrText.GetString());
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,6 +129,43 @@ public sealed class WebSocketTurnFinalizationService(
|
||||
return await FinalizeTurnAsync(session, envelope, messageType, allowFallbackOnMissingTranscript: false, cancellationToken);
|
||||
}
|
||||
|
||||
public IReadOnlyList<WebSocketReply> HandleListenSetup(CloudSession session, WebSocketMessageEnvelope envelope)
|
||||
{
|
||||
PersistTurnHints(session, envelope.Text);
|
||||
|
||||
var turn = ProtocolToTurnContextMapper.MapListenMessage(envelope, session, "LISTEN");
|
||||
if (ShouldIgnoreCompletedWordOfDayTurn(turn))
|
||||
{
|
||||
session.TurnState.AwaitingTurnCompletion = false;
|
||||
session.TurnState.IgnoreAdditionalAudioUntilUtc = DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
|
||||
session.FollowUpExpiresUtc = null;
|
||||
ResetBufferedAudio(session);
|
||||
return [];
|
||||
}
|
||||
|
||||
session.TurnState.AwaitingTurnCompletion = true;
|
||||
return
|
||||
[
|
||||
new WebSocketReply
|
||||
{
|
||||
Text = JsonSerializer.Serialize(new
|
||||
{
|
||||
type = "OPENJIBO_TURN_PENDING",
|
||||
data = new
|
||||
{
|
||||
sessionId = session.SessionId,
|
||||
transID = session.LastTransId,
|
||||
bufferedAudioBytes = session.TurnState.BufferedAudioBytes,
|
||||
bufferedAudioChunks = session.TurnState.BufferedAudioChunkCount,
|
||||
awaitingAudio = session.TurnState.BufferedAudioBytes == 0,
|
||||
awaitingTranscriptHint = session.TurnState.BufferedAudioBytes > 0 && string.IsNullOrWhiteSpace(session.TurnState.AudioTranscriptHint),
|
||||
finalizeAttempts = session.TurnState.FinalizeAttemptCount
|
||||
}
|
||||
})
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
private async Task<TurnContext> ResolveTranscriptAsync(TurnContext turn, CloudSession session, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(turn.NormalizedTranscript) || !string.IsNullOrWhiteSpace(turn.RawTranscript))
|
||||
|
||||
@@ -629,6 +629,27 @@ public sealed class JiboWebSocketServiceTests
|
||||
Assert.Empty(replies);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ListenSetupWithoutTranscript_ReturnsPendingInsteadOfFinalizingTurn()
|
||||
{
|
||||
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
|
||||
{
|
||||
HostName = "neo-hub.jibo.com",
|
||||
Path = "/listen",
|
||||
Kind = "neo-hub-listen",
|
||||
Token = "hub-listen-setup-token",
|
||||
Text = """{"type":"LISTEN","transID":"trans-listen-setup","data":{"rules":["main-menu/execute_fun_stuff","globals/global_commands_launch"],"mode":"CLIENT_NLU"}}"""
|
||||
});
|
||||
|
||||
Assert.Single(replies);
|
||||
Assert.Equal("OPENJIBO_TURN_PENDING", ReadReplyType(replies[0]));
|
||||
|
||||
var session = _store.FindSessionByToken("hub-listen-setup-token");
|
||||
Assert.NotNull(session);
|
||||
Assert.True(session.TurnState.AwaitingTurnCompletion);
|
||||
Assert.Null(session.LastIntent);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BinaryAudio_AfterWordOfDayRightWordListen_IsIgnoredDuringCleanupWindow()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user