fix for quiet Jibo ASR and NLU events

This commit is contained in:
Jacob Dubin
2026-04-19 07:07:54 -05:00
parent a26c64efbe
commit 35ab65f764
14 changed files with 5032 additions and 1 deletions

View File

@@ -106,6 +106,7 @@ Evidence from the smaller `2026-04-18/19` hotphrase and word-of-the-day verifica
- the newer `jibo test 2` bundle shows voice launch now reaches Nimbus and receives a cloud response, but a generic `SLIM/RUNTIME_PROMPT` just says "starting word of the day" instead of performing the menu-style redirect the on-screen path uses
- the `jibo test 3` bundle confirms Nimbus rejects `REDIRECT` in that cloud-skill slot, so the better next experiment is to hint the on-robot target skill directly on the synthetic `LISTEN` result and skip Nimbus `SKILL_ACTION` entirely for word-of-the-day launch
- the same bundle also shows `word-of-the-day/right_word` cleanup turns need a short ignore window for trailing audio or the robot can stay stuck in a blue-ring listening state
- the `jibo test 4` bundle exposed a broader websocket issue: inbound robot `LISTEN` setup packets were still being routed through turn finalization instead of just priming pending state, which can corrupt menu and word-of-the-day flows by treating setup turns like resolved intents
- the local buffered-audio seam is still producing repeated `whisper.cpp returned no transcript` and `ffmpeg ... Codec not found` failures, so lightweight waveform or energy screening is worth considering once the core launch flow is stable
Near-term interaction work should now prioritize:

View File

@@ -44,7 +44,22 @@ public sealed class JiboWebSocketService(
return replies;
}
if (parsedType is "LISTEN" or "CLIENT_NLU" or "CLIENT_ASR")
if (parsedType == "LISTEN")
{
var replies = ContainsInlineTurnPayload(envelope.Text)
? await turnFinalizationService.HandleTurnAsync(session, envelope, parsedType, cancellationToken)
: turnFinalizationService.HandleListenSetup(session, envelope);
await telemetrySink.RecordTurnEventAsync(envelope, session, "turn_processed", new Dictionary<string, object?>
{
["messageType"] = parsedType,
["replyCount"] = replies.Count,
["transcript"] = session.LastTranscript,
["intent"] = session.LastIntent
}, cancellationToken);
return replies;
}
if (parsedType is "CLIENT_NLU" or "CLIENT_ASR")
{
var replies = await turnFinalizationService.HandleTurnAsync(session, envelope, parsedType, cancellationToken);
await telemetrySink.RecordTurnEventAsync(envelope, session, "turn_processed", new Dictionary<string, object?>
@@ -96,4 +111,38 @@ public sealed class JiboWebSocketService(
return "UNKNOWN";
}
private static bool ContainsInlineTurnPayload(string? text)
{
if (string.IsNullOrWhiteSpace(text))
{
return false;
}
try
{
using var document = JsonDocument.Parse(text);
if (!document.RootElement.TryGetProperty("data", out var data) || data.ValueKind != JsonValueKind.Object)
{
return false;
}
if (data.TryGetProperty("text", out var transcript) &&
transcript.ValueKind == JsonValueKind.String &&
!string.IsNullOrWhiteSpace(transcript.GetString()))
{
return true;
}
return data.TryGetProperty("asr", out var asr) &&
asr.ValueKind == JsonValueKind.Object &&
asr.TryGetProperty("text", out var asrText) &&
asrText.ValueKind == JsonValueKind.String &&
!string.IsNullOrWhiteSpace(asrText.GetString());
}
catch
{
return false;
}
}
}

View File

@@ -129,6 +129,43 @@ public sealed class WebSocketTurnFinalizationService(
return await FinalizeTurnAsync(session, envelope, messageType, allowFallbackOnMissingTranscript: false, cancellationToken);
}
public IReadOnlyList<WebSocketReply> HandleListenSetup(CloudSession session, WebSocketMessageEnvelope envelope)
{
PersistTurnHints(session, envelope.Text);
var turn = ProtocolToTurnContextMapper.MapListenMessage(envelope, session, "LISTEN");
if (ShouldIgnoreCompletedWordOfDayTurn(turn))
{
session.TurnState.AwaitingTurnCompletion = false;
session.TurnState.IgnoreAdditionalAudioUntilUtc = DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
session.FollowUpExpiresUtc = null;
ResetBufferedAudio(session);
return [];
}
session.TurnState.AwaitingTurnCompletion = true;
return
[
new WebSocketReply
{
Text = JsonSerializer.Serialize(new
{
type = "OPENJIBO_TURN_PENDING",
data = new
{
sessionId = session.SessionId,
transID = session.LastTransId,
bufferedAudioBytes = session.TurnState.BufferedAudioBytes,
bufferedAudioChunks = session.TurnState.BufferedAudioChunkCount,
awaitingAudio = session.TurnState.BufferedAudioBytes == 0,
awaitingTranscriptHint = session.TurnState.BufferedAudioBytes > 0 && string.IsNullOrWhiteSpace(session.TurnState.AudioTranscriptHint),
finalizeAttempts = session.TurnState.FinalizeAttemptCount
}
})
}
];
}
private async Task<TurnContext> ResolveTranscriptAsync(TurnContext turn, CloudSession session, CancellationToken cancellationToken)
{
if (!string.IsNullOrWhiteSpace(turn.NormalizedTranscript) || !string.IsNullOrWhiteSpace(turn.RawTranscript))

View File

@@ -629,6 +629,27 @@ public sealed class JiboWebSocketServiceTests
Assert.Empty(replies);
}
[Fact]
public async Task ListenSetupWithoutTranscript_ReturnsPendingInsteadOfFinalizingTurn()
{
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-listen-setup-token",
Text = """{"type":"LISTEN","transID":"trans-listen-setup","data":{"rules":["main-menu/execute_fun_stuff","globals/global_commands_launch"],"mode":"CLIENT_NLU"}}"""
});
Assert.Single(replies);
Assert.Equal("OPENJIBO_TURN_PENDING", ReadReplyType(replies[0]));
var session = _store.FindSessionByToken("hub-listen-setup-token");
Assert.NotNull(session);
Assert.True(session.TurnState.AwaitingTurnCompletion);
Assert.Null(session.LastIntent);
}
[Fact]
public async Task BinaryAudio_AfterWordOfDayRightWordListen_IsIgnoredDuringCleanupWindow()
{