version 15 with fixes for yes no and short speech

This commit is contained in:
Jacob Dubin
2026-04-23 07:13:24 -05:00
parent 425d8c1a9b
commit 41e90fc4c1
19 changed files with 8231 additions and 60 deletions

View File

@@ -194,6 +194,7 @@ Latest photo discovery findings:
- The newest `.NET` pass keeps that routing, adds local-file persistence for media metadata, and serves stored media URLs back through `/media/{path}` as a first hosted-gallery slice.
- The remaining gap is binary fidelity: the current HTTP capture path stores request bodies as text, which is enough to preserve metadata and a placeholder payload, but may still be too lossy for perfect thumbnails/original fetches.
- The `jibo test 17` gallery blue-ring report is at least partly tangled up with the gallery-empty path: stock `@be/gallery` says `there's nothing in the gallery yet. want to take a picture now?`, so lingering mic state there is not purely a launch-routing issue.
- The `jibo test 18` bundle shows the more direct failure mode: short local replies like `yes` can stall if buffered-audio auto-finalize waits too long, and the old `OPENJIBO_AUDIO_RECEIVED` compatibility event only added robot-side warning noise while the ring stayed blue.
Latest update and state findings:

View File

@@ -6,12 +6,13 @@
This is the production-oriented path for restoring device connectivity and creating a foundation for future runtime, AI, and OTA work.
Current spoken cloud version: `Open Jibo Cloud version 1.0.14.`
Current spoken cloud version: `Open Jibo Cloud version 1.0.15.`
Release hygiene reminder:
- bump [OpenJiboCloudBuildInfo.cs](/C:/Projects/JiboExperiments/OpenJibo/src/Jibo.Cloud/dotnet/src/Jibo.Cloud.Application/Services/OpenJiboCloudBuildInfo.cs) whenever we ship a meaningful hosted-cloud update
- keep the spoken version response and `/health` version field aligned from that single source of truth
- the API startup log now prints the same version on boot, which is useful for confirming the running build during live robot tests
## Architecture

View File

@@ -11,6 +11,8 @@ builder.Services.AddOpenJiboCloud(builder.Configuration);
var app = builder.Build();
app.Logger.LogInformation("Starting Open Jibo Cloud Api version {Version}", OpenJiboCloudBuildInfo.Version);
app.UseWebSockets();
app.Use(async (context, next) =>

View File

@@ -2,7 +2,7 @@ namespace Jibo.Cloud.Application.Services;
public static class OpenJiboCloudBuildInfo
{
public const string Version = "1.0.14";
public const string Version = "1.0.15";
public static string VersionWords => Version.Replace(".", " dot ");

View File

@@ -15,7 +15,7 @@ public sealed class WebSocketTurnFinalizationService(
)
{
private const int AutoFinalizeMinBufferedAudioBytes = 12000;
private const int AutoFinalizeMinBufferedAudioChunks = 5;
private const int AutoFinalizeMinBufferedAudioChunks = 4;
private static readonly TimeSpan AutoFinalizeMinTurnAge = TimeSpan.FromMilliseconds(1400);
public void ObserveIncomingMessage(CloudSession session, string? text)
@@ -69,23 +69,7 @@ public sealed class WebSocketTurnFinalizationService(
return await FinalizeTurnAsync(session, envelope, "AUTO_FINALIZE", allowFallbackOnMissingTranscript: true, cancellationToken);
}
return
[
new WebSocketReply
{
Text = JsonSerializer.Serialize(new
{
type = "OPENJIBO_AUDIO_RECEIVED",
data = new
{
bytes = envelope.Binary?.Length ?? 0,
bufferedBytes = turnState.BufferedAudioBytes,
bufferedChunks = turnState.BufferedAudioChunkCount,
sessionId = session.SessionId
}
})
}
];
return [];
}
public async Task<IReadOnlyList<WebSocketReply>> HandleContextAsync(
@@ -638,11 +622,9 @@ public sealed class WebSocketTurnFinalizationService(
? DateTimeOffset.UtcNow - turnState.FirstAudioReceivedUtc.Value
: TimeSpan.Zero;
return turnState.AwaitingTurnCompletion &&
turnState is
{
SawListen: true, SawContext: true, BufferedAudioChunkCount: >= AutoFinalizeMinBufferedAudioChunks,
BufferedAudioBytes: >= AutoFinalizeMinBufferedAudioBytes
} &&
turnState.SawListen &&
turnState.BufferedAudioChunkCount >= AutoFinalizeMinBufferedAudioChunks &&
turnState.BufferedAudioBytes >= AutoFinalizeMinBufferedAudioBytes &&
turnAge >= AutoFinalizeMinTurnAge;
}

View File

@@ -23,9 +23,7 @@
},
{
"binary": [1, 2, 3, 4],
"expectedReplyTypes": [
"OPENJIBO_AUDIO_RECEIVED"
]
"expectedReplyTypes": []
},
{
"text": {

View File

@@ -36,9 +36,7 @@
},
{
"binary": [1, 2, 3, 4, 5, 6],
"expectedReplyTypes": [
"OPENJIBO_AUDIO_RECEIVED"
]
"expectedReplyTypes": []
},
{
"text": {

View File

@@ -35,15 +35,11 @@
},
{
"binary": [1, 2, 3],
"expectedReplyTypes": [
"OPENJIBO_AUDIO_RECEIVED"
]
"expectedReplyTypes": []
},
{
"binary": [4, 5, 6, 7],
"expectedReplyTypes": [
"OPENJIBO_AUDIO_RECEIVED"
]
"expectedReplyTypes": []
},
{
"text": {

View File

@@ -81,7 +81,7 @@ public sealed class JiboWebSocketServiceTests
}
[Fact]
public async Task BinaryMessage_ReturnsAcknowledgementPayload()
public async Task BinaryMessage_BuffersAudioWithoutEmittingSyntheticAck()
{
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
@@ -92,11 +92,11 @@ public sealed class JiboWebSocketServiceTests
Binary = [1, 2, 3, 4]
});
using var payload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", payload.RootElement.GetProperty("type").GetString());
Assert.Equal(4, payload.RootElement.GetProperty("data").GetProperty("bytes").GetInt32());
Assert.Equal(4, payload.RootElement.GetProperty("data").GetProperty("bufferedBytes").GetInt32());
Assert.Equal(1, payload.RootElement.GetProperty("data").GetProperty("bufferedChunks").GetInt32());
Assert.Empty(replies);
var session = _store.FindSessionByToken("hub-test-token");
Assert.NotNull(session);
Assert.Equal(4, session.TurnState.BufferedAudioBytes);
Assert.Equal(1, session.TurnState.BufferedAudioChunkCount);
}
[Fact]
@@ -132,8 +132,7 @@ public sealed class JiboWebSocketServiceTests
Binary = new byte[3000]
});
Assert.Single(replies);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(replies[0]));
Assert.Empty(replies);
}
var session = _store.FindSessionByToken("hub-auto-finalize-token");
@@ -193,8 +192,7 @@ public sealed class JiboWebSocketServiceTests
Binary = new byte[3000]
});
Assert.Single(replies);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(replies[0]));
Assert.Empty(replies);
}
var session = _store.FindSessionByToken("hub-auto-fallback-token");
@@ -251,11 +249,8 @@ public sealed class JiboWebSocketServiceTests
Binary = [4, 5, 6, 7]
});
using var firstPayload = JsonDocument.Parse(firstAudioReplies[0].Text!);
using var secondPayload = JsonDocument.Parse(secondAudioReplies[0].Text!);
Assert.Equal(3, firstPayload.RootElement.GetProperty("data").GetProperty("bufferedBytes").GetInt32());
Assert.Equal(7, secondPayload.RootElement.GetProperty("data").GetProperty("bufferedBytes").GetInt32());
Assert.Equal(2, secondPayload.RootElement.GetProperty("data").GetProperty("bufferedChunks").GetInt32());
Assert.Empty(firstAudioReplies);
Assert.Empty(secondAudioReplies);
var session = _store.FindSessionByToken("hub-multichunk-token");
Assert.NotNull(session);
@@ -961,8 +956,7 @@ public sealed class JiboWebSocketServiceTests
Binary = new byte[3000]
});
Assert.Single(interimReplies);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(interimReplies[0]));
Assert.Empty(interimReplies);
}
var session = _store.FindSessionByToken("hub-yesno-noinput-token");
@@ -1446,8 +1440,7 @@ public sealed class JiboWebSocketServiceTests
Binary = new byte[3000]
});
Assert.Single(interimReplies);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(interimReplies[0]));
Assert.Empty(interimReplies);
}
var session = _store.FindSessionByToken("hub-wod-auto-token");
@@ -1742,8 +1735,7 @@ public sealed class JiboWebSocketServiceTests
Binary = [1, 2, 3, 4, 5, 6]
});
Assert.Single(audioReplies);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(audioReplies[0]));
Assert.Empty(audioReplies);
var finalizeReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
@@ -1889,8 +1881,7 @@ public sealed class JiboWebSocketServiceTests
Binary = new byte[3000]
});
Assert.Single(interimReplies);
Assert.Equal("OPENJIBO_AUDIO_RECEIVED", ReadReplyType(interimReplies[0]));
Assert.Empty(interimReplies);
}
var session = _store.FindSessionByToken("hub-hotphrase-greeting-token");