first stab at solving for menus and real STT

This commit is contained in:
Jacob Dubin
2026-04-16 15:40:28 -05:00
parent efe4dfd04e
commit fe1e11653f
19 changed files with 799 additions and 19 deletions

View File

@@ -294,6 +294,42 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal("trans-follow-up", session.LastTransId);
}
[Fact]
public async Task ClientNlu_ClockAskForTime_PreservesObservedIntentRulesAndEntities()
{
var listenReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-clock-menu-token",
Text = """{"type":"LISTEN","transID":"trans-clock-time","data":{"lang":"en-US","rules":["clock/clock_menu","globals/global_commands_launch"],"mode":"CLIENT_NLU"}}"""
});
Assert.Single(listenReplies);
Assert.Equal("OPENJIBO_TURN_PENDING", ReadReplyType(listenReplies[0]));
var nluReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-clock-menu-token",
Text = """{"type":"CLIENT_NLU","transID":"trans-clock-time","data":{"entities":{"domain":"clock"},"intent":"askForTime","rules":["clock/clock_menu"]}}"""
});
Assert.Equal(2, nluReplies.Count);
Assert.Equal("LISTEN", ReadReplyType(nluReplies[0]));
Assert.Equal("EOS", ReadReplyType(nluReplies[1]));
using var listenPayload = JsonDocument.Parse(nluReplies[0].Text!);
Assert.Equal("askForTime", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("askForTime", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("clock", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("entities").GetProperty("domain").GetString());
Assert.Equal("clock/clock_menu", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("rules")[0].GetString());
Assert.Equal("clock/clock_menu", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
}
[Fact]
public async Task BufferedAudio_WithSyntheticTranscriptHint_FinalizesThroughSttSeam()
{
@@ -562,6 +598,7 @@ public sealed class JiboWebSocketServiceTests
[Theory]
[InlineData("fixtures\\neo-hub-client-asr-joke.flow.json")]
[InlineData("fixtures\\neo-hub-context-client-nlu.flow.json")]
[InlineData("fixtures\\neo-hub-client-nlu-clock-ask-time.flow.json")]
[InlineData("fixtures\\neo-hub-buffered-audio-synthetic-asr.flow.json")]
[InlineData("fixtures\\neo-hub-multichunk-audio-chat.flow.json")]
[InlineData("fixtures\\neo-hub-buffered-audio-pending.flow.json")]

View File

@@ -0,0 +1,116 @@
using Jibo.Cloud.Infrastructure.Audio;
using Jibo.Runtime.Abstractions;
namespace Jibo.Cloud.Tests.WebSockets;
public sealed class LocalWhisperCppBufferedAudioSttStrategyTests
{
[Fact]
public void CanHandle_ReturnsFalse_WhenLocalWhisperIsDisabled()
{
var strategy = new LocalWhisperCppBufferedAudioSttStrategy(
new BufferedAudioSttOptions
{
EnableLocalWhisperCpp = false,
FfmpegPath = "ffmpeg",
WhisperCliPath = "whisper-cli",
WhisperModelPath = "model.bin"
},
new FakeExternalProcessRunner());
var turn = new TurnContext
{
Attributes = new Dictionary<string, object?>
{
["bufferedAudioFrames"] = new[] { BuildMinimalOggPage() }
}
};
Assert.False(strategy.CanHandle(turn));
}
[Fact]
public async Task TranscribeAsync_UsesFfmpegAndWhisperCpp_WhenConfigured()
{
var tempDirectory = Path.Combine(Path.GetTempPath(), $"openjibo-stt-test-{Guid.NewGuid():N}");
Directory.CreateDirectory(tempDirectory);
try
{
var runner = new FakeExternalProcessRunner();
var strategy = new LocalWhisperCppBufferedAudioSttStrategy(
new BufferedAudioSttOptions
{
EnableLocalWhisperCpp = true,
FfmpegPath = "ffmpeg",
WhisperCliPath = "whisper-cli",
WhisperModelPath = "model.bin",
TempDirectory = tempDirectory
},
runner);
var turn = new TurnContext
{
TurnId = "turn-local-stt",
Locale = "en-US",
Attributes = new Dictionary<string, object?>
{
["bufferedAudioBytes"] = 47,
["bufferedAudioFrames"] = new[] { BuildMinimalOggPage() }
}
};
var result = await strategy.TranscribeAsync(turn);
Assert.Equal("tell me a joke", result.Text);
Assert.Equal("local-whispercpp-buffered-audio", result.Provider);
Assert.Equal(2, runner.Calls.Count);
Assert.Equal("ffmpeg", runner.Calls[0].FileName);
Assert.Equal("whisper-cli", runner.Calls[1].FileName);
Assert.Equal(47, result.Metadata["bufferedAudioBytes"]);
}
finally
{
if (Directory.Exists(tempDirectory))
{
Directory.Delete(tempDirectory, recursive: true);
}
}
}
private static byte[] BuildMinimalOggPage()
{
return
[
0x4F, 0x67, 0x67, 0x53,
0x00,
0x02,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x01,
0x13,
0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64, 0x01, 0x01, 0x38, 0x01, 0x80, 0xBB, 0x00, 0x00, 0x00, 0x00, 0x00
];
}
private sealed class FakeExternalProcessRunner : IExternalProcessRunner
{
public List<(string FileName, IReadOnlyList<string> Arguments)> Calls { get; } = [];
public Task<ExternalProcessResult> RunAsync(string fileName, IReadOnlyList<string> arguments, CancellationToken cancellationToken = default)
{
Calls.Add((fileName, arguments));
if (string.Equals(fileName, "ffmpeg", StringComparison.OrdinalIgnoreCase))
{
var outputPath = arguments.Last();
File.WriteAllBytes(outputPath, [0x52, 0x49, 0x46, 0x46]);
return Task.FromResult(new ExternalProcessResult(0, string.Empty, string.Empty));
}
return Task.FromResult(new ExternalProcessResult(0, "[00:00:00.000 --> 00:00:01.000] tell me a joke", string.Empty));
}
}
}