first stab at solving for menus and real STT

This commit is contained in:
Jacob Dubin
2026-04-16 15:40:28 -05:00
parent efe4dfd04e
commit fe1e11653f
19 changed files with 799 additions and 19 deletions

View File

@@ -108,3 +108,25 @@ Current raw-audio behavior is still a compatibility bridge:
- if buffered audio has a synthetic transcript hint, the server now auto-finalizes the turn and emits `LISTEN` + `EOS` + `SKILL_ACTION`
- if buffered audio crosses the finalize threshold without a usable transcript, the server now emits a Node-style fallback completion with `EOS` instead of hanging the turn forever
- this is intentionally not a claim of real ASR parity
## Buffered Audio STT
The current `.NET` websocket stack now preserves buffered Ogg/Opus websocket frames in memory for each in-flight turn.
That enables two distinct STT paths:
- fixture-oriented synthetic transcript hints for replay and parity tests
- an opt-in local tool-based path that can normalize the buffered Ogg pages, call `ffmpeg`, and then call `whisper.cpp`
The local tool path is intentionally off by default. It exists to help map real robot audio behavior while the stable hosted cloud remains the primary goal.
Configuration lives under `OpenJibo:Stt`:
- `EnableLocalWhisperCpp`
- `FfmpegPath`
- `WhisperCliPath`
- `WhisperModelPath`
- `WhisperLanguage`
- `TempDirectory`
This is not yet a claim of production-ready onboard ASR. It is a `.NET` discovery seam that keeps us compatible with the Node oracle while we evaluate longer-term options such as Azure-hosted STT or a managed decode/transcribe stack.

View File

@@ -8,22 +8,30 @@ public sealed class DemoConversationBroker : IConversationBroker
{
var transcript = (turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty).Trim();
var lowered = transcript.ToLowerInvariant();
var clientIntent = turn.Attributes.TryGetValue("clientIntent", out var rawClientIntent)
? rawClientIntent?.ToString()
: null;
var semanticIntent = ResolveSemanticIntent(lowered, clientIntent);
var reply = transcript.Length == 0
var reply = semanticIntent switch
{
"time" => $"It is {DateTime.Now:hh:mm tt}.",
"date" => $"Today is {DateTime.Now:dddd, MMMM d}.",
"dance" => "Okay. Watch this.",
_ => transcript.Length == 0
? "I am listening."
: lowered.Contains("time")
? $"It is {DateTime.Now:hh:mm tt}."
: lowered.Contains("hello") || lowered.Contains("hi")
? "Hello from the OpenJibo cloud."
: lowered.Contains("joke")
? "Why did the robot bring a ladder? Because it wanted to reach the cloud."
: $"I heard: {transcript}";
: lowered.Contains("hello") || lowered.Contains("hi")
? "Hello from the OpenJibo cloud."
: lowered.Contains("joke")
? "Why did the robot bring a ladder? Because it wanted to reach the cloud."
: $"I heard: {transcript}"
};
var plan = new ResponsePlan
{
SessionId = turn.SessionId,
Status = ResponseStatus.Succeeded,
IntentName = lowered.Contains("joke") ? "joke" : lowered.Contains("time") ? "time" : "chat",
IntentName = semanticIntent,
Topic = "conversation",
DeviceId = turn.DeviceId,
TargetHost = turn.HostName,
@@ -72,4 +80,39 @@ public sealed class DemoConversationBroker : IConversationBroker
return Task.FromResult(plan);
}
private static string ResolveSemanticIntent(string loweredTranscript, string? clientIntent)
{
if (string.Equals(clientIntent, "askForTime", StringComparison.OrdinalIgnoreCase))
{
return "time";
}
if (string.Equals(clientIntent, "askForDate", StringComparison.OrdinalIgnoreCase))
{
return "date";
}
if (loweredTranscript.Contains("joke", StringComparison.Ordinal))
{
return "joke";
}
if (loweredTranscript.Contains("dance", StringComparison.Ordinal))
{
return "dance";
}
if (loweredTranscript.Contains("time", StringComparison.Ordinal))
{
return "time";
}
if (loweredTranscript.Contains("date", StringComparison.Ordinal) || loweredTranscript.Contains("day", StringComparison.Ordinal))
{
return "date";
}
return "chat";
}
}

View File

@@ -9,12 +9,12 @@ public sealed class ProtocolToTurnContextMapper
public TurnContext MapListenMessage(WebSocketMessageEnvelope envelope, CloudSession session, string messageType)
{
var turnState = session.TurnState;
var text = ExtractTranscript(envelope.Text);
var protocolOperation = messageType.ToLowerInvariant();
var attributes = new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["messageType"] = messageType
};
var text = ExtractTranscript(envelope.Text, attributes);
if (!string.IsNullOrWhiteSpace(turnState.TransId))
{
@@ -35,6 +35,7 @@ public sealed class ProtocolToTurnContextMapper
{
attributes["bufferedAudioBytes"] = turnState.BufferedAudioBytes;
attributes["bufferedAudioChunks"] = turnState.BufferedAudioChunkCount;
attributes["bufferedAudioFrames"] = turnState.BufferedAudioFrames.Select(frame => frame.ToArray()).ToArray();
}
if (!string.IsNullOrWhiteSpace(turnState.AudioTranscriptHint))
@@ -66,7 +67,7 @@ public sealed class ProtocolToTurnContextMapper
};
}
private static string? ExtractTranscript(string? text)
private static string? ExtractTranscript(string? text, IDictionary<string, object?> attributes)
{
if (string.IsNullOrWhiteSpace(text))
{
@@ -99,6 +100,25 @@ public sealed class ProtocolToTurnContextMapper
}
if (data.TryGetProperty("intent", out var intent) && intent.ValueKind == JsonValueKind.String)
{
attributes["clientIntent"] = intent.GetString();
}
if (data.TryGetProperty("rules", out var rules) && rules.ValueKind == JsonValueKind.Array)
{
attributes["clientRules"] = rules.EnumerateArray()
.Where(item => item.ValueKind == JsonValueKind.String)
.Select(item => item.GetString() ?? string.Empty)
.Where(rule => !string.IsNullOrWhiteSpace(rule))
.ToArray();
}
if (data.TryGetProperty("entities", out var entities) && entities.ValueKind == JsonValueKind.Object)
{
attributes["clientEntities"] = entities.Clone();
}
if (intent.ValueKind == JsonValueKind.String)
{
return intent.GetString();
}

View File

@@ -10,11 +10,20 @@ public sealed class ResponsePlanToSocketMessagesMapper
{
var speak = plan.Actions.OfType<SpeakAction>().FirstOrDefault();
var skill = plan.Actions.OfType<InvokeNativeSkillAction>().FirstOrDefault();
var messageType = ReadAttribute(turn, "messageType");
var transId = turn.Attributes.TryGetValue("transID", out var transIdValue)
? transIdValue?.ToString() ?? string.Empty
: session.LastTransId ?? string.Empty;
var transcript = turn.NormalizedTranscript ?? turn.RawTranscript ?? string.Empty;
var rules = ReadRules(turn);
var clientIntent = ReadAttribute(turn, "clientIntent");
var rules = ReadRules(turn, messageType);
var outboundIntent = string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
? clientIntent!
: plan.IntentName ?? "unknown";
var outboundAsrText = string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase) && !string.IsNullOrWhiteSpace(clientIntent)
? clientIntent!
: transcript;
var entities = ReadEntities(turn, messageType);
var messages = new List<SocketReplyPlan>();
messages.Add(new SocketReplyPlan(JsonSerializer.Serialize(new
@@ -27,18 +36,18 @@ public sealed class ResponsePlanToSocketMessagesMapper
{
confidence = 0.95,
final = true,
text = transcript
text = outboundAsrText
},
nlu = new
{
confidence = 0.95,
intent = plan.IntentName ?? "unknown",
intent = outboundIntent,
rules,
entities = new Dictionary<string, object?>()
entities
},
match = new
{
intent = plan.IntentName ?? "unknown",
intent = outboundIntent,
rule = rules.FirstOrDefault() ?? string.Empty,
score = 0.95
}
@@ -107,9 +116,13 @@ public sealed class ResponsePlanToSocketMessagesMapper
];
}
private static IReadOnlyList<string> ReadRules(TurnContext turn)
private static IReadOnlyList<string> ReadRules(TurnContext turn, string? messageType)
{
if (!turn.Attributes.TryGetValue("listenRules", out var value))
var attributeName = string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase)
? "clientRules"
: "listenRules";
if (!turn.Attributes.TryGetValue(attributeName, out var value))
{
return [];
}
@@ -122,12 +135,42 @@ public sealed class ResponsePlanToSocketMessagesMapper
};
}
private static object ReadEntities(TurnContext turn, string? messageType)
{
if (!string.Equals(messageType, "CLIENT_NLU", StringComparison.OrdinalIgnoreCase))
{
return new Dictionary<string, object?>();
}
if (!turn.Attributes.TryGetValue("clientEntities", out var value) || value is null)
{
return new Dictionary<string, object?>();
}
return value switch
{
JsonElement jsonElement when jsonElement.ValueKind == JsonValueKind.Object => jsonElement,
IDictionary<string, object?> dictionary => dictionary,
_ => new Dictionary<string, object?>()
};
}
private static string? ReadAttribute(TurnContext turn, string key)
{
return turn.Attributes.TryGetValue(key, out var value)
? value?.ToString()
: null;
}
private static object BuildSkillPayload(ResponsePlan plan, TurnContext turn, string transId, SpeakAction speak, InvokeNativeSkillAction? skill)
{
var isJoke = string.Equals(plan.IntentName, "joke", StringComparison.OrdinalIgnoreCase) ||
string.Equals(skill?.SkillName, "@be/joke", StringComparison.OrdinalIgnoreCase);
var isDance = string.Equals(plan.IntentName, "dance", StringComparison.OrdinalIgnoreCase);
var skillId = isJoke ? "@be/joke" : skill?.SkillName ?? "chitchat-skill";
var esml = isJoke
var esml = isDance
? "<speak>Okay.<break size='0.2'/> Watch this.<anim cat='dance' filter='music, rom-upbeat' /></speak>"
: isJoke
? $"<speak><es cat='happy' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>"
: $"<speak><es cat='neutral' filter='!ssa-only, !sfx-only' endNeutral='true'>{EscapeXml(speak.Text)}</es></speak>";
var mimId = isJoke ? "runtime-joke" : "runtime-chat";

View File

@@ -23,6 +23,10 @@ public sealed class WebSocketTurnFinalizationService(
turnState.FirstAudioReceivedUtc ??= DateTimeOffset.UtcNow;
turnState.BufferedAudioChunkCount += 1;
turnState.BufferedAudioBytes += envelope.Binary?.Length ?? 0;
if (envelope.Binary is { Length: > 0 })
{
turnState.BufferedAudioFrames.Add(envelope.Binary.ToArray());
}
turnState.LastAudioReceivedUtc = DateTimeOffset.UtcNow;
turnState.AwaitingTurnCompletion = true;
session.Metadata["lastAudioBytes"] = envelope.Binary?.Length ?? 0;
@@ -223,6 +227,7 @@ public sealed class WebSocketTurnFinalizationService(
session.TurnState.BufferedAudioChunkCount = 0;
session.TurnState.FirstAudioReceivedUtc = null;
session.TurnState.LastAudioReceivedUtc = null;
session.TurnState.BufferedAudioFrames.Clear();
session.TurnState.FinalizeAttemptCount = 0;
session.Metadata.Remove("audioTranscriptHint");
}
@@ -236,6 +241,7 @@ public sealed class WebSocketTurnFinalizationService(
turnState.LastAudioReceivedUtc = null;
turnState.BufferedAudioChunkCount = 0;
turnState.BufferedAudioBytes = 0;
turnState.BufferedAudioFrames.Clear();
turnState.FinalizeAttemptCount = 0;
turnState.AwaitingTurnCompletion = false;
turnState.SawListen = false;

View File

@@ -9,6 +9,7 @@ public sealed class WebSocketTurnState
public DateTimeOffset? LastAudioReceivedUtc { get; set; }
public int BufferedAudioChunkCount { get; set; }
public int BufferedAudioBytes { get; set; }
public List<byte[]> BufferedAudioFrames { get; } = [];
public int FinalizeAttemptCount { get; set; }
public bool AwaitingTurnCompletion { get; set; }
public bool SawListen { get; set; }

View File

@@ -0,0 +1,11 @@
namespace Jibo.Cloud.Infrastructure.Audio;
public sealed class BufferedAudioSttOptions
{
public bool EnableLocalWhisperCpp { get; set; }
public string? FfmpegPath { get; set; }
public string? WhisperCliPath { get; set; }
public string? WhisperModelPath { get; set; }
public string WhisperLanguage { get; set; } = "en";
public string? TempDirectory { get; set; }
}

View File

@@ -0,0 +1,42 @@
using System.Diagnostics;
namespace Jibo.Cloud.Infrastructure.Audio;
public sealed class ExternalProcessRunner : IExternalProcessRunner
{
public async Task<ExternalProcessResult> RunAsync(string fileName, IReadOnlyList<string> arguments, CancellationToken cancellationToken = default)
{
using var process = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = fileName,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
CreateNoWindow = true
}
};
foreach (var argument in arguments)
{
process.StartInfo.ArgumentList.Add(argument);
}
process.Start();
var stdOutTask = process.StandardOutput.ReadToEndAsync(cancellationToken);
var stdErrTask = process.StandardError.ReadToEndAsync(cancellationToken);
await process.WaitForExitAsync(cancellationToken);
var stdOut = await stdOutTask;
var stdErr = await stdErrTask;
if (process.ExitCode != 0)
{
throw new InvalidOperationException($"External process '{fileName}' failed with exit code {process.ExitCode}: {stdErr}");
}
return new ExternalProcessResult(process.ExitCode, stdOut, stdErr);
}
}

View File

@@ -0,0 +1,8 @@
namespace Jibo.Cloud.Infrastructure.Audio;
public interface IExternalProcessRunner
{
Task<ExternalProcessResult> RunAsync(string fileName, IReadOnlyList<string> arguments, CancellationToken cancellationToken = default);
}
public sealed record ExternalProcessResult(int ExitCode, string StdOut, string StdErr);

View File

@@ -0,0 +1,153 @@
using System.Text.Json;
using Jibo.Runtime.Abstractions;
namespace Jibo.Cloud.Infrastructure.Audio;
public sealed class LocalWhisperCppBufferedAudioSttStrategy(
BufferedAudioSttOptions options,
IExternalProcessRunner processRunner) : ISttStrategy
{
public string Name => "local-whispercpp-buffered-audio";
public bool CanHandle(TurnContext turn)
{
return options.EnableLocalWhisperCpp &&
!string.IsNullOrWhiteSpace(options.FfmpegPath) &&
!string.IsNullOrWhiteSpace(options.WhisperCliPath) &&
!string.IsNullOrWhiteSpace(options.WhisperModelPath) &&
ReadBufferedAudioFrames(turn).Count > 0;
}
public async Task<SttResult> TranscribeAsync(TurnContext turn, CancellationToken cancellationToken = default)
{
var frames = ReadBufferedAudioFrames(turn);
if (frames.Count == 0)
{
throw new InvalidOperationException("Local whisper.cpp STT requires buffered websocket audio frames.");
}
var tempDirectory = options.TempDirectory;
if (string.IsNullOrWhiteSpace(tempDirectory))
{
tempDirectory = Path.Combine(Path.GetTempPath(), "openjibo-stt");
}
Directory.CreateDirectory(tempDirectory);
var baseName = $"turn-{turn.TurnId}";
var oggPath = Path.Combine(tempDirectory, $"{baseName}.ogg");
var wavPath = Path.Combine(tempDirectory, $"{baseName}.wav");
try
{
await File.WriteAllBytesAsync(oggPath, OggOpusAudioNormalizer.Normalize(frames), cancellationToken);
await processRunner.RunAsync(
options.FfmpegPath!,
["-y", "-i", oggPath, "-ar", "16000", "-ac", "1", "-f", "wav", wavPath],
cancellationToken);
var whisperResult = await processRunner.RunAsync(
options.WhisperCliPath!,
["-m", options.WhisperModelPath!, "-f", wavPath, "-l", options.WhisperLanguage],
cancellationToken);
var transcript = ExtractTranscript(whisperResult.StdOut);
if (string.IsNullOrWhiteSpace(transcript))
{
throw new InvalidOperationException("whisper.cpp returned no transcript for the buffered audio turn.");
}
return new SttResult
{
Text = transcript,
Provider = Name,
Locale = turn.Locale,
Metadata = new Dictionary<string, object?>(StringComparer.OrdinalIgnoreCase)
{
["bufferedAudioBytes"] = ReadBufferedAudioBytes(turn),
["bufferedAudioChunks"] = frames.Count,
["ffmpegPath"] = options.FfmpegPath,
["whisperCliPath"] = options.WhisperCliPath,
["wavPath"] = wavPath
}
};
}
finally
{
TryDelete(oggPath);
TryDelete(wavPath);
}
}
private static IReadOnlyList<byte[]> ReadBufferedAudioFrames(TurnContext turn)
{
if (!turn.Attributes.TryGetValue("bufferedAudioFrames", out var value) || value is null)
{
return [];
}
return value switch
{
byte[][] jagged => jagged,
IReadOnlyList<byte[]> typed => typed,
IEnumerable<byte[]> enumerable => enumerable.ToArray(),
JsonElement jsonElement when jsonElement.ValueKind == JsonValueKind.Array => jsonElement.EnumerateArray()
.Where(static item => item.ValueKind == JsonValueKind.Array)
.Select(static item => item.EnumerateArray().Select(static b => (byte)b.GetInt32()).ToArray())
.ToArray(),
_ => []
};
}
private static int ReadBufferedAudioBytes(TurnContext turn)
{
return turn.Attributes.TryGetValue("bufferedAudioBytes", out var bufferedAudioBytes) && bufferedAudioBytes is not null
? bufferedAudioBytes switch
{
int value => value,
long value => (int)value,
string value when int.TryParse(value, out var parsed) => parsed,
_ => 0
}
: 0;
}
private static string ExtractTranscript(string standardOutput)
{
var lines = standardOutput
.Split(['\r', '\n'], StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
var timecoded = lines
.Where(static line => line.StartsWith("[", StringComparison.Ordinal) && line.Contains("-->", StringComparison.Ordinal))
.Select(static line =>
{
var closingBracket = line.IndexOf(']');
return closingBracket >= 0 ? line[(closingBracket + 1)..].Trim() : line.Trim();
})
.Where(static line => !string.IsNullOrWhiteSpace(line))
.ToArray();
if (timecoded.Length > 0)
{
return string.Join(" ", timecoded).Trim();
}
return string.Join(" ", lines).Trim();
}
private static void TryDelete(string path)
{
try
{
if (File.Exists(path))
{
File.Delete(path);
}
}
catch
{
// Best-effort cleanup only.
}
}
}

View File

@@ -0,0 +1,114 @@
using System.Buffers.Binary;
using System.Text;
namespace Jibo.Cloud.Infrastructure.Audio;
internal static class OggOpusAudioNormalizer
{
private static readonly uint[] CrcTable = BuildCrcTable();
public static byte[] Normalize(IReadOnlyList<byte[]> pages)
{
if (pages.Count == 0)
{
return [];
}
var parsed = pages.Select(ParsePage).ToArray();
var baseGranule = parsed.Length > 1 ? parsed[1].GranulePosition : parsed[0].GranulePosition;
var normalized = new List<byte[]>(pages.Count);
for (var index = 0; index < pages.Count; index += 1)
{
var output = pages[index].ToArray();
var parsedPage = parsed[index];
var newGranule = index >= 1 && parsedPage.GranulePosition >= baseGranule
? parsedPage.GranulePosition - baseGranule
: 0UL;
BinaryPrimitives.WriteUInt64LittleEndian(output.AsSpan(6, 8), newGranule);
BinaryPrimitives.WriteUInt32LittleEndian(output.AsSpan(18, 4), (uint)index);
var headerType = output[5];
output[5] = index == pages.Count - 1
? (byte)(headerType | 0x04)
: (byte)(headerType & ~0x04);
output[22] = 0;
output[23] = 0;
output[24] = 0;
output[25] = 0;
BinaryPrimitives.WriteUInt32LittleEndian(output.AsSpan(22, 4), ComputeCrc(output));
normalized.Add(output);
}
return normalized.SelectMany(static page => page).ToArray();
}
private static ParsedOggPage ParsePage(byte[] buffer)
{
if (buffer.Length < 27)
{
throw new InvalidOperationException($"Buffered Ogg page is too short ({buffer.Length} bytes).");
}
if (!Encoding.ASCII.GetString(buffer, 0, 4).Equals("OggS", StringComparison.Ordinal))
{
throw new InvalidOperationException("Buffered audio frame did not begin with an OggS capture pattern.");
}
var pageSegments = buffer[26];
if (buffer.Length < 27 + pageSegments)
{
throw new InvalidOperationException("Buffered Ogg page segment table was truncated.");
}
var payloadLength = 0;
for (var index = 0; index < pageSegments; index += 1)
{
payloadLength += buffer[27 + index];
}
var expectedLength = 27 + pageSegments + payloadLength;
if (buffer.Length < expectedLength)
{
throw new InvalidOperationException("Buffered Ogg page payload was truncated.");
}
return new ParsedOggPage(BinaryPrimitives.ReadUInt64LittleEndian(buffer.AsSpan(6, 8)));
}
private static uint ComputeCrc(byte[] buffer)
{
uint crc = 0;
foreach (var value in buffer)
{
crc = (crc << 8) ^ CrcTable[((crc >> 24) ^ value) & 0xff];
}
return crc;
}
private static uint[] BuildCrcTable()
{
var table = new uint[256];
for (uint index = 0; index < table.Length; index += 1)
{
var remainder = index << 24;
for (var bit = 0; bit < 8; bit += 1)
{
remainder = (remainder & 0x80000000) != 0
? (remainder << 1) ^ 0x04c11db7
: remainder << 1;
}
table[index] = remainder;
}
return table;
}
private sealed record ParsedOggPage(ulong GranulePosition);
}

View File

@@ -1,5 +1,6 @@
using Jibo.Cloud.Application.Abstractions;
using Jibo.Cloud.Application.Services;
using Jibo.Cloud.Infrastructure.Audio;
using Jibo.Cloud.Infrastructure.Persistence;
using Jibo.Cloud.Infrastructure.Telemetry;
using Jibo.Runtime.Abstractions;
@@ -12,14 +13,19 @@ public static class ServiceCollectionExtensions
{
public static IServiceCollection AddOpenJiboCloud(this IServiceCollection services, IConfiguration? configuration = null)
{
var sttOptions = new BufferedAudioSttOptions();
if (configuration is not null)
{
services.Configure<WebSocketTelemetryOptions>(configuration.GetSection("OpenJibo:Telemetry"));
services.Configure<ProtocolTelemetryOptions>(configuration.GetSection("OpenJibo:ProtocolTelemetry"));
configuration.GetSection("OpenJibo:Stt").Bind(sttOptions);
}
services.AddSingleton(sttOptions);
services.AddSingleton<ICloudStateStore, InMemoryCloudStateStore>();
services.AddSingleton<IConversationBroker, DemoConversationBroker>();
services.AddSingleton<IExternalProcessRunner, ExternalProcessRunner>();
services.AddSingleton<ISttStrategy, LocalWhisperCppBufferedAudioSttStrategy>();
services.AddSingleton<ISttStrategy, SyntheticBufferedAudioSttStrategy>();
services.AddSingleton<ISttStrategySelector, DefaultSttStrategySelector>();
services.AddSingleton<IWebSocketTelemetrySink, FileWebSocketTelemetrySink>();

View File

@@ -12,6 +12,7 @@ Current fixture groups:
Current websocket fixture depth is uneven on purpose:
- `neo-hub-client-asr-joke.flow.json` now asserts a richer vertical slice than reply types alone. It captures the observed Node-oriented `CLIENT_ASR -> LISTEN -> EOS -> delayed SKILL_ACTION` joke turn with payload-shape expectations for `EOS` and joke `SKILL_ACTION`.
- `neo-hub-client-nlu-clock-ask-time.flow.json` captures a real menu-style `CLIENT_NLU` turn from the latest live captures and asserts that `.NET` preserves the observed NLU intent/rules/entities in the synthetic websocket reply instead of flattening everything into generic chat.
- The other websocket fixtures are still mainly sequencing fixtures. They are useful for replay and guardrails, but they should not be read as proof of broader payload parity.
Expand this folder whenever new robot traffic is captured and cleaned.

View File

@@ -0,0 +1,82 @@
{
"name": "neo-hub client nlu clock ask time flow",
"session": {
"hostName": "neo-hub.jibo.com",
"path": "/listen",
"kind": "neo-hub-listen",
"token": "fixture-clock-nlu-token"
},
"steps": [
{
"text": {
"type": "LISTEN",
"transID": "fixture-trans-clock-time",
"data": {
"lang": "en-US",
"rules": [
"clock/clock_menu",
"globals/global_commands_launch"
],
"mode": "CLIENT_NLU"
}
},
"expectedReplyTypes": [
"OPENJIBO_TURN_PENDING"
]
},
{
"text": {
"type": "CLIENT_NLU",
"transID": "fixture-trans-clock-time",
"data": {
"entities": {
"domain": "clock"
},
"intent": "askForTime",
"rules": [
"clock/clock_menu"
]
}
},
"expectedReplyTypes": [
"LISTEN",
"EOS"
],
"expectedReplies": [
{
"type": "LISTEN",
"jsonSubset": {
"type": "LISTEN",
"transID": "fixture-trans-clock-time",
"data": {
"asr": {
"text": "askForTime"
},
"nlu": {
"intent": "askForTime",
"rules": [
"clock/clock_menu"
],
"entities": {
"domain": "clock"
}
},
"match": {
"intent": "askForTime",
"rule": "clock/clock_menu"
}
}
}
},
{
"type": "EOS",
"jsonSubset": {
"type": "EOS",
"transID": "fixture-trans-clock-time",
"data": {}
}
}
]
}
]
}