diff --git a/OpenJibo/OpenJibo.slnx b/OpenJibo/OpenJibo.slnx new file mode 100644 index 0000000..179271a --- /dev/null +++ b/OpenJibo/OpenJibo.slnx @@ -0,0 +1,3 @@ + + + diff --git a/OpenJibo/Playground/Playground.csproj b/OpenJibo/Playground/Playground.csproj new file mode 100644 index 0000000..ed9781c --- /dev/null +++ b/OpenJibo/Playground/Playground.csproj @@ -0,0 +1,10 @@ + + + + Exe + net10.0 + enable + enable + + + diff --git a/OpenJibo/Playground/Program.cs b/OpenJibo/Playground/Program.cs new file mode 100644 index 0000000..4294429 --- /dev/null +++ b/OpenJibo/Playground/Program.cs @@ -0,0 +1,247 @@ +using System.Net.Http.Json; +using System.Net.WebSockets; +using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; + +Console.Write("Enter Jibo IP: "); +var jiboIp = (Console.ReadLine() ?? "").Trim(); + +if (string.IsNullOrWhiteSpace(jiboIp)) +{ + Console.WriteLine("No IP entered."); + return; +} + +var baseHttp = $"http://{jiboIp}:8088"; +var ttsHttp = $"http://{jiboIp}:8089"; +var wsUri = new Uri($"ws://{jiboIp}:8088/simple_port"); + +using var http = new HttpClient(); +using var cts = new CancellationTokenSource(); + +Console.WriteLine($"Connecting to Jibo at {jiboIp}..."); +Console.WriteLine("Press Ctrl+C to quit."); + +Console.CancelKeyPress += (_, e) => +{ + e.Cancel = true; + cts.Cancel(); +}; + +while (!cts.IsCancellationRequested) +{ + var taskId = $"DEBUG:demo-{Guid.NewGuid():N}"; + var requestId = $"stt_start_{Guid.NewGuid():N}"; + + try + { + using var ws = new ClientWebSocket(); + await ws.ConnectAsync(wsUri, cts.Token); + Console.WriteLine("WebSocket connected."); + + var utteranceTcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + + var wsReaderTask = Task.Run(async () => + { + var buffer = new byte[8192]; + + while (ws.State == WebSocketState.Open && !cts.Token.IsCancellationRequested) + { + WebSocketReceiveResult result; + using var ms = new MemoryStream(); + + do + { + result = await ws.ReceiveAsync(new ArraySegment(buffer), cts.Token); + + if (result.MessageType == WebSocketMessageType.Close) + { + Console.WriteLine("WebSocket closed by server."); + return; + } + + ms.Write(buffer, 0, result.Count); + } + while (!result.EndOfMessage); + + var json = Encoding.UTF8.GetString(ms.ToArray()); + + AsrEvent? evt = null; + try + { + evt = JsonSerializer.Deserialize(json); + } + catch + { + Console.WriteLine($"Non-JSON WS message: {json}"); + continue; + } + + if (evt == null) + continue; + + if (evt.TaskId != taskId) + continue; + + Console.WriteLine($"[{evt.EventType}] {json}"); + + if (evt.EventType == "speech_to_text_final") + { + var best = PickBestUtterance(evt.Utterances); + if (!string.IsNullOrWhiteSpace(best)) + { + utteranceTcs.TrySetResult(best); + return; + } + } + } + }, cts.Token); + + var startPayload = new + { + command = "start", + task_id = taskId, + audio_source_id = "alsa1", + hotphrase = "none", + speech_to_text = true, + request_id = requestId + }; + + var startResp = await http.PostAsJsonAsync($"{baseHttp}/asr_simple_interface", startPayload, cts.Token); + var startBody = await startResp.Content.ReadAsStringAsync(cts.Token); + + Console.WriteLine($"ASR start: {(int)startResp.StatusCode} {startResp.ReasonPhrase}"); + Console.WriteLine(startBody); + + if (!startResp.IsSuccessStatusCode) + continue; + + Console.WriteLine("Speak now..."); + + var completed = await Task.WhenAny(utteranceTcs.Task, Task.Delay(TimeSpan.FromSeconds(15), cts.Token)); + + if (completed != utteranceTcs.Task) + { + Console.WriteLine("Timed out waiting for speech_to_text_final."); + } + else + { + var heard = utteranceTcs.Task.Result; + Console.WriteLine($"Heard: {heard}"); + + var reply = BuildReply(heard); + Console.WriteLine($"Reply: {reply}"); + + var ttsPayload = new + { + prompt = reply, + locale = "en-us", + voice = "griffin", + mode = "text", + outputMode = "stream" + }; + + var ttsResp = await http.PostAsJsonAsync($"{ttsHttp}/tts_speak", ttsPayload, cts.Token); + var ttsBody = await ttsResp.Content.ReadAsStringAsync(cts.Token); + + Console.WriteLine($"TTS: {(int)ttsResp.StatusCode} {ttsResp.ReasonPhrase}"); + if (!string.IsNullOrWhiteSpace(ttsBody)) + Console.WriteLine(ttsBody); + } + + var stopPayload = new + { + command = "stop", + task_id = taskId, + request_id = $"stt_stop_{Guid.NewGuid():N}" + }; + + var stopResp = await http.PostAsJsonAsync($"{baseHttp}/asr_simple_interface", stopPayload, cts.Token); + _ = await stopResp.Content.ReadAsStringAsync(cts.Token); + + Console.WriteLine("STT task stopped."); + Console.WriteLine(); + Console.WriteLine("Press Enter to run another round, or Ctrl+C to quit."); + Console.ReadLine(); + } + catch (OperationCanceledException) + { + break; + } + catch (Exception ex) + { + Console.WriteLine($"Error: {ex.Message}"); + Console.WriteLine("Retrying in 2 seconds..."); + await Task.Delay(2000, cts.Token); + } +} + +static string PickBestUtterance(List? utterances) +{ + if (utterances == null || utterances.Count == 0) + return ""; + + var cleaned = utterances + .Select(u => NormalizeUtterance(u.Utterance)) + .Where(s => !string.IsNullOrWhiteSpace(s)) + .Distinct(StringComparer.OrdinalIgnoreCase) + .OrderBy(s => s.Length) + .ToList(); + + return cleaned.FirstOrDefault() ?? ""; +} + +static string NormalizeUtterance(string? text) +{ + if (string.IsNullOrWhiteSpace(text)) + return ""; + + var s = text.Trim(); + + // Very light cleanup for occasional weird leading duplication like "wWhat" + if (s.Length >= 2 && char.ToLowerInvariant(s[0]) == char.ToLowerInvariant(s[1])) + s = s.Substring(1); + + return s; +} + +static string BuildReply(string heard) +{ + var text = heard.Trim().ToLowerInvariant(); + + if (text.Contains("time")) + return $"It is {DateTime.Now:hh:mm tt}."; + + if (text.Contains("hello") || text.Contains("hi")) + return "Hello! I heard you loud and clear."; + + if (text.Contains("your name")) + return "I am Jibo, running with a local demo bridge."; + + return $"You said: {heard}"; +} + +public sealed class AsrEvent +{ + [JsonPropertyName("event_type")] + public string? EventType { get; set; } + + [JsonPropertyName("task_id")] + public string? TaskId { get; set; } + + [JsonPropertyName("request_id")] + public string? RequestId { get; set; } + + [JsonPropertyName("utterances")] + public List? Utterances { get; set; } +} + +public sealed class AsrUtterance +{ + [JsonPropertyName("utterance")] + public string? Utterance { get; set; } + + [JsonPropertyName("score")] + public double Score { get; set; } +} \ No newline at end of file