Compare commits
6 Commits
98c64f6efb
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
afcacf6df4 | ||
|
422912b16a
|
|||
| 33f4e5bdaf | |||
|
e29e4eca7c
|
|||
| f6aada604d | |||
| 0420f4026b |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,3 @@
|
||||
|
||||
.obsidian/workspace.json
|
||||
.obsidian/workspace.json
|
||||
./Nf
|
||||
16
.obsidian/plugins/webpage-html-export/data.json
vendored
16
.obsidian/plugins/webpage-html-export/data.json
vendored
@@ -10,7 +10,7 @@
|
||||
"postProcess": true,
|
||||
"displayProgress": true,
|
||||
"inlineHTML": false,
|
||||
"exportPath": "/home/kevin/Desktop/test2",
|
||||
"exportPath": "",
|
||||
"filesToExport": [],
|
||||
"useFallbackRenderer": false,
|
||||
"addBodyClasses": true,
|
||||
@@ -196,7 +196,7 @@
|
||||
"alwaysEnabled": false,
|
||||
"hideSettingsButton": true
|
||||
},
|
||||
"relativeHeaderLinks": true,
|
||||
"relativeHeaderLinks": false,
|
||||
"includeJS": true,
|
||||
"includeCSS": true,
|
||||
"inlineMedia": false,
|
||||
@@ -214,18 +214,12 @@
|
||||
"siteName": "JiboDocs",
|
||||
"iconEmojiStyle": "Native",
|
||||
"exportRoot": "",
|
||||
"includePluginCss": [
|
||||
"code-styler",
|
||||
"code-styler-styles"
|
||||
],
|
||||
"includeStyleCssIds": [
|
||||
"code-styler",
|
||||
"code-styler-styles"
|
||||
],
|
||||
"includePluginCss": [],
|
||||
"includeStyleCssIds": [],
|
||||
"autoDisposeWebpages": true
|
||||
},
|
||||
"logLevel": "warning",
|
||||
"titleProperty": "JRD",
|
||||
"titleProperty": "title",
|
||||
"rssDateProperty": "date",
|
||||
"onlyExportModified": true,
|
||||
"deleteOldFiles": true,
|
||||
|
||||
57
.obsidian/workspace.json
vendored
57
.obsidian/workspace.json
vendored
@@ -4,39 +4,24 @@
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "05bab3d2778ffeca",
|
||||
"id": "7b3950c54aff9719",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "6d78e1847f72f5de",
|
||||
"id": "cf241359d22c6783",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "markdown",
|
||||
"state": {
|
||||
"file": "Documentation/Useful Items List.md",
|
||||
"file": "Documentation/Voice/Local Voice Round-Trip on Jibo (AI Commmunication).md",
|
||||
"mode": "source",
|
||||
"source": false
|
||||
},
|
||||
"icon": "lucide-file",
|
||||
"title": "Useful Items List"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "0806f039bf8a940e",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "markdown",
|
||||
"state": {
|
||||
"file": "Jibo Tools & Mod Installer/08 - Troubleshooting.md",
|
||||
"mode": "source",
|
||||
"source": false
|
||||
},
|
||||
"icon": "lucide-file",
|
||||
"title": "08 - Troubleshooting"
|
||||
"title": "Local Voice Round-Trip on Jibo (AI Commmunication)"
|
||||
}
|
||||
}
|
||||
],
|
||||
"currentTab": 1
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "vertical"
|
||||
@@ -212,8 +197,23 @@
|
||||
"quartz-syncer:Quartz Syncer publication center": false
|
||||
}
|
||||
},
|
||||
"active": "0806f039bf8a940e",
|
||||
"active": "cf241359d22c6783",
|
||||
"lastOpenFiles": [
|
||||
"home/kevin/Documents/Jibo-Official-Endpoints/README.md",
|
||||
"home/kevin/Documents/Jibo-Official-Endpoints",
|
||||
"home/kevin/Documents",
|
||||
"home/kevin",
|
||||
"home",
|
||||
"Documentation/Useful Items List.md",
|
||||
"README.md",
|
||||
"External Devices/MFSx",
|
||||
"External Devices",
|
||||
"Documentation/Voice/Local Voice Round-Trip on Jibo (AI Commmunication).md",
|
||||
"Jibo Tools & Mod Installer/08 - Troubleshooting.md",
|
||||
"Documentation/Voice",
|
||||
"Jibo Tools & Mod Installer/07 - Working Directory + State Files.md",
|
||||
"Jibo Tools & Mod Installer/01 - Installer (How It Works).md",
|
||||
"Getting Started/Modifying the Firmware/Using the install script.md",
|
||||
"index.md",
|
||||
"obsidian/08 - Troubleshooting.md",
|
||||
"obsidian/07 - Working Directory + State Files.md",
|
||||
@@ -228,33 +228,18 @@
|
||||
"Documentation/Menu Buttons/ButtonSetup.png",
|
||||
"Documentation/Menu Buttons/ButtonSetup.kra",
|
||||
"Documentation/Menu Buttons",
|
||||
"Jibo Tools & Mod Installer/07 - Working Directory + State Files.md",
|
||||
"Jibo Tools & Mod Installer/06 - Updater (How It Works).md",
|
||||
"Jibo Tools & Mod Installer/05 - Windows Support.md",
|
||||
"Jibo Tools & Mod Installer/04 - GUI (How It Works).md",
|
||||
"Assets/JiboToolsScreen.png",
|
||||
"Assets/Screenshot_20260317_235807.png",
|
||||
"Jibo Tools & Mod Installer/03 - CLI Arguments.md",
|
||||
"Jibo Tools & Mod Installer/01 - Installer (How It Works).md",
|
||||
"Getting Started/Modifying the Firmware/1. Get your environment ready!.md",
|
||||
"Jibo Tools & Mod Installer/00 - Index.md",
|
||||
"Documentation/Useful Items List.md",
|
||||
"Jibo Tools & Mod Installer/02 - Mapping to guide.md",
|
||||
"Getting Started/Welcome to the Jibo Revival Project.md",
|
||||
"Documentation/AtDev - New Firewall script.md",
|
||||
"Documentation/Networking/Network Profiling & Traffic Analysis.md",
|
||||
"Documentation/Networking",
|
||||
"Documentation/The be skill/Assets/The audio directory.md",
|
||||
"Documentation/The be skill/Assets/The assets directory.md",
|
||||
"Documentation/The be skill/Assets",
|
||||
"Documentation/The be skill/The Splash screen image!.md",
|
||||
"Documentation/The be skill",
|
||||
"Documentation/The be skill/Assets/Menu Buttons/ButtonSetup.png",
|
||||
"Assets/Jibo RCM.jpg",
|
||||
"About UART Connection",
|
||||
"Jibo Tools & Mod Installer",
|
||||
"Documentation/The be skill/Assets/Menu Buttons/ButtonSetup.kra",
|
||||
"Documentation/The be skill/Assets/Menu Buttons",
|
||||
"Assets/JiboChatGPT.png"
|
||||
]
|
||||
}
|
||||
@@ -7,7 +7,10 @@ Jibo was built a little weird, so it's easy to forget things. This document cont
|
||||
## About [[The audio directory]]
|
||||
|
||||
## About [[Networking & ports & Error codes]]
|
||||
|
||||
## About [[Network Profiling & Traffic Analysis]]
|
||||
|
||||
## About [[Local Voice Round-Trip on Jibo (AI Commmunication)]]
|
||||
|
||||
- - -
|
||||
Documented by ZaneDev @ Our Discord
|
||||
@@ -0,0 +1,264 @@
|
||||
# Local ASR, TTS, and Voice Round-Trip on Jibo (Post-Cloud)
|
||||
|
||||
> This document describes the first confirmed working voice interaction on a Jibo robot after official cloud services were discontinued.
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
Short version: Jibo can still have a full conversation loop locally.
|
||||
|
||||
We now have:
|
||||
|
||||
* Speech → text (STT) working locally
|
||||
* Text → speech (TTS) working locally
|
||||
* A working loop where Jibo hears something and responds
|
||||
|
||||
This is all happening without the original cloud services.
|
||||
|
||||
---
|
||||
|
||||
## Key Findings
|
||||
|
||||
Here’s what we now know for sure:
|
||||
|
||||
* Wake word detection (`hey jibo`) still works locally
|
||||
* Speaker ID is still running locally (even if it rejects us 😄)
|
||||
* `jibo-asr-service` can be started and controlled manually
|
||||
* ASR (speech recognition) is exposed over HTTP on port `8088`
|
||||
* TTS (speech output) is exposed over HTTP on port `8089`
|
||||
|
||||
### ASR Endpoints
|
||||
|
||||
Confirmed working endpoints:
|
||||
|
||||
* `/asr_simple_interface`
|
||||
* `/audio_source`
|
||||
* `/asr_control`
|
||||
* `/status`
|
||||
|
||||
### WebSocket Outputs
|
||||
|
||||
ASR results are streamed over WebSockets:
|
||||
|
||||
* `ws://<jibo-ip>:8088/port`
|
||||
* `ws://<jibo-ip>:8088/simple_port`
|
||||
|
||||
### Example STT Start Payload
|
||||
|
||||
```json
|
||||
{
|
||||
"command": "start",
|
||||
"task_id": "DEBUG:task3",
|
||||
"audio_source_id": "alsa1",
|
||||
"hotphrase": "none",
|
||||
"speech_to_text": true,
|
||||
"request_id": "stt_start3"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## What’s Actually Happening (Architecture)
|
||||
|
||||
Here’s the real flow in plain English:
|
||||
|
||||
1. We send a request to Jibo to start listening
|
||||
2. Jibo captures audio from its mic (ALSA)
|
||||
3. The ASR engine processes it
|
||||
4. Results come back over WebSocket
|
||||
5. Our app reads the transcript
|
||||
6. Our app decides what to say
|
||||
7. We send that to Jibo’s TTS
|
||||
8. Jibo speaks
|
||||
|
||||
Visual version:
|
||||
|
||||
```
|
||||
HTTP POST (/asr_simple_interface)
|
||||
↓
|
||||
ASR service captures audio
|
||||
↓
|
||||
Speech recognition runs locally
|
||||
↓
|
||||
WebSocket emits events
|
||||
↓
|
||||
External app receives transcript
|
||||
↓
|
||||
External logic decides response
|
||||
↓
|
||||
HTTP POST (/tts_speak)
|
||||
↓
|
||||
Jibo talks
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Example WebSocket Output
|
||||
|
||||
Here’s a trimmed real example of a final result:
|
||||
|
||||
```json
|
||||
{
|
||||
"event_type": "speech_to_text_final",
|
||||
"task_id": "DEBUG:task3",
|
||||
"utterances": [
|
||||
{
|
||||
"utterance": "what time is it",
|
||||
"score": 975.9
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
You’ll also see:
|
||||
|
||||
* `speech_to_text_incremental` (partial results)
|
||||
* `end_of_speech`
|
||||
* `hotphrase` (for "hey jibo")
|
||||
|
||||
---
|
||||
|
||||
## Demo Flow (How to Reproduce)
|
||||
|
||||
This is the important part.
|
||||
|
||||
### 1. Make sure you are in `int-developer` mode and ASR service is running
|
||||
|
||||
From ssh:
|
||||
|
||||
```
|
||||
/usr/local/bin/jibo-asr-service -c /usr/local/etc/jibo-asr-service.json
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Connect to WebSocket
|
||||
|
||||
```
|
||||
ws://<jibo-ip>:8088/simple_port
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. Start an STT task
|
||||
|
||||
POST to:
|
||||
|
||||
```
|
||||
http://<jibo-ip>:8088/asr_simple_interface
|
||||
```
|
||||
|
||||
With:
|
||||
|
||||
```json
|
||||
{
|
||||
"command": "start",
|
||||
"task_id": "DEBUG:task3",
|
||||
"audio_source_id": "alsa1",
|
||||
"hotphrase": "none",
|
||||
"speech_to_text": true
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. Speak to Jibo
|
||||
|
||||
Say something like:
|
||||
|
||||
> “what time is it”
|
||||
|
||||
---
|
||||
|
||||
### 5. Wait for final transcript
|
||||
|
||||
Watch for:
|
||||
|
||||
```
|
||||
event_type: speech_to_text_final
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 6. Send response to TTS
|
||||
|
||||
POST to:
|
||||
|
||||
```
|
||||
http://<jibo-ip>:8089/tts_speak
|
||||
```
|
||||
|
||||
With something like:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "It is demo time."
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 7. Jibo speaks 🎉
|
||||
|
||||
---
|
||||
|
||||
## Known Behaviors / Quirks
|
||||
|
||||
Some things we’ve seen so far:
|
||||
|
||||
* WebSocket connections can drop → reconnect logic helps
|
||||
* Incremental results can be messy or duplicated
|
||||
* Multiple transcript guesses can show up
|
||||
* Wake word (`task0`) runs alongside your custom task
|
||||
* Saying “hey jibo” during a manual STT session can interfere
|
||||
* Speaker ID often rejects (but doesn’t block STT)
|
||||
|
||||
---
|
||||
|
||||
## Corrections to Previous Assumptions
|
||||
|
||||
Some things we (and others) thought before that are now clearly wrong or incomplete:
|
||||
|
||||
* “ASR is dead without cloud” → **Not true in developer mode**
|
||||
* “Only wake word works locally” → **Incomplete**
|
||||
* “No way to get transcripts” → **False (WebSocket output exists)**
|
||||
* “Jibo can’t answer questions anymore” → **Also false now 🙂**
|
||||
|
||||
---
|
||||
|
||||
## What This Means
|
||||
|
||||
This is a big deal:
|
||||
|
||||
* Jibo’s core voice pipeline is still there
|
||||
* The cloud was orchestration, not the whole system
|
||||
* We can now rebuild the “brain” externally
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
Where this naturally goes next:
|
||||
|
||||
* Hook wake word → automatically trigger STT
|
||||
* Figure out how this behaves in “normal mode”
|
||||
* See if Jibo tries to initiate outbound connections (old cloud flow)
|
||||
* Intercept or replace those endpoints locally
|
||||
* Build a simple always-on bridge service:
|
||||
|
||||
* Wake word → STT → AI → TTS
|
||||
|
||||
---
|
||||
|
||||
## Final Thought
|
||||
|
||||
We didn’t just poke at endpoints here.
|
||||
|
||||
We proved Jibo can:
|
||||
|
||||
* hear
|
||||
* understand
|
||||
* and respond again
|
||||
|
||||
That’s a pretty great place to be.
|
||||
Reference in New Issue
Block a user