Compare commits
6 Commits
98c64f6efb
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
afcacf6df4 | ||
|
422912b16a
|
|||
| 33f4e5bdaf | |||
|
e29e4eca7c
|
|||
| f6aada604d | |||
| 0420f4026b |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,3 @@
|
|||||||
|
|
||||||
.obsidian/workspace.json
|
.obsidian/workspace.json
|
||||||
.obsidian/workspace.json
|
.obsidian/workspace.json
|
||||||
|
./Nf
|
||||||
16
.obsidian/plugins/webpage-html-export/data.json
vendored
16
.obsidian/plugins/webpage-html-export/data.json
vendored
@@ -10,7 +10,7 @@
|
|||||||
"postProcess": true,
|
"postProcess": true,
|
||||||
"displayProgress": true,
|
"displayProgress": true,
|
||||||
"inlineHTML": false,
|
"inlineHTML": false,
|
||||||
"exportPath": "/home/kevin/Desktop/test2",
|
"exportPath": "",
|
||||||
"filesToExport": [],
|
"filesToExport": [],
|
||||||
"useFallbackRenderer": false,
|
"useFallbackRenderer": false,
|
||||||
"addBodyClasses": true,
|
"addBodyClasses": true,
|
||||||
@@ -196,7 +196,7 @@
|
|||||||
"alwaysEnabled": false,
|
"alwaysEnabled": false,
|
||||||
"hideSettingsButton": true
|
"hideSettingsButton": true
|
||||||
},
|
},
|
||||||
"relativeHeaderLinks": true,
|
"relativeHeaderLinks": false,
|
||||||
"includeJS": true,
|
"includeJS": true,
|
||||||
"includeCSS": true,
|
"includeCSS": true,
|
||||||
"inlineMedia": false,
|
"inlineMedia": false,
|
||||||
@@ -214,18 +214,12 @@
|
|||||||
"siteName": "JiboDocs",
|
"siteName": "JiboDocs",
|
||||||
"iconEmojiStyle": "Native",
|
"iconEmojiStyle": "Native",
|
||||||
"exportRoot": "",
|
"exportRoot": "",
|
||||||
"includePluginCss": [
|
"includePluginCss": [],
|
||||||
"code-styler",
|
"includeStyleCssIds": [],
|
||||||
"code-styler-styles"
|
|
||||||
],
|
|
||||||
"includeStyleCssIds": [
|
|
||||||
"code-styler",
|
|
||||||
"code-styler-styles"
|
|
||||||
],
|
|
||||||
"autoDisposeWebpages": true
|
"autoDisposeWebpages": true
|
||||||
},
|
},
|
||||||
"logLevel": "warning",
|
"logLevel": "warning",
|
||||||
"titleProperty": "JRD",
|
"titleProperty": "title",
|
||||||
"rssDateProperty": "date",
|
"rssDateProperty": "date",
|
||||||
"onlyExportModified": true,
|
"onlyExportModified": true,
|
||||||
"deleteOldFiles": true,
|
"deleteOldFiles": true,
|
||||||
|
|||||||
57
.obsidian/workspace.json
vendored
57
.obsidian/workspace.json
vendored
@@ -4,39 +4,24 @@
|
|||||||
"type": "split",
|
"type": "split",
|
||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"id": "05bab3d2778ffeca",
|
"id": "7b3950c54aff9719",
|
||||||
"type": "tabs",
|
"type": "tabs",
|
||||||
"children": [
|
"children": [
|
||||||
{
|
{
|
||||||
"id": "6d78e1847f72f5de",
|
"id": "cf241359d22c6783",
|
||||||
"type": "leaf",
|
"type": "leaf",
|
||||||
"state": {
|
"state": {
|
||||||
"type": "markdown",
|
"type": "markdown",
|
||||||
"state": {
|
"state": {
|
||||||
"file": "Documentation/Useful Items List.md",
|
"file": "Documentation/Voice/Local Voice Round-Trip on Jibo (AI Commmunication).md",
|
||||||
"mode": "source",
|
"mode": "source",
|
||||||
"source": false
|
"source": false
|
||||||
},
|
},
|
||||||
"icon": "lucide-file",
|
"icon": "lucide-file",
|
||||||
"title": "Useful Items List"
|
"title": "Local Voice Round-Trip on Jibo (AI Commmunication)"
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "0806f039bf8a940e",
|
|
||||||
"type": "leaf",
|
|
||||||
"state": {
|
|
||||||
"type": "markdown",
|
|
||||||
"state": {
|
|
||||||
"file": "Jibo Tools & Mod Installer/08 - Troubleshooting.md",
|
|
||||||
"mode": "source",
|
|
||||||
"source": false
|
|
||||||
},
|
|
||||||
"icon": "lucide-file",
|
|
||||||
"title": "08 - Troubleshooting"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"currentTab": 1
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"direction": "vertical"
|
"direction": "vertical"
|
||||||
@@ -212,8 +197,23 @@
|
|||||||
"quartz-syncer:Quartz Syncer publication center": false
|
"quartz-syncer:Quartz Syncer publication center": false
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"active": "0806f039bf8a940e",
|
"active": "cf241359d22c6783",
|
||||||
"lastOpenFiles": [
|
"lastOpenFiles": [
|
||||||
|
"home/kevin/Documents/Jibo-Official-Endpoints/README.md",
|
||||||
|
"home/kevin/Documents/Jibo-Official-Endpoints",
|
||||||
|
"home/kevin/Documents",
|
||||||
|
"home/kevin",
|
||||||
|
"home",
|
||||||
|
"Documentation/Useful Items List.md",
|
||||||
|
"README.md",
|
||||||
|
"External Devices/MFSx",
|
||||||
|
"External Devices",
|
||||||
|
"Documentation/Voice/Local Voice Round-Trip on Jibo (AI Commmunication).md",
|
||||||
|
"Jibo Tools & Mod Installer/08 - Troubleshooting.md",
|
||||||
|
"Documentation/Voice",
|
||||||
|
"Jibo Tools & Mod Installer/07 - Working Directory + State Files.md",
|
||||||
|
"Jibo Tools & Mod Installer/01 - Installer (How It Works).md",
|
||||||
|
"Getting Started/Modifying the Firmware/Using the install script.md",
|
||||||
"index.md",
|
"index.md",
|
||||||
"obsidian/08 - Troubleshooting.md",
|
"obsidian/08 - Troubleshooting.md",
|
||||||
"obsidian/07 - Working Directory + State Files.md",
|
"obsidian/07 - Working Directory + State Files.md",
|
||||||
@@ -228,33 +228,18 @@
|
|||||||
"Documentation/Menu Buttons/ButtonSetup.png",
|
"Documentation/Menu Buttons/ButtonSetup.png",
|
||||||
"Documentation/Menu Buttons/ButtonSetup.kra",
|
"Documentation/Menu Buttons/ButtonSetup.kra",
|
||||||
"Documentation/Menu Buttons",
|
"Documentation/Menu Buttons",
|
||||||
"Jibo Tools & Mod Installer/07 - Working Directory + State Files.md",
|
|
||||||
"Jibo Tools & Mod Installer/06 - Updater (How It Works).md",
|
"Jibo Tools & Mod Installer/06 - Updater (How It Works).md",
|
||||||
"Jibo Tools & Mod Installer/05 - Windows Support.md",
|
"Jibo Tools & Mod Installer/05 - Windows Support.md",
|
||||||
"Jibo Tools & Mod Installer/04 - GUI (How It Works).md",
|
"Jibo Tools & Mod Installer/04 - GUI (How It Works).md",
|
||||||
"Assets/JiboToolsScreen.png",
|
"Assets/JiboToolsScreen.png",
|
||||||
"Assets/Screenshot_20260317_235807.png",
|
"Assets/Screenshot_20260317_235807.png",
|
||||||
"Jibo Tools & Mod Installer/03 - CLI Arguments.md",
|
"Jibo Tools & Mod Installer/03 - CLI Arguments.md",
|
||||||
"Jibo Tools & Mod Installer/01 - Installer (How It Works).md",
|
|
||||||
"Getting Started/Modifying the Firmware/1. Get your environment ready!.md",
|
"Getting Started/Modifying the Firmware/1. Get your environment ready!.md",
|
||||||
"Jibo Tools & Mod Installer/00 - Index.md",
|
"Jibo Tools & Mod Installer/00 - Index.md",
|
||||||
"Documentation/Useful Items List.md",
|
|
||||||
"Jibo Tools & Mod Installer/02 - Mapping to guide.md",
|
"Jibo Tools & Mod Installer/02 - Mapping to guide.md",
|
||||||
"Getting Started/Welcome to the Jibo Revival Project.md",
|
"Getting Started/Welcome to the Jibo Revival Project.md",
|
||||||
"Documentation/AtDev - New Firewall script.md",
|
|
||||||
"Documentation/Networking/Network Profiling & Traffic Analysis.md",
|
|
||||||
"Documentation/Networking",
|
|
||||||
"Documentation/The be skill/Assets/The audio directory.md",
|
|
||||||
"Documentation/The be skill/Assets/The assets directory.md",
|
|
||||||
"Documentation/The be skill/Assets",
|
|
||||||
"Documentation/The be skill/The Splash screen image!.md",
|
|
||||||
"Documentation/The be skill",
|
|
||||||
"Documentation/The be skill/Assets/Menu Buttons/ButtonSetup.png",
|
"Documentation/The be skill/Assets/Menu Buttons/ButtonSetup.png",
|
||||||
"Assets/Jibo RCM.jpg",
|
"Assets/Jibo RCM.jpg",
|
||||||
"About UART Connection",
|
|
||||||
"Jibo Tools & Mod Installer",
|
|
||||||
"Documentation/The be skill/Assets/Menu Buttons/ButtonSetup.kra",
|
|
||||||
"Documentation/The be skill/Assets/Menu Buttons",
|
|
||||||
"Assets/JiboChatGPT.png"
|
"Assets/JiboChatGPT.png"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -7,7 +7,10 @@ Jibo was built a little weird, so it's easy to forget things. This document cont
|
|||||||
## About [[The audio directory]]
|
## About [[The audio directory]]
|
||||||
|
|
||||||
## About [[Networking & ports & Error codes]]
|
## About [[Networking & ports & Error codes]]
|
||||||
|
|
||||||
## About [[Network Profiling & Traffic Analysis]]
|
## About [[Network Profiling & Traffic Analysis]]
|
||||||
|
|
||||||
|
## About [[Local Voice Round-Trip on Jibo (AI Commmunication)]]
|
||||||
|
|
||||||
- - -
|
- - -
|
||||||
Documented by ZaneDev @ Our Discord
|
Documented by ZaneDev @ Our Discord
|
||||||
@@ -0,0 +1,264 @@
|
|||||||
|
# Local ASR, TTS, and Voice Round-Trip on Jibo (Post-Cloud)
|
||||||
|
|
||||||
|
> This document describes the first confirmed working voice interaction on a Jibo robot after official cloud services were discontinued.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Short version: Jibo can still have a full conversation loop locally.
|
||||||
|
|
||||||
|
We now have:
|
||||||
|
|
||||||
|
* Speech → text (STT) working locally
|
||||||
|
* Text → speech (TTS) working locally
|
||||||
|
* A working loop where Jibo hears something and responds
|
||||||
|
|
||||||
|
This is all happening without the original cloud services.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Findings
|
||||||
|
|
||||||
|
Here’s what we now know for sure:
|
||||||
|
|
||||||
|
* Wake word detection (`hey jibo`) still works locally
|
||||||
|
* Speaker ID is still running locally (even if it rejects us 😄)
|
||||||
|
* `jibo-asr-service` can be started and controlled manually
|
||||||
|
* ASR (speech recognition) is exposed over HTTP on port `8088`
|
||||||
|
* TTS (speech output) is exposed over HTTP on port `8089`
|
||||||
|
|
||||||
|
### ASR Endpoints
|
||||||
|
|
||||||
|
Confirmed working endpoints:
|
||||||
|
|
||||||
|
* `/asr_simple_interface`
|
||||||
|
* `/audio_source`
|
||||||
|
* `/asr_control`
|
||||||
|
* `/status`
|
||||||
|
|
||||||
|
### WebSocket Outputs
|
||||||
|
|
||||||
|
ASR results are streamed over WebSockets:
|
||||||
|
|
||||||
|
* `ws://<jibo-ip>:8088/port`
|
||||||
|
* `ws://<jibo-ip>:8088/simple_port`
|
||||||
|
|
||||||
|
### Example STT Start Payload
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"command": "start",
|
||||||
|
"task_id": "DEBUG:task3",
|
||||||
|
"audio_source_id": "alsa1",
|
||||||
|
"hotphrase": "none",
|
||||||
|
"speech_to_text": true,
|
||||||
|
"request_id": "stt_start3"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What’s Actually Happening (Architecture)
|
||||||
|
|
||||||
|
Here’s the real flow in plain English:
|
||||||
|
|
||||||
|
1. We send a request to Jibo to start listening
|
||||||
|
2. Jibo captures audio from its mic (ALSA)
|
||||||
|
3. The ASR engine processes it
|
||||||
|
4. Results come back over WebSocket
|
||||||
|
5. Our app reads the transcript
|
||||||
|
6. Our app decides what to say
|
||||||
|
7. We send that to Jibo’s TTS
|
||||||
|
8. Jibo speaks
|
||||||
|
|
||||||
|
Visual version:
|
||||||
|
|
||||||
|
```
|
||||||
|
HTTP POST (/asr_simple_interface)
|
||||||
|
↓
|
||||||
|
ASR service captures audio
|
||||||
|
↓
|
||||||
|
Speech recognition runs locally
|
||||||
|
↓
|
||||||
|
WebSocket emits events
|
||||||
|
↓
|
||||||
|
External app receives transcript
|
||||||
|
↓
|
||||||
|
External logic decides response
|
||||||
|
↓
|
||||||
|
HTTP POST (/tts_speak)
|
||||||
|
↓
|
||||||
|
Jibo talks
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Example WebSocket Output
|
||||||
|
|
||||||
|
Here’s a trimmed real example of a final result:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"event_type": "speech_to_text_final",
|
||||||
|
"task_id": "DEBUG:task3",
|
||||||
|
"utterances": [
|
||||||
|
{
|
||||||
|
"utterance": "what time is it",
|
||||||
|
"score": 975.9
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
You’ll also see:
|
||||||
|
|
||||||
|
* `speech_to_text_incremental` (partial results)
|
||||||
|
* `end_of_speech`
|
||||||
|
* `hotphrase` (for "hey jibo")
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Demo Flow (How to Reproduce)
|
||||||
|
|
||||||
|
This is the important part.
|
||||||
|
|
||||||
|
### 1. Make sure you are in `int-developer` mode and ASR service is running
|
||||||
|
|
||||||
|
From ssh:
|
||||||
|
|
||||||
|
```
|
||||||
|
/usr/local/bin/jibo-asr-service -c /usr/local/etc/jibo-asr-service.json
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Connect to WebSocket
|
||||||
|
|
||||||
|
```
|
||||||
|
ws://<jibo-ip>:8088/simple_port
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. Start an STT task
|
||||||
|
|
||||||
|
POST to:
|
||||||
|
|
||||||
|
```
|
||||||
|
http://<jibo-ip>:8088/asr_simple_interface
|
||||||
|
```
|
||||||
|
|
||||||
|
With:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"command": "start",
|
||||||
|
"task_id": "DEBUG:task3",
|
||||||
|
"audio_source_id": "alsa1",
|
||||||
|
"hotphrase": "none",
|
||||||
|
"speech_to_text": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. Speak to Jibo
|
||||||
|
|
||||||
|
Say something like:
|
||||||
|
|
||||||
|
> “what time is it”
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5. Wait for final transcript
|
||||||
|
|
||||||
|
Watch for:
|
||||||
|
|
||||||
|
```
|
||||||
|
event_type: speech_to_text_final
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 6. Send response to TTS
|
||||||
|
|
||||||
|
POST to:
|
||||||
|
|
||||||
|
```
|
||||||
|
http://<jibo-ip>:8089/tts_speak
|
||||||
|
```
|
||||||
|
|
||||||
|
With something like:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"text": "It is demo time."
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 7. Jibo speaks 🎉
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Known Behaviors / Quirks
|
||||||
|
|
||||||
|
Some things we’ve seen so far:
|
||||||
|
|
||||||
|
* WebSocket connections can drop → reconnect logic helps
|
||||||
|
* Incremental results can be messy or duplicated
|
||||||
|
* Multiple transcript guesses can show up
|
||||||
|
* Wake word (`task0`) runs alongside your custom task
|
||||||
|
* Saying “hey jibo” during a manual STT session can interfere
|
||||||
|
* Speaker ID often rejects (but doesn’t block STT)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Corrections to Previous Assumptions
|
||||||
|
|
||||||
|
Some things we (and others) thought before that are now clearly wrong or incomplete:
|
||||||
|
|
||||||
|
* “ASR is dead without cloud” → **Not true in developer mode**
|
||||||
|
* “Only wake word works locally” → **Incomplete**
|
||||||
|
* “No way to get transcripts” → **False (WebSocket output exists)**
|
||||||
|
* “Jibo can’t answer questions anymore” → **Also false now 🙂**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What This Means
|
||||||
|
|
||||||
|
This is a big deal:
|
||||||
|
|
||||||
|
* Jibo’s core voice pipeline is still there
|
||||||
|
* The cloud was orchestration, not the whole system
|
||||||
|
* We can now rebuild the “brain” externally
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
Where this naturally goes next:
|
||||||
|
|
||||||
|
* Hook wake word → automatically trigger STT
|
||||||
|
* Figure out how this behaves in “normal mode”
|
||||||
|
* See if Jibo tries to initiate outbound connections (old cloud flow)
|
||||||
|
* Intercept or replace those endpoints locally
|
||||||
|
* Build a simple always-on bridge service:
|
||||||
|
|
||||||
|
* Wake word → STT → AI → TTS
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Final Thought
|
||||||
|
|
||||||
|
We didn’t just poke at endpoints here.
|
||||||
|
|
||||||
|
We proved Jibo can:
|
||||||
|
|
||||||
|
* hear
|
||||||
|
* understand
|
||||||
|
* and respond again
|
||||||
|
|
||||||
|
That’s a pretty great place to be.
|
||||||
Reference in New Issue
Block a user