version 18 testing video session - full regression fixes

This commit is contained in:
Jacob Dubin
2026-04-27 22:13:59 -05:00
parent 8c97968d95
commit 17437b5a67
14 changed files with 45972 additions and 25 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,102 @@
{
"name": "neohubjibocom-neohubproactive-tidd36da4d442a611f1aba45cf821ea55ae",
"session": {
"hostName": "neo-hub.jibo.com",
"path": "/v1/proactive",
"kind": "neo-hub-proactive",
"token": "hub-usr_openjibo_owner-1777340189867"
},
"steps": [
{
"text": {
"type": "TRIGGER",
"ts": 1777341970615,
"msgID": "mid-d388c070-42a6-11f1-a414-5cf821ea55ae",
"transID": "tid-d36da4d4-42a6-11f1-aba4-5cf821ea55ae",
"data": {
"triggerSource": "SURPRISE",
"triggerData": {
"looperID": "5c0b221fdf9d450019c5e255"
}
}
},
"binary": null,
"expectedReplyTypes": []
},
{
"text": {
"type": "CONTEXT",
"ts": 1777341970702,
"msgID": "mid-d395f790-42a6-11f1-95f4-5cf821ea55ae",
"transID": "tid-d36da4d4-42a6-11f1-aba4-5cf821ea55ae",
"data": {
"runtime": {
"character": {
"emotion": {
"name": "NEUTRAL",
"valence": 0.45,
"confidence": 0.2
},
"motivation": {
"social": 1,
"playful": 0.5152989351851469
}
},
"perception": {
"speaker": "5c0b221fdf9d450019c5e255",
"peoplePresent": [
{
"id": "NOT_TRAINED",
"entityId": 16085,
"type": "fused",
"confidence": 1
}
]
},
"location": {
"city": "Pleasant Hill",
"state": "Missouri",
"stateAbbr": "MO",
"country": "United States",
"countryCode": "US",
"lat": 38.8358494,
"lng": -94.1427229,
"iso": "2026-04-27T21:06:10.626-05:00"
},
"loop": {
"loopId": "5c0b221fdf9d450019c5e253",
"users": [
{
"firstName": "Erin",
"lastName": "Picone",
"phoneticName": "Erin",
"gender": "female",
"birthdate": 649209600000,
"id": "5c0b221fdf9d450019c5e255",
"accountId": "5c0b20547c46170019235759"
}
],
"jibo": {
"color": "WHITE",
"birthdate": 1544234645598,
"id": "5c0b221fdf9d450019c5e254"
},
"owner": "5c0b221fdf9d450019c5e255"
},
"dialog": {
"referent": null
}
},
"skill": {
"id": null
},
"general": {
"release": "1.9.2"
}
}
},
"binary": null,
"expectedReplyTypes": []
}
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -21,14 +21,14 @@ Release `1.0.18` is now in feature-hardening. Its main bug-fix theme is alarm an
## Latest Live Evidence
`jibo test 24` was captured after the `jibo test 23` alarm/photo fixes.
`jibo test 25` was captured as a broader `1.0.18` regression pass after the `jibo test 24` alarm/photo fixes and the stop/volume feature slice.
- Basic news remained live-proven from `jibo test 23`; `jibo test 24` focused on alarm and photo/gallery regression.
- Alarm replacement yes/no improved: `Yes.` on `clock/alarm_timer_change` was recognized locally. The persistent `7:00 PM` alarm from the previous session still caused repeated replacement prompts until menu cleanup.
- Alarm still struggled around value-entry cleanup. Empty ASR under `clock/alarm_set_value` fell into generic Nimbus speech (`I heard you.`), and `CLIENT_NLU intent=cancel` under `clock/alarm_set_value` mapped back to alarm clarification instead of closing the clock value prompt. Current source now treats clock value empty turns as local no-input and maps value-prompt cancel to local clock `cancel`.
- Photo/gallery cleanup improved: the blue-ring/listening state appeared to settle, create keeper yes/no could complete, and robot logs showed photo adoption/upload plus `it's a keeper`.
- Photo/gallery still has spotty speech recognition. Empty ASR under `gallery/gallery_preview` caused the other observed `I heard you.` after gallery opened. Current source now treats gallery preview empty turns as local no-input instead of relaunching Nimbus fallback speech.
- No `ffmpeg` / `whisper.cpp` error was evident in the `jibo test 24` websocket timeline. Remaining alarm/gallery failures are now mostly local no-input handling and STT/recognition quality, not decode failures.
- Cloud version, good night, good morning, time, joke, radio, news, Word of the Day, and several expressive/personality paths were still reachable. `good day` routed to date, which is acceptable unless a distinct good-day behavior is chosen later.
- Backup-in-progress still appears at the start of every live session and blocks the settings update menu. The Test 25 HTTP capture again did not show `Backup_*` calls; observed cloud traffic was mostly log upload, ASR binary upload, and one `Update_20160301.GetUpdateFrom`. Treat this as a robot-local backup scheduler/status or log/upload load issue until a capture proves a hosted backup API path is involved.
- Timer and alarm remain the riskiest current-release paths. Test 25 showed a timer prompt accepting partial speech, a stale 10-second timer state after reset, a `9:02` alarm being interpreted as the next local PM occurrence, and voice delete reporting success while the robot menu still retained an alarm. Current source fixes some prompt-tail and context handling issues from this run, but alarm set/delete/menu agreement still needs another focused live pass.
- Photo/gallery blue-ring cleanup improved compared with earlier tests, but gallery yes/no could still hang. Test 25 showed `open the photogal` as the observed transcript and active `shared/yes_no` prompts under `@be/gallery`; current source now recognizes the gallery alias and keeps active local prompts live instead of treating that gallery context as passive noise.
- Stop and volume first-pass behavior mostly reached the stock local paths, but exposed cleanup bugs. `Never mind.` included punctuation and mapped to generic chat; current source normalizes punctuation before stop/cancel matching. `Set Volume 2-6.` could pick the wrong number; current source treats that homophone shape as level `6`. `show volume controls` opened settings, then a passive local audio tail caused generic `I heard you`; current source treats `@be/settings` and `settings/volume_control` as local cleanup paths.
- No dominant `ffmpeg` / `whisper.cpp` decode failure emerged from Test 25. Remaining short-answer failures should be split between cloud turn-state bugs, robot-local backup/load interference, and true STT misses.
## Release Rhythm
@@ -72,7 +72,9 @@ Current websocket scope:
- local whisper only attempts external decoding when buffered audio contains an Opus identification header
- auto-finalize thresholds for buffered audio after a real listen phase
- late-audio ignore windows after completed turns
- no-input local completion for constrained prompts, clock value prompts, and gallery preview prompts
- passive local context cleanup for gallery/create/settings contexts after stock local skills take ownership
- no-input local completion for constrained prompts, clock value prompts, gallery preview prompts, and settings volume-control prompts
- active local prompt preservation so `shared/yes_no`, clock, gallery, and settings prompts can still consume transcript-bearing short replies even when the stock skill reports a local context
- unknown inbound websocket types dropped silently instead of echoing stock-OS-unknown OpenJibo events
- file telemetry and fixture export for HTTP, websocket, and turn captures
@@ -92,7 +94,9 @@ The following behavior is present in source and covered by focused tests:
- radio voice launch supports `open the radio` and genre launch such as `play country music`, using local `@be/radio` `menu` payloads, `SKILL_REDIRECT`, and silent completion
- news has a first Nimbus-shaped cloud path using `match.cloudSkill = news` and a `news` `SKILL_ACTION` with synthetic briefing content
- stop commands such as `stop that` and `never mind` emit stock `global_commands` `stop` NLU plus a local `@be/idle` redirect, without generic chat speech
- stop and cancel phrase matching tolerates stock ASR punctuation such as `Never mind.`
- volume commands emit stock `global_commands` volume intents: `volumeUp`, `volumeDown`, and `volumeToValue` with `volumeLevel`; `show volume controls` redirects to `@be/settings` `volumeQuery`
- volume-to-value parsing handles the observed stock ASR homophone shape `Set Volume 2-6.` as level `6`
- stock-shaped clock handoffs cover time, date, day, clock open, timer/alarm menu, timer/alarm value, timer/alarm clarification, and timer/alarm delete
- alarm parsing covers forms such as `7:30 am`, `830`, `8 30`, `7, 44`, `10-25`, `10:25 pm`, and `10 25 p m`
- ambiguous alarm times can prefer the next local occurrence when the robot context includes `runtime.location.iso`
@@ -100,13 +104,14 @@ The following behavior is present in source and covered by focused tests:
- `CLIENT_NLU intent=set` with only `domain=alarm` stays on the local clock clarification path instead of defaulting to a fabricated time
- `CLIENT_NLU intent=cancel` on `clock/alarm_timer_query_menu` can reuse the last active clock domain
- `CLIENT_NLU intent=cancel` on `clock/alarm_set_value` / `clock/timer_set_value` maps to local clock `cancel` instead of re-asking for a value
- photo flows route `open photo gallery` to `@be/gallery`, `snap a picture` to `@be/create/createOnePhoto`, and `open photobooth` to `@be/create/createSomePhotos`
- passive gallery/create context does not reopen a stale cloud turn
- photo flows route `open photo gallery`, observed `open photogal`, `snap a picture`, and `open photobooth` to the matching gallery/create local skills
- passive gallery/create/settings context does not reopen a stale cloud turn
- active local prompts under gallery/settings context are preserved so short `yes`/`no` answers can finalize the prompt instead of being suppressed as passive context
- media metadata persists across store recreation and `/media/{path}` can serve the current text-body placeholder payload
- constrained yes/no handling covers `clock/alarm_timer_change`, `clock/alarm_timer_none_set`, `create/is_it_a_keeper`, `shared/yes_no`, `settings/download_now_later`, `surprises-date/offer_date_fact`, `surprises-ota/want_to_download_now`, and `$YESNO` hints
- outbound constrained yes/no responses strip unrelated `globals/*` rules so stock OS stays local
- no-input fallback for constrained yes/no prompts emits local `LISTEN`/`EOS` instead of relaunching generic Nimbus speech, including `shared/yes_no` after STT failure
- no-input fallback for clock value prompts and `gallery/gallery_preview` emits local `LISTEN`/`EOS` instead of generic `I heard you` Nimbus speech
- no-input fallback for clock value prompts, `gallery/gallery_preview`, and `settings/volume_control` emits local `LISTEN`/`EOS` instead of generic `I heard you` Nimbus speech
- repeated empty `create/is_it_a_keeper` replies redirect to `@be/idle` after the second miss so the photo/create flow can settle instead of leaving a stale listening state
- local whisper skips buffered audio turns that do not contain `OpusHead`, preventing a known `ffmpeg` failure path from becoming the noisy failure mode
- Word of the Day launch, spoken guesses, structured `CLIENT_NLU` guesses, hint-order guesses, fuzzy hint matching, right-word cleanup, and late audio cleanup are covered in the websocket layer
@@ -145,14 +150,15 @@ Before calling `1.0.18` complete, prove or explicitly defer these:
- Run the focused `.NET` cloud test suite after the last feature slice.
- Run the current-release live checklist in [regression-test-plan.md](regression-test-plan.md).
- Confirm the running robot build reports cloud version `1.0.18`.
- Regression test alarm flows again after the `jibo test 24` fixes: set with explicit time, set with compact/spoken/comma-separated time, clarify missing time, replace an existing alarm, cancel/delete by voice, cancel out of a value prompt, and verify the menu agrees.
- Regression test photo/gallery flows again after the `jibo test 24` fixes: open gallery, answer the stock `shared/yes_no` prompt with a transcript-bearing `yes`, hand into create, take one photo, keep it, and avoid blue-ring or `I heard you` stale turns.
- Regression test alarm flows again after the `jibo test 25` fixes: set with explicit time, set with compact/spoken/comma-separated time, clarify missing time, replace an existing alarm, cancel/delete by voice, cancel out of a value prompt, and verify the menu agrees.
- Regression test timer flows after the Test 25 stale-timer observation: set a 10-second timer, let it fire, reset by gesture only after recording state, and verify a new timer prompt does not see an already-expired timer as still active.
- Regression test photo/gallery flows again after the `jibo test 25` fixes: open gallery, answer the stock `shared/yes_no` prompt with a transcript-bearing `yes`, hand into create, take one photo, keep it, and avoid blue-ring or `I heard you` stale turns.
- Live-test radio launch: `open the radio` passed in `jibo test 22`; re-run `play country music` if that exact phrase was not captured.
- Treat basic news as live-proven by `jibo test 23`; defer provider-backed or category-expanded news unless it is chosen as an optional feature slice.
- Regression test the added stop and volume slices: `stop that`, `never mind`, `turn it up`, `turn it down`, `set volume to six`, and `show volume controls`.
- Regression test the added stop and volume slices after the Test 25 fixes: `stop that`, `never mind`, `turn it up`, `turn it down`, `set volume to six`, `set volume to 6`, and `show volume controls`.
- Recheck constrained yes/no prompts for update/backup/share/gallery/alarm replacement without leaking global rules.
- Recheck that stock OS no longer logs OpenJibo-only websocket events such as synthetic pending/context/ack packets from the current build.
- Recheck backup/update behavior with explicit attention to robot-local `jibo.scheduler.backupStatus`, CPU/load, and whether the deployed cloud is involved at all.
- Recheck backup/update behavior with explicit attention to robot-local `jibo.scheduler.backupStatus`, CPU/load, log/upload activity, and whether the deployed cloud is involved at all.
- Treat remaining empty-ASR, `ffmpeg`, or `whisper.cpp` transcript failures as STT work unless the capture proves a separate turn-routing regression.
## Known Gaps
@@ -163,9 +169,10 @@ These are not blockers for calling `1.0.18` complete unless the live test shows
- media upload/body handling is not binary-safe enough for final gallery originals and thumbnails
- state persistence is local JSON, not Azure SQL / Blob Storage
- update, backup, and restore are not end-to-end proven, and the `jibo test 22` sluggishness appears tied to robot-local backup status/load
- Test 25 still showed repeated backup-in-progress behavior and update-menu blockage without corresponding `Backup_*` HTTP traffic
- deployed-build verification needs to prove that synthetic OpenJibo websocket events are gone from the hosted artifact, not just from source
- news content is synthetic; `jibo test 23` proved the path but not live provider-backed headlines
- gallery `shared/yes_no` still needs a successful transcript-bearing live `yes` pass
- gallery `shared/yes_no`, settings volume-control cleanup, and punctuated `never mind` still need successful live proof after the Test 25 source fixes
- weather, calendar, commute, personal report, identity, memory, and proactivity are still mostly discovery or placeholder content paths
- stop and volume are implemented but still need live stock-OS proof; robot age and command-versus-question personality routing are not implemented yet

View File

@@ -42,6 +42,7 @@ Current release theme:
- `jibo test 22` validated radio, exposed backup/load interference, exposed a shared yes/no no-input gap, exposed repeated create keeper prompts after photo handoff, and showed local whisper `ffmpeg` failures on unusable buffered audio
- `jibo test 23` validated basic news, proved one alarm set/fire path at `7:43 AM`, exposed comma-separated/short alarm follow-up parsing risk, showed stock alarm replacement yes/no rules that needed cloud handling, and showed photo gallery still failing when `shared/yes_no` ASR came back empty
- `jibo test 24` showed alarm replacement yes/no working, but exposed empty `clock/alarm_set_value` and `gallery/gallery_preview` turns falling into generic `I heard you` fallback speech; it also showed `CLIENT_NLU cancel` inside `clock/alarm_set_value` re-asking for an alarm value instead of closing the prompt
- `jibo test 25` proved a broader regression path but exposed repeated backup-in-progress/update-menu blockage, timer/alarm stale state and delete/menu disagreement, gallery `shared/yes_no` hangs under `@be/gallery`, punctuated `Never mind.` falling through to chat, volume homophone parsing (`Set Volume 2-6.`), and settings volume-control cleanup falling into `I heard you`
## Immediate `1.0.18` Queue
@@ -99,10 +100,11 @@ Current release theme:
- repeated empty `create/is_it_a_keeper` replies redirect to `@be/idle` after the second miss
- Latest evidence:
- `jibo test 22` did not show `Backup_*` HTTP traffic during the backup complaint
- `jibo test 25` again showed backup-in-progress/update-menu blockage without `Backup_*` HTTP traffic; observed cloud traffic was log upload, ASR binary upload, and update check traffic
- stock `@be/surprises-ota` drives the backup notification from robot-local `jibo.scheduler.backupStatus`
- original `surprises-ota` tests make backup and OTA notifications contextual-priority prompts, with repeat suppression through last-notification timestamps
- a spoken `take a backup` command currently routes as generic chat and is not the same as proving the local backup scheduler path
- `jibo test 23` again showed backup-in-progress sluggishness and update-menu blockage while backups were active; explicit backup voice launch remains unwired
- `jibo test 23` and `jibo test 25` showed backup-in-progress sluggishness and update-menu blockage while backups were active; explicit backup voice launch remains unwired
- Exit criteria:
- spoken `yes` and `no` work on update, backup, share/offer, and gallery/create prompts
- empty or missed short replies retry locally instead of relaunching Nimbus or generic chat
@@ -125,7 +127,8 @@ Current release theme:
- stock alarm replacement/no-alarm prompts use the constrained yes/no path
- gallery opens as `@be/gallery`; snapshot and photobooth open through `@be/create`
- empty `gallery/gallery_preview` turns complete locally as no-input instead of relaunching Nimbus fallback speech
- passive gallery/create context no longer reopens stale cloud turns
- passive gallery/create/settings context no longer reopens stale cloud turns
- active local prompts under gallery/settings contexts are preserved so real short replies are not suppressed as passive context
- `shared/yes_no` no-input fallback and repeated create keeper cleanup were added after `jibo test 22`
- Latest evidence:
- gallery opened and handed into create, but repeated `create/is_it_a_keeper` prompts could leave the blue ring/listening state
@@ -135,6 +138,8 @@ Current release theme:
- `jibo test 23` photo gallery got stuck on `shared/yes_no` turns with empty ASR, not on a transcript-bearing `yes` that the cloud mapped incorrectly
- `jibo test 24` recognized `Yes.` for `clock/alarm_timer_change`, but empty `clock/alarm_set_value` produced `I heard you`; current source now keeps that as local no-input
- `jibo test 24` showed photo/gallery blue-ring cleanup improved and create keeper completion working, but empty `gallery/gallery_preview` produced `I heard you`; current source now keeps that as local no-input
- `jibo test 25` showed gallery launching from the observed phrase `open the photogal`, but active `shared/yes_no` prompts under `@be/gallery` could hang; current source now recognizes the alias and preserves active gallery prompts even while ignoring passive gallery tails
- `jibo test 25` showed timer/alarm still needs live follow-up for stale timer state, alarm replacement/PM ambiguity, and voice delete versus robot menu agreement
- original clock tests confirm cancel inside the alarm value prompt must close without scheduling, existing-alarm `keep` must preserve KB/scheduler state, and existing-alarm `delete` or `cancel` must clear it
- original gallery tests confirm empty-gallery `yes` redirects to `@be/create`, empty-gallery `no` exits, media-load failure exits, and delete confirmation only deletes on a positive `yes`
- Exit criteria:
@@ -158,6 +163,9 @@ Current release theme:
- `turn it up` and `turn it down` emit stock `global_commands` `volumeUp` / `volumeDown` with `volumeLevel = null` and no cloud speech
- `set volume to six` emits stock `global_commands` `volumeToValue` with `volumeLevel = 6` and no cloud speech
- `show volume controls` redirects into `@be/settings` with `volumeQuery`
- stop/cancel matching now normalizes stock ASR punctuation, so `Never mind.` is still a stop command
- absolute volume parsing now treats the observed homophone shape `Set Volume 2-6.` as level `6`
- passive settings context and `settings/volume_control` no-input cleanup now avoid post-panel `I heard you` fallback speech
- Evidence:
- Pegasus `globals/global_commands_launch.rule` defines `stop`, `volumeUp`, `volumeDown`, and `volumeToValue`
- stock Jibo `VolumePlugin` subscribes to global volume events and uses the same intent/entity names
@@ -167,6 +175,7 @@ Current release theme:
- live volume up/down audibly changes volume or logs a local volume event
- live volume-to-value changes the setting to the requested value or logs the expected stock local handling
- live volume controls opens the settings volume panel
- live volume controls settles after the panel opens without a trailing `I heard you`
## Implemented In Current Source
@@ -216,6 +225,7 @@ Current release theme:
- Follow-up:
- live regression remains in the immediate queue
- add fixture coverage for original clock-test branches that are not yet mirrored in `.NET`: no-alarm query `yes`/`no`, existing-alarm `keep` versus `delete`, and cross-domain `OtherSet` behavior
- Test 25 still requires a focused live check for timer stale state and alarm voice delete versus menu state
### Photo / Gallery / Create Family
@@ -223,9 +233,11 @@ Current release theme:
- Tags: `protocol`, `storage`
- Result:
- gallery, snapshot, and photobooth voice paths route to the correct local skills
- the observed `open photogal` transcript routes to gallery
- media metadata persists locally
- `/media/{path}` serves the current text-body placeholder payload
- empty `gallery/gallery_preview` turns produce local no-input instead of generic Nimbus fallback speech
- active `shared/yes_no` prompts under `@be/gallery` stay active instead of being suppressed as passive local context
- repeated empty `create/is_it_a_keeper` turns redirect to `@be/idle` after the second miss
- Follow-up:
- live regression remains in the immediate queue
@@ -263,9 +275,11 @@ Current release theme:
- Tags: `protocol`
- Result:
- global stop commands emit stock `global_commands` `stop` and redirect to `@be/idle`
- stop/cancel command matching tolerates punctuation from stock ASR
- relative volume commands emit stock `global_commands` `volumeUp` / `volumeDown`
- absolute volume commands emit `volumeToValue` with a `volumeLevel` entity
- absolute volume commands emit `volumeToValue` with a `volumeLevel` entity, including the observed `Set Volume 2-6.` shape
- volume controls launch redirects to `@be/settings` `volumeQuery`
- passive settings context and `settings/volume_control` no-input cleanup avoid stale generic speech after the settings panel opens
- websocket responses avoid generic chat speech for these local/global command paths
- Follow-up:
- live validation remains in the immediate queue because volume depends on stock robot local global-command handling
@@ -337,6 +351,7 @@ Current release theme:
- `@be/restore` waits for a UGC key, runs restore, and reboots
- original OTA surprise tests treat backup/download status as robot-local scheduler state, not as a direct cloud backup command path
- no-op update fabrication has been removed from `.NET`
- Test 25 still showed repeated backup-in-progress/update-menu blockage without `Backup_*` HTTP traffic
- Exit criteria:
- no phantom "always has updates" behavior
- one controlled update can be staged and delivered
@@ -353,6 +368,7 @@ Current release theme:
- `jibo test 22` showed `ffmpeg` and `whisper.cpp` failures
- `jibo test 23` did not show the same decode failure pattern, but gallery yes/no turns still produced empty ASR
- `jibo test 24` still had collapsed or empty transcripts in alarm/gallery paths, including `Sudden alarm.`, `I'm setting alarm for seven.`, empty clock value input, and empty gallery preview input
- `jibo test 25` still had short-answer failures, but several were cloud turn-state issues now patched rather than pure STT failures
- current source now skips local whisper when buffered audio does not contain an Opus identification header
- yes/no and alarm flows are especially sensitive to short or collapsed transcripts
- Implementation notes:

View File

@@ -15,6 +15,7 @@ Run this plan:
- after the last code change before calling a release complete
- after any fix that touches websocket turn finalization, local skill redirects, constrained yes/no, or STT
- before moving from `1.0.18` bug-fix closeout into `1.0.19` feature work
- after the Test 25 fixes, run at least the focused alarm/timer, photo/gallery, stop, and volume sections before deciding whether `1.0.18` is ready to freeze
For small feature slices, run the automated `.NET` tests plus the smoke checks and only the live sections that share the same machinery. Before release closeout, run the full current-release suite.
@@ -91,6 +92,7 @@ Goal: prove constrained yes/no prompts stay local and do not leak global launch
- Exercise any available share/date/offer yes-no prompt and answer both `yes` and `no` across runs when practical.
- Observe backup-in-progress behavior separately from explicit voice commands.
- Do not treat a spoken `take a backup` failure as proof of the backup scheduler path; that command is not currently wired as a hosted-cloud voice feature.
- If the update menu reports backup-in-progress, record whether HTTP captures include any `Backup_*` targets; current evidence points to robot-local scheduler/status or log/upload load unless those calls appear.
- Expected: short `yes`/`no` replies map locally, empty replies no-input locally, and backup/download notifications are not repeatedly re-announced once acknowledged.
- Capture check: active rule remains the constrained rule such as `surprises-ota/want_to_download_now`, `settings/download_now_later`, `shared/yes_no`, or another stock prompt rule.
@@ -109,6 +111,7 @@ Test these paths:
- value-prompt cancel: `set an alarm`, then say `cancel`
- voice delete: `delete my alarm` or `cancel alarm`
- no-input cleanup: allow one value prompt to miss or time out when practical
- timer sanity: `set a timer for 10 seconds`, let it fire or record the exact remaining state, then verify a second timer request does not report a stale already-running timer
Expected:
@@ -116,6 +119,7 @@ Expected:
- replacement prompt answer changes or preserves the alarm consistently with the robot's question
- `cancel` inside the value prompt closes without scheduling
- voice delete clears the robot menu state
- timer state agrees with what just happened on the robot; a reset gesture should not leave a phantom active timer in the next prompt
- empty value prompt turns complete locally instead of generic `I heard you` speech
Capture check:
@@ -133,6 +137,7 @@ Test these paths:
- `open photo gallery`
- if gallery is empty, answer `yes` to the offer to take a picture
- if the robot hears `open photogal` or another close gallery alias, verify it still launches gallery
- take one photo and answer the keeper prompt with `yes`
- repeat a gallery empty prompt or create keeper prompt with a missed/empty answer when practical
- if using disposable test photos, test delete confirmation once with `no` and once with `yes`
@@ -142,6 +147,7 @@ Expected:
- empty gallery `yes` redirects to `@be/create`
- empty gallery `no` exits cleanly when tested
- keeper `yes` completes and Jibo settles without a stale blue ring
- transcript-bearing `yes` under gallery `shared/yes_no` is consumed even when the robot reports `@be/gallery` context
- empty `shared/yes_no`, `create/is_it_a_keeper`, and `gallery/gallery_preview` turns no-input locally instead of generic `I heard you`
- delete confirmation only deletes on a positive `yes`
@@ -150,6 +156,7 @@ Capture check:
- gallery launch redirects to `@be/gallery`
- create photo redirects to `@be/create/createOnePhoto`
- local no-input replies keep the active constrained rule and strip unrelated global launch rules
- active `shared/yes_no` is not suppressed merely because the current context is `@be/gallery`
### STT And Audio Quality
@@ -177,9 +184,11 @@ Test these phrases:
- `stop`
- `stop that`
- `never mind`
- `never mind.` or any punctuated transcript form observed in the capture
- `turn it up`
- `turn it down`
- `set volume to six`
- `set volume to 6`
- `show volume controls`
Expected:
@@ -188,13 +197,16 @@ Expected:
- `turn it up` and `turn it down` adjust volume or at least produce the stock local volume event/log
- `set volume to six` sets or attempts to set the local volume level to `6`
- `show volume controls` opens the settings volume panel
- after `show volume controls`, the robot settles without a trailing `I heard you`
Capture check:
- stop emits `nlu.intent = stop`, `nlu.domain = global_commands`, then redirects to `@be/idle`
- punctuated `Never mind.` still maps to global stop, not generic chat
- relative volume emits `nlu.intent = volumeUp` or `volumeDown`, `nlu.domain = global_commands`, and `entities.volumeLevel = null`, with no `SKILL_ACTION` cloud speech
- absolute volume emits `nlu.intent = volumeToValue` and `entities.volumeLevel` matching the requested value, with no `SKILL_ACTION` cloud speech
- absolute volume emits `nlu.intent = volumeToValue` and `entities.volumeLevel` matching the requested value, including the observed `Set Volume 2-6.` homophone shape, with no `SKILL_ACTION` cloud speech
- volume controls redirects to `@be/settings` with `nlu.intent = volumeQuery`
- passive `@be/settings` / `settings/volume_control` audio tails complete locally and do not reopen Nimbus fallback speech
## Optional Feature Slice Checks

View File

@@ -425,6 +425,8 @@ public sealed class JiboInteractionService(
if (MatchesAny(
loweredTranscript,
"photo gallery",
"photogal",
"photo gal",
"open the gallery",
"open photo gallery",
"show my photos",
@@ -1172,9 +1174,10 @@ public sealed class JiboInteractionService(
private static bool IsCancelRequest(string? clientIntent, string loweredTranscript)
{
var normalizedTranscript = NormalizeCommandPhrase(loweredTranscript);
return string.Equals(clientIntent, "cancel", StringComparison.OrdinalIgnoreCase) ||
string.Equals(clientIntent, "stop", StringComparison.OrdinalIgnoreCase) ||
loweredTranscript is "cancel" or "stop" or "never mind" or "nevermind";
normalizedTranscript is "cancel" or "stop" or "never mind" or "nevermind";
}
private static bool IsGlobalStopRequest(
@@ -1188,8 +1191,9 @@ public sealed class JiboInteractionService(
return true;
}
return loweredTranscript is "stop" or "stop it" or "stop that" or "stop talking" or "be quiet" or "never mind" or "nevermind" or "forget it" ||
MatchesAny(loweredTranscript, "that s enough", "that's enough", "that will do", "that ll do", "that'll do", "cut it out", "cut that out");
var normalizedTranscript = NormalizeCommandPhrase(loweredTranscript);
return normalizedTranscript is "stop" or "stop it" or "stop that" or "stop talking" or "be quiet" or "never mind" or "nevermind" or "forget it" ||
MatchesAny(normalizedTranscript, "that s enough", "that will do", "that ll do", "cut it out", "cut that out");
}
private static bool IsVolumeQueryRequest(string loweredTranscript)
@@ -1287,10 +1291,26 @@ public sealed class JiboInteractionService(
return "1";
}
var match = VolumeLevelPattern.Match(loweredTranscript);
var normalizedTranscript = NormalizeCommandPhrase(loweredTranscript);
var homophoneMatch = VolumeToValueHomophonePattern.Match(normalizedTranscript);
if (homophoneMatch.Success &&
TryNormalizeVolumeLevel(homophoneMatch.Groups["value"].Value) is { } homophoneLevel)
{
return homophoneLevel;
}
var match = VolumeLevelPattern.Match(normalizedTranscript);
return !match.Success ? null : TryNormalizeVolumeLevel(match.Groups["value"].Value);
}
private static string NormalizeCommandPhrase(string value)
{
return CommandWhitespacePattern.Replace(
CommandPhrasePattern.Replace(value.Trim().ToLowerInvariant(), " "),
" ")
.Trim();
}
private static string? TryNormalizeVolumeLevel(string token)
{
if (string.Equals(token, "null", StringComparison.OrdinalIgnoreCase))
@@ -1489,6 +1509,18 @@ public sealed class JiboInteractionService(
@"\b(?:volume|loudness)\s*(?:to|at|level|is)?\s*(?<value>10|\d|one|two|three|four|five|six|seven|eight|nine|ten)\b|\b(?:set|change|make|turn)\s+(?:the\s+|your\s+)?(?:volume|loudness)\s*(?:to|at)?\s*(?<value>10|\d|one|two|three|four|five|six|seven|eight|nine|ten)\b",
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
private static readonly Regex VolumeToValueHomophonePattern = new(
@"\b(?:volume|loudness)\s+(?:2|two|to)\s+(?<value>10|\d|one|two|three|four|five|six|seven|eight|nine|ten)\b",
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
private static readonly Regex CommandPhrasePattern = new(
@"[^\w\s]",
RegexOptions.CultureInvariant | RegexOptions.Compiled);
private static readonly Regex CommandWhitespacePattern = new(
@"\s+",
RegexOptions.CultureInvariant | RegexOptions.Compiled);
private static readonly (string Phrase, string Station)[] RadioGenreAliases =
[
("country music", "Country"),

View File

@@ -87,6 +87,7 @@ public sealed partial class WebSocketTurnFinalizationService(
turnState.AwaitingTurnCompletion = false;
turnState.IgnoreAdditionalAudioUntilUtc = DateTimeOffset.UtcNow.Add(WebSocketTurnState.DefaultLateAudioIgnoreWindow);
ResetBufferedAudio(session);
turnState.SawListen = false;
turnState.SawContext = false;
return [];
}
@@ -550,9 +551,24 @@ public sealed partial class WebSocketTurnFinalizationService(
return false;
}
if (HasCloudHandledLocalPromptOpen(session.TurnState))
{
return false;
}
var skillId = TryReadContextSkillId(text);
return string.Equals(skillId, "@be/gallery", StringComparison.OrdinalIgnoreCase) ||
string.Equals(skillId, "@be/create", StringComparison.OrdinalIgnoreCase);
string.Equals(skillId, "@be/create", StringComparison.OrdinalIgnoreCase) ||
string.Equals(skillId, "@be/settings", StringComparison.OrdinalIgnoreCase);
}
private static bool HasCloudHandledLocalPromptOpen(WebSocketTurnState turnState)
{
return turnState is { AwaitingTurnCompletion: true, SawListen: true } &&
turnState.ListenRules.Any(rule =>
IsClockValueRule(rule) ||
IsGalleryPreviewRule(rule) ||
IsConstrainedYesNoRule(rule));
}
private static string? ExtractDataPayload(string? text)
@@ -781,6 +797,7 @@ public sealed partial class WebSocketTurnFinalizationService(
private static bool IsLocalNoInputRule(string rule)
{
return string.Equals(rule, "clock/alarm_timer_okay", StringComparison.OrdinalIgnoreCase) ||
string.Equals(rule, "settings/volume_control", StringComparison.OrdinalIgnoreCase) ||
IsClockValueRule(rule) ||
IsGalleryPreviewRule(rule) ||
IsConstrainedYesNoRule(rule);

View File

@@ -275,6 +275,21 @@ public sealed class JiboInteractionServiceTests
Assert.Equal("global_commands", decision.SkillPayload["nluDomain"]);
}
[Fact]
public async Task BuildDecisionAsync_NeverMindWithPunctuation_MapsToIdleStopCommand()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "Never mind.",
NormalizedTranscript = "Never mind."
});
Assert.Equal("stop", decision.IntentName);
Assert.Equal("@be/idle", decision.SkillName);
}
[Fact]
public async Task BuildDecisionAsync_TurnItUp_MapsToGlobalVolumeUpCommand()
{
@@ -309,6 +324,22 @@ public sealed class JiboInteractionServiceTests
Assert.Equal("6", decision.SkillPayload["volumeLevel"]);
}
[Fact]
public async Task BuildDecisionAsync_SetVolumeTwoSix_UsesTrailingHomophoneLevel()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "Set Volume 2-6.",
NormalizedTranscript = "Set Volume 2-6."
});
Assert.Equal("volume_to_value", decision.IntentName);
Assert.Equal("volumeToValue", decision.SkillPayload!["globalIntent"]);
Assert.Equal("6", decision.SkillPayload["volumeLevel"]);
}
[Fact]
public async Task BuildDecisionAsync_ShowVolumeControls_MapsToSettingsVolumeQuery()
{
@@ -325,6 +356,21 @@ public sealed class JiboInteractionServiceTests
Assert.Equal("volumeQuery", decision.SkillPayload!["localIntent"]);
}
[Fact]
public async Task BuildDecisionAsync_OpenPhotogal_MapsToGalleryLaunch()
{
var service = CreateService();
var decision = await service.BuildDecisionAsync(new TurnContext
{
RawTranscript = "open the photogal",
NormalizedTranscript = "open the photogal"
});
Assert.Equal("photo_gallery", decision.IntentName);
Assert.Equal("@be/gallery", decision.SkillName);
}
[Fact]
public async Task BuildDecisionAsync_OpenTimer_MapsToLocalClockTimerMenu()
{

View File

@@ -932,6 +932,111 @@ public sealed class JiboWebSocketServiceTests
Assert.Equal(0, session.TurnState.BufferedAudioChunkCount);
}
[Fact]
public async Task Context_FromGalleryYesNoPrompt_DoesNotSuppressYesAnswer()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-gallery-yesno-context-token",
Text = """{"type":"LISTEN","transID":"trans-gallery-yesno-context","data":{"rules":["shared/yes_no","globals/gui_nav","globals/mim_repeat","globals/global_commands_launch"],"asr":{"hints":["$YESNO"]}}}"""
});
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-gallery-yesno-context-token",
Text = """{"type":"CONTEXT","transID":"trans-gallery-yesno-context","data":{"audioTranscriptHint":"yes","skill":{"id":"@be/gallery"}}}"""
});
for (var index = 0; index < 4; index += 1)
{
var interimReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-gallery-yesno-context-token",
Binary = new byte[3000]
});
Assert.Empty(interimReplies);
}
var session = _store.FindSessionByToken("hub-gallery-yesno-context-token");
Assert.NotNull(session);
session.TurnState.FirstAudioReceivedUtc = DateTimeOffset.UtcNow - TimeSpan.FromSeconds(2);
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-gallery-yesno-context-token",
Binary = new byte[3000]
});
Assert.Equal(3, replies.Count);
using var listenPayload = JsonDocument.Parse(replies[0].Text!);
Assert.Equal("yes", listenPayload.RootElement.GetProperty("data").GetProperty("asr").GetProperty("text").GetString());
Assert.Equal("yes", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("intent").GetString());
Assert.Equal("shared/yes_no", listenPayload.RootElement.GetProperty("data").GetProperty("nlu").GetProperty("rules")[0].GetString());
Assert.Equal("shared/yes_no", listenPayload.RootElement.GetProperty("data").GetProperty("match").GetProperty("rule").GetString());
}
[Fact]
public async Task Context_FromSettingsVolumeControl_IgnoresPassiveLocalAudioTail()
{
await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-settings-volume-tail-token",
Text = """{"type":"LISTEN","transID":"trans-settings-volume-tail","data":{"rules":["settings/volume_control","globals/gui_nav","globals/global_commands_launch"]}}"""
});
var contextReplies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-settings-volume-tail-token",
Text = """{"type":"CONTEXT","transID":"trans-settings-volume-tail","data":{"skill":{"id":"@be/settings"}}}"""
});
Assert.Empty(contextReplies);
var session = _store.FindSessionByToken("hub-settings-volume-tail-token");
Assert.NotNull(session);
Assert.False(session.TurnState.AwaitingTurnCompletion);
Assert.False(session.TurnState.SawListen);
session.TurnState.IgnoreAdditionalAudioUntilUtc = DateTimeOffset.UtcNow - TimeSpan.FromSeconds(1);
for (var index = 0; index < 5; index += 1)
{
var replies = await _service.HandleMessageAsync(new WebSocketMessageEnvelope
{
HostName = "neo-hub.jibo.com",
Path = "/listen",
Kind = "neo-hub-listen",
Token = "hub-settings-volume-tail-token",
Binary = new byte[3000]
});
Assert.Empty(replies);
}
Assert.False(session.TurnState.AwaitingTurnCompletion);
Assert.Equal(0, session.TurnState.BufferedAudioBytes);
Assert.Equal(0, session.TurnState.BufferedAudioChunkCount);
}
[Fact]
public async Task ClientAsr_AlarmTimerOkayEmptyReply_MapsToLocalNoInputInsteadOfFallback()
{