From bf81fadd627615761492a055fd8f2c870ecf6cbd Mon Sep 17 00:00:00 2001 From: Kevin Date: Sat, 23 May 2026 01:20:55 +0300 Subject: [PATCH] More original server design and communications documentation --- .../DesignDoc/additional-sections-design.md | 1249 +++++++++++++++++ .../docs/DesignDoc/communication-design.md | 1011 +++++++++++++ OpenJibo/docs/DesignDoc/hub-service-design.md | 777 ++++++++++ .../docs/DesignDoc/skill-framework-design.md | 874 ++++++++++++ 4 files changed, 3911 insertions(+) create mode 100644 OpenJibo/docs/DesignDoc/additional-sections-design.md create mode 100644 OpenJibo/docs/DesignDoc/communication-design.md create mode 100644 OpenJibo/docs/DesignDoc/hub-service-design.md create mode 100644 OpenJibo/docs/DesignDoc/skill-framework-design.md diff --git a/OpenJibo/docs/DesignDoc/additional-sections-design.md b/OpenJibo/docs/DesignDoc/additional-sections-design.md new file mode 100644 index 0000000..6041370 --- /dev/null +++ b/OpenJibo/docs/DesignDoc/additional-sections-design.md @@ -0,0 +1,1249 @@ +# Additional Sections Design Document + +## Overview + +This document covers deployment strategies, data flow examples, error handling, monitoring and logging, and skill development guidelines for the Jibo cloud system. + +## Deployment + +### Docker Compose (Local Development) + +**Location:** `docker-compose.yml` + +**Purpose:** Provides local development environment with all services running in Docker containers. + +#### Base Configuration + +**YAML Anchor:** +```yaml +x-pegasus-defaults: &pegasus-defaults + image: pegasus_base + networks: + - pegasus-nw + volumes: + - ./:/pegasus:consistent # Live code editing + command: yarn run start:debug + environment: + - ETCO_server_logLevel=debug + - ETCO_server_env=local + - ETCO_server_structuredLogs=false +``` + +**Key Features:** +- Shared base image for all services +- Volume mounting for live code editing +- Debug mode enabled +- Consistent environment variables + +#### Services + +**Hub Service:** +```yaml +hub: + container_name: hub + working_dir: /pegasus/packages/hub + environment: + - ETCO_hub_skillsConfig=skills-local.json + - ETCO_server_hubTokenSecret=dev-hub-token-secret + - NET_parser=parser:8080 + - NET_history=history:8080 + ports: + - 9000:8080 # HTTP + - 5850:5850 # Node debugging + build: + context: ./ +``` + +**Parser Service:** +```yaml +parser: + container_name: parser + working_dir: /pegasus/packages/parser + environment: + - ETCO_parser_dialogflow_key=bca3ddc410a54274ac55bd678bff6747 + ports: + - 9005:8080 + - 5851:5851 +``` + +**History Service:** +```yaml +history: + container_name: history + working_dir: /pegasus/packages/history + environment: + - ETCO_history_skillLaunch_mongo=history_mongos:27017 + - ETCO_history_speechHistory_mongo=history_mongos:27017 + depends_on: + - history_cluster + ports: + - 9006:8080 + - 5852:5852 +``` + +**Skills:** +```yaml +chitchat-skill: + container_name: chitchat-skill + working_dir: /pegasus/packages/chitchat-skill + ports: + - 9004:8080 + - 5853:5853 + +report-skill: + container_name: report-skill + working_dir: /pegasus/packages/report-skill + environment: + - NET_lasso=lasso:8080 + - NET_settings=settings.jibo.aws + ports: + - 9003:8080 + - 5854:5854 +``` + +**Lasso Service:** +```yaml +lasso: + container_name: lasso + working_dir: /pegasus/packages/lasso + environment: + - ETCO_lasso_darkSkyKey=d87d094ee8b8cec48b69c1149823c0fa + - ETCO_lasso_googleMapsKey=Ri2CIo95Sa7dlwft5tQPixUtnPo= + - ETCO_lasso_apNewsKey=@Pwf$$103103 + - NET_redis=redis:6379 + - ETCO_lasso_credentials_mongo=mongo_lasso:27017 + depends_on: + - redis + - mongo_lasso + ports: + - 9007:8080 + - 5855:5855 +``` + +**Infrastructure Services:** +```yaml +redis: + image: redis:3 + ports: + - 6379:6379 + +mongo_lasso: + image: mongo:3.6.0 + ports: + - 27017:27017 +``` + +#### Network + +```yaml +networks: + pegasus-nw: +``` + +All services communicate over the `pegasus-nw` Docker network. + +#### Commands + +**Start all services:** +```bash +docker-compose up +``` + +**Start specific service:** +```bash +docker-compose up hub +``` + +**Build and start:** +```bash +docker-compose up --build +``` + +**Stop all services:** +```bash +docker-compose down +``` + +### Dockerfile + +**Location:** `Dockerfile` + +**Base Image:** +```dockerfile +FROM node:8.16.0-slim +``` + +**Key Steps:** +1. Configure Debian archive (stretch is old, uses archive.debian.org) +2. Install apt-transport-https +3. Add Yarn repository and install Yarn 1.5.1-1 +4. Install git, unzip, gnupg2, python3 +5. Link Yarn to /usr/local/bin +6. Set PATH for node_modules/.bin +7. Set WORKDIR to /pegasus + +**Environment Variables:** +```dockerfile +ENV PATH="/pegasus/node_modules/.bin:${PATH}" +WORKDIR /pegasus +``` + +### Build Process + +**Location:** `scripts/quickbuild.sh` + +**Purpose:** Parallel build of packages using Lerna. + +**Build Scopes:** +- `core` - interfaces, utils, test-utils, history-client, hub-client +- `skills` - baseskill, chitchat-skill, example-skill, report-skill +- `services` - hub, lasso, parser, history, hub-client-cli +- `all` - All packages + +**Parallelism:** +```bash +CONCURRENCY="$(get-lerna-concurrency.sh)" +PARALLEL="--parallel --concurrency=$CONCURRENCY" +``` + +**Usage:** +```bash +./scripts/quickbuild.sh [scope] [nodocker] +``` + +**Examples:** +```bash +./scripts/quickbuild.sh all +./scripts/quickbuild.sh core +./scripts/quickbuild.sh skills nodocker +``` + +### AWS ECS (Production) + +**Architecture:** + +**ECS (Elastic Container Service):** +- Container orchestration for production +- Task definitions for each service +- Service auto-scaling based on load +- Load balancer for traffic distribution + +**ECR (Elastic Container Registry):** +- Docker image storage +- Versioned image tags +- CI/CD pipeline integration + +**Application Load Balancer:** +- TLS termination +- Health checks +- Route to ECS tasks +- Sticky sessions if needed + +**MongoDB Atlas:** +- Managed MongoDB service +- Automatic backups +- Global distribution +- High availability + +**ElastiCache (Redis):** +- Managed Redis service +- Cluster mode for scaling +- Automatic failover +- Persistence options + +**CloudWatch:** +- Log aggregation +- Metrics collection +- Alarm configuration +- Dashboard creation + +**Deployment Pipeline:** +1. Code pushed to Git +2. CI builds Docker image +3. Image pushed to ECR +4. ECS task definition updated +5. New tasks deployed +6. Load balancer health checks pass +7. Old tasks terminated + +**Environment Variables (Production):** +- `ETCO_server_hubTokenSecret` - JWT secret (from Secrets Manager) +- `ETCO_hub_skillsConfig` - S3 URL for skill config +- `ETCO_hub_recordSpeechLogBucket` - S3 bucket for speech logs +- `NET_parser` - Parser service URL +- `NET_history` - History service URL +- `ETCO_parser_dialogflow_key` - Dialogflow API key +- `ETCO_history_skillLaunch_mongo` - MongoDB connection string +- `ETCO_history_speechHistory_mongo` - MongoDB connection string + +## Data Flow Examples + +### Example 1: User Says "Tell Me a Joke" + +**Step 1: Robot Initiates Listen** + +**Robot → Hub (WebSocket):** +```typescript +{ + type: "LISTEN", + msgID: "uuid-1", + ts: 1234567890, + data: { + mode: "default", + lang: "en-US", + hotphrase: true, + rules: ["launch"], + asr: { + sosTimeout: 5000, + maxSpeechTimeout: 60000, + hints: ["tell me a joke", "say something funny"] + } + } +} +``` + +**Step 2: Audio Streaming** + +**Robot → Hub (WebSocket):** +- Binary audio packets streamed +- Hub buffers in AudioBuffer +- Hub streams to Google Cloud Speech API + +**Step 3: Speech Detected** + +**Hub → Robot (WebSocket):** +```typescript +{ + type: "SOS", + msgID: "uuid-2", + ts: 1234567895, + data: null, + timings: { total: 5000 } +} +``` + +**Step 4: Context Sent** + +**Robot → Hub (WebSocket):** +```typescript +{ + type: "CONTEXT", + msgID: "uuid-3", + ts: 1234567896, + data: { + general: { + accountID: "account-123", + robotID: "jibo-001", + lang: "en-US", + release: "1.2.3" + }, + runtime: { + character: { emotion: { name: "happy" } }, + location: { city: "Boston" }, + loop: { users: [], jibo: { id: "jibo-001" } }, + perception: { speaker: null, peoplePresent: [] }, + dialog: {} + }, + skill: {} + } +} +``` + +**Step 5: Speech Ended** + +**Hub → Robot (WebSocket):** +```typescript +{ + type: "EOS", + msgID: "uuid-4", + ts: 1234567920, + data: null, + timings: { total: 25000 } +} +``` + +**Step 6: ASR Complete** + +**Hub internal:** +- Google Cloud Speech API returns: "tell me a joke" +- ASR result: `{ text: "tell me a joke", confidence: 0.95 }` + +**Step 7: NLU Processing** + +**Hub → Parser (HTTP):** +```typescript +POST http://parser:8080/v1/parse +{ + text: "tell me a joke", + rules: ["launch"], + external: [], + loop: { users: [] } +} +``` + +**Parser → Hub (HTTP):** +```typescript +{ + intent: "joke_tell", + entities: {}, + rules: ["launch"] +} +``` + +**Step 8: Intent Routing** + +**Hub internal:** +- IntentRouter matches "joke_tell" to "joke-skill" +- DecisionMediator confirms no external factors +- Selected skill: "joke-skill" (cloud skill) + +**Step 9: Listen Result** + +**Hub → Robot (WebSocket):** +```typescript +{ + type: "LISTEN", + msgID: "uuid-5", + ts: 1234567930, + data: { + asr: { text: "tell me a joke", confidence: 0.95 }, + nlu: { intent: "joke_tell", entities: {}, rules: ["launch"] }, + match: { skillID: "joke-skill", launch: true, onRobot: false } + }, + final: false, + timings: { total: 40000, asr: 25000, nlu: 10000 } +} +``` + +**Step 10: Skill Launch** + +**Hub → Joke Skill (HTTP):** +```typescript +POST http://joke-skill:8080/ +Authorization: Bearer +x-jibo-transid: uuid-1 +x-jibo-robotid: jibo-001 +{ + type: "LISTEN_LAUNCH", + msgID: "uuid-6", + ts: 1234567930, + data: { + general: { accountID: "account-123", robotID: "jibo-001", lang: "en-US", release: "1.2.3" }, + runtime: { character, location, loop, perception, dialog }, + skill: { id: "joke-skill" }, + nlu: { intent: "joke_tell", entities: {}, rules: ["launch"] }, + asr: { text: "tell me a joke", confidence: 0.95 } + } +} +``` + +**Step 11: Skill Response** + +**Joke Skill → Hub (HTTP):** +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid-7", + ts: 1234567935, + data: { + action: { + type: "JCP", + config: { + version: "1.0.0", + jcp: SayTextBehavior("Why did the chicken cross the road? To get to the other side!") + } + }, + analytics: { "joke-skill": [{ event: "JokeSelected", properties: { category: "classic" } }] }, + final: false, + fireAndForget: false + } +} +``` + +**Step 12: Skill Action** + +**Hub → Robot (WebSocket):** +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid-8", + ts: 1234567935, + data: { + action: { + type: "JCP", + config: { + version: "1.0.0", + jcp: SayTextBehavior("Why did the chicken cross the road? To get to the other side!") + } + }, + analytics: { "joke-skill": [...] }, + final: false, + fireAndForget: false + }, + timings: { total: 5000, skill: 5000 } +} +``` + +**Step 13: Robot Executes** + +**Robot internal:** +- Executes SayText behavior +- Speaks the joke +- Sends result back + +**Robot → Hub (WebSocket):** +```typescript +{ + type: "CMD_RESULT", + msgID: "uuid-9", + ts: 1234567950, + data: { + result: { success: true, duration: 3000 } + } +} +``` + +**Step 14: Skill Update** + +**Hub → Joke Skill (HTTP):** +```typescript +POST http://joke-skill:8080/ +{ + type: "LISTEN_UPDATE", + msgID: "uuid-10", + ts: 1234567950, + data: { + general: { ... }, + runtime: { ... }, + skill: { id: "joke-skill", session: { id: "session-1", nodeID: 2, data: {}, trace: [...] } }, + result: { success: true, duration: 3000 } + } +} +``` + +**Step 15: Final Response** + +**Joke Skill → Hub (HTTP):** +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid-11", + ts: 1234567952, + data: { + action: null, + analytics: { "joke-skill": [...] }, + final: true, + fireAndForget: true + } +} +``` + +**Hub → Robot (WebSocket):** +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid-12", + ts: 1234567952, + data: { + action: null, + analytics: { "joke-skill": [...] }, + final: true, + fireAndForget: true + }, + timings: { total: 22000, skill: 17000 } +} +``` + +**Transaction Complete** + +### Example 2: Proactive Greeting + +**Step 1: Robot Detects Person** + +**Robot internal:** +- Vision system detects person entering room +- Triggers proactive event + +**Step 2: Proactive Trigger** + +**Robot → Hub (WebSocket):** +```typescript +{ + type: "TRIGGER", + msgID: "uuid-1", + ts: 1234567890, + data: { + triggerData: { + triggerType: "person_entered", + looperID: "user-123" + }, + triggerSource: "SURPRISE" + } +} +``` + +**Step 3: Context Sent** + +**Robot → Hub (WebSocket):** +```typescript +{ + type: "CONTEXT", + msgID: "uuid-2", + ts: 1234567891, + data: { + general: { accountID: "account-123", robotID: "jibo-001", lang: "en-US", release: "1.2.3" }, + runtime: { + character: { emotion: { name: "happy" } }, + location: { city: "Boston" }, + loop: { users: [{ id: "user-123", firstName: "John", lastName: "Doe" }], jibo: { id: "jibo-001" } }, + perception: { speaker: "user-123", peoplePresent: [{ id: "user-123", type: "IDENTIFIED" }] }, + dialog: {} + }, + skill: {} + } +} +``` + +**Step 4: Proactive Selection** + +**Hub internal:** +- Get all proactive skill configurations +- Filter by context (time, location, people present) +- Filter by history (last greeting time > 1 hour ago) +- Filter by settings (user has greetings enabled) +- Randomly select "greeting-skill" + +**Step 5: Proactive Match** + +**Hub → Robot (WebSocket):** +```typescript +{ + type: "PROACTIVE", + msgID: "uuid-3", + ts: 1234567910, + data: { + match: { + skillID: "greeting-skill", + onRobot: false, + isProactive: true, + launch: true, + skipSurprises: true + } + }, + final: false +} +``` + +**Step 6: Skill Launch** + +**Hub → Greeting Skill (HTTP):** +```typescript +POST http://greeting-skill:8080/ +{ + type: "PROACTIVE_LAUNCH", + msgID: "uuid-4", + ts: 1234567910, + data: { + general: { accountID: "account-123", robotID: "jibo-001", lang: "en-US", release: "1.2.3" }, + runtime: { character, location, loop, perception, dialog }, + skill: { id: "greeting-skill" }, + memo: { triggerType: "person_entered", looperID: "user-123" } + } +} +``` + +**Step 7: Skill Response** + +**Greeting Skill → Hub (HTTP):** +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid-5", + ts: 1234567915, + data: { + action: { + type: "JCP", + config: { + version: "1.0.0", + jcp: Sequence([ + LookAtBehavior("user-123"), + SayTextBehavior("Hello John! Good to see you.") + ]) + } + }, + analytics: { "greeting-skill": [{ event: "Greeting", properties: { person: "user-123" } }] }, + final: true, + fireAndForget: false + } +} +``` + +**Step 8: Skill Action** + +**Hub → Robot (WebSocket):** +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid-6", + ts: 1234567915, + data: { + action: { + type: "JCP", + config: { + version: "1.0.0", + jcp: Sequence([ + LookAtBehavior("user-123"), + SayTextBehavior("Hello John! Good to see you.") + ]) + } + }, + analytics: { "greeting-skill": [...] }, + final: true, + fireAndForget: false + }, + timings: { total: 5000, skill: 5000 } +} +``` + +**Transaction Complete** + +## Error Handling and Timeouts + +### Timeout Configuration + +**Listen Transaction Timeouts:** +```typescript +TIMEOUT_ASR = 40 * 1000; // 40 seconds +TIMEOUT_PARSER = 10 * 1000; // 10 seconds +TIMEOUT_CONTEXT = 5 * 1000; // 5 seconds +TIMEOUT_SKILL = 10 * 1000; // 10 seconds +DEFAULT_TRANSACTION_TIME = 60 * 1000; // 60 seconds +``` + +**WebSocket Timeouts:** +```typescript +TIMEOUT_MAX_DURATION = 3 * 60 * 1000; // 3 minutes +TIMEOUT_CLOSE_AFTER_FINAL = 2 * 1000; // 2 seconds +``` + +**ASR Timeouts:** +```typescript +sosTimeout: number // Time to wait for speech start (configurable) +maxSpeechTimeout: number // Maximum speech duration (default 60 seconds) +``` + +### Error Types + +**Hub Error Codes:** +- `TIMEOUT_ASR` - ASR timeout (40 seconds) +- `TIMEOUT_PARSER` - Parser timeout (10 seconds) +- `TIMEOUT_CONTEXT` - Context timeout (5 seconds) +- `TIMEOUT_SKILL` - Skill timeout (10 seconds) +- `PARSER` - Parser error +- `ASR` - ASR error + +**Skill Request Errors:** +- `SKILL_NOT_FOUND` - Skill does not exist or is on-robot +- `TIMEOUT` - Skill request timeout + +### Error Response Format + +**Standard Error Response:** +```typescript +{ + type: "ERROR", + msgID: "uuid", + ts: 1234567890, + data: { + message: "Error description", + code?: "ERROR_CODE" + }, + final: true, + timings: { + total: number + } +} +``` + +### Error Handling Flow + +**WebSocket Errors:** +1. Error occurs in handler +2. Handler catches error +3. If `error.hasBeenHandled`, log and continue +4. Otherwise, send ERROR message to robot +5. Close WebSocket connection + +**HTTP Errors:** +1. Error occurs in handler +2. Express error middleware catches +3. Returns 500 status with ERROR JSON +4. Logs error with details + +**Skill Errors:** +1. Skill throws error +2. BaseSkill catches in POST handler +3. Calls `buildErrorResponse()` +4. Returns ERROR response to Hub +5. Hub forwards to robot + +### Timeout Handling + +**ASR Timeout:** +- If SOS timeout reached: Returns empty text with SOS_TIMEOUT annotation +- If max speech timeout reached: Returns last incremental with MAX_SPEECH_TIMEOUT annotation +- Hub skips NLU and returns no match + +**Parser Timeout:** +- Hub waits 10 seconds for Parser response +- If timeout: Throws HubError with TIMEOUT_PARSER code +- Hub returns ERROR to robot + +**Context Timeout:** +- Hub waits 5 seconds for CONTEXT message +- If timeout: Throws HubError with TIMEOUT_CONTEXT code +- Hub returns ERROR to robot + +**Skill Timeout:** +- Hub waits 10 seconds for Skill response +- If timeout: Throws HubError with TIMEOUT_SKILL code +- Hub returns ERROR to robot + +**Transaction Timeout:** +- Overall transaction timeout of 60 seconds +- If exceeded: Transaction rejected +- WebSocket closed + +## Monitoring and Logging + +### New Relic Integration + +**Location:** `packages/utils/src/service/NewRelic.ts` + +**Purpose:** Application performance monitoring and error tracking. + +**Initialization:** +- New Relic agent loaded via `@jibo/utils/common/init-newrelic.js` +- Global flag `_newRelicLoaded` set when loaded +- Lazy loading of newrelic module + +**Web Transaction Wrapping:** +```typescript +NewRelic.wrapWebTransaction(name: string, handler: PromiseFunction): Promise +``` + +**Usage:** +```typescript +NewRelic.wrapWebTransaction(`ws:${req.url}`, () => handler.handler.handleSocket(ws)) +``` + +**Error Tracking:** +```typescript +NewRelic.newrelic.noticeError(error, error.nrAttributes); +``` + +**Custom Attributes:** +- `transID` - Transaction ID +- `robotID` - Robot ID + +**Transaction Names:** +- WebSocket: `ws:/listen`, `ws:/proactive` +- HTTP: Based on endpoint path + +### jibo-log Integration + +**Location:** `packages/utils/src/logging/` + +**Purpose:** Structured logging with namespace support and dynamic configuration. + +**Log Instance Creation:** +```typescript +req.log = new Log(this.logNamespace); +req.log.transID = req.jibo.transID; +req.log.robotID = req.jibo.robotID; +req.log.outputPerNamespace = parseLoggingConfigHeader(req.jibo.loggingConfig); +``` + +**Log Levels:** +- `debug` +- `info` +- `warn` +- `error` + +**Dynamic Configuration:** +- Per-namespace log levels via `x-jibo-logging-config` header +- Format: `{ "Hub": "debug", "Parser": "info" }` +- Converted to `{ "Hub": { pegasus: "debug" }, "Parser": { pegasus: "info" } }` + +**Log Methods:** +```typescript +log.debug(message, ...args) +log.info(message, ...args) +log.warn(message, ...args) +log.error(message, ...args) +``` + +**Child Loggers:** +```typescript +const childLog = parentLog.createChild('ChildName'); +``` + +**Structured Logs:** +- JSON format when `ETCO_server_structuredLogs=true` +- Plain text when false + +### CloudWatch Integration + +**Log Aggregation:** +- All service logs sent to CloudWatch Logs +- Log groups per service +- Log streams per container instance + +**Metrics:** +- Custom metrics via CloudWatch Metrics +- HTTP request counts and latencies +- WebSocket connection counts +- Error rates + +**Alarms:** +- High error rate +- High latency +- Low connection count +- Service health check failures + +### Health Checks + +**Endpoint:** `/healthcheck` + +**Method:** GET + +**Response:** +``` +200 OK +"ok" +``` + +**Custom Health Checks:** +Services can override `getHealthcheckResponse()` to return custom health data: + +```typescript +protected async getHealthcheckResponse(): Promise { + // Check database connections + // Check external service availability + return { statusCode: 200, body: 'ok' }; +} +``` + +## Skill Development Guide + +### Creating a Simple Skill + +**Step 1: Create Package** + +```bash +cd packages +mkdir my-skill +cd my-skill +yarn init +``` + +**Step 2: Add Dependencies** + +```json +{ + "dependencies": { + "@jibo/baseskill": "^1.0.0", + "@jibo/interfaces": "^1.0.0", + "@jibo/utils": "^1.0.0" + } +} +``` + +**Step 3: Create Skill Class** + +```typescript +import { BaseSkill } from '@jibo/baseskill'; +import { skill } from '@jibo/interfaces'; +import { generateJCPAction } from '@jibo/baseskill/src/graph/Utils'; + +export class MySkill extends BaseSkill { + constructor() { + super('my-skill'); + } + + protected async handle(req: PegasusRequest): Promise { + const data = req.body.data; + const text = data.nlu.entities.text || "Hello!"; + + const action = generateJCPAction(SayTextBehavior(text)); + + return { + type: skill.response.ResponseType.SKILL_ACTION, + data: { + action: action, + final: true, + fireAndForget: true + }, + ts: Date.now(), + msgID: getUUID() + }; + } +} +``` + +**Step 4: Create Service Entry Point** + +```typescript +import { SkillService } from '@jibo/baseskill'; +import { MySkill } from './MySkill'; + +const skill = new MySkill(); +const service = new SkillService(skill); + +service.init(8080).catch(err => { + console.error(err); + process.exit(1); +}); +``` + +**Step 5: Build and Run** + +```bash +yarn build +yarn start +``` + +### Creating a Graph Skill + +**Step 1: Define Transitions** + +```typescript +enum Transition { + Done = 'Done', + Retry = 'Retry' +} +``` + +**Step 2: Create Custom Nodes** + +```typescript +import { Node, Data, EnterResponse, ExitResponse } from '@jibo/baseskill'; + +class StartNode extends Node { + constructor() { + super('Start', [Transition.Done]); + } + + async enter(data: Data): Promise { + const action = generateJCPAction(SayTextBehavior("Hello!")); + return { action }; + } + + async exit(data: Data): Promise { + return { transition: Transition.Done }; + } +} +``` + +**Step 3: Create Graph Skill** + +```typescript +import { GraphSkill, graph } from '@jibo/baseskill'; + +export class MyGraphSkill extends GraphSkill { + constructor() { + super('my-graph-skill'); + } + + createGraph(): graph.Graph { + const g = new graph.Graph('My Skill', graph.utils.generateTransitions(Transition)); + + const startNode = new StartNode(); + const endNode = new graph.nodes.dn.DefaultNode('End'); + + g.addNode(startNode, [[Transition.Done, endNode]]); + g.addNode(endNode, [[graph.nodes.dn.Transition.Done, Transition.Done]]); + + g.finalize(); + return g; + } +} +``` + +### Skill Configuration + +**Create Manifest:** + +```json +{ + "id": "my-skill", + "intents": [ + { + "name": "my_intent", + "entities": [], + "memo": null + } + ], + "proactives": [], + "onRobot": false, + "settings": {} +} +``` + +**Register with Hub:** + +Add to `skills-local.json` or environment configuration: + +```json +{ + "my-skill": { + "id": "my-skill", + "URL": "http://my-skill:8080", + "intents": [...], + "onRobot": false + } +} +``` + +### Skill Best Practices + +**Error Handling:** +```typescript +try { + // Skill logic +} catch (error) { + this.track(data, 'Error', { error: error.message }); + throw error; +} +``` + +**Analytics Tracking:** +```typescript +this.track(data, 'CustomEvent', { key: value }); +``` + +**Supplemental Behaviors:** +```typescript +this.addParallelBehavior(data, SetPresentPersonBehavior); +this.addSequenceBehavior(data, LookAtBehavior); +``` + +**Speaker Override:** +```typescript +this.overrideSpeaker(data, userId); +``` + +**Session Data:** +```typescript +// Store data in session +data.skill.session.data.myKey = myValue; + +// Retrieve data +const myValue = data.skill.session.data.myKey; +``` + +**Local Data:** +```typescript +// Store temporary data +data.local.myTemp = tempValue; +``` + +### Testing Skills + +**Unit Tests:** +```typescript +import { MySkill } from './MySkill'; + +describe('MySkill', () => { + it('should return action', async () => { + const skill = new MySkill(); + const req = createMockRequest(); + const response = await skill.handle(req); + expect(response.type).toBe('SKILL_ACTION'); + }); +}); +``` + +**Integration Tests:** +```typescript +import axios from 'axios'; + +describe('MySkill Integration', () => { + it('should handle launch request', async () => { + const response = await axios.post('http://localhost:8080/', { + type: 'LISTEN_LAUNCH', + data: { ... } + }); + expect(response.data.type).toBe('SKILL_ACTION'); + }); +}); +``` + +### Debugging + +**Node Debugging:** +```bash +# Start with debug port +node --inspect=5850 dist/index.js + +# Connect with Chrome DevTools +# chrome://inspect +``` + +**Docker Debugging:** +```yaml +ports: + - 5850:5850 # Debug port +``` + +**Logging:** +```typescript +req.log.debug('Debug message'); +req.log.info('Info message'); +req.log.warn('Warning message'); +req.log.error('Error message'); +``` + +**Graph Visualization:** +```typescript +g.writeDotFile('/tmp/my-graph.dot'); +# Convert to PNG +dot -Tpng /tmp/my-graph.dot -o /tmp/my-graph.png +``` + +### Deployment + +**Dockerfile:** +```dockerfile +FROM pegasus_base:latest +WORKDIR /pegasus/packages/my-skill +COPY package.json yarn.lock ./ +RUN yarn install +COPY . . +RUN yarn build +CMD ["yarn", "start"] +``` + +**Docker Compose:** +```yaml +my-skill: + image: my-skill:latest + ports: + - 9008:8080 + environment: + - ETCO_server_logLevel=debug +``` + +**ECS Task Definition:** +```json +{ + "containerDefinitions": [ + { + "name": "my-skill", + "image": "my-ecr-repo/my-skill:latest", + "portMappings": [{ "containerPort": 8080 }], + "environment": [...] + } + ] +} +``` diff --git a/OpenJibo/docs/DesignDoc/communication-design.md b/OpenJibo/docs/DesignDoc/communication-design.md new file mode 100644 index 0000000..3ec120d --- /dev/null +++ b/OpenJibo/docs/DesignDoc/communication-design.md @@ -0,0 +1,1011 @@ +# Communication Design Document + +## Overview + +The Jibo cloud system uses two primary communication protocols: WebSocket for real-time bidirectional communication between the robot and cloud services, and HTTP for service-to-service communication (Hub to skills, Hub to parser, etc.). All communication is secured using JWT (JSON Web Token) authentication with Bearer tokens. + +## Location + +- WebSocket implementation: `packages/utils/src/service/BaseService.ts` +- HTTP implementation: `packages/utils/src/service/BaseService.ts` +- Authentication: `packages/utils/src/service/BaseService.ts` +- Headers: `packages/utils/src/service/JiboHeaders.ts` + +## WebSocket Protocol + +### Connection Establishment + +**WebSocket Server Setup:** + +The WebSocket server is created within `BaseService.init()`: + +```typescript +this.wsServer = new WebSocket.Server({ + server: this.server, + verifyClient: (info, callback) => { + // Authentication verification + // Handler existence check + callback(true, 200, ''); + } +}); +``` + +**Connection Flow:** + +1. Robot initiates WebSocket connection to Hub +2. Hub's `verifyClient` callback is invoked before connection is accepted +3. Hub verifies JWT token in Authorization header +4. Hub checks if a handler exists for the requested URL +5. If both checks pass, connection is accepted +6. Hub creates `PegasusWebSocket` instance with enhanced properties +7. Hub calls handler's `handleSocket()` method + +### WebSocket URL Format + +**Listen Endpoint:** +``` +ws://hub:9000/listen +ws://hub:9000/v1/listen +``` + +**Proactive Endpoint:** +``` +ws://hub:9000/proactive +ws://hub:9000/v1/proactive +``` + +### Authentication + +**JWT Token Format:** + +The robot sends a Bearer token in the Authorization header: + +``` +Authorization: Bearer +``` + +**Token Payload:** +```typescript +{ + id: string, // Account ID + accessKeyId: string, // Client ID + secretAccessKey: string, // Client Secret + friendlyId?: string // Robot name +} +``` + +**Verification Process:** + +```typescript +checkAuthentication(headers: any): { error?: string, auth?: IAuthDetails } +``` + +1. Check for Authorization header +2. Validate Bearer scheme +3. Extract token +4. Verify token using `jsonwebtoken.verify()` +5. Use secret from `ETCO_server_hubTokenSecret` environment variable +6. Return auth details or error + +**Error Cases:** +- Missing Authorization header → "Authorization is required" +- Invalid scheme → "Only bearer scheme is supported" +- Missing secret → "No JWT secret set" +- Invalid token → JWT verification error (e.g., "JsonWebTokenError: invalid signature") + +**Authentication Storage:** + +After verification, auth details are stored on the WebSocket instance: + +```typescript +ws.auth = { + id: string, + accessKeyId: string, + secretAccessKey: string, + friendlyId?: string +} +``` + +### Jibo Headers + +**Location:** `packages/utils/src/service/JiboHeaders.ts` + +**Purpose:** Transmit trace information across services for logging and debugging. + +**Header Names:** +```typescript +Headers = { + transID: "x-jibo-transid", + robotID: "x-jibo-robotid", + loggingConfig: "x-jibo-logging-config" +} +``` + +**JiboHeaders Class:** + +```typescript +class JiboHeaders { + transID: string; + robotID?: string; + loggingConfig?: string; +} +``` + +**Parsing:** +```typescript +ws.jibo = new JiboHeaders(req.headers); +// transID defaults to 'unknown' +// robotID defaults to 'unknown' +// loggingConfig defaults to '{}' +``` + +**Logging Configuration:** + +The logging config header allows dynamic log level configuration per namespace: + +```json +{ + "Hub": "debug", + "Parser": "info", + "Skill": "warn" +} +``` + +**Format Conversion:** +The framework converts from `{[namespace]: LogLevel}` to `{[namespace]: {pegasus: LogLevel}}` for compatibility with jibo-log. + +### PegasusWebSocket + +**Location:** `packages/utils/src/service/PegasusWebSocket.ts` + +**Purpose:** Enhanced WebSocket class with Jibo-specific properties. + +**Properties:** +```typescript +class PegasusWebSocket extends WebSocket { + jibo: JiboHeaders; // Parsed Jibo headers + auth?: IAuthDetails; // JWT auth details + remoteAddress?: string; // Client IP address + log?: Log; // Logger instance +} +``` + +**Remote Address Detection:** +1. Check `x-forwarded-for` header (from load balancer) +2. Fall back to `connection.remoteAddress` +3. Log warning if neither available + +### ResponseWrapper + +**Location:** `packages/utils/src/service/handlers/BaseWebsocketHandler.ts` + +**Purpose:** Manages WebSocket response lifecycle with timeout enforcement. + +**Timeouts:** +- `TIMEOUT_MAX_DURATION` = 3 minutes - Maximum connection duration +- `TIMEOUT_CLOSE_AFTER_FINAL` = 2 seconds - Close after final message + +**Methods:** + +**write(data):** +- Writes message to WebSocket +- Adds timing if not present +- If `final=true`, marks response as ended +- Closes socket after 2 seconds if final + +**writeFinal(data):** +- Sets `final=true` and calls `write()` + +**error(error, errorData):** +- Writes ERROR message +- Sets `final=true` + +**Lifecycle:** +1. Created when handler starts +2. Max duration timer starts (3 minutes) +3. Messages written via `write()` or `writeFinal()` +4. If final message sent, close timer starts (2 seconds) +5. Socket close triggers cleanup +6. Promise resolves when response ends + +### Message Format + +**Base Message Structure:** + +```typescript +{ + type: string, // Message type + msgID: string, // Unique message ID (UUID) + ts: number, // Timestamp (milliseconds since epoch) + data: any, // Message-specific data + final?: boolean, // Is this the final message? + timings?: { // Timing information + total: number, + [key: string]: number + } +} +``` + +**Message Serialization:** + +All messages are serialized to JSON before sending: + +```typescript +socket.send(JSON.stringify(data)); +``` + +### Server-to-Robot Messages (WebSocket) + +The following messages are sent from the Hub (server) to the robot: + +#### SOS (Start of Speech) + +**Emitted when:** Speech is detected during ASR + +**Purpose:** Notify robot that speech has started + +**Format:** +```typescript +{ + type: "SOS", + msgID: "uuid", + ts: 1234567890, + data: null, + timings: { + total: number + } +} +``` + +#### EOS (End of Speech) + +**Emitted when:** Speech ends during ASR + +**Purpose:** Notify robot that speech has ended + +**Format:** +```typescript +{ + type: "EOS", + msgID: "uuid", + ts: 1234567890, + data: null, + timings: { + total: number + } +} +``` + +#### LISTEN Response + +**Emitted when:** ASR and NLU processing complete + +**Purpose:** Send ASR result, NLU result, and skill match to robot + +**Format:** +```typescript +{ + type: "LISTEN", + msgID: "uuid", + ts: 1234567890, + data: { + asr: { + text: string, + confidence: number, + annotation: "GARBAGE" | "SOS_TIMEOUT" | "MAX_SPEECH_TIMEOUT" + }, + nlu: { + intent: string, + entities: {}, + rules: [] + }, + match: { + skillID: string, + launch: boolean, + onRobot: boolean + } | null + }, + final: boolean, + timings: { + total: number, + asr: number, + nlu: number + } +} +``` + +**Final Flag:** +- `final: true` - No skill matched or on-robot skill, transaction complete +- `final: false` - Cloud skill matched, more messages coming + +#### SKILL_ACTION + +**Emitted when:** Cloud skill returns an action to execute + +**Purpose:** Send JCP behavior for robot to execute + +**Format:** +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid", + ts: 1234567890, + data: { + action: { + type: "JCP", + config: { + version: "1.0.0", + jcp: SupportedBehaviors // SLIM, Sequence, Parallel, SetPresentPerson, ImpactEmotion + } + }, + analytics: AnalyticsData, + final: boolean, + fireAndForget: boolean + }, + timings: { + total: number, + skill: number + } +} +``` + +**Final Flag:** +- `final: false` - Robot should execute and send CMD_RESULT back +- `final: true` - Transaction complete, no more actions expected + +**FireAndForget:** +- `true` - Robot executes but doesn't send result back +- `false` - Robot executes and sends result back + +#### SKILL_REDIRECT + +**Emitted when:** Skill redirects to another skill + +**Purpose:** Notify robot of skill redirection + +**Format:** +```typescript +{ + type: "SKILL_REDIRECT", + msgID: "uuid", + ts: 1234567890, + data: { + match: { + skillID: string, + launch: boolean, + onRobot: boolean + }, + nlu: NLUResult, + asr: ASRResult, + memo: any + }, + final: boolean +} +``` + +**Final Flag:** +- `final: true` - On-robot skill, robot handles it +- `final: false` - Cloud skill, Hub will send actions + +#### PROACTIVE Response + +**Emitted when:** Proactive action selected + +**Purpose:** Notify robot of proactive skill launch + +**Format:** +```typescript +{ + type: "PROACTIVE", + msgID: "uuid", + ts: 1234567890, + data: { + match: { + skillID: string, + onRobot: boolean, + isProactive: true, + launch: true, + skipSurprises: boolean + } + } | {}, + final: boolean +} +``` + +**Data:** +- With match data - Action selected +- Empty data - No action selected + +#### ERROR + +**Emitted when:** An error occurs during transaction + +**Purpose:** Notify robot of error + +**Format:** +```typescript +{ + type: "ERROR", + msgID: "uuid", + ts: 1234567890, + data: { + message: string + }, + final: true, + timings: { + total: number + } +} +``` + +### Robot-to-Server Messages (WebSocket) + +The following messages are sent from the robot to the Hub: + +#### LISTEN + +**Purpose:** Initiate listen transaction + +**Format:** +```typescript +{ + type: "LISTEN", + msgID: "uuid", + ts: 1234567890, + data: { + mode: "default" | "CLIENT_ASR" | "CLIENT_NLU", + lang: "en-US", + hotphrase: boolean, + rules: string[], + asr: { + sosTimeout: number, + maxSpeechTimeout: number, + hints: string[], + earlyEOS: string[] + }, + agents: ExternalAgentRequest[] + } +} +``` + +#### Audio Packets + +**Purpose:** Stream audio data for ASR + +**Format:** Binary Buffer (not JSON) + +#### CONTEXT + +**Purpose:** Send runtime context from robot + +**Format:** +```typescript +{ + type: "CONTEXT", + msgID: "uuid", + ts: 1234567890, + data: { + general: { + accountID: string, + robotID: string, + lang: string, + release: string + }, + runtime: { + character: { emotion, motivation }, + location: { city, state, country, lat, lng }, + loop: { users, jibo, owner, loopId }, + perception: { speaker, peoplePresent }, + dialog: { referent } + }, + skill: { + id: string, + session: { id, nodeID, data, trace } + } + } +} +``` + +#### CLIENT_ASR + +**Purpose:** Provide ASR result (for menu clicks, etc.) + +**Format:** +```typescript +{ + type: "CLIENT_ASR", + msgID: "uuid", + ts: 1234567890, + data: { + text: string + } +} +``` + +#### CLIENT_NLU + +**Purpose:** Provide NLU result (for menu clicks, etc.) + +**Format:** +```typescript +{ + type: "CLIENT_NLU", + msgID: "uuid", + ts: 1234567890, + data: { + intent: string, + entities: {}, + rules: [] + } +} +``` + +#### TRIGGER + +**Purpose:** Initiate proactive selection + +**Format:** +```typescript +{ + type: "TRIGGER", + msgID: "uuid", + ts: 1234567890, + data: { + triggerData: { + triggerType: string, + looperID?: string + }, + triggerSource: "SURPRISE" | "OTHER" + } +} +``` + +## HTTP Protocol + +### HTTP Server Setup + +**Express.js Application:** + +```typescript +this.app = express(); +this.app.use(bodyParser.urlencoded({ extended: true })); +this.app.use(bodyParser.json()); +``` + +**HTTP Server Creation:** + +```typescript +this.server = http.createServer(this.app); +this.server.listen(port, callback); +``` + +### HTTP Authentication + +**Middleware:** + +```typescript +checkRequestAuthentication(req, res, next) +``` + +**Process:** +1. Check Authorization header +2. Verify JWT token +3. If valid, call `next()` +4. If invalid, return 401 error + +**Protected Endpoints:** + +Endpoints with `authenticationRequired: true` are protected: + +```typescript +this.addHttpHandler('/path', { + handler: myHandler, + authenticationRequired: true +}); +``` + +### HTTP Headers + +**Jibo Headers (HTTP):** + +Same as WebSocket headers: +- `x-jibo-transid` - Transaction ID +- `x-jibo-robotid` - Robot ID +- `x-jibo-logging-config` - Log level configuration + +**Authorization Header:** +``` +Authorization: Bearer +``` + +### Service-to-Service HTTP Requests + +#### Hub to Skill + +**Purpose:** Send skill launch/update requests + +**Method:** POST + +**URL:** `http://skill-host:port/` or `http://skill-host:port/v1/main` + +**Headers:** +``` +Authorization: Bearer +x-jibo-transid: +x-jibo-robotid: +Content-Type: application/json +``` + +**Request Body:** +```typescript +{ + type: "LISTEN_LAUNCH" | "LISTEN_UPDATE" | "PROACTIVE_LAUNCH", + msgID: "uuid", + ts: 1234567890, + data: { + general: { accountID, robotID, lang, release }, + runtime: { character, location, loop, perception, dialog }, + skill: { id, session? }, + result?: any, + nlu?: NLUResult, + asr?: ASRResult, + memo?: any + } +} +``` + +**Response Body:** +```typescript +{ + type: "SKILL_ACTION" | "SKILL_REDIRECT" | "ERROR", + msgID: "uuid", + ts: 1234567890, + data: { ... }, + final?: boolean, + timings?: { total: number, skill: number } +} +``` + +**Timeout:** 10 seconds (configurable) + +#### Hub to Parser + +**Purpose:** Send NLU request + +**Method:** POST + +**URL:** `http://parser:8080/v1/parse` + +**Headers:** +``` +x-jibo-transid: +x-jibo-robotid: +Content-Type: application/json +``` + +**Request Body:** +```typescript +{ + text: string, + rules: string[], + external: ExternalAgentRequest[], + loop: { + users: [{ firstName, lastName, id }] + } +} +``` + +**Response Body:** +```typescript +{ + intent: string, + entities: {}, + rules: [] +} +``` + +**Timeout:** 10 seconds + +#### Hub to History + +**Purpose:** Record skill launches or speech history + +**Method:** POST + +**URL:** +- `http://history:8080/v1/skill/launch` - Skill launch history +- `http://history:8080/v1/speech` - Speech history + +**Headers:** +``` +x-jibo-transid: +x-jibo-robotid: +Content-Type: application/json +``` + +**Request Body (Skill Launch):** +```typescript +{ + robotID: string, + sessionID: string, + skillID: string, + intent: string, + personIDs: string[] +} +``` + +**Request Body (Speech History):** +```typescript +{ + robotID: string, + accountID: string, + transID: string, + timestamp: number, + audioFileURL?: string, + asr?: ASRResult, + nlu?: NLUResult, + match?: GlobalMatchResponseData, + skill?: SkillRequestOutput, + redirect?: RedirectData, + error?: Error +} +``` + +### Health Check Endpoint + +**URL:** `/healthcheck` + +**Method:** GET + +**Purpose:** Service health check + +**Response:** +``` +200 OK +``` + +**Body:** `"ok"` (default, can be overridden) + +## JWT Authentication + +### Token Generation + +**Token is generated by the robot (or authentication service) and sent to cloud services.** + +**Token Structure:** + +```typescript +{ + id: string, // Account ID + accessKeyId: string, // Client ID + secretAccessKey: string, // Client Secret + friendlyId?: string // Robot name (optional) +} +``` + +### Token Verification + +**Verification Function:** + +```typescript +jsonwebtoken.verify(token, secret) +``` + +**Secret Source:** `ETCO_server_hubTokenSecret` environment variable + +**Verification Process:** +1. Decode JWT token +2. Verify signature using secret +3. Check expiration (if present in token) +4. Return decoded payload + +### Authentication Flow + +**WebSocket Connection:** +1. Robot connects with `Authorization: Bearer ` +2. Hub's `verifyClient` callback verifies token +3. If valid, connection accepted and auth stored on WebSocket +4. If invalid, connection rejected with 401 + +**HTTP Request:** +1. Robot sends request with `Authorization: Bearer ` +2. Express middleware verifies token +3. If valid, request proceeds to handler +4. If invalid, returns 401 error + +### Authentication Bypass + +**Development Mode:** + +Services can disable authentication for development: + +```typescript +this.disableAuth = true; +``` + +**When disabled:** +- WebSocket connections accepted without token verification +- HTTP requests proceed without authentication middleware +- Auth details may be missing from request objects + +## Error Handling + +### WebSocket Errors + +**Connection Errors:** +- Authentication failure → 401, connection rejected +- No handler for URL → 404, connection rejected +- Network error → Connection closed + +**Message Errors:** +- Invalid JSON → Logged, connection may close +- Missing required fields → Handler-specific error +- Timeout → Socket closed after max duration + +**Error Message Format:** +```typescript +{ + type: "ERROR", + msgID: "uuid", + ts: 1234567890, + data: { + message: string + }, + final: true +} +``` + +### HTTP Errors + +**Status Codes:** +- 200 - Success +- 401 - Unauthorized (invalid token) +- 404 - Not found (invalid URL) +- 500 - Internal server error + +**Error Response Format:** +```typescript +{ + type: "ERROR", + msgID: "uuid", + ts: 1234567890, + data: { + message: string + }, + final: true +} +``` + +## Logging + +### Log Instance Creation + +**Per-Request Logging:** + +Each request (HTTP or WebSocket) gets a dedicated log instance: + +```typescript +req.log = new Log(this.logNamespace); +req.log.transID = req.jibo.transID; +req.log.robotID = req.jibo.robotID; +req.log.outputPerNamespace = parseLoggingConfigHeader(req.jibo.loggingConfig); +``` + +**WebSocket Logging:** + +```typescript +ws.log = new Log(this.logNamespace); +ws.log.transID = ws.jibo.transID; +ws.log.robotID = ws.jibo.robotID; +ws.log.outputPerNamespace = parseLoggingConfigHeader(ws.jibo.loggingConfig); +``` + +### Log Level Configuration + +**Dynamic Configuration:** + +Log levels can be configured per namespace via the `x-jibo-logging-config` header: + +```json +{ + "Hub": "debug", + "Parser": "info", + "Skill": "error" +} +``` + +**Supported Levels:** +- `debug` +- `info` +- `warn` +- `error` + +## Monitoring + +### New Relic Integration + +**WebSocket Transactions:** + +```typescript +NewRelic.wrapWebTransaction(`ws:${req.url}`, () => handler.handler.handleSocket(ws)) +``` + +**Error Tracking:** + +Errors are tracked with custom attributes: +- `transID` - Transaction ID +- `robotID` - Robot ID + +### Timing Information + +**All messages include timing:** + +```typescript +{ + timings: { + total: number, // Total time since start + asr?: number, // ASR processing time + nlu?: number, // NLU processing time + skill?: number // Skill processing time + } +} +``` + +## Security Considerations + +### TLS/SSL + +**Current Implementation:** +- WebSocket connections from load balancer may not be secure +- TLS termination at load balancer +- Services behind load balancer communicate over internal network + +**Future Considerations:** +- End-to-end encryption for sensitive data +- Certificate pinning for robot authentication + +### Token Security + +**Secret Management:** +- JWT secret stored in environment variable +- Secret should be rotated regularly +- Different secrets for different environments + +**Token Expiration:** +- Tokens should include expiration (`exp` claim) +- Short-lived tokens recommended +- Refresh token mechanism for long-lived sessions + +### IP Filtering + +**Remote Address Tracking:** +- Client IP address logged for all connections +- Can be used for IP-based filtering +- Load balancer sets `x-forwarded-for` header + +## Summary of Server-to-Robot Communication + +### WebSocket Messages (Server → Robot) + +1. **SOS** - Speech detected +2. **EOS** - Speech ended +3. **LISTEN** - ASR/NLU result with match +4. **SKILL_ACTION** - JCP behavior to execute +5. **SKILL_REDIRECT** - Skill redirect notification +6. **PROACTIVE** - Proactive match/no-action +7. **ERROR** - Error occurred + +### HTTP Messages (Server → Robot) + +HTTP is not used for direct server-to-robot communication. All server-to-robot communication happens over WebSocket. + +### Key Design Principles + +1. **Bidirectional** - WebSocket enables real-time bidirectional communication +2. **Binary Support** - WebSocket supports binary audio streaming +3. **Authentication** - JWT tokens secure all connections +4. **Traceability** - Transaction IDs and robot IDs in all messages +5. **Timeouts** - All operations have timeouts to prevent hanging +6. **Error Handling** - Standardized error format across all protocols +7. **Logging** - Per-request logging with dynamic configuration +8. **Monitoring** - New Relic integration for performance tracking diff --git a/OpenJibo/docs/DesignDoc/hub-service-design.md b/OpenJibo/docs/DesignDoc/hub-service-design.md new file mode 100644 index 0000000..bcaa7f0 --- /dev/null +++ b/OpenJibo/docs/DesignDoc/hub-service-design.md @@ -0,0 +1,777 @@ +# Hub Service Design Document + +## Overview + +The Hub Service is the central orchestrator of the Jibo cloud system. It coordinates all communication between the robot and cloud services, managing speech recognition, natural language understanding, skill routing, and proactive behaviors. The Hub exposes WebSocket endpoints for real-time bidirectional communication with the robot. + +## Location + +`packages/hub/src/` + +## Key Components + +### HubService (`HubService.ts`) + +Main service class extending `BaseService` from `@jibo/utils`. Initializes and manages all hub components. + +**HubComponents** (dependency injection container): +- `parser: ParserClient` - NLU service client +- `skillConfigManager: SkillConfigManager` - Manages skill configurations +- `intentRouter: IntentRouter` - Routes intents to skills +- `skillRequestMaker: SkillRequestMaker` - Makes HTTP requests to skills +- `history: HistoryServiceClient` - History service client +- `hubSettings: HubSettings` - Hub configuration +- `settingsClient: SettingsClient` - Settings service client + +### WebSocket Handlers + +- **ListenHandler** (`listen/ListenHandler.ts`) - Handles `/listen` and `/v1/listen` endpoints +- **ProactiveSocketRequestHandler** (`proactive/ProactiveSocketRequestHandler.ts`) - Handles `/proactive` and `/v1/proactive` endpoints + +### Transaction Handlers + +- **ListenTransactionHandler** (`listen/ListenTransactionHandler.ts`) - State machine for listen transactions +- **ProactiveTransactionHandler** (`proactive/ProactiveTransactionHandler.ts`) - Handles proactive action selection + +## WebSocket Endpoints + +### Listen Endpoint + +**URL:** `ws://hub:9000/listen` or `ws://hub:9000/v1/listen` + +**Authentication:** Bearer JWT token in Authorization header + +**Headers:** +- `x-jibo-transid` - Transaction ID +- `x-jibo-robotid` - Robot ID +- `x-jibo-logging-config` - Log level configuration + +### Proactive Endpoint + +**URL:** `ws://hub:9000/proactive` or `ws://hub:9000/v1/proactive` + +**Authentication:** Same as listen endpoint + +## Listen Transaction Flow + +The listen transaction follows a state machine with the following states: + +``` +WAIT_LISTEN → ASR → NLU → ROUTE → DONE +WAIT_LISTEN → WAIT_CLIENT_ASR → NLU → ROUTE → DONE +WAIT_LISTEN → WAIT_CLIENT_NLU → ROUTE → DONE +``` + +### State Machine Implementation + +**File:** `packages/hub/src/listen/ListenTransactionHandler.ts` + +**States:** +- `WAIT_LISTEN` - Waiting for LISTEN message from robot +- `WAIT_CLIENT_ASR` - Waiting for client-provided ASR result +- `WAIT_CLIENT_NLU` - Waiting for client-provided NLU result +- `ASR` - Performing speech recognition +- `NLU` - Performing natural language understanding +- `ROUTE` - Routing to appropriate skill +- `DONE` - Transaction complete +- `STOP` - Transaction stopped + +**Timeouts:** +- ASR: 40 seconds (configurable via sosTimeout, maxSpeechTimeout) +- Parser: 10 seconds +- Context: 5 seconds +- Skill: 10 seconds +- Transaction: 60 seconds (default) + +### Robot-to-Hub Messages (Listen Flow) + +1. **LISTEN** - Initiates listen transaction + ```typescript + { + type: "LISTEN", + msgID: "uuid", + ts: 1234567890, + data: { + mode: "default" | "CLIENT_ASR" | "CLIENT_NLU", + lang: "en-US", + hotphrase: boolean, + rules: string[], + asr: { + sosTimeout: number, + maxSpeechTimeout: number, + hints: string[], + earlyEOS: string[] + }, + agents: ExternalAgentRequest[] + } + } + ``` + +2. **Audio Packets** - Binary audio data streamed after LISTEN + +3. **CONTEXT** - Runtime context from robot + ```typescript + { + type: "CONTEXT", + msgID: "uuid", + ts: 1234567890, + data: { + general: { + accountID: string, + robotID: string, + lang: string, + release: string + }, + runtime: { + character: { emotion, motivation }, + location: { city, state, country, lat, lng }, + loop: { users, jibo, owner, loopId }, + perception: { speaker, peoplePresent }, + dialog: { referent } + }, + skill: { + id: string, + session: { id, nodeID, data, trace } + } + } + } + ``` + +4. **CLIENT_ASR** - Client-provided ASR result (for menu clicks, etc.) + ```typescript + { + type: "CLIENT_ASR", + msgID: "uuid", + ts: 1234567890, + data: { + text: string + } + } + ``` + +5. **CLIENT_NLU** - Client-provided NLU result + ```typescript + { + type: "CLIENT_NLU", + msgID: "uuid", + ts: 1234567890, + data: { + intent: string, + entities: {}, + rules: [] + } + } + ``` + +### Hub-to-Robot Messages (Listen Flow) + +#### 1. SOS (Start of Speech) + +**Emitted when:** Speech is detected during ASR + +**Location:** `ListenTransactionHandler.emitSOS()` + +```typescript +{ + type: "SOS", + msgID: "uuid", + ts: 1234567890, + data: null, + timings: { + total: number + } +} +``` + +**Trigger conditions:** +- Google Cloud Speech API detects start of speech +- ASRSession calls `onStartOfSpeech` callback +- Clears SOS timeout timer + +#### 2. EOS (End of Speech) + +**Emitted when:** Speech ends during ASR + +**Location:** `ListenTransactionHandler.emitEOS()` + +```typescript +{ + type: "EOS", + msgID: "uuid", + ts: 1234567890, + data: null, + timings: { + total: number + } +} +``` + +**Trigger conditions:** +- Google Cloud Speech API detects end of speech +- ASRSession calls `onEndOfSpeech` callback +- Clears max speech timeout timer + +#### 3. LISTEN Response (ASR/NLU Result) + +**Emitted when:** ASR and NLU processing complete + +**Location:** `ListenTransactionHandler.emitListenResult()` + +```typescript +{ + type: "LISTEN", + msgID: "uuid", + ts: 1234567890, + data: { + asr: { + text: string, + confidence: number, + annotation: "GARBAGE" | "SOS_TIMEOUT" | "MAX_SPEECH_TIMEOUT" + }, + nlu: { + intent: string, + entities: {}, + rules: [] + }, + match: { + skillID: string, + launch: boolean, + onRobot: boolean + } | null + }, + final: boolean, + timings: { + total: number, + asr: number, + nlu: number + } +} +``` + +**Emission scenarios:** +- **No match:** `match: null, final: true` - No skill matched the NLU result +- **On-robot skill:** `match.onRobot: true, final: true` - Skill runs on robot, Hub done +- **Cloud skill:** `match.onRobot: false, final: false` - Skill runs in cloud, Hub will send skill actions + +#### 4. SKILL_ACTION + +**Emitted when:** Cloud skill returns an action to execute + +**Location:** `TransactionHandler.emitSkillResult()` + +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid", + ts: 1234567890, + data: { + action: { + type: "JCP", + config: { + version: "1.0.0", + jcp: SupportedBehaviors // SLIM, Sequence, Parallel, SetPresentPerson, ImpactEmotion + } + }, + analytics?: AnalyticsData, + fireAndForget?: boolean + }, + final: boolean, + timings: { + total: number, + skill: number + } +} +``` + +**JCP Behavior Types:** +- `SLIM` - Single behavior execution +- `Sequence` - Sequential behavior execution +- `Parallel` - Parallel behavior execution +- `SetPresentPerson` - Set focused person +- `ImpactEmotion` - Modify Jibo's emotional state + +**Emission scenarios:** +- **Non-final:** `final: false` - Robot should execute action and send CMD_RESULT back +- **Final:** `final: true` - Transaction complete, no more actions expected + +#### 5. SKILL_REDIRECT + +**Emitted when:** Skill redirects to another skill + +**Location:** `TransactionHandler.emitSkillRedirectNotification()` + +```typescript +{ + type: "SKILL_REDIRECT", + msgID: "uuid", + ts: 1234567890, + data: { + match: { + skillID: string, + launch: boolean, + onRobot: boolean + }, + nlu: NLUResult, + asr: ASRResult, + memo: any + }, + final: boolean +} +``` + +**Emission scenarios:** +- Skill returns `SKILL_REDIRECT` response +- Hub launches new skill with provided context +- Only one level of redirect supported (error on second redirect) + +#### 6. ERROR + +**Emitted when:** An error occurs during transaction + +**Location:** `TransactionHandler.emitSkillResult()` (error case) + +```typescript +{ + type: "ERROR", + msgID: "uuid", + ts: 1234567890, + data: { + message: string + }, + final: true, + timings: { + total: number + } +} +``` + +### Listen Transaction State Transitions + +#### WAIT_LISTEN → ASR + +**Trigger:** LISTEN message received with mode="default" + +**Actions:** +- Initialize ASRSession with Google Cloud Speech API +- Start audio streaming +- Set up SOS timeout (if configured) +- Set up max speech timeout (if configured) + +#### WAIT_LISTEN → WAIT_CLIENT_ASR + +**Trigger:** LISTEN message received with mode="CLIENT_ASR" + +**Actions:** +- Emit fake SOS (immediate) +- Wait for CLIENT_ASR message from robot + +#### WAIT_LISTEN → WAIT_CLIENT_NLU + +**Trigger:** LISTEN message received with mode="CLIENT_NLU" + +**Actions:** +- Emit fake SOS (immediate) +- Wait for CLIENT_NLU message from robot + +#### ASR → NLU + +**Trigger:** ASR completes successfully + +**Actions:** +- Stop ASR session +- Normalize ASR text +- Check for garbage annotation (skip NLU if garbage) +- Wait for CONTEXT message (5 second timeout) +- Send ASR text to Parser service + +#### WAIT_CLIENT_ASR → NLU + +**Trigger:** CLIENT_ASR message received + +**Actions:** +- Use provided ASR text +- Emit fake EOS +- Proceed to NLU + +#### WAIT_CLIENT_NLU → ROUTE + +**Trigger:** CLIENT_NLU message received + +**Actions:** +- Use provided NLU result +- Emit fake EOS +- Skip NLU, proceed to routing + +#### NLU → ROUTE + +**Trigger:** Parser returns NLU result + +**Actions:** +- Wait for CONTEXT message (5 second timeout) +- Call IntentRouter to match skill +- Apply DecisionMediator for external factors +- Route to matched skill or context skill + +#### ROUTE → DONE + +**Trigger:** Routing complete + +**Actions:** +- For on-robot skills: Emit LISTEN with match, transaction done +- For cloud skills: Get skill response, emit SKILL_ACTION, transaction done +- For no match: Emit LISTEN with match=null, transaction done + +## Intent Routing + +### IntentRouter (`intent/IntentRouter.ts`) + +Matches NLU results to registered cloud skills. + +**Routing Logic:** +1. Check if NLU has intent and 'launch' rule +2. Query all skill configurations +3. Match intent against skill intent configurations +4. Match entities against skill entity configurations +5. Return first matching skill decision + +**DecisionMediator** (`intent/DecisionMediator.ts`): +- Can alter routing decisions based on external factors +- Considers robot release version +- May redirect to different skill based on context + +**IRDecisionMaker** (`intent/IRDecisionMaker.ts`): +- Core matching algorithm +- Compares intent names and entity values +- Supports exact match and NOT match rules + +### Skill Request Maker (`skill/SkillRequestMaker.ts`) + +Makes HTTP requests to cloud skills. + +**Methods:** +- `skillLaunch(skillID, data, jiboHeaders, log)` - Launch new skill +- `skillLaunchOrUpdate(skillID, data, jiboHeaders, log, update)` - Launch or update skill +- `proactiveLaunch(skillID, data, jiboHeaders, log)` - Proactive launch + +**Request Format:** +```typescript +{ + type: "LISTEN_LAUNCH" | "LISTEN_UPDATE" | "PROACTIVE_LAUNCH", + msgID: "uuid", + ts: 1234567890, + data: { + general: { accountID, robotID, lang, release }, + runtime: { character, location, loop, perception, dialog }, + skill: { id, session? }, + result?: any, // For UPDATE + nlu: NLUResult, + asr: ASRResult, + memo?: any + } +} +``` + +**Timeout:** 10 seconds (configurable) + +**Error Handling:** +- `SKILL_NOT_FOUND` - Skill does not exist or is on-robot +- `TIMEOUT` - Skill request timeout + +## Proactive Flow + +### Proactive Transaction Handler (`proactive/ProactiveTransactionHandler.ts`) + +Handles proactive action selection based on context, history, and settings. + +### Robot-to-Hub Messages (Proactive Flow) + +1. **TRIGGER** - Initiates proactive selection + ```typescript + { + type: "TRIGGER", + msgID: "uuid", + ts: 1234567890, + data: { + triggerData: { + triggerType: string, + looperID?: string + }, + triggerSource: "SURPRISE" | "OTHER" + } + } + ``` + +2. **CONTEXT** - Runtime context (same as listen flow) + +### Hub-to-Robot Messages (Proactive Flow) + +#### PROACTIVE Match Response + +**Emitted when:** Proactive action selected + +**Location:** `ProactiveTransactionHandler.emitMatchResponse()` + +```typescript +{ + type: "PROACTIVE", + msgID: "uuid", + ts: 1234567890, + data: { + match: { + skillID: string, + onRobot: boolean, + isProactive: true, + launch: true, + skipSurprises: boolean + } + }, + final: boolean +} +``` + +**Emission scenarios:** +- **On-robot skill:** `final: true` - Robot handles skill, Hub done +- **Cloud skill:** `final: false` - Hub will send skill actions + +#### PROACTIVE No-Action Response + +**Emitted when:** No eligible proactive action found + +**Location:** `ProactiveTransactionHandler.emitNoActionResponse()` + +```typescript +{ + type: "PROACTIVE", + msgID: "uuid", + ts: 1234567890, + data: {}, + final: true +} +``` + +### Proactive Action Selection Algorithm + +**File:** `ProactiveTransactionHandler.getEligibleActions()` + +**Steps:** + +1. **Get all proactive skill configurations** + - Query SkillConfigManager for skills with proactive registrations + +2. **Gather transaction data** + - Extract focused person, present people, loop ID, robot ID + - Use ContextTools to extract context fields + +3. **Fetch user settings** (if focused person) + - Batch request to SettingsClient for all skill settings + - Consolidate into skill settings map + +4. **Filter by context rules** + - Check time-based rules (time of day, day of week) + - Check location rules + - Check people present rules + - Check robot state rules + +5. **Filter by interaction history rules** + - Query History service for past interactions + - Check frequency rules (e.g., "at most once per hour") + - Check recency rules (e.g., "not in last 10 minutes") + - Check sequence rules (e.g., "after greeting skill") + +6. **Filter by settings rules** + - Check user preferences for each skill + - Check enabled/disabled status + - Check custom parameters + +7. **Select action** + - Currently: Random selection from eligible actions + - Future: Heuristics based on context, engagement, topics + +### Context Tools (`proactive/tools/ContextTools.ts`) + +Helper functions for context rule evaluation: + +- `extractContextData(field, context, requestData, log)` - Extract specific context field +- `checkContextRules(registration, context, requestData, log)` - Evaluate all context rules + +### History Rules Checker (`proactive/tools/IHRulesChecker.ts`) + +Evaluates interaction history rules: + +- `checkIHRules(registrations, IHQueries, data, log)` - Filter by history rules +- Queries History service for past skill launches +- Applies frequency, recency, and sequence constraints + +### Settings Rules Checker (`proactive/tools/SettingsRulesChecker.ts`) + +Evaluates user settings: + +- `getSkillSettingsMap(skillConfigs, accountID, loopID, transID)` - Batch fetch settings +- `checkSettingsRegistrations(registrations, skillSettingsMap)` - Filter by settings + +## Skill Interaction Flow (Cloud Skills) + +### Initial Launch + +1. Hub sends LISTEN_LAUNCH request to skill +2. Skill processes request, returns SKILL_ACTION +3. Hub sends SKILL_ACTION to robot +4. Robot executes action, sends CMD_RESULT to Hub +5. Hub sends LISTEN_UPDATE request to skill with action result +6. Skill processes result, returns next SKILL_ACTION or final=true +7. Repeat steps 3-6 until skill returns final=true + +### Skill Redirect + +1. Skill returns SKILL_REDIRECT response +2. Hub emits SKILL_REDIRECT notification to robot +3. Hub sends launch request to new skill +4. New skill proceeds with normal flow +5. Error if second redirect attempted + +## Message Timing + +### Listen Transaction Timing + +**Timings tracked:** +- `total` - Total transaction time +- `asr` - ASR processing time +- `nlu` - NLU processing time +- `skill` - Skill processing time + +**Timing emission:** +- SOS/EOS include timing from start +- LISTEN response includes ASR and NLU timings +- SKILL_ACTION includes skill timing + +### Proactive Transaction Timing + +**Timings tracked:** +- `total` - Total transaction time +- `skill` - Skill processing time + +## Error Handling + +### Hub Error Codes (`HubErrorCode.ts`) + +- `TIMEOUT_ASR` - ASR timeout (40 seconds) +- `TIMEOUT_PARSER` - Parser timeout (10 seconds) +- `TIMEOUT_CONTEXT` - Context timeout (5 seconds) +- `TIMEOUT_SKILL` - Skill timeout (10 seconds) +- `PARSER` - Parser error +- `ASR` - ASR error + +### Error Response Format + +```typescript +{ + type: "ERROR", + msgID: "uuid", + ts: 1234567890, + data: { + message: string, + code?: string + }, + final: true, + timings: { + total: number + } +} +``` + +## Speech History Recording + +### Optional Features + +**Configuration:** +- `ETCO_hub_recordLaunchHistory` - Record skill launches to MongoDB +- `ETCO_hub_recordSpeechHistory` - Record speech interactions to MongoDB +- `ETCO_hub_recordSpeechLogBucket` - Upload speech logs to S3 + +### Speech History Record + +**Data recorded:** +- Robot ID, account ID, transaction ID +- Timestamp +- ASR result +- NLU result +- Match data +- Skill response +- Redirect data +- Error (if any) + +### S3 Upload + +**Format:** JSON with audio as base64 + +**Path:** `{robotID}/year={year}/month={month}/day={day}/{timestamp}-{transID}.json` + +## Hub Configuration + +### Environment Variables + +**Hub Settings:** +- `ETCO_hub_recordLaunchHistory` - Enable launch history +- `ETCO_hub_recordSpeechHistory` - Enable speech history +- `ETCO_hub_recordSpeechLogBucket` - S3 bucket for speech logs + +**Authentication:** +- `ETCO_server_hubTokenSecret` - JWT secret for token verification + +### Skill Configuration + +**Sources:** +- `skills-local.json` - Local development configuration +- Environment variables - Production configuration +- Settings service - Dynamic configuration + +**Skill Config Structure:** +```typescript +{ + id: string, + intents: [{ + name: string, + entities?: [{ name, value, matchRule }], + memo?: any + }], + proactives?: [{ + triggerType: string, + contextRules?: ContextRule[], + IHRules?: IHRule[], + settingsRules?: SettingsRule[], + memo?: any + }], + IHQueries?: IHQueryDefinitions, + onRobot?: boolean, + URL: string, + settings?: ManifestSettings +} +``` + +## Summary of Server-to-Robot Communication + +### Listen Flow + +1. **SOS** - Speech detected +2. **EOS** - Speech ended +3. **LISTEN** - ASR/NLU result with match data +4. **SKILL_ACTION** - JCP action to execute (repeated for multi-turn) +5. **SKILL_REDIRECT** - Skill redirect notification +6. **ERROR** - Error occurred + +### Proactive Flow + +1. **PROACTIVE** - Match or no-action response +2. **SKILL_ACTION** - JCP action to execute (if cloud skill) +3. **SKILL_REDIRECT** - Skill redirect notification +4. **ERROR** - Error occurred + +### Key Design Principles + +1. **State Machine** - Clear state transitions with validation +2. **Timeouts** - Every operation has a timeout to prevent hanging +3. **Error Handling** - Errors propagate to robot with clear messages +4. **Timing** - All operations are timed for monitoring +5. **History** - All interactions are recorded for analysis +6. **Flexibility** - Supports on-robot and cloud skills +7. **Proactivity** - Context-aware action selection diff --git a/OpenJibo/docs/DesignDoc/skill-framework-design.md b/OpenJibo/docs/DesignDoc/skill-framework-design.md new file mode 100644 index 0000000..19ab02e --- /dev/null +++ b/OpenJibo/docs/DesignDoc/skill-framework-design.md @@ -0,0 +1,874 @@ +# Skill Framework Design Document + +## Overview + +The Skill Framework provides the foundation for building cloud-based skills for the Jibo robot. It consists of a base class for all skills, a graph-based state machine for complex conversational flows, and a system for generating JCP (Jibo Command Protocol) actions that are sent to the robot. + +## Location + +`packages/baseskill/src/` + +## Core Components + +### BaseSkill (`BaseSkill.ts`) + +Abstract base class that all cloud skills must extend. + +**Purpose:** Provides common HTTP handling and error handling for all skills. + +**Key Features:** +- Extends `BaseHttpHandler` from `@jibo/utils` +- Registers POST handler at `/` endpoint +- Validates request structure +- Tracks timing for each request +- Provides error response builder + +**Constructor:** +```typescript +constructor(public name: string) +``` + +**Abstract Method:** +```typescript +protected abstract handle(request: PegasusRequest): Promise; +``` + +**Lifecycle Methods:** +- `init(): Promise` - Override to initialize resources (load files, connect to services) +- `buildErrorResponse(err: Error): ErrorResponse` - Builds standardized error response + +**HTTP Handler:** +- Accepts POST requests at `/` +- Logs request type +- Calls `handle()` method +- Adds timing information +- Catches errors and returns error response + +### GraphSkill (`GraphSkill.ts`) + +Extends BaseSkill with a graph-based state machine for complex conversational flows. + +**Purpose:** Enables skills to define their logic as a series of interconnected nodes (states) with transitions. + +**Key Features:** +- Implements `GraphFactory` interface +- Manages graph execution via `GraphManager` singleton +- Supports skill redirects +- Tracks analytics events +- Supports supplemental behaviors (parallel/sequence) +- Handles both launch and update requests + +**Constructor:** +```typescript +constructor(name: string) +``` + +**Abstract Method:** +```typescript +abstract createGraph(): Graph +``` + +**Request Handling:** + +**Launch Requests** (LISTEN_LAUNCH or PROACTIVE_LAUNCH): +1. Validates request data (accountID, robotID, skill ID) +2. Initializes skill session data +3. Tracks SKILL_ENTRY analytics event +4. Calls `GraphManager.instance.start(graph, data)` to begin graph execution +5. Returns SKILL_ACTION or SKILL_REDIRECT response + +**Update Requests** (LISTEN_UPDATE): +1. Validates request data +2. Calls `GraphManager.instance.exitNode(data)` to process action results +3. Returns next SKILL_ACTION or final response + +**Response Types:** + +1. **SKILL_REDIRECT** - Redirects to another skill + ```typescript + { + type: "SKILL_REDIRECT", + msgID: "uuid", + ts: 1234567890, + data: { + skillID: string, + nlu?: NLUResult, + asr?: ASRResult, + memo?: any + } + } + ``` + +2. **SKILL_ACTION** - Returns JCP action for robot to execute + ```typescript + { + type: "SKILL_ACTION", + msgID: "uuid", + ts: 1234567890, + data: { + action: JCPAction, + analytics: AnalyticsData, + final: boolean, + fireAndForget: boolean + } + } + ``` + +3. **Final Response** - No action, transaction complete + ```typescript + { + type: "SKILL_ACTION", + msgID: "uuid", + ts: 1234567890, + data: { + action: null, + analytics: AnalyticsData, + final: true, + fireAndForget: true + } + } + ``` + +**Convenience Methods:** + +- `track(data, event, properties)` - Track analytics event +- `overrideSpeaker(data, id)` - Override current speaker in context +- `addParallelBehavior(data, behavior)` - Add behavior to execute in parallel +- `addSequenceBehavior(data, behavior)` - Add behavior to execute in sequence + +**Supplemental Behaviors Injection:** + +When a skill returns a JCP action, the framework injects any supplemental behaviors that were added during execution: + +1. If sequence behaviors exist, wraps main action in a Sequence +2. If parallel behaviors exist, wraps result in a Parallel +3. Final JCP action is sent to robot + +**Example:** +```typescript +// Skill adds parallel behavior +this.addParallelBehavior(data, SetPresentPersonBehavior); + +// Skill returns main action +return { action: SayTextBehavior }; + +// Framework injects: Parallel([SetPresentPersonBehavior, SayTextBehavior]) +``` + +### Graph System + +#### Graph (`graph/Graph.ts`) + +Represents a directed graph of connected nodes (states). + +**Purpose:** Defines the structure of a skill's conversation flow. + +**Key Properties:** +- `name: string` - Graph name +- `initial: Node` - Starting node +- `nodes: Set` - All nodes in graph +- `exitTransitions: Map` - Exit transition mappings + +**Constructor:** +```typescript +constructor(name: string, exitTransitionNames: ExitTransition[]) +``` + +**Methods:** + +- `setInitialNode(node)` - Sets the starting node +- `addNode(node, transitionMapping)` - Adds a node and connects its transitions +- `addSubGraph(subGraph, transitionMapping)` - Adds a subgraph and connects its exits +- `finalize()` - Validates graph and locks it for execution +- `writeDotFile(filePath)` - Generates GraphViz dot file for visualization + +**Transition Mapping:** +```typescript +[ + [TransitionName, DestinationNode], // Transition to another node + [TransitionName, ExitTransition] // Exit from graph +] +``` + +**Validation (in finalize):** +- All nodes must be reachable from initial node +- All exit transitions must be connected +- All transitions must have valid destinations +- No duplicate transition names + +**Subgraphs:** +- Graphs can be nested within other graphs +- Subgraph exit transitions connect to parent graph nodes +- Enables hierarchical organization of complex flows +- Nodes can belong to multiple graphs (for subgraph sharing) + +**GraphViz Visualization:** +- Generates .dot files for graph visualization +- Color-codes initial node, regular nodes, and exit states +- Shows hierarchical structure with clusters +- Labels transitions with their names + +#### GraphManager (`graph/GraphManager.ts`) + +Singleton that manages graph execution and skill sessions. + +**Purpose:** Coordinates node execution and maintains session state. + +**Singleton Pattern:** +```typescript +GraphManager.instance // Access singleton +``` + +**Key Responsibilities:** +- Assigns unique IDs to all nodes +- Maps node IDs to node instances +- Manages skill session lifecycle +- Executes node enter/exit lifecycle +- Handles transitions between nodes + +**Session Structure:** +```typescript +{ + id: string, // Session UUID + nodeID: number, // Current node ID + data: any, // Skill-specific session data + trace: [ // History of transitions + { nodeID: number, transition: string } + ] +} +``` + +**Execution Flow:** + +**Start Graph** (launch request): +```typescript +start(graph, data) + → Creates new session + → Sets initial node + → Calls enterNode() +``` + +**Enter Node:** +```typescript +enterNode(data) + → Fetches current node + → Calls node.enter(data) + → Updates trace + → If action returned: return action + → Else: call exitNode() +``` + +**Exit Node:** +```typescript +exitNode(data) + → Fetches current node + → Calls node.exit(data) + → If transition returned: executeTransition() + → Else: return (terminal) +``` + +**Execute Transition:** +```typescript +executeTransition(node, result, data) + → Validates transition exists + → Updates trace with transition name + → If terminal: return null + → Else: update nodeID, call enterNode() +``` + +**Node ID Assignment:** +- Counter starts at 0, increments for each node +- Bidirectional mapping: node ↔ ID +- Enables serialization of session state + +#### Node (`graph/nodes/Node.ts`) + +Abstract base class for all graph nodes. + +**Purpose:** Defines a state in the skill's conversation flow. + +**Key Properties:** +- `id: number` - Unique ID assigned by GraphManager +- `name: string` - Node name +- `transitionNames: Transition[]` - Valid exit transitions +- `graphs: Graph[]` - Graphs this node belongs to +- `transitions: Map` - Transition destinations + +**Constructor:** +```typescript +constructor(name: string, transitionNames: Transition[]) +``` + +**Abstract Methods:** + +```typescript +abstract async enter(data: Data): Promise +``` +- Called when node is entered +- Returns action to execute, redirect, or nothing + +```typescript +abstract async exit(data: Data): Promise +``` +- Called with action results (if action was issued) +- Returns next transition or nothing (terminal) + +**Data Structure:** +```typescript +Data = { + // From request + general: { accountID, robotID, lang, release }, + runtime: { character, location, loop, perception, dialog }, + skill: { id, session }, + result?: any, // Action results for UPDATE + + // Added by framework + req: PegasusRequest, + log: Log, + local: any, // Skill-local data + analytics: {}, // Analytics events + behaviors: { // Supplemental behaviors + parallel: [], + sequence: [] + } +} +``` + +**Response Types:** + +**EnterResponse:** +```typescript +{ + action?: Action, // JCP action to execute + redirect?: RedirectData, // Redirect to another skill + final?: boolean // Is this the final response? +} +``` + +**ExitResponse:** +```typescript +{ + transition?: string, // Next transition to take + result?: any, // Result to pass to next node + redirect?: RedirectData +} +``` + +**Built-in Node Types:** + +1. **DefaultNode** - Simple terminal node + - Returns no action + - Transitions to Done + +2. **NoOpNode** - No operation node + - Returns no action + - Can have custom transitions + +3. **JCPNode** - Returns a JCP action + - Returns specified JCP behavior + - Can be terminal or continue + +4. **TrueFalseNode** - Conditional branching + - Evaluates condition + - Transitions based on true/false + +5. **SetLooperIDNode** - Sets speaker ID + - Updates perception.speaker in context + - Useful for multi-turn conversations + +**Node Traversal:** +- `forEachDescendent(handler)` - BFS traversal of all descendant nodes +- Used for graph validation and analysis + +### Skill Request/Response Protocol + +#### Skill Request Types + +**Location:** `packages/interfaces/src/skill/request.ts` + +**MessageType:** +- `LISTEN_LAUNCH` - Launch skill from listen interaction +- `LISTEN_UPDATE` - Update skill with action results +- `PROACTIVE_LAUNCH` - Launch skill proactively + +**Request Structure:** +```typescript +{ + type: MessageType, + msgID: "uuid", + ts: 1234567890, + data: { + general: { + accountID: string, + robotID: string, + lang: string, + release: string + }, + runtime: { + character: { emotion, motivation }, + location: { city, state, country, lat, lng }, + loop: { users, jibo, owner, loopId }, + perception: { speaker, peoplePresent }, + dialog: { referent } + }, + skill: { + id: string, + session?: { + id: string, + nodeID: number, + data: any, + trace: [{ nodeID, transition }] + } + }, + result?: any, // Action results for UPDATE + nlu?: NLUResult, + asr?: ASRResult, + memo?: any + } +} +``` + +#### Skill Response Types + +**Location:** `packages/interfaces/src/skill/response.ts` + +**ResponseType:** +- `SKILL_ACTION` - Returns action to execute +- `SKILL_REDIRECT` - Redirects to another skill +- `ERROR` - Error response + +**SKILL_ACTION Response:** +```typescript +{ + type: "SKILL_ACTION", + msgID: "uuid", + ts: 1234567890, + data: { + action: JCPAction | null, + analytics: AnalyticsData, + final: boolean, + fireAndForget: boolean + } +} +``` + +**SKILL_REDIRECT Response:** +```typescript +{ + type: "SKILL_REDIRECT", + msgID: "uuid", + ts: 1234567890, + data: { + skillID: string, + nlu?: NLUResult, + asr?: ASRResult, + memo?: any + } +} +``` + +**ERROR Response:** +```typescript +{ + type: "ERROR", + msgID: "uuid", + ts: 1234567890, + data: { + message: string, + skill: { id: string } + } +} +``` + +### JCP Actions + +**Location:** `packages/interfaces/src/skill/action.ts` + +**Purpose:** Defines behaviors that the robot should execute. + +**ActionType:** +- `JCP` - Jibo Command Protocol action + +**JCPAction Structure:** +```typescript +{ + type: "JCP", + config: { + version: "1.0.0", + jcp: SupportedBehaviors + } +} +``` + +**SupportedBehaviors:** +- `SLIM` - Single behavior execution +- `Sequence` - Sequential behavior execution +- `Parallel` - Parallel behavior execution +- `SetPresentPerson` - Set focused person +- `ImpactEmotion` - Modify Jibo's emotional state + +**Helper Function:** +```typescript +generateJCPAction(behavior): JCPAction +``` +Wraps a behavior as a JCP action with version 2.0. + +### MIM (Motion Interaction Model) System + +**Location:** `packages/baseskill/src/graph/mims/` + +**Purpose:** Provides pre-built graph structures for playing MIM animations. + +**MIM Files:** +- `.mim` files contain animation definitions +- Organized in directories: + - `scripted-responses` - Pre-scripted responses + - `emotion-responses` - Emotion-based responses + - `core-responses` - Fallback responses + +**MIM Factories:** + +**ANFactory** - Animation Node Factory +- Creates graph for playing a single MIM +- Supports prompt data injection +- Can be final or continue + +**MANFactory** - Multiple Animation Node Factory +- Creates graph for playing multiple MIMs +- Supports random selection +- Can be final or continue + +**MIMFactory** - General MIM Factory +- Creates graph for MIM playback +- Supports semi-specific responses +- Handles category-based selection + +**QNFactory** - Question Node Factory +- Creates graph for asking questions +- Supports opt-in flows +- Handles user responses + +**OptInFactory** - Opt-In Node Factory +- Creates graph for opt-in offers +- Tracks user acceptance/rejection +- Handles analytics + +**MIM Factory Options:** +```typescript +{ + mimDataProvider: (data) => string[], // Function to get MIM paths + promptDataProvider?: (data) => any, // Function to get prompt data + final: boolean // Is this the final action? +} +``` + +**Example Usage (Chitchat Skill):** +```typescript +const doMIMOptions: MimFactoryOptions = { + mimDataProvider: (data) => data.local.path, + promptDataProvider: (data) => data.local.promptData, + final: true +}; +const doMIM = new ANFactory('Do MIM', doMIMOptions).createGraph(); +``` + +**Semi-Specific Responses:** +- MIMs with `_SS_` suffix are semi-specific +- Match specific categories (e.g., time, weather) +- CSV files define category members +- Enables context-aware responses + +### SkillService (`SkillService.ts`) + +Service wrapper that hosts a skill as an HTTP service. + +**Purpose:** Provides the service infrastructure for running a skill. + +**Constructor:** +```typescript +constructor(private skillV1: BaseSkill) +``` + +**HTTP Handler:** +- Registers skill at `/v1/main` endpoint +- No authentication required (handled by Hub) + +**Initialization:** +```typescript +async init(port: number) + → Starts HTTP server + → Calls skill.init() +``` + +### Analytics + +**Location:** `packages/interfaces/src/skill/analytics.ts` + +**Purpose:** Track skill events for analysis. + +**AnalyticsData Structure:** +```typescript +{ + [skillName: string]: [ + { + event: string, + properties: any + } + ] +} +``` + +**Built-in Events:** +- `SKILL_ENTRY` - Skill launched +- `SKILL_OFFER` - Opt-in offer presented + +**Skill Entry Analytics:** +```typescript +{ + initial_intent: string, + domain: string, + was_hey_jibo_launch: boolean, + user_initiated: boolean, + last_skill: string +} +``` + +**Tracking:** +```typescript +this.track(data, 'CustomEvent', { key: value }); +``` + +Events are automatically included in SKILL_ACTION responses. + +## Server-to-Robot Communication Flow + +### Skill Response to Hub + +When a skill returns a response, the Hub forwards it to the robot: + +**SKILL_ACTION Response:** +1. Skill returns SKILL_ACTION with JCP behavior +2. Hub adds timing information +3. Hub sends SKILL_ACTION to robot via WebSocket +4. Robot executes JCP behavior +5. Robot sends CMD_RESULT back to Hub +6. Hub sends LISTEN_UPDATE to skill +7. Skill processes result, returns next action + +**Final SKILL_ACTION:** +1. Skill returns SKILL_ACTION with `final: true` +2. Hub sends to robot +3. Robot executes (if action present) +4. Transaction complete + +**SKILL_REDIRECT:** +1. Skill returns SKILL_REDIRECT +2. Hub emits SKILL_REDIRECT notification to robot +3. Hub launches new skill +4. New skill proceeds normally + +### JCP Action Execution + +**Single Behavior (SLIM):** +```typescript +{ + type: "JCP", + config: { + version: "1.0.0", + jcp: SayTextBehavior + } +} +``` +Robot executes single behavior immediately. + +**Sequence Behavior:** +```typescript +{ + type: "JCP", + config: { + version: "1.0.0", + jcp: Sequence([ + LookAtBehavior, + SayTextBehavior, + GestureBehavior + ]) + } +} +``` +Robot executes behaviors in order. + +**Parallel Behavior:** +```typescript +{ + type: "JCP", + config: { + version: "1.0.0", + jcp: Parallel([ + SetPresentPersonBehavior, + SayTextBehavior + ]) + } +} +``` +Robot executes behaviors simultaneously. + +### Supplemental Behaviors + +Skills can add behaviors that execute alongside the main action: + +**Parallel Supplemental:** +```typescript +this.addParallelBehavior(data, SetPresentPersonBehavior); +// Main action: SayTextBehavior +// Result: Parallel([SetPresentPersonBehavior, SayTextBehavior]) +``` + +**Sequence Supplemental:** +```typescript +this.addSequenceBehavior(data, LookAtBehavior); +// Main action: SayTextBehavior +// Result: Sequence([LookAtBehavior, SayTextBehavior]) +``` + +**Combined:** +```typescript +this.addSequenceBehavior(data, LookAtBehavior); +this.addParallelBehavior(data, SetPresentPersonBehavior); +// Result: Parallel([SetPresentPersonBehavior, Sequence([LookAtBehavior, SayTextBehavior])]) +``` + +## Example Skill Implementation + +### Chitchat Skill + +**Location:** `packages/chitchat-skill/src/Chitchat.ts` + +**Purpose:** Handles conversational interactions with the robot. + +**Graph Structure:** +1. **IntentSplitNode** - Splits based on intent type +2. **ProcessQueryNode** - Processes user query, selects response +3. **DoMIM (ANFactory)** - Plays selected MIM animation +4. **Complete (DefaultNode)** - Terminates skill + +**Initialization:** +- Loads MIM files from directories +- Builds semi-specific mappings +- Reads category CSV files + +**Response Selection:** +- Scripted responses for common queries +- Emotion responses for emotional queries +- Semi-specific responses for context-aware queries +- Fallback responses for unknown queries + +**MIM Selection:** +- Based on intent and entities +- Considers semi-specific categories +- Falls back to core responses + +## Skill Development Guide + +### Creating a Simple Skill + +```typescript +import { BaseSkill } from '@jibo/baseskill'; +import { skill } from '@jibo/interfaces'; + +export class MySkill extends BaseSkill { + constructor() { + super('my-skill'); + } + + protected async handle(req: PegasusRequest): Promise { + const data = req.body.data; + + // Process request + const action = generateJCPAction(SayTextBehavior("Hello!")); + + return { + type: skill.response.ResponseType.SKILL_ACTION, + data: { + action: action, + final: true, + fireAndForget: true + }, + ts: Date.now(), + msgID: getUUID() + }; + } +} +``` + +### Creating a Graph Skill + +```typescript +import { GraphSkill, graph } from '@jibo/baseskill'; + +enum Transition { + Done = 'Done', + Retry = 'Retry' +} + +export class MyGraphSkill extends GraphSkill { + constructor() { + super('my-graph-skill'); + } + + createGraph(): graph.Graph { + const g = new graph.Graph('My Skill', generateTransitions(Transition)); + + const startNode = new MyStartNode('Start'); + const endNode = new graph.nodes.dn.DefaultNode('End'); + + g.addNode(startNode, [[Transition.Done, endNode]]); + g.addNode(endNode, [[graph.nodes.dn.Transition.Done, Transition.Done]]); + + g.finalize(); + return g; + } +} +``` + +### Creating a Custom Node + +```typescript +import { Node, Data, EnterResponse, ExitResponse } from '@jibo/baseskill'; + +enum MyTransition { + Success = 'Success', + Failure = 'Failure' +} + +class MyNode extends Node { + constructor() { + super('MyNode', [MyTransition.Success, MyTransition.Failure]); + } + + async enter(data: Data): Promise { + // Perform logic + const action = generateJCPAction(SayTextBehavior("Processing...")); + return { action }; + } + + async exit(data: Data): Promise { + // Process action results + if (data.result.success) { + return { transition: MyTransition.Success }; + } else { + return { transition: MyTransition.Failure }; + } + } +} +``` + +## Key Design Principles + +1. **State Machine** - Graph-based state machine for complex flows +2. **Single Responsibility** - Each node handles one piece of logic +3. **Reusability** - Subgraphs and node types can be reused +4. **Testability** - Nodes can be tested independently +5. **Visualization** - GraphViz generation for debugging +6. **Analytics** - Built-in event tracking +7. **Flexibility** - Supports both simple and complex skills +8. **Supplemental Behaviors** - Easy to add parallel/sequence actions