Merge branch 'main' of https://kevinblog.sytes.net/Code/Jibo-Revival-Group/JiboExperiments
This commit is contained in:
1249
OpenJibo/docs/DesignDoc/additional-sections-design.md
Normal file
1249
OpenJibo/docs/DesignDoc/additional-sections-design.md
Normal file
File diff suppressed because it is too large
Load Diff
1011
OpenJibo/docs/DesignDoc/communication-design.md
Normal file
1011
OpenJibo/docs/DesignDoc/communication-design.md
Normal file
File diff suppressed because it is too large
Load Diff
777
OpenJibo/docs/DesignDoc/hub-service-design.md
Normal file
777
OpenJibo/docs/DesignDoc/hub-service-design.md
Normal file
@@ -0,0 +1,777 @@
|
||||
# Hub Service Design Document
|
||||
|
||||
## Overview
|
||||
|
||||
The Hub Service is the central orchestrator of the Jibo cloud system. It coordinates all communication between the robot and cloud services, managing speech recognition, natural language understanding, skill routing, and proactive behaviors. The Hub exposes WebSocket endpoints for real-time bidirectional communication with the robot.
|
||||
|
||||
## Location
|
||||
|
||||
`packages/hub/src/`
|
||||
|
||||
## Key Components
|
||||
|
||||
### HubService (`HubService.ts`)
|
||||
|
||||
Main service class extending `BaseService` from `@jibo/utils`. Initializes and manages all hub components.
|
||||
|
||||
**HubComponents** (dependency injection container):
|
||||
- `parser: ParserClient` - NLU service client
|
||||
- `skillConfigManager: SkillConfigManager` - Manages skill configurations
|
||||
- `intentRouter: IntentRouter` - Routes intents to skills
|
||||
- `skillRequestMaker: SkillRequestMaker` - Makes HTTP requests to skills
|
||||
- `history: HistoryServiceClient` - History service client
|
||||
- `hubSettings: HubSettings` - Hub configuration
|
||||
- `settingsClient: SettingsClient` - Settings service client
|
||||
|
||||
### WebSocket Handlers
|
||||
|
||||
- **ListenHandler** (`listen/ListenHandler.ts`) - Handles `/listen` and `/v1/listen` endpoints
|
||||
- **ProactiveSocketRequestHandler** (`proactive/ProactiveSocketRequestHandler.ts`) - Handles `/proactive` and `/v1/proactive` endpoints
|
||||
|
||||
### Transaction Handlers
|
||||
|
||||
- **ListenTransactionHandler** (`listen/ListenTransactionHandler.ts`) - State machine for listen transactions
|
||||
- **ProactiveTransactionHandler** (`proactive/ProactiveTransactionHandler.ts`) - Handles proactive action selection
|
||||
|
||||
## WebSocket Endpoints
|
||||
|
||||
### Listen Endpoint
|
||||
|
||||
**URL:** `ws://hub:9000/listen` or `ws://hub:9000/v1/listen`
|
||||
|
||||
**Authentication:** Bearer JWT token in Authorization header
|
||||
|
||||
**Headers:**
|
||||
- `x-jibo-transid` - Transaction ID
|
||||
- `x-jibo-robotid` - Robot ID
|
||||
- `x-jibo-logging-config` - Log level configuration
|
||||
|
||||
### Proactive Endpoint
|
||||
|
||||
**URL:** `ws://hub:9000/proactive` or `ws://hub:9000/v1/proactive`
|
||||
|
||||
**Authentication:** Same as listen endpoint
|
||||
|
||||
## Listen Transaction Flow
|
||||
|
||||
The listen transaction follows a state machine with the following states:
|
||||
|
||||
```
|
||||
WAIT_LISTEN → ASR → NLU → ROUTE → DONE
|
||||
WAIT_LISTEN → WAIT_CLIENT_ASR → NLU → ROUTE → DONE
|
||||
WAIT_LISTEN → WAIT_CLIENT_NLU → ROUTE → DONE
|
||||
```
|
||||
|
||||
### State Machine Implementation
|
||||
|
||||
**File:** `packages/hub/src/listen/ListenTransactionHandler.ts`
|
||||
|
||||
**States:**
|
||||
- `WAIT_LISTEN` - Waiting for LISTEN message from robot
|
||||
- `WAIT_CLIENT_ASR` - Waiting for client-provided ASR result
|
||||
- `WAIT_CLIENT_NLU` - Waiting for client-provided NLU result
|
||||
- `ASR` - Performing speech recognition
|
||||
- `NLU` - Performing natural language understanding
|
||||
- `ROUTE` - Routing to appropriate skill
|
||||
- `DONE` - Transaction complete
|
||||
- `STOP` - Transaction stopped
|
||||
|
||||
**Timeouts:**
|
||||
- ASR: 40 seconds (configurable via sosTimeout, maxSpeechTimeout)
|
||||
- Parser: 10 seconds
|
||||
- Context: 5 seconds
|
||||
- Skill: 10 seconds
|
||||
- Transaction: 60 seconds (default)
|
||||
|
||||
### Robot-to-Hub Messages (Listen Flow)
|
||||
|
||||
1. **LISTEN** - Initiates listen transaction
|
||||
```typescript
|
||||
{
|
||||
type: "LISTEN",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
mode: "default" | "CLIENT_ASR" | "CLIENT_NLU",
|
||||
lang: "en-US",
|
||||
hotphrase: boolean,
|
||||
rules: string[],
|
||||
asr: {
|
||||
sosTimeout: number,
|
||||
maxSpeechTimeout: number,
|
||||
hints: string[],
|
||||
earlyEOS: string[]
|
||||
},
|
||||
agents: ExternalAgentRequest[]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **Audio Packets** - Binary audio data streamed after LISTEN
|
||||
|
||||
3. **CONTEXT** - Runtime context from robot
|
||||
```typescript
|
||||
{
|
||||
type: "CONTEXT",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
general: {
|
||||
accountID: string,
|
||||
robotID: string,
|
||||
lang: string,
|
||||
release: string
|
||||
},
|
||||
runtime: {
|
||||
character: { emotion, motivation },
|
||||
location: { city, state, country, lat, lng },
|
||||
loop: { users, jibo, owner, loopId },
|
||||
perception: { speaker, peoplePresent },
|
||||
dialog: { referent }
|
||||
},
|
||||
skill: {
|
||||
id: string,
|
||||
session: { id, nodeID, data, trace }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
4. **CLIENT_ASR** - Client-provided ASR result (for menu clicks, etc.)
|
||||
```typescript
|
||||
{
|
||||
type: "CLIENT_ASR",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
text: string
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
5. **CLIENT_NLU** - Client-provided NLU result
|
||||
```typescript
|
||||
{
|
||||
type: "CLIENT_NLU",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
intent: string,
|
||||
entities: {},
|
||||
rules: []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Hub-to-Robot Messages (Listen Flow)
|
||||
|
||||
#### 1. SOS (Start of Speech)
|
||||
|
||||
**Emitted when:** Speech is detected during ASR
|
||||
|
||||
**Location:** `ListenTransactionHandler.emitSOS()`
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "SOS",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: null,
|
||||
timings: {
|
||||
total: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Trigger conditions:**
|
||||
- Google Cloud Speech API detects start of speech
|
||||
- ASRSession calls `onStartOfSpeech` callback
|
||||
- Clears SOS timeout timer
|
||||
|
||||
#### 2. EOS (End of Speech)
|
||||
|
||||
**Emitted when:** Speech ends during ASR
|
||||
|
||||
**Location:** `ListenTransactionHandler.emitEOS()`
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "EOS",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: null,
|
||||
timings: {
|
||||
total: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Trigger conditions:**
|
||||
- Google Cloud Speech API detects end of speech
|
||||
- ASRSession calls `onEndOfSpeech` callback
|
||||
- Clears max speech timeout timer
|
||||
|
||||
#### 3. LISTEN Response (ASR/NLU Result)
|
||||
|
||||
**Emitted when:** ASR and NLU processing complete
|
||||
|
||||
**Location:** `ListenTransactionHandler.emitListenResult()`
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "LISTEN",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
asr: {
|
||||
text: string,
|
||||
confidence: number,
|
||||
annotation: "GARBAGE" | "SOS_TIMEOUT" | "MAX_SPEECH_TIMEOUT"
|
||||
},
|
||||
nlu: {
|
||||
intent: string,
|
||||
entities: {},
|
||||
rules: []
|
||||
},
|
||||
match: {
|
||||
skillID: string,
|
||||
launch: boolean,
|
||||
onRobot: boolean
|
||||
} | null
|
||||
},
|
||||
final: boolean,
|
||||
timings: {
|
||||
total: number,
|
||||
asr: number,
|
||||
nlu: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Emission scenarios:**
|
||||
- **No match:** `match: null, final: true` - No skill matched the NLU result
|
||||
- **On-robot skill:** `match.onRobot: true, final: true` - Skill runs on robot, Hub done
|
||||
- **Cloud skill:** `match.onRobot: false, final: false` - Skill runs in cloud, Hub will send skill actions
|
||||
|
||||
#### 4. SKILL_ACTION
|
||||
|
||||
**Emitted when:** Cloud skill returns an action to execute
|
||||
|
||||
**Location:** `TransactionHandler.emitSkillResult()`
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "SKILL_ACTION",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
action: {
|
||||
type: "JCP",
|
||||
config: {
|
||||
version: "1.0.0",
|
||||
jcp: SupportedBehaviors // SLIM, Sequence, Parallel, SetPresentPerson, ImpactEmotion
|
||||
}
|
||||
},
|
||||
analytics?: AnalyticsData,
|
||||
fireAndForget?: boolean
|
||||
},
|
||||
final: boolean,
|
||||
timings: {
|
||||
total: number,
|
||||
skill: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**JCP Behavior Types:**
|
||||
- `SLIM` - Single behavior execution
|
||||
- `Sequence` - Sequential behavior execution
|
||||
- `Parallel` - Parallel behavior execution
|
||||
- `SetPresentPerson` - Set focused person
|
||||
- `ImpactEmotion` - Modify Jibo's emotional state
|
||||
|
||||
**Emission scenarios:**
|
||||
- **Non-final:** `final: false` - Robot should execute action and send CMD_RESULT back
|
||||
- **Final:** `final: true` - Transaction complete, no more actions expected
|
||||
|
||||
#### 5. SKILL_REDIRECT
|
||||
|
||||
**Emitted when:** Skill redirects to another skill
|
||||
|
||||
**Location:** `TransactionHandler.emitSkillRedirectNotification()`
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "SKILL_REDIRECT",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
match: {
|
||||
skillID: string,
|
||||
launch: boolean,
|
||||
onRobot: boolean
|
||||
},
|
||||
nlu: NLUResult,
|
||||
asr: ASRResult,
|
||||
memo: any
|
||||
},
|
||||
final: boolean
|
||||
}
|
||||
```
|
||||
|
||||
**Emission scenarios:**
|
||||
- Skill returns `SKILL_REDIRECT` response
|
||||
- Hub launches new skill with provided context
|
||||
- Only one level of redirect supported (error on second redirect)
|
||||
|
||||
#### 6. ERROR
|
||||
|
||||
**Emitted when:** An error occurs during transaction
|
||||
|
||||
**Location:** `TransactionHandler.emitSkillResult()` (error case)
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "ERROR",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
message: string
|
||||
},
|
||||
final: true,
|
||||
timings: {
|
||||
total: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Listen Transaction State Transitions
|
||||
|
||||
#### WAIT_LISTEN → ASR
|
||||
|
||||
**Trigger:** LISTEN message received with mode="default"
|
||||
|
||||
**Actions:**
|
||||
- Initialize ASRSession with Google Cloud Speech API
|
||||
- Start audio streaming
|
||||
- Set up SOS timeout (if configured)
|
||||
- Set up max speech timeout (if configured)
|
||||
|
||||
#### WAIT_LISTEN → WAIT_CLIENT_ASR
|
||||
|
||||
**Trigger:** LISTEN message received with mode="CLIENT_ASR"
|
||||
|
||||
**Actions:**
|
||||
- Emit fake SOS (immediate)
|
||||
- Wait for CLIENT_ASR message from robot
|
||||
|
||||
#### WAIT_LISTEN → WAIT_CLIENT_NLU
|
||||
|
||||
**Trigger:** LISTEN message received with mode="CLIENT_NLU"
|
||||
|
||||
**Actions:**
|
||||
- Emit fake SOS (immediate)
|
||||
- Wait for CLIENT_NLU message from robot
|
||||
|
||||
#### ASR → NLU
|
||||
|
||||
**Trigger:** ASR completes successfully
|
||||
|
||||
**Actions:**
|
||||
- Stop ASR session
|
||||
- Normalize ASR text
|
||||
- Check for garbage annotation (skip NLU if garbage)
|
||||
- Wait for CONTEXT message (5 second timeout)
|
||||
- Send ASR text to Parser service
|
||||
|
||||
#### WAIT_CLIENT_ASR → NLU
|
||||
|
||||
**Trigger:** CLIENT_ASR message received
|
||||
|
||||
**Actions:**
|
||||
- Use provided ASR text
|
||||
- Emit fake EOS
|
||||
- Proceed to NLU
|
||||
|
||||
#### WAIT_CLIENT_NLU → ROUTE
|
||||
|
||||
**Trigger:** CLIENT_NLU message received
|
||||
|
||||
**Actions:**
|
||||
- Use provided NLU result
|
||||
- Emit fake EOS
|
||||
- Skip NLU, proceed to routing
|
||||
|
||||
#### NLU → ROUTE
|
||||
|
||||
**Trigger:** Parser returns NLU result
|
||||
|
||||
**Actions:**
|
||||
- Wait for CONTEXT message (5 second timeout)
|
||||
- Call IntentRouter to match skill
|
||||
- Apply DecisionMediator for external factors
|
||||
- Route to matched skill or context skill
|
||||
|
||||
#### ROUTE → DONE
|
||||
|
||||
**Trigger:** Routing complete
|
||||
|
||||
**Actions:**
|
||||
- For on-robot skills: Emit LISTEN with match, transaction done
|
||||
- For cloud skills: Get skill response, emit SKILL_ACTION, transaction done
|
||||
- For no match: Emit LISTEN with match=null, transaction done
|
||||
|
||||
## Intent Routing
|
||||
|
||||
### IntentRouter (`intent/IntentRouter.ts`)
|
||||
|
||||
Matches NLU results to registered cloud skills.
|
||||
|
||||
**Routing Logic:**
|
||||
1. Check if NLU has intent and 'launch' rule
|
||||
2. Query all skill configurations
|
||||
3. Match intent against skill intent configurations
|
||||
4. Match entities against skill entity configurations
|
||||
5. Return first matching skill decision
|
||||
|
||||
**DecisionMediator** (`intent/DecisionMediator.ts`):
|
||||
- Can alter routing decisions based on external factors
|
||||
- Considers robot release version
|
||||
- May redirect to different skill based on context
|
||||
|
||||
**IRDecisionMaker** (`intent/IRDecisionMaker.ts`):
|
||||
- Core matching algorithm
|
||||
- Compares intent names and entity values
|
||||
- Supports exact match and NOT match rules
|
||||
|
||||
### Skill Request Maker (`skill/SkillRequestMaker.ts`)
|
||||
|
||||
Makes HTTP requests to cloud skills.
|
||||
|
||||
**Methods:**
|
||||
- `skillLaunch(skillID, data, jiboHeaders, log)` - Launch new skill
|
||||
- `skillLaunchOrUpdate(skillID, data, jiboHeaders, log, update)` - Launch or update skill
|
||||
- `proactiveLaunch(skillID, data, jiboHeaders, log)` - Proactive launch
|
||||
|
||||
**Request Format:**
|
||||
```typescript
|
||||
{
|
||||
type: "LISTEN_LAUNCH" | "LISTEN_UPDATE" | "PROACTIVE_LAUNCH",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
general: { accountID, robotID, lang, release },
|
||||
runtime: { character, location, loop, perception, dialog },
|
||||
skill: { id, session? },
|
||||
result?: any, // For UPDATE
|
||||
nlu: NLUResult,
|
||||
asr: ASRResult,
|
||||
memo?: any
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Timeout:** 10 seconds (configurable)
|
||||
|
||||
**Error Handling:**
|
||||
- `SKILL_NOT_FOUND` - Skill does not exist or is on-robot
|
||||
- `TIMEOUT` - Skill request timeout
|
||||
|
||||
## Proactive Flow
|
||||
|
||||
### Proactive Transaction Handler (`proactive/ProactiveTransactionHandler.ts`)
|
||||
|
||||
Handles proactive action selection based on context, history, and settings.
|
||||
|
||||
### Robot-to-Hub Messages (Proactive Flow)
|
||||
|
||||
1. **TRIGGER** - Initiates proactive selection
|
||||
```typescript
|
||||
{
|
||||
type: "TRIGGER",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
triggerData: {
|
||||
triggerType: string,
|
||||
looperID?: string
|
||||
},
|
||||
triggerSource: "SURPRISE" | "OTHER"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **CONTEXT** - Runtime context (same as listen flow)
|
||||
|
||||
### Hub-to-Robot Messages (Proactive Flow)
|
||||
|
||||
#### PROACTIVE Match Response
|
||||
|
||||
**Emitted when:** Proactive action selected
|
||||
|
||||
**Location:** `ProactiveTransactionHandler.emitMatchResponse()`
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "PROACTIVE",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
match: {
|
||||
skillID: string,
|
||||
onRobot: boolean,
|
||||
isProactive: true,
|
||||
launch: true,
|
||||
skipSurprises: boolean
|
||||
}
|
||||
},
|
||||
final: boolean
|
||||
}
|
||||
```
|
||||
|
||||
**Emission scenarios:**
|
||||
- **On-robot skill:** `final: true` - Robot handles skill, Hub done
|
||||
- **Cloud skill:** `final: false` - Hub will send skill actions
|
||||
|
||||
#### PROACTIVE No-Action Response
|
||||
|
||||
**Emitted when:** No eligible proactive action found
|
||||
|
||||
**Location:** `ProactiveTransactionHandler.emitNoActionResponse()`
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "PROACTIVE",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {},
|
||||
final: true
|
||||
}
|
||||
```
|
||||
|
||||
### Proactive Action Selection Algorithm
|
||||
|
||||
**File:** `ProactiveTransactionHandler.getEligibleActions()`
|
||||
|
||||
**Steps:**
|
||||
|
||||
1. **Get all proactive skill configurations**
|
||||
- Query SkillConfigManager for skills with proactive registrations
|
||||
|
||||
2. **Gather transaction data**
|
||||
- Extract focused person, present people, loop ID, robot ID
|
||||
- Use ContextTools to extract context fields
|
||||
|
||||
3. **Fetch user settings** (if focused person)
|
||||
- Batch request to SettingsClient for all skill settings
|
||||
- Consolidate into skill settings map
|
||||
|
||||
4. **Filter by context rules**
|
||||
- Check time-based rules (time of day, day of week)
|
||||
- Check location rules
|
||||
- Check people present rules
|
||||
- Check robot state rules
|
||||
|
||||
5. **Filter by interaction history rules**
|
||||
- Query History service for past interactions
|
||||
- Check frequency rules (e.g., "at most once per hour")
|
||||
- Check recency rules (e.g., "not in last 10 minutes")
|
||||
- Check sequence rules (e.g., "after greeting skill")
|
||||
|
||||
6. **Filter by settings rules**
|
||||
- Check user preferences for each skill
|
||||
- Check enabled/disabled status
|
||||
- Check custom parameters
|
||||
|
||||
7. **Select action**
|
||||
- Currently: Random selection from eligible actions
|
||||
- Future: Heuristics based on context, engagement, topics
|
||||
|
||||
### Context Tools (`proactive/tools/ContextTools.ts`)
|
||||
|
||||
Helper functions for context rule evaluation:
|
||||
|
||||
- `extractContextData(field, context, requestData, log)` - Extract specific context field
|
||||
- `checkContextRules(registration, context, requestData, log)` - Evaluate all context rules
|
||||
|
||||
### History Rules Checker (`proactive/tools/IHRulesChecker.ts`)
|
||||
|
||||
Evaluates interaction history rules:
|
||||
|
||||
- `checkIHRules(registrations, IHQueries, data, log)` - Filter by history rules
|
||||
- Queries History service for past skill launches
|
||||
- Applies frequency, recency, and sequence constraints
|
||||
|
||||
### Settings Rules Checker (`proactive/tools/SettingsRulesChecker.ts`)
|
||||
|
||||
Evaluates user settings:
|
||||
|
||||
- `getSkillSettingsMap(skillConfigs, accountID, loopID, transID)` - Batch fetch settings
|
||||
- `checkSettingsRegistrations(registrations, skillSettingsMap)` - Filter by settings
|
||||
|
||||
## Skill Interaction Flow (Cloud Skills)
|
||||
|
||||
### Initial Launch
|
||||
|
||||
1. Hub sends LISTEN_LAUNCH request to skill
|
||||
2. Skill processes request, returns SKILL_ACTION
|
||||
3. Hub sends SKILL_ACTION to robot
|
||||
4. Robot executes action, sends CMD_RESULT to Hub
|
||||
5. Hub sends LISTEN_UPDATE request to skill with action result
|
||||
6. Skill processes result, returns next SKILL_ACTION or final=true
|
||||
7. Repeat steps 3-6 until skill returns final=true
|
||||
|
||||
### Skill Redirect
|
||||
|
||||
1. Skill returns SKILL_REDIRECT response
|
||||
2. Hub emits SKILL_REDIRECT notification to robot
|
||||
3. Hub sends launch request to new skill
|
||||
4. New skill proceeds with normal flow
|
||||
5. Error if second redirect attempted
|
||||
|
||||
## Message Timing
|
||||
|
||||
### Listen Transaction Timing
|
||||
|
||||
**Timings tracked:**
|
||||
- `total` - Total transaction time
|
||||
- `asr` - ASR processing time
|
||||
- `nlu` - NLU processing time
|
||||
- `skill` - Skill processing time
|
||||
|
||||
**Timing emission:**
|
||||
- SOS/EOS include timing from start
|
||||
- LISTEN response includes ASR and NLU timings
|
||||
- SKILL_ACTION includes skill timing
|
||||
|
||||
### Proactive Transaction Timing
|
||||
|
||||
**Timings tracked:**
|
||||
- `total` - Total transaction time
|
||||
- `skill` - Skill processing time
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Hub Error Codes (`HubErrorCode.ts`)
|
||||
|
||||
- `TIMEOUT_ASR` - ASR timeout (40 seconds)
|
||||
- `TIMEOUT_PARSER` - Parser timeout (10 seconds)
|
||||
- `TIMEOUT_CONTEXT` - Context timeout (5 seconds)
|
||||
- `TIMEOUT_SKILL` - Skill timeout (10 seconds)
|
||||
- `PARSER` - Parser error
|
||||
- `ASR` - ASR error
|
||||
|
||||
### Error Response Format
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "ERROR",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
message: string,
|
||||
code?: string
|
||||
},
|
||||
final: true,
|
||||
timings: {
|
||||
total: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Speech History Recording
|
||||
|
||||
### Optional Features
|
||||
|
||||
**Configuration:**
|
||||
- `ETCO_hub_recordLaunchHistory` - Record skill launches to MongoDB
|
||||
- `ETCO_hub_recordSpeechHistory` - Record speech interactions to MongoDB
|
||||
- `ETCO_hub_recordSpeechLogBucket` - Upload speech logs to S3
|
||||
|
||||
### Speech History Record
|
||||
|
||||
**Data recorded:**
|
||||
- Robot ID, account ID, transaction ID
|
||||
- Timestamp
|
||||
- ASR result
|
||||
- NLU result
|
||||
- Match data
|
||||
- Skill response
|
||||
- Redirect data
|
||||
- Error (if any)
|
||||
|
||||
### S3 Upload
|
||||
|
||||
**Format:** JSON with audio as base64
|
||||
|
||||
**Path:** `{robotID}/year={year}/month={month}/day={day}/{timestamp}-{transID}.json`
|
||||
|
||||
## Hub Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
**Hub Settings:**
|
||||
- `ETCO_hub_recordLaunchHistory` - Enable launch history
|
||||
- `ETCO_hub_recordSpeechHistory` - Enable speech history
|
||||
- `ETCO_hub_recordSpeechLogBucket` - S3 bucket for speech logs
|
||||
|
||||
**Authentication:**
|
||||
- `ETCO_server_hubTokenSecret` - JWT secret for token verification
|
||||
|
||||
### Skill Configuration
|
||||
|
||||
**Sources:**
|
||||
- `skills-local.json` - Local development configuration
|
||||
- Environment variables - Production configuration
|
||||
- Settings service - Dynamic configuration
|
||||
|
||||
**Skill Config Structure:**
|
||||
```typescript
|
||||
{
|
||||
id: string,
|
||||
intents: [{
|
||||
name: string,
|
||||
entities?: [{ name, value, matchRule }],
|
||||
memo?: any
|
||||
}],
|
||||
proactives?: [{
|
||||
triggerType: string,
|
||||
contextRules?: ContextRule[],
|
||||
IHRules?: IHRule[],
|
||||
settingsRules?: SettingsRule[],
|
||||
memo?: any
|
||||
}],
|
||||
IHQueries?: IHQueryDefinitions,
|
||||
onRobot?: boolean,
|
||||
URL: string,
|
||||
settings?: ManifestSettings
|
||||
}
|
||||
```
|
||||
|
||||
## Summary of Server-to-Robot Communication
|
||||
|
||||
### Listen Flow
|
||||
|
||||
1. **SOS** - Speech detected
|
||||
2. **EOS** - Speech ended
|
||||
3. **LISTEN** - ASR/NLU result with match data
|
||||
4. **SKILL_ACTION** - JCP action to execute (repeated for multi-turn)
|
||||
5. **SKILL_REDIRECT** - Skill redirect notification
|
||||
6. **ERROR** - Error occurred
|
||||
|
||||
### Proactive Flow
|
||||
|
||||
1. **PROACTIVE** - Match or no-action response
|
||||
2. **SKILL_ACTION** - JCP action to execute (if cloud skill)
|
||||
3. **SKILL_REDIRECT** - Skill redirect notification
|
||||
4. **ERROR** - Error occurred
|
||||
|
||||
### Key Design Principles
|
||||
|
||||
1. **State Machine** - Clear state transitions with validation
|
||||
2. **Timeouts** - Every operation has a timeout to prevent hanging
|
||||
3. **Error Handling** - Errors propagate to robot with clear messages
|
||||
4. **Timing** - All operations are timed for monitoring
|
||||
5. **History** - All interactions are recorded for analysis
|
||||
6. **Flexibility** - Supports on-robot and cloud skills
|
||||
7. **Proactivity** - Context-aware action selection
|
||||
792
OpenJibo/docs/DesignDoc/original-server-design.md
Normal file
792
OpenJibo/docs/DesignDoc/original-server-design.md
Normal file
@@ -0,0 +1,792 @@
|
||||
# Original Jibo Server (Pegasus) Design Document
|
||||
|
||||
## Executive Summary
|
||||
|
||||
The original Jibo server, codenamed "Pegasus" (formerly V1.X), is a cloud-based microservices architecture that powers the Jibo social robot's conversational AI capabilities. It is built as a Lerna monorepo using Node.js/TypeScript and deployed via Docker containers. The system processes speech, performs natural language understanding, routes to appropriate skills, and manages proactive behaviors.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Monorepo Structure
|
||||
|
||||
The codebase is organized as a Lerna monorepo with the following main packages:
|
||||
|
||||
- **packages/hub** - Central orchestration service
|
||||
- **packages/parser** - NLU (Natural Language Understanding) service
|
||||
- **packages/history** - Data persistence service (MongoDB)
|
||||
- **packages/baseskill** - Base class and framework for cloud skills
|
||||
- **packages/interfaces** - TypeScript interfaces and API contracts
|
||||
- **packages/utils** - Shared utility libraries
|
||||
- **packages/chitchat-skill** - Example conversational skill
|
||||
- **packages/report-skill** - Reporting skill
|
||||
- **packages/lasso** - External data integration service
|
||||
- **packages/hub-client** - Client library for hub communication
|
||||
- **packages/history-client** - Client library for history service
|
||||
- **packages/test-utils** - Testing utilities
|
||||
|
||||
### Technology Stack
|
||||
|
||||
- **Language**: TypeScript 2.5.3
|
||||
- **Runtime**: Node.js 8.9.4
|
||||
- **Package Manager**: Yarn 1.7.0
|
||||
- **Containerization**: Docker
|
||||
- **Orchestration**: Docker Compose (local), AWS ECS (production)
|
||||
- **Database**: MongoDB 3.6.0
|
||||
- **Cache**: Redis 3
|
||||
- **NLU**: Dialogflow (API.ai)
|
||||
- **ASR**: Google Cloud Speech API
|
||||
- **WebSocket**: ws library
|
||||
- **HTTP**: Express.js
|
||||
- **Authentication**: JWT (jsonwebtoken)
|
||||
|
||||
## Core Services
|
||||
|
||||
### 1. Hub Service (`packages/hub`)
|
||||
|
||||
The Hub is the central orchestrator that coordinates all interactions between the robot and cloud services.
|
||||
|
||||
#### Key Components
|
||||
|
||||
**HubService** (`HubService.ts`)
|
||||
- Main service class extending `BaseService`
|
||||
- Initializes and manages all hub components
|
||||
- Registers WebSocket and HTTP handlers
|
||||
|
||||
**HubComponents** - Dependency injection container:
|
||||
- `parser: ParserClient` - NLU service client
|
||||
- `skillConfigManager: SkillConfigManager` - Manages skill configurations
|
||||
- `intentRouter: IntentRouter` - Routes intents to skills
|
||||
- `skillRequestMaker: SkillRequestMaker` - Makes HTTP requests to skills
|
||||
- `history: HistoryServiceClient` - History service client
|
||||
- `hubSettings: HubSettings` - Hub configuration
|
||||
- `settingsClient: SettingsClient` - Settings service client
|
||||
|
||||
#### Endpoints
|
||||
|
||||
**WebSocket Endpoints:**
|
||||
- `/listen` and `/v1/listen` - Handles speech recognition and NLU
|
||||
- `/proactive` and `/v1/proactive` - Handles proactive triggers
|
||||
|
||||
**HTTP Endpoints:**
|
||||
- `/skills` and `/v1/skills` - Lists available skills
|
||||
- `/healthcheck` - Service health check
|
||||
|
||||
#### Listen Flow
|
||||
|
||||
The listen transaction follows a state machine implemented in `ListenTransactionHandler`:
|
||||
|
||||
```
|
||||
States:
|
||||
WAIT_LISTEN → ASR → NLU → ROUTE → DONE
|
||||
WAIT_LISTEN → WAIT_CLIENT_ASR → NLU → ROUTE → DONE
|
||||
WAIT_LISTEN → WAIT_CLIENT_NLU → ROUTE → DONE
|
||||
```
|
||||
|
||||
**State Transitions:**
|
||||
|
||||
1. **WAIT_LISTEN** - Receives LISTEN message from robot
|
||||
2. **ASR** - Performs Automatic Speech Recognition using Google Cloud Speech API
|
||||
- Streams audio packets
|
||||
- Emits SOS (Start of Speech) when speech detected
|
||||
- Emits EOS (End of Speech) when speech ends
|
||||
- Handles timeouts (SOS timeout, max speech timeout)
|
||||
3. **NLU** - Sends ASR text to Parser service for intent recognition
|
||||
- Includes context (loop users, perception, etc.)
|
||||
- Supports external Dialogflow agents
|
||||
4. **ROUTE** - Intent Router determines which skill to launch
|
||||
- Matches NLU result against skill intent configurations
|
||||
- Decision Mediator can alter decisions based on external factors
|
||||
- Routes to on-robot skills or cloud skills
|
||||
5. **DONE** - Transaction complete
|
||||
|
||||
**Listen Transaction Handler** (`ListenTransactionHandler.ts`):
|
||||
- Manages audio streaming via `AudioBuffer`
|
||||
- Creates `ASRSession` for speech recognition
|
||||
- Handles timeouts (ASR: 40s, Parser: 10s, Context: 5s, Skill: 10s)
|
||||
- Records speech history to MongoDB and optionally S3
|
||||
- Supports client-provided ASR/NLU (for menu clicks, etc.)
|
||||
- Handles skill redirects
|
||||
|
||||
#### Proactive Flow
|
||||
|
||||
The proactive system allows Jibo to initiate conversations based on context, history, and triggers.
|
||||
|
||||
**Proactive Transaction Handler** (`ProactiveTransactionHandler.ts`):
|
||||
|
||||
1. Receives TRIGGER message from robot
|
||||
2. Waits for CONTEXT message (robot state)
|
||||
3. **Action Selection**:
|
||||
- Gets all proactive skill configurations
|
||||
- Filters by context rules (time, location, people present, etc.)
|
||||
- Filters by interaction history rules (frequency, recency)
|
||||
- Filters by user settings
|
||||
- Randomly selects from eligible actions
|
||||
4. Launches selected skill (on-robot or cloud)
|
||||
5. Returns match response or no-action response
|
||||
|
||||
**Proactive Registration**:
|
||||
Skills register proactive behaviors with:
|
||||
- Trigger types (time-based, event-based, surprise)
|
||||
- Context rules (when this can trigger)
|
||||
- Interaction history rules (how often it can trigger)
|
||||
- Settings rules (user preferences)
|
||||
|
||||
### 2. Parser Service (`packages/parser`)
|
||||
|
||||
The Parser service performs Natural Language Understanding using Dialogflow.
|
||||
|
||||
**ParserService** (`ParserService.ts`):
|
||||
- Starts RobustParser process on port 8787 (optional)
|
||||
- Initializes Dialogflow client
|
||||
- Initializes Robust Parser client
|
||||
- Handles POST requests to `/v1/parse`
|
||||
- Exposes state at `/state` endpoint
|
||||
|
||||
**NLU Pipeline:**
|
||||
1. Receives text, rules, and context
|
||||
2. Queries Dialogflow with configured agents
|
||||
3. Optionally queries Robust Parser (custom NLU)
|
||||
4. Returns intent, entities, and rules
|
||||
|
||||
**Configuration:**
|
||||
- Dialogflow API key
|
||||
- Robust Parser enable/disable
|
||||
- Multiple external agents support
|
||||
|
||||
### 3. History Service (`packages/history`)
|
||||
|
||||
The History service persists interaction data to MongoDB.
|
||||
|
||||
**HistoryService** (`HistoryService.ts`):
|
||||
- Two database clients:
|
||||
- `SkillLaunchDBClient` - Records skill launches
|
||||
- `SpeechHistoryDBClient` - Records speech interactions (optional)
|
||||
- HTTP endpoints:
|
||||
- `/v1/skill/launch` - Skill launch history
|
||||
- `/v1/speech` - Speech history (if enabled)
|
||||
- Health check endpoint
|
||||
|
||||
**Data Stored:**
|
||||
- Skill launches (skill ID, intent, timestamp, robot ID, account ID)
|
||||
- Speech interactions (ASR result, NLU result, audio file URL, error tracking)
|
||||
|
||||
### 4. Lasso Service (`packages/lasso`)
|
||||
|
||||
Lasso provides external data integration for skills.
|
||||
|
||||
**Features:**
|
||||
- OAuth2 credential management
|
||||
- Calendar client integration
|
||||
- Weather data (Dark Sky API)
|
||||
- Maps data (Google Maps API)
|
||||
- News data (AP News)
|
||||
- MongoDB for credential storage
|
||||
- Redis for caching
|
||||
|
||||
**LassoService** (`LassoService.ts`):
|
||||
- Manages OAuth2 flows
|
||||
- Provides relay endpoints for external APIs
|
||||
- Caches responses in Redis
|
||||
|
||||
## Skill Framework
|
||||
|
||||
### BaseSkill (`packages/baseskill`)
|
||||
|
||||
**BaseSkill** (`BaseSkill.ts`):
|
||||
- Abstract base class for all cloud skills
|
||||
- Extends `BaseHttpHandler`
|
||||
- Handles POST requests to `/`
|
||||
- Provides error handling
|
||||
- Tracks timing
|
||||
|
||||
**GraphSkill** (`GraphSkill.ts`):
|
||||
- Extends BaseSkill with graph-based state machine
|
||||
- Implements node-based conversation flow
|
||||
- Supports skill redirects
|
||||
- Tracks analytics events
|
||||
- Supports supplemental behaviors (parallel/sequence)
|
||||
|
||||
### Graph System
|
||||
|
||||
The graph system provides a state machine framework for skills.
|
||||
|
||||
**Graph** (`Graph.ts`):
|
||||
- Directed graph of connected nodes
|
||||
- Supports subgraphs (hierarchical)
|
||||
- Exit transitions for graph termination
|
||||
- Validation (reachability, transition completeness)
|
||||
- GraphViz dot file generation
|
||||
|
||||
**GraphManager** (`GraphManager.ts`):
|
||||
- Singleton per skill
|
||||
- Manages node IDs and mappings
|
||||
- Executes graph:
|
||||
- `start()` - Creates session, enters initial node
|
||||
- `enterNode()` - Calls node's enter method
|
||||
- `exitNode()` - Calls node's exit method with action results
|
||||
- `executeTransition()` - Moves to next node
|
||||
- Maintains session state (node ID, data, trace)
|
||||
|
||||
**Node** (`Node.ts`):
|
||||
- Abstract base class for graph nodes
|
||||
- Has transition names and destinations
|
||||
- Two lifecycle methods:
|
||||
- `enter(data)` - Called when node is entered, returns action or redirect
|
||||
- `exit(data)` - Called with action results, returns next transition
|
||||
- Supports graph traversal (BFS)
|
||||
|
||||
**Built-in Node Types:**
|
||||
- `DefaultNode` - Simple terminal node
|
||||
- `JCPNode` - Returns JCP action
|
||||
- `NoOpNode` - No operation
|
||||
- `TrueFalseNode` - Conditional branching
|
||||
- `SetLooperIDNode` - Sets speaker ID
|
||||
|
||||
**MIM (Motion Interaction Model) System:**
|
||||
- `ANFactory` - Creates graph for playing MIM animations
|
||||
- Supports scripted responses, emotion responses, fallback responses
|
||||
- Semi-specific responses (context-aware)
|
||||
|
||||
### Skill Request/Response Protocol
|
||||
|
||||
**Skill Request Types** (`skill/request.ts`):
|
||||
- `LISTEN_LAUNCH` - Launch skill from listen interaction
|
||||
- `LISTEN_UPDATE` - Update skill with action results
|
||||
- `PROACTIVE_LAUNCH` - Launch skill proactively
|
||||
|
||||
**Skill Request Data:**
|
||||
```typescript
|
||||
{
|
||||
type: MessageType,
|
||||
msgID: UUID,
|
||||
ts: number,
|
||||
data: {
|
||||
general: { accountID, robotID, lang, release },
|
||||
runtime: { character, location, loop, perception, dialog },
|
||||
skill: { id, session? },
|
||||
result: any, // Action results for UPDATE
|
||||
nlu: NLUResult,
|
||||
asr: ASRResult,
|
||||
memo?: any
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Skill Response Types** (`skill/response.ts`):
|
||||
- `SKILL_ACTION` - Returns action to execute
|
||||
- `SKILL_REDIRECT` - Redirects to another skill
|
||||
- `ERROR` - Error response
|
||||
|
||||
**Skill Action Data:**
|
||||
```typescript
|
||||
{
|
||||
action: JCPAction, // JCP protocol behavior
|
||||
analytics?: AnalyticsData,
|
||||
final?: boolean, // Is this the final response?
|
||||
fireAndForget?: boolean
|
||||
}
|
||||
```
|
||||
|
||||
**JCP Action** (`skill/action.ts`):
|
||||
```typescript
|
||||
{
|
||||
type: ActionType.JCP,
|
||||
config: {
|
||||
version: "1.0.0",
|
||||
jcp: SupportedBehaviors // SLIM, Sequence, Parallel, SetPresentPerson, ImpactEmotion
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Skill Configuration
|
||||
|
||||
**SkillConfig** (`skill/config.ts`):
|
||||
```typescript
|
||||
{
|
||||
id: SkillID,
|
||||
intents: [{
|
||||
name: IntentName,
|
||||
entities?: EntityConfig[],
|
||||
memo?: any
|
||||
}],
|
||||
proactives?: ProactiveRegistration[],
|
||||
IHQueries?: IHQueryDefinitions,
|
||||
onRobot?: boolean,
|
||||
URL: string,
|
||||
settings?: ManifestSettings
|
||||
}
|
||||
```
|
||||
|
||||
**Entity Config**:
|
||||
- `name` - Entity name
|
||||
- `value` - Expected value
|
||||
- `matchRule` - 'EXACT' or 'NOT'
|
||||
|
||||
**Proactive Registration**:
|
||||
- Trigger type and conditions
|
||||
- Context rules
|
||||
- Interaction history rules
|
||||
- Settings rules
|
||||
|
||||
## Interfaces Package
|
||||
|
||||
The `interfaces` package defines all TypeScript interfaces for communication between services.
|
||||
|
||||
### Key Interface Modules
|
||||
|
||||
**service.ts** - Base message types:
|
||||
- `BaseMessage<T, D>` - Generic message with type, msgID, timestamp, data
|
||||
- `BaseResponse<T, D>` - Response with final flag and timings
|
||||
- `IAuthDetails` - Authentication details (account ID, access keys)
|
||||
|
||||
**hub/** - Hub-specific interfaces:
|
||||
- `request.ts` - LISTEN, CONTEXT, CLIENT_ASR, CLIENT_NLU messages
|
||||
- `response.ts` - ASR, NLU, LISTEN, SKILL_REDIRECT, ERROR responses
|
||||
- `MessageType.ts` - Message type enums
|
||||
- `HubErrorCode.ts` - Error code enums
|
||||
|
||||
**skill/** - Skill-specific interfaces:
|
||||
- `request.ts` - LISTEN_LAUNCH, LISTEN_UPDATE, PROACTIVE_LAUNCH
|
||||
- `response.ts` - SKILL_ACTION, SKILL_REDIRECT, ERROR
|
||||
- `action.ts` - JCP action types
|
||||
- `config.ts` - Skill configuration
|
||||
- `behaviors.ts` - Supported JCP behaviors
|
||||
- `analytics.ts` - Analytics event types
|
||||
|
||||
**nlu.ts** - NLU interfaces:
|
||||
- `NLURequestData` - Text, rules, loop users, external agents
|
||||
- `NLUResult` - Intent, entities, rules
|
||||
- `ExternalAgentRequest` - External Dialogflow agent config
|
||||
|
||||
**asr.ts** - ASR interfaces:
|
||||
- `ASRResult` - Text, confidence, annotation
|
||||
- `ASRConfig` - Language, hints, timeouts
|
||||
|
||||
**jibo/** - Jibo-specific data:
|
||||
- `data.ts` - GeneralData (account, robot, language), SkillData (session, trace)
|
||||
- `runtime.ts` - RuntimeContext (character, location, loop, perception, dialog)
|
||||
|
||||
**proactive/** - Proactive interfaces:
|
||||
- Context field definitions
|
||||
- History rules
|
||||
- Settings rules
|
||||
- Proactive trigger/request/response
|
||||
|
||||
**history/** - History interfaces:
|
||||
- Skill launch data
|
||||
- Speech history data
|
||||
|
||||
## Utils Package
|
||||
|
||||
The `utils` package provides shared functionality.
|
||||
|
||||
### BaseService (`utils/service/BaseService.ts`)
|
||||
|
||||
Base class for all Pegasus services:
|
||||
|
||||
**Features:**
|
||||
- Express.js HTTP server
|
||||
- WebSocket server (ws library)
|
||||
- JWT authentication
|
||||
- Request/response logging with jibo-log
|
||||
- New Relic monitoring
|
||||
- Health check endpoint
|
||||
- Error handling middleware
|
||||
|
||||
**Methods:**
|
||||
- `addSocketHandler(path, handler)` - Register WebSocket handler
|
||||
- `addHttpHandler(path, handler)` - Register HTTP handler
|
||||
- `init(port)` - Start server
|
||||
- `close()` - Stop server
|
||||
|
||||
**Authentication:**
|
||||
- JWT token verification
|
||||
- Bearer token scheme
|
||||
- Configurable secret via `ETCO_server_hubTokenSecret`
|
||||
|
||||
**Logging:**
|
||||
- Per-request log instances
|
||||
- Transaction ID tracking
|
||||
- Robot ID tracking
|
||||
- Configurable log levels per namespace
|
||||
|
||||
### Other Utils
|
||||
|
||||
- `PegasusRequest` - Enhanced Express request with Jibo headers
|
||||
- `PegasusWebSocket` - Enhanced WebSocket with auth and logging
|
||||
- `JiboHeaders` - Parses Jibo-specific headers (transID, robotID, logging config)
|
||||
- `ResponseWrapper` - Wraps WebSocket responses
|
||||
- `HttpError` - HTTP error with status code
|
||||
|
||||
## Communication Protocols
|
||||
|
||||
### WebSocket Protocol
|
||||
|
||||
**Connection:**
|
||||
- URL: `ws://hub:9000/listen` or `ws://hub:9000/proactive`
|
||||
- Authentication: Bearer token in Authorization header
|
||||
- Headers: `x-jibo-transid`, `x-jibo-robotid`, `x-jibo-logging-config`
|
||||
|
||||
**Message Format:**
|
||||
```json
|
||||
{
|
||||
"type": "MESSAGE_TYPE",
|
||||
"msgID": "uuid",
|
||||
"ts": 1234567890,
|
||||
"data": { ... }
|
||||
}
|
||||
```
|
||||
|
||||
**Listen Flow Messages:**
|
||||
1. Robot → Hub: LISTEN (with ASR config, rules, language)
|
||||
2. Robot → Hub: Audio packets (binary)
|
||||
3. Hub → Robot: SOS (Start of Speech)
|
||||
4. Robot → Hub: CONTEXT (runtime context)
|
||||
5. Hub → Robot: EOS (End of Speech)
|
||||
6. Hub → Robot: LISTEN (with ASR result, NLU result, match)
|
||||
7. Hub → Robot: SKILL_ACTION (if cloud skill)
|
||||
8. Robot → Hub: CMD_RESULT (action results)
|
||||
9. Hub → Robot: SKILL_ACTION (next action) or final
|
||||
|
||||
**Proactive Flow Messages:**
|
||||
1. Robot → Hub: TRIGGER (trigger data)
|
||||
2. Robot → Hub: CONTEXT (runtime context)
|
||||
3. Hub → Robot: PROACTIVE (match or no-action)
|
||||
4. Hub → Robot: SKILL_ACTION (if cloud skill)
|
||||
|
||||
### HTTP Protocol
|
||||
|
||||
**Skill Request:**
|
||||
- Method: POST
|
||||
- URL: `http://skill-host:port/`
|
||||
- Headers: Authorization, x-jibo-transid, x-jibo-robotid
|
||||
- Body: SkillRequest JSON
|
||||
|
||||
**Parser Request:**
|
||||
- Method: POST
|
||||
- URL: `http://parser:8080/v1/parse`
|
||||
- Body: NLURequestData JSON
|
||||
|
||||
## Authentication & Security
|
||||
|
||||
### JWT Authentication
|
||||
|
||||
**Token Format:**
|
||||
```json
|
||||
{
|
||||
"id": "account-id",
|
||||
"accessKeyId": "client-id",
|
||||
"secretAccessKey": "client-secret",
|
||||
"friendlyId": "robot-name"
|
||||
}
|
||||
```
|
||||
|
||||
**Verification:**
|
||||
- Secret: `ETCO_server_hubTokenSecret` environment variable
|
||||
- Scheme: Bearer
|
||||
- Applied to WebSocket connections and HTTP endpoints
|
||||
|
||||
### Network Security
|
||||
|
||||
- All services run in Docker containers
|
||||
- Services communicate via Docker network (pegasus-nw)
|
||||
- External access via load balancer
|
||||
- TLS termination at load balancer
|
||||
|
||||
## Deployment
|
||||
|
||||
### Docker Compose (Local Development)
|
||||
|
||||
**Services:**
|
||||
- `hub` - Hub service (port 9000)
|
||||
- `parser` - Parser service (port 9005)
|
||||
- `history` - History service (port 9006)
|
||||
- `chitchat-skill` - Chitchat skill (port 9004)
|
||||
- `report-skill` - Report skill (port 9003)
|
||||
- `lasso` - Lasso service (port 9007)
|
||||
- `redis` - Redis cache (port 6379)
|
||||
- `mongo_lasso` - MongoDB for Lasso (port 27017)
|
||||
- `history_cluster` - MongoDB for History (from docker-compose-history-db.yml)
|
||||
|
||||
**Configuration:**
|
||||
- Environment variables prefixed with `ETCO_` (ETCO = Environment TO Configuration)
|
||||
- Volume mounting: `./:/pegasus:consistent` for live code editing
|
||||
- Debug ports: 5850-5855 for Node.js debugging
|
||||
|
||||
### Build Process
|
||||
|
||||
**Commands:**
|
||||
```bash
|
||||
docker build -t pegasus_base:latest .
|
||||
yarn docker:bootstrap
|
||||
yarn docker:build
|
||||
./pegasus.js build-docker-image --services hub
|
||||
```
|
||||
|
||||
**CLI Tool** (`cli/`):
|
||||
- `bootstrap` - Install dependencies
|
||||
- `build` - Build TypeScript
|
||||
- `test` - Run tests
|
||||
- `docker-run` - Run commands in Docker
|
||||
- `build-docker-image` - Build Docker images for services
|
||||
|
||||
### Production Deployment
|
||||
|
||||
- AWS ECS (Elastic Container Service)
|
||||
- ECR (Elastic Container Registry) for Docker images
|
||||
- Application Load Balancer
|
||||
- MongoDB Atlas for production databases
|
||||
- ElastiCache for Redis
|
||||
- CloudWatch for logging
|
||||
- New Relic for monitoring
|
||||
|
||||
## Data Flow Examples
|
||||
|
||||
### Example 1: User Says "Tell Me a Joke"
|
||||
|
||||
1. **Robot → Hub**: LISTEN message with ASR config
|
||||
2. **Robot → Hub**: Audio stream
|
||||
3. **Hub**: Detects SOS, emits SOS message
|
||||
4. **Hub**: Streams audio to Google Cloud Speech API
|
||||
5. **Hub**: Detects EOS, emits EOS message
|
||||
6. **Robot → Hub**: CONTEXT message (runtime state)
|
||||
7. **Hub → Parser**: POST /v1/parse with text "tell me a joke"
|
||||
8. **Parser → Dialogflow**: Query with "joke" intent rules
|
||||
9. **Dialogflow → Parser**: Intent="joke_tell", entities={}
|
||||
10. **Parser → Hub**: NLU result
|
||||
11. **Hub → IntentRouter**: Match intent to "joke-skill"
|
||||
12. **Hub → joke-skill**: POST LISTEN_LAUNCH request
|
||||
13. **joke-skill**: Executes graph, selects joke
|
||||
14. **joke-skill → Hub**: SKILL_ACTION with JCP behavior (SayText)
|
||||
15. **Hub → Robot**: SKILL_ACTION message
|
||||
16. **Robot**: Executes behavior, speaks joke
|
||||
17. **Robot → Hub**: CMD_RESULT with action result
|
||||
18. **Hub → joke-skill**: POST LISTEN_UPDATE request
|
||||
19. **joke-skill**: Returns final=true
|
||||
20. **Hub → Robot**: Final SKILL_ACTION
|
||||
|
||||
### Example 2: Proactive Greeting
|
||||
|
||||
1. **Robot**: Detects person entering room
|
||||
2. **Robot → Hub**: TRIGGER message with trigger data
|
||||
3. **Robot → Hub**: CONTEXT message (runtime state)
|
||||
4. **Hub**: Queries all proactive skill configs
|
||||
5. **Hub**: Filters by context (time, people present)
|
||||
6. **Hub**: Filters by history (last greeting time)
|
||||
7. **Hub**: Filters by settings (user greeting preference)
|
||||
8. **Hub**: Selects "greeting-skill"
|
||||
9. **Hub → greeting-skill**: POST PROACTIVE_LAUNCH request
|
||||
10. **greeting-skill → Hub**: SKILL_ACTION with greeting behavior
|
||||
11. **Hub → Robot**: PROACTIVE response with match
|
||||
12. **Hub → Robot**: SKILL_ACTION message
|
||||
13. **Robot**: Executes greeting
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Error Types
|
||||
|
||||
**Hub Error Codes** (`HubErrorCode.ts`):
|
||||
- `TIMEOUT_ASR` - ASR timeout
|
||||
- `TIMEOUT_PARSER` - Parser timeout
|
||||
- `TIMEOUT_CONTEXT` - Context timeout
|
||||
- `TIMEOUT_SKILL` - Skill timeout
|
||||
- `PARSER` - Parser error
|
||||
- `ASR` - ASR error
|
||||
|
||||
**Skill Request Errors** (`SkillRequestError`):
|
||||
- `SKILL_NOT_FOUND` - Skill does not exist
|
||||
- `TIMEOUT` - Skill request timeout
|
||||
|
||||
### Error Response Format
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "ERROR",
|
||||
"msgID": "uuid",
|
||||
"ts": 1234567890,
|
||||
"final": true,
|
||||
"data": {
|
||||
"message": "Error description",
|
||||
"code": "ERROR_CODE"
|
||||
},
|
||||
"timings": {
|
||||
"total": 1234
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Timeout Handling
|
||||
|
||||
- ASR: 40 seconds (configurable via sosTimeout, maxSpeechTimeout)
|
||||
- Parser: 10 seconds
|
||||
- Context: 5 seconds
|
||||
- Skill: 10 seconds
|
||||
- Transaction: 60 seconds (configurable)
|
||||
|
||||
## Monitoring & Logging
|
||||
|
||||
### Logging
|
||||
|
||||
**jibo-log Integration:**
|
||||
- Per-namespace log levels
|
||||
- Transaction ID correlation
|
||||
- Robot ID tracking
|
||||
- Structured logging support
|
||||
|
||||
**Log Levels:**
|
||||
- Configured via `x-jibo-logging-config` header
|
||||
- Per-namespace granularity
|
||||
- Environment variable: `ETCO_server_logLevel`
|
||||
|
||||
### Monitoring
|
||||
|
||||
**New Relic:**
|
||||
- HTTP request tracking
|
||||
- WebSocket transaction tracking
|
||||
- Error tracking
|
||||
- Custom attributes (transID, robotID)
|
||||
|
||||
**Health Checks:**
|
||||
- `/healthcheck` endpoint on all services
|
||||
- Returns service-specific health data
|
||||
- Database connection status
|
||||
|
||||
### Speech History Recording
|
||||
|
||||
**Optional Features:**
|
||||
- Record skill launches to MongoDB
|
||||
- Record speech interactions to MongoDB
|
||||
- Upload speech logs to S3 (JSON with audio base64)
|
||||
|
||||
**Configuration:**
|
||||
- `ETCO_hub_recordLaunchHistory` - Enable launch history
|
||||
- `ETCO_hub_recordSpeechHistory` - Enable speech history
|
||||
- `ETCO_hub_recordSpeechLogBucket` - S3 bucket for speech logs
|
||||
|
||||
## Skill Development Guide
|
||||
|
||||
### Creating a New Skill
|
||||
|
||||
1. **Extend GraphSkill:**
|
||||
```typescript
|
||||
export class MySkill extends GraphSkill<Transition> {
|
||||
constructor() {
|
||||
super('my-skill');
|
||||
}
|
||||
|
||||
createGraph(): Graph<Transition> {
|
||||
const g = new Graph('My Skill', generateTransitions<Transition>(Transition));
|
||||
// Add nodes and transitions
|
||||
g.finalize();
|
||||
return g;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **Define Transitions:**
|
||||
```typescript
|
||||
enum Transition {
|
||||
Done = 'Done',
|
||||
Retry = 'Retry'
|
||||
}
|
||||
```
|
||||
|
||||
3. **Create Nodes:**
|
||||
```typescript
|
||||
class MyNode extends Node<Transition> {
|
||||
async enter(data: Data): Promise<EnterResponse> {
|
||||
// Return action or redirect
|
||||
return { action: myJCPAction };
|
||||
}
|
||||
|
||||
async exit(data: Data): Promise<ExitResponse> {
|
||||
// Return next transition
|
||||
return { transition: Transition.Done };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
4. **Create Skill Manifest:**
|
||||
```json
|
||||
{
|
||||
"id": "my-skill",
|
||||
"intents": [
|
||||
{
|
||||
"name": "my_intent",
|
||||
"entities": []
|
||||
}
|
||||
],
|
||||
"onRobot": false
|
||||
}
|
||||
```
|
||||
|
||||
5. **Register with Hub:**
|
||||
- Add skill config to skills-local.json or environment
|
||||
- Deploy skill service
|
||||
- Hub will load configuration
|
||||
|
||||
### Skill Best Practices
|
||||
|
||||
- Use graph for complex flows, direct responses for simple ones
|
||||
- Track analytics events for monitoring
|
||||
- Handle errors gracefully with try-catch
|
||||
- Use supplemental behaviors for parallel actions
|
||||
- Set appropriate timeouts
|
||||
- Log important events
|
||||
- Test with both LISTEN_LAUNCH and PROACTIVE_LAUNCH
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
### Why Graph-Based Skills?
|
||||
|
||||
- **State Management**: Explicit state machine with session tracking
|
||||
- **Visualization**: GraphViz generation for debugging
|
||||
- **Reusability**: Subgraphs for common patterns
|
||||
- **Testability**: Isolated node testing
|
||||
- **Maintainability**: Clear flow structure
|
||||
|
||||
### Why WebSocket for Robot Communication?
|
||||
|
||||
- **Low Latency**: Real-time bidirectional communication
|
||||
- **Audio Streaming**: Binary message support for audio
|
||||
- **Stateful**: Single connection per transaction
|
||||
- **Efficiency**: No HTTP overhead for each message
|
||||
|
||||
### Why Separate Services?
|
||||
|
||||
- **Scalability**: Scale each service independently
|
||||
- **Isolation**: Failure in one service doesn't affect others
|
||||
- **Technology**: Different services can use different tech stacks
|
||||
- **Deployment**: Independent deployment cycles
|
||||
|
||||
### Why Lerna Monorepo?
|
||||
|
||||
- **Code Sharing**: Easy to share interfaces and utils
|
||||
- **Versioning**: Linked versioning for interdependent packages
|
||||
- **Development**: Single repository for all services
|
||||
- **Testing**: Integration tests across packages
|
||||
|
||||
## Limitations & Known Issues
|
||||
|
||||
1. **Single Graph Manager**: Skills cannot have concurrent sessions (singleton pattern)
|
||||
2. **Sequential Skill Redirects**: Only one level of redirect supported
|
||||
3. **No Skill-to-Skill Communication**: Skills must go through hub
|
||||
4. **Fixed Timeouts**: Hardcoded timeouts in some places
|
||||
5. **No Skill Hot-Reload**: Requires container rebuild for skill changes
|
||||
6. **Limited NLU**: Dialogflow dependency, no custom model training
|
||||
7. **No Skill Versioning**: Skills identified by ID only
|
||||
8. **Synchronous Skill Requests**: Hub waits for skill response (no async)
|
||||
|
||||
## Future Considerations
|
||||
|
||||
1. **Skill Versioning**: Support multiple versions of same skill
|
||||
2. **Skill-to-Skill Direct Communication**: Allow skills to call each other
|
||||
3. **Async Skill Responses**: Long-running skills with callback pattern
|
||||
4. **Custom NLU Models**: Support for custom trained models
|
||||
5. **Skill Hot-Reload**: Dynamic skill loading without restart
|
||||
6. **Multi-Session Skills**: Support concurrent skill sessions
|
||||
7. **Skill Marketplace**: Third-party skill distribution
|
||||
8. **A/B Testing**: Framework for testing skill variations
|
||||
|
||||
## Conclusion
|
||||
|
||||
The original Jibo server (Pegasus) is a well-architected microservices system that provides a robust foundation for conversational AI on the Jibo robot. The graph-based skill framework offers flexibility and maintainability, while the separation of concerns enables independent scaling and development. The system successfully handles real-time speech processing, natural language understanding, skill routing, and proactive behaviors in a distributed cloud environment.
|
||||
874
OpenJibo/docs/DesignDoc/skill-framework-design.md
Normal file
874
OpenJibo/docs/DesignDoc/skill-framework-design.md
Normal file
@@ -0,0 +1,874 @@
|
||||
# Skill Framework Design Document
|
||||
|
||||
## Overview
|
||||
|
||||
The Skill Framework provides the foundation for building cloud-based skills for the Jibo robot. It consists of a base class for all skills, a graph-based state machine for complex conversational flows, and a system for generating JCP (Jibo Command Protocol) actions that are sent to the robot.
|
||||
|
||||
## Location
|
||||
|
||||
`packages/baseskill/src/`
|
||||
|
||||
## Core Components
|
||||
|
||||
### BaseSkill (`BaseSkill.ts`)
|
||||
|
||||
Abstract base class that all cloud skills must extend.
|
||||
|
||||
**Purpose:** Provides common HTTP handling and error handling for all skills.
|
||||
|
||||
**Key Features:**
|
||||
- Extends `BaseHttpHandler` from `@jibo/utils`
|
||||
- Registers POST handler at `/` endpoint
|
||||
- Validates request structure
|
||||
- Tracks timing for each request
|
||||
- Provides error response builder
|
||||
|
||||
**Constructor:**
|
||||
```typescript
|
||||
constructor(public name: string)
|
||||
```
|
||||
|
||||
**Abstract Method:**
|
||||
```typescript
|
||||
protected abstract handle(request: PegasusRequest<SkillRequest>): Promise<SkillResponse>;
|
||||
```
|
||||
|
||||
**Lifecycle Methods:**
|
||||
- `init(): Promise<void>` - Override to initialize resources (load files, connect to services)
|
||||
- `buildErrorResponse(err: Error): ErrorResponse` - Builds standardized error response
|
||||
|
||||
**HTTP Handler:**
|
||||
- Accepts POST requests at `/`
|
||||
- Logs request type
|
||||
- Calls `handle()` method
|
||||
- Adds timing information
|
||||
- Catches errors and returns error response
|
||||
|
||||
### GraphSkill (`GraphSkill.ts`)
|
||||
|
||||
Extends BaseSkill with a graph-based state machine for complex conversational flows.
|
||||
|
||||
**Purpose:** Enables skills to define their logic as a series of interconnected nodes (states) with transitions.
|
||||
|
||||
**Key Features:**
|
||||
- Implements `GraphFactory` interface
|
||||
- Manages graph execution via `GraphManager` singleton
|
||||
- Supports skill redirects
|
||||
- Tracks analytics events
|
||||
- Supports supplemental behaviors (parallel/sequence)
|
||||
- Handles both launch and update requests
|
||||
|
||||
**Constructor:**
|
||||
```typescript
|
||||
constructor(name: string)
|
||||
```
|
||||
|
||||
**Abstract Method:**
|
||||
```typescript
|
||||
abstract createGraph(): Graph<ExitTransition>
|
||||
```
|
||||
|
||||
**Request Handling:**
|
||||
|
||||
**Launch Requests** (LISTEN_LAUNCH or PROACTIVE_LAUNCH):
|
||||
1. Validates request data (accountID, robotID, skill ID)
|
||||
2. Initializes skill session data
|
||||
3. Tracks SKILL_ENTRY analytics event
|
||||
4. Calls `GraphManager.instance.start(graph, data)` to begin graph execution
|
||||
5. Returns SKILL_ACTION or SKILL_REDIRECT response
|
||||
|
||||
**Update Requests** (LISTEN_UPDATE):
|
||||
1. Validates request data
|
||||
2. Calls `GraphManager.instance.exitNode(data)` to process action results
|
||||
3. Returns next SKILL_ACTION or final response
|
||||
|
||||
**Response Types:**
|
||||
|
||||
1. **SKILL_REDIRECT** - Redirects to another skill
|
||||
```typescript
|
||||
{
|
||||
type: "SKILL_REDIRECT",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
skillID: string,
|
||||
nlu?: NLUResult,
|
||||
asr?: ASRResult,
|
||||
memo?: any
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **SKILL_ACTION** - Returns JCP action for robot to execute
|
||||
```typescript
|
||||
{
|
||||
type: "SKILL_ACTION",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
action: JCPAction,
|
||||
analytics: AnalyticsData,
|
||||
final: boolean,
|
||||
fireAndForget: boolean
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. **Final Response** - No action, transaction complete
|
||||
```typescript
|
||||
{
|
||||
type: "SKILL_ACTION",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
action: null,
|
||||
analytics: AnalyticsData,
|
||||
final: true,
|
||||
fireAndForget: true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Convenience Methods:**
|
||||
|
||||
- `track(data, event, properties)` - Track analytics event
|
||||
- `overrideSpeaker(data, id)` - Override current speaker in context
|
||||
- `addParallelBehavior(data, behavior)` - Add behavior to execute in parallel
|
||||
- `addSequenceBehavior(data, behavior)` - Add behavior to execute in sequence
|
||||
|
||||
**Supplemental Behaviors Injection:**
|
||||
|
||||
When a skill returns a JCP action, the framework injects any supplemental behaviors that were added during execution:
|
||||
|
||||
1. If sequence behaviors exist, wraps main action in a Sequence
|
||||
2. If parallel behaviors exist, wraps result in a Parallel
|
||||
3. Final JCP action is sent to robot
|
||||
|
||||
**Example:**
|
||||
```typescript
|
||||
// Skill adds parallel behavior
|
||||
this.addParallelBehavior(data, SetPresentPersonBehavior);
|
||||
|
||||
// Skill returns main action
|
||||
return { action: SayTextBehavior };
|
||||
|
||||
// Framework injects: Parallel([SetPresentPersonBehavior, SayTextBehavior])
|
||||
```
|
||||
|
||||
### Graph System
|
||||
|
||||
#### Graph (`graph/Graph.ts`)
|
||||
|
||||
Represents a directed graph of connected nodes (states).
|
||||
|
||||
**Purpose:** Defines the structure of a skill's conversation flow.
|
||||
|
||||
**Key Properties:**
|
||||
- `name: string` - Graph name
|
||||
- `initial: Node` - Starting node
|
||||
- `nodes: Set<Node>` - All nodes in graph
|
||||
- `exitTransitions: Map<ExitTransition, TransitionContainer[]>` - Exit transition mappings
|
||||
|
||||
**Constructor:**
|
||||
```typescript
|
||||
constructor(name: string, exitTransitionNames: ExitTransition[])
|
||||
```
|
||||
|
||||
**Methods:**
|
||||
|
||||
- `setInitialNode(node)` - Sets the starting node
|
||||
- `addNode(node, transitionMapping)` - Adds a node and connects its transitions
|
||||
- `addSubGraph(subGraph, transitionMapping)` - Adds a subgraph and connects its exits
|
||||
- `finalize()` - Validates graph and locks it for execution
|
||||
- `writeDotFile(filePath)` - Generates GraphViz dot file for visualization
|
||||
|
||||
**Transition Mapping:**
|
||||
```typescript
|
||||
[
|
||||
[TransitionName, DestinationNode], // Transition to another node
|
||||
[TransitionName, ExitTransition] // Exit from graph
|
||||
]
|
||||
```
|
||||
|
||||
**Validation (in finalize):**
|
||||
- All nodes must be reachable from initial node
|
||||
- All exit transitions must be connected
|
||||
- All transitions must have valid destinations
|
||||
- No duplicate transition names
|
||||
|
||||
**Subgraphs:**
|
||||
- Graphs can be nested within other graphs
|
||||
- Subgraph exit transitions connect to parent graph nodes
|
||||
- Enables hierarchical organization of complex flows
|
||||
- Nodes can belong to multiple graphs (for subgraph sharing)
|
||||
|
||||
**GraphViz Visualization:**
|
||||
- Generates .dot files for graph visualization
|
||||
- Color-codes initial node, regular nodes, and exit states
|
||||
- Shows hierarchical structure with clusters
|
||||
- Labels transitions with their names
|
||||
|
||||
#### GraphManager (`graph/GraphManager.ts`)
|
||||
|
||||
Singleton that manages graph execution and skill sessions.
|
||||
|
||||
**Purpose:** Coordinates node execution and maintains session state.
|
||||
|
||||
**Singleton Pattern:**
|
||||
```typescript
|
||||
GraphManager.instance // Access singleton
|
||||
```
|
||||
|
||||
**Key Responsibilities:**
|
||||
- Assigns unique IDs to all nodes
|
||||
- Maps node IDs to node instances
|
||||
- Manages skill session lifecycle
|
||||
- Executes node enter/exit lifecycle
|
||||
- Handles transitions between nodes
|
||||
|
||||
**Session Structure:**
|
||||
```typescript
|
||||
{
|
||||
id: string, // Session UUID
|
||||
nodeID: number, // Current node ID
|
||||
data: any, // Skill-specific session data
|
||||
trace: [ // History of transitions
|
||||
{ nodeID: number, transition: string }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Execution Flow:**
|
||||
|
||||
**Start Graph** (launch request):
|
||||
```typescript
|
||||
start(graph, data)
|
||||
→ Creates new session
|
||||
→ Sets initial node
|
||||
→ Calls enterNode()
|
||||
```
|
||||
|
||||
**Enter Node:**
|
||||
```typescript
|
||||
enterNode(data)
|
||||
→ Fetches current node
|
||||
→ Calls node.enter(data)
|
||||
→ Updates trace
|
||||
→ If action returned: return action
|
||||
→ Else: call exitNode()
|
||||
```
|
||||
|
||||
**Exit Node:**
|
||||
```typescript
|
||||
exitNode(data)
|
||||
→ Fetches current node
|
||||
→ Calls node.exit(data)
|
||||
→ If transition returned: executeTransition()
|
||||
→ Else: return (terminal)
|
||||
```
|
||||
|
||||
**Execute Transition:**
|
||||
```typescript
|
||||
executeTransition(node, result, data)
|
||||
→ Validates transition exists
|
||||
→ Updates trace with transition name
|
||||
→ If terminal: return null
|
||||
→ Else: update nodeID, call enterNode()
|
||||
```
|
||||
|
||||
**Node ID Assignment:**
|
||||
- Counter starts at 0, increments for each node
|
||||
- Bidirectional mapping: node ↔ ID
|
||||
- Enables serialization of session state
|
||||
|
||||
#### Node (`graph/nodes/Node.ts`)
|
||||
|
||||
Abstract base class for all graph nodes.
|
||||
|
||||
**Purpose:** Defines a state in the skill's conversation flow.
|
||||
|
||||
**Key Properties:**
|
||||
- `id: number` - Unique ID assigned by GraphManager
|
||||
- `name: string` - Node name
|
||||
- `transitionNames: Transition[]` - Valid exit transitions
|
||||
- `graphs: Graph[]` - Graphs this node belongs to
|
||||
- `transitions: Map<Transition, TransitionContainer>` - Transition destinations
|
||||
|
||||
**Constructor:**
|
||||
```typescript
|
||||
constructor(name: string, transitionNames: Transition[])
|
||||
```
|
||||
|
||||
**Abstract Methods:**
|
||||
|
||||
```typescript
|
||||
abstract async enter(data: Data): Promise<EnterResponse>
|
||||
```
|
||||
- Called when node is entered
|
||||
- Returns action to execute, redirect, or nothing
|
||||
|
||||
```typescript
|
||||
abstract async exit(data: Data): Promise<ExitResponse>
|
||||
```
|
||||
- Called with action results (if action was issued)
|
||||
- Returns next transition or nothing (terminal)
|
||||
|
||||
**Data Structure:**
|
||||
```typescript
|
||||
Data = {
|
||||
// From request
|
||||
general: { accountID, robotID, lang, release },
|
||||
runtime: { character, location, loop, perception, dialog },
|
||||
skill: { id, session },
|
||||
result?: any, // Action results for UPDATE
|
||||
|
||||
// Added by framework
|
||||
req: PegasusRequest,
|
||||
log: Log,
|
||||
local: any, // Skill-local data
|
||||
analytics: {}, // Analytics events
|
||||
behaviors: { // Supplemental behaviors
|
||||
parallel: [],
|
||||
sequence: []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Response Types:**
|
||||
|
||||
**EnterResponse:**
|
||||
```typescript
|
||||
{
|
||||
action?: Action, // JCP action to execute
|
||||
redirect?: RedirectData, // Redirect to another skill
|
||||
final?: boolean // Is this the final response?
|
||||
}
|
||||
```
|
||||
|
||||
**ExitResponse:**
|
||||
```typescript
|
||||
{
|
||||
transition?: string, // Next transition to take
|
||||
result?: any, // Result to pass to next node
|
||||
redirect?: RedirectData
|
||||
}
|
||||
```
|
||||
|
||||
**Built-in Node Types:**
|
||||
|
||||
1. **DefaultNode** - Simple terminal node
|
||||
- Returns no action
|
||||
- Transitions to Done
|
||||
|
||||
2. **NoOpNode** - No operation node
|
||||
- Returns no action
|
||||
- Can have custom transitions
|
||||
|
||||
3. **JCPNode** - Returns a JCP action
|
||||
- Returns specified JCP behavior
|
||||
- Can be terminal or continue
|
||||
|
||||
4. **TrueFalseNode** - Conditional branching
|
||||
- Evaluates condition
|
||||
- Transitions based on true/false
|
||||
|
||||
5. **SetLooperIDNode** - Sets speaker ID
|
||||
- Updates perception.speaker in context
|
||||
- Useful for multi-turn conversations
|
||||
|
||||
**Node Traversal:**
|
||||
- `forEachDescendent(handler)` - BFS traversal of all descendant nodes
|
||||
- Used for graph validation and analysis
|
||||
|
||||
### Skill Request/Response Protocol
|
||||
|
||||
#### Skill Request Types
|
||||
|
||||
**Location:** `packages/interfaces/src/skill/request.ts`
|
||||
|
||||
**MessageType:**
|
||||
- `LISTEN_LAUNCH` - Launch skill from listen interaction
|
||||
- `LISTEN_UPDATE` - Update skill with action results
|
||||
- `PROACTIVE_LAUNCH` - Launch skill proactively
|
||||
|
||||
**Request Structure:**
|
||||
```typescript
|
||||
{
|
||||
type: MessageType,
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
general: {
|
||||
accountID: string,
|
||||
robotID: string,
|
||||
lang: string,
|
||||
release: string
|
||||
},
|
||||
runtime: {
|
||||
character: { emotion, motivation },
|
||||
location: { city, state, country, lat, lng },
|
||||
loop: { users, jibo, owner, loopId },
|
||||
perception: { speaker, peoplePresent },
|
||||
dialog: { referent }
|
||||
},
|
||||
skill: {
|
||||
id: string,
|
||||
session?: {
|
||||
id: string,
|
||||
nodeID: number,
|
||||
data: any,
|
||||
trace: [{ nodeID, transition }]
|
||||
}
|
||||
},
|
||||
result?: any, // Action results for UPDATE
|
||||
nlu?: NLUResult,
|
||||
asr?: ASRResult,
|
||||
memo?: any
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Skill Response Types
|
||||
|
||||
**Location:** `packages/interfaces/src/skill/response.ts`
|
||||
|
||||
**ResponseType:**
|
||||
- `SKILL_ACTION` - Returns action to execute
|
||||
- `SKILL_REDIRECT` - Redirects to another skill
|
||||
- `ERROR` - Error response
|
||||
|
||||
**SKILL_ACTION Response:**
|
||||
```typescript
|
||||
{
|
||||
type: "SKILL_ACTION",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
action: JCPAction | null,
|
||||
analytics: AnalyticsData,
|
||||
final: boolean,
|
||||
fireAndForget: boolean
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**SKILL_REDIRECT Response:**
|
||||
```typescript
|
||||
{
|
||||
type: "SKILL_REDIRECT",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
skillID: string,
|
||||
nlu?: NLUResult,
|
||||
asr?: ASRResult,
|
||||
memo?: any
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**ERROR Response:**
|
||||
```typescript
|
||||
{
|
||||
type: "ERROR",
|
||||
msgID: "uuid",
|
||||
ts: 1234567890,
|
||||
data: {
|
||||
message: string,
|
||||
skill: { id: string }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### JCP Actions
|
||||
|
||||
**Location:** `packages/interfaces/src/skill/action.ts`
|
||||
|
||||
**Purpose:** Defines behaviors that the robot should execute.
|
||||
|
||||
**ActionType:**
|
||||
- `JCP` - Jibo Command Protocol action
|
||||
|
||||
**JCPAction Structure:**
|
||||
```typescript
|
||||
{
|
||||
type: "JCP",
|
||||
config: {
|
||||
version: "1.0.0",
|
||||
jcp: SupportedBehaviors
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**SupportedBehaviors:**
|
||||
- `SLIM` - Single behavior execution
|
||||
- `Sequence` - Sequential behavior execution
|
||||
- `Parallel` - Parallel behavior execution
|
||||
- `SetPresentPerson` - Set focused person
|
||||
- `ImpactEmotion` - Modify Jibo's emotional state
|
||||
|
||||
**Helper Function:**
|
||||
```typescript
|
||||
generateJCPAction(behavior): JCPAction
|
||||
```
|
||||
Wraps a behavior as a JCP action with version 2.0.
|
||||
|
||||
### MIM (Motion Interaction Model) System
|
||||
|
||||
**Location:** `packages/baseskill/src/graph/mims/`
|
||||
|
||||
**Purpose:** Provides pre-built graph structures for playing MIM animations.
|
||||
|
||||
**MIM Files:**
|
||||
- `.mim` files contain animation definitions
|
||||
- Organized in directories:
|
||||
- `scripted-responses` - Pre-scripted responses
|
||||
- `emotion-responses` - Emotion-based responses
|
||||
- `core-responses` - Fallback responses
|
||||
|
||||
**MIM Factories:**
|
||||
|
||||
**ANFactory** - Animation Node Factory
|
||||
- Creates graph for playing a single MIM
|
||||
- Supports prompt data injection
|
||||
- Can be final or continue
|
||||
|
||||
**MANFactory** - Multiple Animation Node Factory
|
||||
- Creates graph for playing multiple MIMs
|
||||
- Supports random selection
|
||||
- Can be final or continue
|
||||
|
||||
**MIMFactory** - General MIM Factory
|
||||
- Creates graph for MIM playback
|
||||
- Supports semi-specific responses
|
||||
- Handles category-based selection
|
||||
|
||||
**QNFactory** - Question Node Factory
|
||||
- Creates graph for asking questions
|
||||
- Supports opt-in flows
|
||||
- Handles user responses
|
||||
|
||||
**OptInFactory** - Opt-In Node Factory
|
||||
- Creates graph for opt-in offers
|
||||
- Tracks user acceptance/rejection
|
||||
- Handles analytics
|
||||
|
||||
**MIM Factory Options:**
|
||||
```typescript
|
||||
{
|
||||
mimDataProvider: (data) => string[], // Function to get MIM paths
|
||||
promptDataProvider?: (data) => any, // Function to get prompt data
|
||||
final: boolean // Is this the final action?
|
||||
}
|
||||
```
|
||||
|
||||
**Example Usage (Chitchat Skill):**
|
||||
```typescript
|
||||
const doMIMOptions: MimFactoryOptions = {
|
||||
mimDataProvider: (data) => data.local.path,
|
||||
promptDataProvider: (data) => data.local.promptData,
|
||||
final: true
|
||||
};
|
||||
const doMIM = new ANFactory('Do MIM', doMIMOptions).createGraph();
|
||||
```
|
||||
|
||||
**Semi-Specific Responses:**
|
||||
- MIMs with `_SS_` suffix are semi-specific
|
||||
- Match specific categories (e.g., time, weather)
|
||||
- CSV files define category members
|
||||
- Enables context-aware responses
|
||||
|
||||
### SkillService (`SkillService.ts`)
|
||||
|
||||
Service wrapper that hosts a skill as an HTTP service.
|
||||
|
||||
**Purpose:** Provides the service infrastructure for running a skill.
|
||||
|
||||
**Constructor:**
|
||||
```typescript
|
||||
constructor(private skillV1: BaseSkill)
|
||||
```
|
||||
|
||||
**HTTP Handler:**
|
||||
- Registers skill at `/v1/main` endpoint
|
||||
- No authentication required (handled by Hub)
|
||||
|
||||
**Initialization:**
|
||||
```typescript
|
||||
async init(port: number)
|
||||
→ Starts HTTP server
|
||||
→ Calls skill.init()
|
||||
```
|
||||
|
||||
### Analytics
|
||||
|
||||
**Location:** `packages/interfaces/src/skill/analytics.ts`
|
||||
|
||||
**Purpose:** Track skill events for analysis.
|
||||
|
||||
**AnalyticsData Structure:**
|
||||
```typescript
|
||||
{
|
||||
[skillName: string]: [
|
||||
{
|
||||
event: string,
|
||||
properties: any
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Built-in Events:**
|
||||
- `SKILL_ENTRY` - Skill launched
|
||||
- `SKILL_OFFER` - Opt-in offer presented
|
||||
|
||||
**Skill Entry Analytics:**
|
||||
```typescript
|
||||
{
|
||||
initial_intent: string,
|
||||
domain: string,
|
||||
was_hey_jibo_launch: boolean,
|
||||
user_initiated: boolean,
|
||||
last_skill: string
|
||||
}
|
||||
```
|
||||
|
||||
**Tracking:**
|
||||
```typescript
|
||||
this.track(data, 'CustomEvent', { key: value });
|
||||
```
|
||||
|
||||
Events are automatically included in SKILL_ACTION responses.
|
||||
|
||||
## Server-to-Robot Communication Flow
|
||||
|
||||
### Skill Response to Hub
|
||||
|
||||
When a skill returns a response, the Hub forwards it to the robot:
|
||||
|
||||
**SKILL_ACTION Response:**
|
||||
1. Skill returns SKILL_ACTION with JCP behavior
|
||||
2. Hub adds timing information
|
||||
3. Hub sends SKILL_ACTION to robot via WebSocket
|
||||
4. Robot executes JCP behavior
|
||||
5. Robot sends CMD_RESULT back to Hub
|
||||
6. Hub sends LISTEN_UPDATE to skill
|
||||
7. Skill processes result, returns next action
|
||||
|
||||
**Final SKILL_ACTION:**
|
||||
1. Skill returns SKILL_ACTION with `final: true`
|
||||
2. Hub sends to robot
|
||||
3. Robot executes (if action present)
|
||||
4. Transaction complete
|
||||
|
||||
**SKILL_REDIRECT:**
|
||||
1. Skill returns SKILL_REDIRECT
|
||||
2. Hub emits SKILL_REDIRECT notification to robot
|
||||
3. Hub launches new skill
|
||||
4. New skill proceeds normally
|
||||
|
||||
### JCP Action Execution
|
||||
|
||||
**Single Behavior (SLIM):**
|
||||
```typescript
|
||||
{
|
||||
type: "JCP",
|
||||
config: {
|
||||
version: "1.0.0",
|
||||
jcp: SayTextBehavior
|
||||
}
|
||||
}
|
||||
```
|
||||
Robot executes single behavior immediately.
|
||||
|
||||
**Sequence Behavior:**
|
||||
```typescript
|
||||
{
|
||||
type: "JCP",
|
||||
config: {
|
||||
version: "1.0.0",
|
||||
jcp: Sequence([
|
||||
LookAtBehavior,
|
||||
SayTextBehavior,
|
||||
GestureBehavior
|
||||
])
|
||||
}
|
||||
}
|
||||
```
|
||||
Robot executes behaviors in order.
|
||||
|
||||
**Parallel Behavior:**
|
||||
```typescript
|
||||
{
|
||||
type: "JCP",
|
||||
config: {
|
||||
version: "1.0.0",
|
||||
jcp: Parallel([
|
||||
SetPresentPersonBehavior,
|
||||
SayTextBehavior
|
||||
])
|
||||
}
|
||||
}
|
||||
```
|
||||
Robot executes behaviors simultaneously.
|
||||
|
||||
### Supplemental Behaviors
|
||||
|
||||
Skills can add behaviors that execute alongside the main action:
|
||||
|
||||
**Parallel Supplemental:**
|
||||
```typescript
|
||||
this.addParallelBehavior(data, SetPresentPersonBehavior);
|
||||
// Main action: SayTextBehavior
|
||||
// Result: Parallel([SetPresentPersonBehavior, SayTextBehavior])
|
||||
```
|
||||
|
||||
**Sequence Supplemental:**
|
||||
```typescript
|
||||
this.addSequenceBehavior(data, LookAtBehavior);
|
||||
// Main action: SayTextBehavior
|
||||
// Result: Sequence([LookAtBehavior, SayTextBehavior])
|
||||
```
|
||||
|
||||
**Combined:**
|
||||
```typescript
|
||||
this.addSequenceBehavior(data, LookAtBehavior);
|
||||
this.addParallelBehavior(data, SetPresentPersonBehavior);
|
||||
// Result: Parallel([SetPresentPersonBehavior, Sequence([LookAtBehavior, SayTextBehavior])])
|
||||
```
|
||||
|
||||
## Example Skill Implementation
|
||||
|
||||
### Chitchat Skill
|
||||
|
||||
**Location:** `packages/chitchat-skill/src/Chitchat.ts`
|
||||
|
||||
**Purpose:** Handles conversational interactions with the robot.
|
||||
|
||||
**Graph Structure:**
|
||||
1. **IntentSplitNode** - Splits based on intent type
|
||||
2. **ProcessQueryNode** - Processes user query, selects response
|
||||
3. **DoMIM (ANFactory)** - Plays selected MIM animation
|
||||
4. **Complete (DefaultNode)** - Terminates skill
|
||||
|
||||
**Initialization:**
|
||||
- Loads MIM files from directories
|
||||
- Builds semi-specific mappings
|
||||
- Reads category CSV files
|
||||
|
||||
**Response Selection:**
|
||||
- Scripted responses for common queries
|
||||
- Emotion responses for emotional queries
|
||||
- Semi-specific responses for context-aware queries
|
||||
- Fallback responses for unknown queries
|
||||
|
||||
**MIM Selection:**
|
||||
- Based on intent and entities
|
||||
- Considers semi-specific categories
|
||||
- Falls back to core responses
|
||||
|
||||
## Skill Development Guide
|
||||
|
||||
### Creating a Simple Skill
|
||||
|
||||
```typescript
|
||||
import { BaseSkill } from '@jibo/baseskill';
|
||||
import { skill } from '@jibo/interfaces';
|
||||
|
||||
export class MySkill extends BaseSkill {
|
||||
constructor() {
|
||||
super('my-skill');
|
||||
}
|
||||
|
||||
protected async handle(req: PegasusRequest<SkillRequest>): Promise<SkillResponse> {
|
||||
const data = req.body.data;
|
||||
|
||||
// Process request
|
||||
const action = generateJCPAction(SayTextBehavior("Hello!"));
|
||||
|
||||
return {
|
||||
type: skill.response.ResponseType.SKILL_ACTION,
|
||||
data: {
|
||||
action: action,
|
||||
final: true,
|
||||
fireAndForget: true
|
||||
},
|
||||
ts: Date.now(),
|
||||
msgID: getUUID()
|
||||
};
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Creating a Graph Skill
|
||||
|
||||
```typescript
|
||||
import { GraphSkill, graph } from '@jibo/baseskill';
|
||||
|
||||
enum Transition {
|
||||
Done = 'Done',
|
||||
Retry = 'Retry'
|
||||
}
|
||||
|
||||
export class MyGraphSkill extends GraphSkill<Transition> {
|
||||
constructor() {
|
||||
super('my-graph-skill');
|
||||
}
|
||||
|
||||
createGraph(): graph.Graph<Transition> {
|
||||
const g = new graph.Graph('My Skill', generateTransitions(Transition));
|
||||
|
||||
const startNode = new MyStartNode('Start');
|
||||
const endNode = new graph.nodes.dn.DefaultNode('End');
|
||||
|
||||
g.addNode(startNode, [[Transition.Done, endNode]]);
|
||||
g.addNode(endNode, [[graph.nodes.dn.Transition.Done, Transition.Done]]);
|
||||
|
||||
g.finalize();
|
||||
return g;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Creating a Custom Node
|
||||
|
||||
```typescript
|
||||
import { Node, Data, EnterResponse, ExitResponse } from '@jibo/baseskill';
|
||||
|
||||
enum MyTransition {
|
||||
Success = 'Success',
|
||||
Failure = 'Failure'
|
||||
}
|
||||
|
||||
class MyNode extends Node<MyTransition> {
|
||||
constructor() {
|
||||
super('MyNode', [MyTransition.Success, MyTransition.Failure]);
|
||||
}
|
||||
|
||||
async enter(data: Data): Promise<EnterResponse> {
|
||||
// Perform logic
|
||||
const action = generateJCPAction(SayTextBehavior("Processing..."));
|
||||
return { action };
|
||||
}
|
||||
|
||||
async exit(data: Data): Promise<ExitResponse> {
|
||||
// Process action results
|
||||
if (data.result.success) {
|
||||
return { transition: MyTransition.Success };
|
||||
} else {
|
||||
return { transition: MyTransition.Failure };
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Key Design Principles
|
||||
|
||||
1. **State Machine** - Graph-based state machine for complex flows
|
||||
2. **Single Responsibility** - Each node handles one piece of logic
|
||||
3. **Reusability** - Subgraphs and node types can be reused
|
||||
4. **Testability** - Nodes can be tested independently
|
||||
5. **Visualization** - GraphViz generation for debugging
|
||||
6. **Analytics** - Built-in event tracking
|
||||
7. **Flexibility** - Supports both simple and complex skills
|
||||
8. **Supplemental Behaviors** - Easy to add parallel/sequence actions
|
||||
Reference in New Issue
Block a user