570 lines
20 KiB
JavaScript
570 lines
20 KiB
JavaScript
|
|
/**
|
|||
|
|
* Tool definitions and executor for the Jibo LLM agent.
|
|||
|
|
*
|
|||
|
|
* Each tool maps to a rom-control capability the LLM can invoke.
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
// ── OpenAI function-tool schemas ───────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
const TOOL_SCHEMAS = [
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'say',
|
|||
|
|
description:
|
|||
|
|
"Speak text aloud through Jibo's speaker. Plain text plus valid ESML tags only " +
|
|||
|
|
'(e.g. <anim cat="happy" nonBlocking="true"/>, <break size="0.3"/>). ' +
|
|||
|
|
'NEVER include markdown (no *italics*, **bold**, backticks), LaTeX ($...$), ' +
|
|||
|
|
'unmatched/closing tags like </es>, or other symbols Jibo cannot pronounce. ' +
|
|||
|
|
'Malformed input can hang the TTS engine. Keep each call under 200 chars.',
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
text: { type: 'string', description: 'Text (or ESML) to speak.' },
|
|||
|
|
},
|
|||
|
|
required: ['text'],
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'listen',
|
|||
|
|
description:
|
|||
|
|
"Listen for the user's speech and return a transcript. " +
|
|||
|
|
'Call this after speaking if you want to continue the conversation.',
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
timeout: {
|
|||
|
|
type: 'number',
|
|||
|
|
description: 'Max seconds to wait. Default 15.',
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'take_photo',
|
|||
|
|
description:
|
|||
|
|
"Take a photo with Jibo's camera. The image is returned so you can see what's in front of you.",
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
resolution: {
|
|||
|
|
type: 'string',
|
|||
|
|
enum: ['medium', 'low'],
|
|||
|
|
description: 'Default: medium.',
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'show_text',
|
|||
|
|
description: "Display text on Jibo's screen.",
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
text: { type: 'string', description: 'Text to show.' },
|
|||
|
|
},
|
|||
|
|
required: ['text'],
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'show_image',
|
|||
|
|
description: "Display an image on Jibo's screen from a URL.",
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
url: { type: 'string', description: 'Image URL.' },
|
|||
|
|
},
|
|||
|
|
required: ['url'],
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'show_eye',
|
|||
|
|
description: "Reset Jibo's screen to the default eye animation.",
|
|||
|
|
parameters: { type: 'object', properties: {} },
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'look_at_angle',
|
|||
|
|
description: "Turn Jibo's head. theta = yaw (±180°, positive right), psi = pitch (±30°, positive up).",
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
theta: { type: 'number', description: 'Yaw degrees.' },
|
|||
|
|
psi: { type: 'number', description: 'Pitch degrees.' },
|
|||
|
|
},
|
|||
|
|
required: ['theta', 'psi'],
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'set_volume',
|
|||
|
|
description: "Set Jibo's speaker volume (0.0 – 1.0).",
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
level: { type: 'number', description: 'Volume 0.0 to 1.0.' },
|
|||
|
|
},
|
|||
|
|
required: ['level'],
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'web_search',
|
|||
|
|
description:
|
|||
|
|
'Search the web via Brave Search. Use for current events, facts you are unsure of, ' +
|
|||
|
|
'or anything that may have changed since training. Returns titles, URLs, and snippets.',
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
query: { type: 'string', description: 'The search query.' },
|
|||
|
|
count: {
|
|||
|
|
type: 'number',
|
|||
|
|
description: 'How many results to return (1–10). Default 5.',
|
|||
|
|
},
|
|||
|
|
freshness: {
|
|||
|
|
type: 'string',
|
|||
|
|
enum: ['pd', 'pw', 'pm', 'py'],
|
|||
|
|
description:
|
|||
|
|
'Optional recency filter: pd=past day, pw=past week, pm=past month, py=past year.',
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
required: ['query'],
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'fetch_url',
|
|||
|
|
description:
|
|||
|
|
'Fetch the contents of a web page by URL. Prefers markdown via content ' +
|
|||
|
|
'negotiation (Cloudflare Markdown for Agents) and falls back to HTML→text. ' +
|
|||
|
|
'Use after web_search to read a result, or to traverse linked pages.',
|
|||
|
|
parameters: {
|
|||
|
|
type: 'object',
|
|||
|
|
properties: {
|
|||
|
|
url: { type: 'string', description: 'Absolute http(s) URL to fetch.' },
|
|||
|
|
max_chars: {
|
|||
|
|
type: 'number',
|
|||
|
|
description: 'Truncate the body to this many characters. Default 4000.',
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
required: ['url'],
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
type: 'function',
|
|||
|
|
function: {
|
|||
|
|
name: 'end_conversation',
|
|||
|
|
description:
|
|||
|
|
'Call this when the conversation has reached a natural end and you do NOT want to ' +
|
|||
|
|
'listen for another reply. Pair it with a final "say" in the same turn for a farewell.',
|
|||
|
|
parameters: { type: 'object', properties: {} },
|
|||
|
|
},
|
|||
|
|
},
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
// ── Resolution map ─────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
const RES_MAP = { high: 'highRes', medium: 'medRes', low: 'lowRes' };
|
|||
|
|
|
|||
|
|
// ── Screen text helpers ────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Word-wrap text for Jibo's small screen. Breaks oversized words, respects
|
|||
|
|
* existing newlines, and truncates with an ellipsis past `maxLines`.
|
|||
|
|
*/
|
|||
|
|
function wrapForScreen(text, width = 40, maxLines = 10) {
|
|||
|
|
const out = [];
|
|||
|
|
for (const para of String(text).split('\n')) {
|
|||
|
|
if (para === '') { out.push(''); continue; }
|
|||
|
|
let line = '';
|
|||
|
|
for (const word of para.split(/\s+/).filter(Boolean)) {
|
|||
|
|
if (word.length > width) {
|
|||
|
|
if (line) { out.push(line); line = ''; }
|
|||
|
|
for (let i = 0; i < word.length; i += width) {
|
|||
|
|
const chunk = word.slice(i, i + width);
|
|||
|
|
if (chunk.length === width) out.push(chunk);
|
|||
|
|
else line = chunk;
|
|||
|
|
}
|
|||
|
|
continue;
|
|||
|
|
}
|
|||
|
|
const candidate = line ? `${line} ${word}` : word;
|
|||
|
|
if (candidate.length > width) {
|
|||
|
|
out.push(line);
|
|||
|
|
line = word;
|
|||
|
|
} else {
|
|||
|
|
line = candidate;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
if (line) out.push(line);
|
|||
|
|
}
|
|||
|
|
if (out.length > maxLines) {
|
|||
|
|
return out.slice(0, maxLines - 1).concat('…').join('\n');
|
|||
|
|
}
|
|||
|
|
return out.join('\n');
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Strip markup the Jibo TTS engine chokes on (markdown, LaTeX, unmatched
|
|||
|
|
* closing tags). Preserves valid ESML self-closing tags like <anim .../> and
|
|||
|
|
* <break .../>. Defense-in-depth against models that ignore the instructions.
|
|||
|
|
*/
|
|||
|
|
function sanitizeForTTS(text) {
|
|||
|
|
const ESML_TAGS = /^(anim|break|prosody|emph|phoneme|phrase|style|voice)\b/i;
|
|||
|
|
return text
|
|||
|
|
// Remove LaTeX inline math: $...$ and $$...$$
|
|||
|
|
.replace(/\${1,2}[^$]{0,200}\${1,2}/g, '')
|
|||
|
|
// Strip code fences and inline backticks
|
|||
|
|
.replace(/```[\s\S]*?```/g, '')
|
|||
|
|
.replace(/`+/g, '')
|
|||
|
|
// Strip markdown emphasis markers but keep the words
|
|||
|
|
.replace(/(\*\*|__)(.*?)\1/g, '$2')
|
|||
|
|
.replace(/(\*|_)(?=\S)(.+?)(?<=\S)\1/g, '$2')
|
|||
|
|
// Drop any tag that isn't a known ESML tag (e.g. </es>, <br>, etc.)
|
|||
|
|
.replace(/<\/?([a-zA-Z][^\s>/]*)\b[^>]*\/?>/g, (m, name) =>
|
|||
|
|
ESML_TAGS.test(name) ? m : '')
|
|||
|
|
// Collapse extra whitespace
|
|||
|
|
.replace(/[ \t]+/g, ' ')
|
|||
|
|
.trim();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── Abort helpers ──────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
function throwIfAborted(signal) {
|
|||
|
|
if (signal?.aborted) {
|
|||
|
|
const err = new Error('Conversation aborted');
|
|||
|
|
err.code = 'CONVERSATION_ABORTED';
|
|||
|
|
throw err;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
function onAbort(signal) {
|
|||
|
|
if (!signal) return new Promise(() => { }); // never resolves
|
|||
|
|
return new Promise((_, reject) => {
|
|||
|
|
const handler = () => {
|
|||
|
|
const err = new Error('Conversation aborted');
|
|||
|
|
err.code = 'CONVERSATION_ABORTED';
|
|||
|
|
reject(err);
|
|||
|
|
};
|
|||
|
|
if (signal.aborted) return handler();
|
|||
|
|
signal.addEventListener('abort', handler, { once: true });
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── Tool executor ──────────────────────────────────────────────────────────────
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Execute a single tool call against the Jibo client.
|
|||
|
|
*
|
|||
|
|
* Returns { content, image? }.
|
|||
|
|
* - content — text string for the tool-result message
|
|||
|
|
* - image — optional base64 JPEG (only for take_photo)
|
|||
|
|
*
|
|||
|
|
* @param {import('rom-control').Client} client
|
|||
|
|
* @param {string} name Tool function name
|
|||
|
|
* @param {object} args Parsed arguments
|
|||
|
|
* @param {AbortSignal} [signal] Cancellation signal
|
|||
|
|
* @returns {Promise<{ content: string, image?: string }>}
|
|||
|
|
*/
|
|||
|
|
async function executeTool(client, name, args, signal, ctx) {
|
|||
|
|
throwIfAborted(signal);
|
|||
|
|
ctx = ctx || {};
|
|||
|
|
if (!ctx.speechChain) ctx.speechChain = Promise.resolve();
|
|||
|
|
switch (name) {
|
|||
|
|
// ── Communication ──────────────────────────────────────────────────────
|
|||
|
|
case 'say': {
|
|||
|
|
const text = sanitizeForTTS(String(args.text || ''));
|
|||
|
|
console.log(` [tool:say] "${text}" (queued)`);
|
|||
|
|
// Estimate ~80ms per char + 5s base, capped at 60s. Anything longer
|
|||
|
|
// is almost certainly Jibo's TTS hung on bad ESML/markup; we'd rather
|
|||
|
|
// log a warning and unblock the conversation than deadlock listen.
|
|||
|
|
const estimateMs = Math.min(60000, 5000 + text.length * 80);
|
|||
|
|
|
|||
|
|
ctx.speechChain = ctx.speechChain
|
|||
|
|
.then(() => {
|
|||
|
|
const started = Date.now();
|
|||
|
|
console.log(` [tool:say] speaking… (timeout ${estimateMs}ms)`);
|
|||
|
|
let timer;
|
|||
|
|
const timeout = new Promise((resolve) => {
|
|||
|
|
timer = setTimeout(() => {
|
|||
|
|
console.warn(` [tool:say] timed out after ${estimateMs}ms — continuing.`);
|
|||
|
|
resolve();
|
|||
|
|
}, estimateMs);
|
|||
|
|
});
|
|||
|
|
return Promise.race([
|
|||
|
|
client.behavior.say(text, { signal }),
|
|||
|
|
onAbort(signal),
|
|||
|
|
timeout,
|
|||
|
|
]).finally(() => {
|
|||
|
|
clearTimeout(timer);
|
|||
|
|
console.log(` [tool:say] done in ${Date.now() - started}ms`);
|
|||
|
|
});
|
|||
|
|
})
|
|||
|
|
.catch((err) => {
|
|||
|
|
if (err.code === 'CONVERSATION_ABORTED') return;
|
|||
|
|
console.error(' [tool:say] error:', err.message);
|
|||
|
|
});
|
|||
|
|
return { content: 'Speech queued — Jibo will speak it shortly. Continue with other tools; listen will wait for it.' };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case 'listen': {
|
|||
|
|
const ms = (args.timeout || 15) * 1000;
|
|||
|
|
// Make sure pending speech finishes before we open the mic, otherwise
|
|||
|
|
// Jibo will hear his own voice.
|
|||
|
|
console.log(' [tool:listen] awaiting pending speech…');
|
|||
|
|
await Promise.race([ctx.speechChain, onAbort(signal)]);
|
|||
|
|
throwIfAborted(signal);
|
|||
|
|
console.log(` [tool:listen] waiting ${ms}ms…`);
|
|||
|
|
client.display.showText('Listening...');
|
|||
|
|
try {
|
|||
|
|
const speech = await Promise.race([
|
|||
|
|
client.audio.awaitSpeech({ mode: 'local', time: ms }),
|
|||
|
|
onAbort(signal),
|
|||
|
|
]);
|
|||
|
|
console.log(` [tool:listen] heard: "${speech.content}"`);
|
|||
|
|
ctx.lastHeard = speech.content;
|
|||
|
|
return { content: `User said: "${speech.content}"` };
|
|||
|
|
} catch (err) {
|
|||
|
|
if (err.code === 'CONVERSATION_ABORTED') throw err;
|
|||
|
|
if (err.code === 'SPEECH_TIMEOUT') {
|
|||
|
|
console.log(' [tool:listen] timed out');
|
|||
|
|
return { content: 'No speech detected — user did not respond.' };
|
|||
|
|
}
|
|||
|
|
throw err;
|
|||
|
|
} finally {
|
|||
|
|
client.display.showEye();
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── Camera ─────────────────────────────────────────────────────────────
|
|||
|
|
case 'take_photo': {
|
|||
|
|
const res = RES_MAP[args.resolution] || 'medRes';
|
|||
|
|
console.log(` [tool:take_photo] ${res}…`);
|
|||
|
|
const photo = await Promise.race([
|
|||
|
|
client.camera.takePhoto({ resolution: res, timeout: 30000 }),
|
|||
|
|
onAbort(signal),
|
|||
|
|
]);
|
|||
|
|
const buf = await photo.fetchBuffer();
|
|||
|
|
console.log(` [tool:take_photo] ${buf.length} bytes captured`);
|
|||
|
|
return {
|
|||
|
|
content: "Photo captured from Jibo's camera.",
|
|||
|
|
image: buf.toString('base64'),
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── Display ────────────────────────────────────────────────────────────
|
|||
|
|
case 'show_text': {
|
|||
|
|
console.log(` [tool:show_text] "${args.text}"`);
|
|||
|
|
client.display.showText(wrapForScreen(args.text, 40, 10));
|
|||
|
|
return { content: 'Text displayed on screen.' };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case 'show_image': {
|
|||
|
|
console.log(` [tool:show_image] ${args.url}`);
|
|||
|
|
client.display.showImage(args.url);
|
|||
|
|
return { content: 'Image displayed on screen.' };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case 'show_eye': {
|
|||
|
|
console.log(' [tool:show_eye]');
|
|||
|
|
client.display.showEye();
|
|||
|
|
return { content: 'Eye animation restored on screen.' };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
case 'look_at_angle': {
|
|||
|
|
console.log(` [tool:look_at_angle] θ=${args.theta}° ψ=${args.psi}°`);
|
|||
|
|
await client.behavior.lookAtAngle(args.theta, args.psi);
|
|||
|
|
return { content: `Now looking at θ=${args.theta}°, ψ=${args.psi}°.` };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case 'set_volume': {
|
|||
|
|
console.log(` [tool:set_volume] ${args.level}`);
|
|||
|
|
await client.audio.setVolume(args.level);
|
|||
|
|
return { content: `Volume set to ${args.level}.` };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ── Web search ─────────────────────────────────────────────────────────
|
|||
|
|
case 'web_search': {
|
|||
|
|
const apiKey = process.env.BRAVE_API_KEY;
|
|||
|
|
if (!apiKey) {
|
|||
|
|
return {
|
|||
|
|
content:
|
|||
|
|
'web_search is unavailable: BRAVE_API_KEY environment variable is not set.',
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
const query = String(args.query || '').trim();
|
|||
|
|
if (!query) {
|
|||
|
|
return { content: 'web_search error: query is required.' };
|
|||
|
|
}
|
|||
|
|
const count = Math.max(1, Math.min(10, Number(args.count) || 5));
|
|||
|
|
const params = new URLSearchParams({
|
|||
|
|
q: query,
|
|||
|
|
count: String(count),
|
|||
|
|
extra_snippets: 'true',
|
|||
|
|
safesearch: 'moderate',
|
|||
|
|
});
|
|||
|
|
if (args.freshness) params.set('freshness', String(args.freshness));
|
|||
|
|
|
|||
|
|
console.log(` [tool:web_search] "${query}" (count=${count})`);
|
|||
|
|
const url = `https://api.search.brave.com/res/v1/web/search?${params.toString()}`;
|
|||
|
|
const ac = new AbortController();
|
|||
|
|
const onAbortHandler = () => ac.abort();
|
|||
|
|
signal?.addEventListener('abort', onAbortHandler, { once: true });
|
|||
|
|
try {
|
|||
|
|
const res = await fetch(url, {
|
|||
|
|
headers: {
|
|||
|
|
Accept: 'application/json',
|
|||
|
|
'Accept-Encoding': 'gzip',
|
|||
|
|
'X-Subscription-Token': apiKey,
|
|||
|
|
},
|
|||
|
|
signal: ac.signal,
|
|||
|
|
});
|
|||
|
|
if (!res.ok) {
|
|||
|
|
const body = await res.text().catch(() => '');
|
|||
|
|
return {
|
|||
|
|
content: `web_search error: ${res.status} ${res.statusText}. ${body.slice(0, 200)}`,
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
const data = await res.json();
|
|||
|
|
const results = data?.web?.results || [];
|
|||
|
|
if (results.length === 0) {
|
|||
|
|
return { content: `No web results found for "${query}".` };
|
|||
|
|
}
|
|||
|
|
const lines = results.slice(0, count).map((r, i) => {
|
|||
|
|
const title = r.title || '(untitled)';
|
|||
|
|
const u = r.url || '';
|
|||
|
|
const desc = (r.description || '').replace(/\s+/g, ' ').trim();
|
|||
|
|
const extras = Array.isArray(r.extra_snippets)
|
|||
|
|
? r.extra_snippets.slice(0, 2).map((s) => s.replace(/\s+/g, ' ').trim())
|
|||
|
|
: [];
|
|||
|
|
const tail = extras.length ? `\n • ${extras.join('\n • ')}` : '';
|
|||
|
|
return `${i + 1}. ${title}\n ${u}\n ${desc}${tail}`;
|
|||
|
|
});
|
|||
|
|
return {
|
|||
|
|
content: `Web results for "${query}":\n\n${lines.join('\n\n')}`,
|
|||
|
|
};
|
|||
|
|
} catch (err) {
|
|||
|
|
if (err.name === 'AbortError') throw Object.assign(new Error('Conversation aborted'), { code: 'CONVERSATION_ABORTED' });
|
|||
|
|
return { content: `web_search error: ${err.message}` };
|
|||
|
|
} finally {
|
|||
|
|
signal?.removeEventListener('abort', onAbortHandler);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case 'fetch_url': {
|
|||
|
|
const target = String(args.url || '').trim();
|
|||
|
|
if (!/^https?:\/\//i.test(target)) {
|
|||
|
|
return { content: 'fetch_url error: url must be an absolute http(s) URL.' };
|
|||
|
|
}
|
|||
|
|
const maxChars = Math.max(200, Math.min(20000, Number(args.max_chars) || 4000));
|
|||
|
|
console.log(` [tool:fetch_url] ${target}`);
|
|||
|
|
|
|||
|
|
const ac = new AbortController();
|
|||
|
|
const onAbortHandler = () => ac.abort();
|
|||
|
|
signal?.addEventListener('abort', onAbortHandler, { once: true });
|
|||
|
|
const timeoutId = setTimeout(() => ac.abort(), 20000);
|
|||
|
|
try {
|
|||
|
|
const res = await fetch(target, {
|
|||
|
|
headers: {
|
|||
|
|
// Prefer markdown (Cloudflare Markdown for Agents); accept HTML/text fallback.
|
|||
|
|
Accept: 'text/markdown, text/plain;q=0.9, text/html;q=0.8, */*;q=0.1',
|
|||
|
|
'Accept-Encoding': 'gzip',
|
|||
|
|
'User-Agent': 'jibo-llm/1.0 (+agent)',
|
|||
|
|
},
|
|||
|
|
redirect: 'follow',
|
|||
|
|
signal: ac.signal,
|
|||
|
|
});
|
|||
|
|
if (!res.ok) {
|
|||
|
|
return {
|
|||
|
|
content: `fetch_url error: ${res.status} ${res.statusText} from ${target}`,
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
const ctype = (res.headers.get('content-type') || '').toLowerCase();
|
|||
|
|
if (!/^(text\/|application\/(json|xml|xhtml))/.test(ctype) && ctype) {
|
|||
|
|
return {
|
|||
|
|
content: `fetch_url: refusing non-text content (${ctype}) from ${target}`,
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
let body = await res.text();
|
|||
|
|
const isMarkdown = ctype.includes('markdown');
|
|||
|
|
const isHtml = ctype.includes('html') || /<html[\s>]/i.test(body.slice(0, 500));
|
|||
|
|
|
|||
|
|
if (!isMarkdown && isHtml) {
|
|||
|
|
// Lightweight HTML→text: strip scripts/styles/tags, collapse whitespace.
|
|||
|
|
body = body
|
|||
|
|
.replace(/<script[\s\S]*?<\/script>/gi, ' ')
|
|||
|
|
.replace(/<style[\s\S]*?<\/style>/gi, ' ')
|
|||
|
|
.replace(/<noscript[\s\S]*?<\/noscript>/gi, ' ')
|
|||
|
|
.replace(/<!--[\s\S]*?-->/g, ' ')
|
|||
|
|
.replace(/<\/(p|div|li|h[1-6]|br|tr)>/gi, '\n')
|
|||
|
|
.replace(/<[^>]+>/g, ' ')
|
|||
|
|
.replace(/ /g, ' ')
|
|||
|
|
.replace(/&/g, '&')
|
|||
|
|
.replace(/</g, '<')
|
|||
|
|
.replace(/>/g, '>')
|
|||
|
|
.replace(/"/g, '"')
|
|||
|
|
.replace(/'/g, "'")
|
|||
|
|
.replace(/[ \t]+/g, ' ')
|
|||
|
|
.replace(/\n{3,}/g, '\n\n')
|
|||
|
|
.trim();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const truncated = body.length > maxChars;
|
|||
|
|
const out = truncated ? body.slice(0, maxChars) + '\n…[truncated]' : body;
|
|||
|
|
const finalUrl = res.url || target;
|
|||
|
|
const fmt = isMarkdown ? 'markdown' : isHtml ? 'html→text' : 'text';
|
|||
|
|
return {
|
|||
|
|
content: `Fetched ${finalUrl} (${fmt}, ${body.length} chars${truncated ? `, truncated to ${maxChars}` : ''}):\n\n${out}`,
|
|||
|
|
};
|
|||
|
|
} catch (err) {
|
|||
|
|
if (err.name === 'AbortError') {
|
|||
|
|
if (signal?.aborted) {
|
|||
|
|
throw Object.assign(new Error('Conversation aborted'), { code: 'CONVERSATION_ABORTED' });
|
|||
|
|
}
|
|||
|
|
return { content: `fetch_url error: timeout fetching ${target}` };
|
|||
|
|
}
|
|||
|
|
return { content: `fetch_url error: ${err.message}` };
|
|||
|
|
} finally {
|
|||
|
|
clearTimeout(timeoutId);
|
|||
|
|
signal?.removeEventListener('abort', onAbortHandler);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
case 'end_conversation': {
|
|||
|
|
console.log(' [tool:end_conversation] awaiting pending speech…');
|
|||
|
|
await Promise.race([ctx.speechChain, onAbort(signal)]);
|
|||
|
|
return { content: 'Conversation ended.', endConversation: true };
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
default:
|
|||
|
|
return { content: `Unknown tool "${name}".` };
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
module.exports = { TOOL_SCHEMAS, executeTool, wrapForScreen };
|