From 4985bfc000d61b8bfe14dd77bcf3011c48abc62d Mon Sep 17 00:00:00 2001 From: tanzhenxin Date: Mon, 8 Sep 2025 20:01:49 +0800 Subject: [PATCH] feat: subagent runtime & CLI display - wip --- .../components/messages/ToolMessage.test.tsx | 42 ++ .../ui/components/messages/ToolMessage.tsx | 38 +- .../subagents/SubagentExecutionDisplay.tsx | 381 +++++++++++ .../cli/src/ui/components/subagents/index.ts | 3 + packages/cli/src/ui/types.ts | 2 +- packages/core/src/config/config.ts | 8 +- .../core/__snapshots__/prompts.test.ts.snap | 9 + packages/core/src/core/coreToolScheduler.ts | 6 +- packages/core/src/core/prompts.ts | 2 + packages/core/src/subagents/index.ts | 17 +- .../core/src/subagents/subagent-events.ts | 89 +++ packages/core/src/subagents/subagent-hooks.ts | 33 + .../src/subagents/subagent-manager.test.ts | 44 +- .../core/src/subagents/subagent-manager.ts | 106 +++- .../src/subagents/subagent-result-format.ts | 186 ++++++ .../core/src/subagents/subagent-statistics.ts | 105 +++ .../src/{core => subagents}/subagent.test.ts | 186 ++---- .../core/src/{core => subagents}/subagent.ts | 597 +++++++++++++----- packages/core/src/subagents/types.ts | 2 +- .../core/src/subagents/validation.test.ts | 26 +- packages/core/src/subagents/validation.ts | 15 +- packages/core/src/telemetry/constants.ts | 3 + packages/core/src/telemetry/loggers.ts | 31 + packages/core/src/telemetry/metrics.ts | 34 + .../src/telemetry/qwen-logger/qwen-logger.ts | 15 + packages/core/src/telemetry/types.ts | 31 +- packages/core/src/tools/shell.ts | 3 +- packages/core/src/tools/task.test.ts | 507 +++++++++++++++ packages/core/src/tools/task.ts | 498 +++++++++++++++ packages/core/src/tools/tool-registry.ts | 3 +- packages/core/src/tools/tools.ts | 32 +- 31 files changed, 2664 insertions(+), 390 deletions(-) create mode 100644 packages/cli/src/ui/components/subagents/SubagentExecutionDisplay.tsx create mode 100644 packages/core/src/subagents/subagent-events.ts create mode 100644 packages/core/src/subagents/subagent-hooks.ts create mode 100644 packages/core/src/subagents/subagent-result-format.ts create mode 100644 packages/core/src/subagents/subagent-statistics.ts rename packages/core/src/{core => subagents}/subagent.test.ts (81%) rename packages/core/src/{core => subagents}/subagent.ts (52%) create mode 100644 packages/core/src/tools/task.test.ts create mode 100644 packages/core/src/tools/task.ts diff --git a/packages/cli/src/ui/components/messages/ToolMessage.test.tsx b/packages/cli/src/ui/components/messages/ToolMessage.test.tsx index c9bed003..94a17e6d 100644 --- a/packages/cli/src/ui/components/messages/ToolMessage.test.tsx +++ b/packages/cli/src/ui/components/messages/ToolMessage.test.tsx @@ -39,6 +39,19 @@ vi.mock('../../utils/MarkdownDisplay.js', () => ({ return MockMarkdown:{text}; }, })); +vi.mock('../subagents/index.js', () => ({ + SubagentExecutionDisplay: function MockSubagentExecutionDisplay({ + data, + }: { + data: { subagentName: string; taskDescription: string }; + }) { + return ( + + šŸ¤– {data.subagentName} • Task: {data.taskDescription} + + ); + }, +})); // Helper to render with context const renderWithContext = ( @@ -180,4 +193,33 @@ describe('', () => { // We can at least ensure it doesn't have the high emphasis indicator. expect(lowEmphasisFrame()).not.toContain('←'); }); + + it('shows subagent execution display for task tool with proper result display', () => { + const subagentResultDisplay = { + type: 'subagent_execution' as const, + subagentName: 'file-search', + taskDescription: 'Search for files matching pattern', + status: 'running' as const, + }; + + const props: ToolMessageProps = { + name: 'task', + description: 'Delegate task to subagent', + resultDisplay: subagentResultDisplay, + status: ToolCallStatus.Executing, + terminalWidth: 80, + callId: 'test-call-id-2', + confirmationDetails: undefined, + }; + + const { lastFrame } = renderWithContext( + , + StreamingState.Responding, + ); + + const output = lastFrame(); + expect(output).toContain('šŸ¤–'); // Subagent execution display should show + expect(output).toContain('file-search'); // Actual subagent name + expect(output).toContain('Search for files matching pattern'); // Actual task description + }); }); diff --git a/packages/cli/src/ui/components/messages/ToolMessage.tsx b/packages/cli/src/ui/components/messages/ToolMessage.tsx index e7e58e9a..70d2d6a7 100644 --- a/packages/cli/src/ui/components/messages/ToolMessage.tsx +++ b/packages/cli/src/ui/components/messages/ToolMessage.tsx @@ -13,7 +13,11 @@ import { MarkdownDisplay } from '../../utils/MarkdownDisplay.js'; import { GeminiRespondingSpinner } from '../GeminiRespondingSpinner.js'; import { MaxSizedBox } from '../shared/MaxSizedBox.js'; import { TodoDisplay } from '../TodoDisplay.js'; -import { TodoResultDisplay } from '@qwen-code/qwen-code-core'; +import { + TodoResultDisplay, + TaskResultDisplay, +} from '@qwen-code/qwen-code-core'; +import { SubagentExecutionDisplay } from '../subagents/index.js'; const STATIC_HEIGHT = 1; const RESERVED_LINE_COUNT = 5; // for tool name, status, padding etc. @@ -29,7 +33,8 @@ type DisplayRendererResult = | { type: 'none' } | { type: 'todo'; data: TodoResultDisplay } | { type: 'string'; data: string } - | { type: 'diff'; data: { fileDiff: string; fileName: string } }; + | { type: 'diff'; data: { fileDiff: string; fileName: string } } + | { type: 'subagent_execution'; data: TaskResultDisplay }; /** * Custom hook to determine the type of result display and return appropriate rendering info @@ -55,6 +60,19 @@ const useResultDisplayRenderer = ( }; } + // Check for SubagentExecutionResultDisplay (for non-task tools) + if ( + typeof resultDisplay === 'object' && + resultDisplay !== null && + 'type' in resultDisplay && + resultDisplay.type === 'subagent_execution' + ) { + return { + type: 'subagent_execution', + data: resultDisplay as TaskResultDisplay, + }; + } + // Check for FileDiff if ( typeof resultDisplay === 'object' && @@ -81,6 +99,15 @@ const TodoResultRenderer: React.FC<{ data: TodoResultDisplay }> = ({ data, }) => ; +/** + * Component to render subagent execution results + */ +const SubagentExecutionRenderer: React.FC<{ + data: TaskResultDisplay; + availableHeight?: number; + childWidth: number; +}> = ({ data }) => ; + /** * Component to render string results (markdown or plain text) */ @@ -189,6 +216,13 @@ export const ToolMessage: React.FC = ({ {displayRenderer.type === 'todo' && ( )} + {displayRenderer.type === 'subagent_execution' && ( + + )} {displayRenderer.type === 'string' && ( = ({ data }) => ( + + {/* Header with subagent name and status */} + + + + {data.subagentName} + + • + + + + {/* Task description */} + + Task: + {data.taskDescription} + + + {/* Progress section for running tasks */} + {data.status === 'running' && ( + + )} + + {/* Results section for completed/failed tasks */} + {(data.status === 'completed' || data.status === 'failed') && ( + + )} + +); + +/** + * Status dot component with similar height as text + */ +const StatusDot: React.FC<{ + status: TaskResultDisplay['status']; +}> = ({ status }) => { + const color = React.useMemo(() => { + switch (status) { + case 'running': + return Colors.AccentYellow; + case 'completed': + return Colors.AccentGreen; + case 'failed': + return Colors.AccentRed; + default: + return Colors.Gray; + } + }, [status]); + + return ( + + ā— + + ); +}; + +/** + * Status indicator component + */ +const StatusIndicator: React.FC<{ + status: TaskResultDisplay['status']; +}> = ({ status }) => { + switch (status) { + case 'running': + return Running; + case 'completed': + return Completed; + case 'failed': + return Failed; + default: + return Unknown; + } +}; + +/** + * Progress section for running executions + */ +const ProgressSection: React.FC<{ + progress: { + toolCalls?: Array<{ + name: string; + status: 'executing' | 'success' | 'failed'; + error?: string; + args?: Record; + result?: string; + returnDisplay?: string; + }>; + }; +}> = ({ progress }) => ( + + {progress.toolCalls && progress.toolCalls.length > 0 && ( + + )} + +); + +/** + * Clean tool calls list - format consistent with ToolInfo in ToolMessage.tsx + */ +const CleanToolCallsList: React.FC<{ + toolCalls: Array<{ + name: string; + status: 'executing' | 'success' | 'failed'; + error?: string; + args?: Record; + result?: string; + returnDisplay?: string; + }>; +}> = ({ toolCalls }) => ( + + + Tools: + + {toolCalls.map((toolCall, index) => ( + + ))} + +); + +/** + * Individual tool call item - consistent with ToolInfo format + */ +const CleanToolCallItem: React.FC<{ + toolCall: { + name: string; + status: 'executing' | 'success' | 'failed'; + error?: string; + args?: Record; + result?: string; + returnDisplay?: string; + }; +}> = ({ toolCall }) => { + const STATUS_INDICATOR_WIDTH = 3; + + // Map subagent status to ToolCallStatus-like display + const statusIcon = React.useMemo(() => { + switch (toolCall.status) { + case 'executing': + return ⊷; // Using same as ToolMessage + case 'success': + return āœ”; + case 'failed': + return ( + + x + + ); + default: + return o; + } + }, [toolCall.status]); + + const description = getToolDescription(toolCall); + + // Get first line of returnDisplay for truncated output + const truncatedOutput = React.useMemo(() => { + if (!toolCall.returnDisplay) return ''; + const firstLine = toolCall.returnDisplay.split('\n')[0]; + return firstLine.length > 80 + ? firstLine.substring(0, 80) + '...' + : firstLine; + }, [toolCall.returnDisplay]); + + return ( + + {/* First line: status icon + tool name + description (consistent with ToolInfo) */} + + {statusIcon} + + + {toolCall.name} + {' '} + {description} + {toolCall.error && ( + - {toolCall.error} + )} + + + + {/* Second line: truncated returnDisplay output */} + {truncatedOutput && ( + + {truncatedOutput} + + )} + + ); +}; + +/** + * Helper function to get tool description from args + */ +const getToolDescription = (toolCall: { + name: string; + args?: Record; +}): string => { + if (!toolCall.args) return ''; + + // Handle common tool patterns + if (toolCall.name === 'Glob' && toolCall.args['glob_pattern']) { + return `"${toolCall.args['glob_pattern']}"`; + } + if (toolCall.name === 'ReadFile' && toolCall.args['target_file']) { + const path = toolCall.args['target_file'] as string; + return path.split('/').pop() || path; + } + if (toolCall.name === 'SearchFileContent' && toolCall.args['pattern']) { + return `"${toolCall.args['pattern']}"`; + } + + // Generic fallback + const firstArg = Object.values(toolCall.args)[0]; + if (typeof firstArg === 'string' && firstArg.length < 50) { + return firstArg; + } + + return ''; +}; + +/** + * Execution summary details component + */ +const ExecutionSummaryDetails: React.FC<{ + data: TaskResultDisplay; +}> = ({ data }) => { + // Parse execution summary for structured data + const summaryData = React.useMemo(() => { + if (!data.executionSummary) return null; + + // Try to extract structured data from execution summary + const durationMatch = data.executionSummary.match(/Duration:\s*([^\n]+)/i); + const roundsMatch = data.executionSummary.match(/Rounds:\s*(\d+)/i); + const tokensMatch = data.executionSummary.match(/Tokens:\s*([\d,]+)/i); + + return { + duration: durationMatch?.[1] || 'N/A', + rounds: roundsMatch?.[1] || 'N/A', + tokens: tokensMatch?.[1] || 'N/A', + }; + }, [data.executionSummary]); + + if (!summaryData) { + return ( + + • No summary available + + ); + } + + return ( + + + • Duration: {summaryData.duration} + + + • Rounds: {summaryData.rounds} + + + • Tokens: {summaryData.tokens} + + + ); +}; + +/** + * Tool usage statistics component + */ +const ToolUsageStats: React.FC<{ + toolCalls: Array<{ + name: string; + status: 'executing' | 'success' | 'failed'; + error?: string; + args?: Record; + result?: string; + returnDisplay?: string; + }>; +}> = ({ toolCalls }) => { + const stats = React.useMemo(() => { + const total = toolCalls.length; + const successful = toolCalls.filter( + (call) => call.status === 'success', + ).length; + const failed = toolCalls.filter((call) => call.status === 'failed').length; + const successRate = + total > 0 ? ((successful / total) * 100).toFixed(1) : '0.0'; + + return { total, successful, failed, successRate }; + }, [toolCalls]); + + return ( + + + • Total Calls: {stats.total} + + + • Success Rate:{' '} + {stats.successRate}% ( + {stats.successful} success,{' '} + {stats.failed} failed) + + + ); +}; + +/** + * Results section for completed executions - matches the clean layout from the image + */ +const ResultsSection: React.FC<{ + data: TaskResultDisplay; +}> = ({ data }) => ( + + {/* Tool calls section - clean list format */} + {data.progress?.toolCalls && data.progress.toolCalls.length > 0 && ( + + )} + + {/* Task Completed section */} + + šŸ“„ + Task Completed: + {data.taskDescription} + + + {/* Execution Summary section */} + + + šŸ“Š + + Execution Summary: + + + + + + {/* Tool Usage section */} + {data.progress?.toolCalls && data.progress.toolCalls.length > 0 && ( + + + šŸ”§ + + Tool Usage: + + + + + )} + + {/* Error reason for failed tasks */} + {data.status === 'failed' && data.terminateReason && ( + + āŒ Failed: + {data.terminateReason} + + )} + +); diff --git a/packages/cli/src/ui/components/subagents/index.ts b/packages/cli/src/ui/components/subagents/index.ts index e77e8562..8e794c65 100644 --- a/packages/cli/src/ui/components/subagents/index.ts +++ b/packages/cli/src/ui/components/subagents/index.ts @@ -20,6 +20,9 @@ export { ActionSelectionStep } from './ActionSelectionStep.js'; export { AgentViewerStep } from './AgentViewerStep.js'; export { AgentDeleteStep } from './AgentDeleteStep.js'; +// Execution Display Components +export { SubagentExecutionDisplay } from './SubagentExecutionDisplay.js'; + // Creation Wizard Types and State export type { CreationWizardState, diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index c8568da8..efc61a54 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -46,7 +46,7 @@ export interface IndividualToolCallDisplay { callId: string; name: string; description: string; - resultDisplay: ToolResultDisplay | undefined; + resultDisplay: ToolResultDisplay | string | object | undefined; status: ToolCallStatus; confirmationDetails: ToolCallConfirmationDetails | undefined; renderOutputAsMarkdown?: boolean; diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index b0db59c8..35dde76b 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -28,6 +28,7 @@ import { GEMINI_CONFIG_DIR as GEMINI_DIR, } from '../tools/memoryTool.js'; import { TodoWriteTool } from '../tools/todoWrite.js'; +import { TaskTool } from '../tools/task.js'; import { WebSearchTool } from '../tools/web-search.js'; import { GeminiClient } from '../core/client.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; @@ -237,6 +238,7 @@ export interface ConfigParameters { export class Config { private toolRegistry!: ToolRegistry; private promptRegistry!: PromptRegistry; + private subagentManager!: SubagentManager; private sessionId: string; private fileSystemService: FileSystemService; private contentGeneratorConfig!: ContentGeneratorConfig; @@ -317,7 +319,6 @@ export class Config { private readonly shouldUseNodePtyShell: boolean; private readonly skipNextSpeakerCheck: boolean; private initialized: boolean = false; - private subagentManager: SubagentManager | null = null; constructor(params: ConfigParameters) { this.sessionId = params.sessionId; @@ -427,6 +428,7 @@ export class Config { await this.getGitService(); } this.promptRegistry = new PromptRegistry(); + this.subagentManager = new SubagentManager(this); this.toolRegistry = await this.createToolRegistry(); } @@ -868,9 +870,6 @@ export class Config { } getSubagentManager(): SubagentManager { - if (!this.subagentManager) { - this.subagentManager = new SubagentManager(this.targetDir); - } return this.subagentManager; } @@ -910,6 +909,7 @@ export class Config { } }; + registerCoreTool(TaskTool, this); registerCoreTool(LSTool, this); registerCoreTool(ReadFileTool, this); registerCoreTool(GrepTool, this); diff --git a/packages/core/src/core/__snapshots__/prompts.test.ts.snap b/packages/core/src/core/__snapshots__/prompts.test.ts.snap index d8c33ef5..43c61351 100644 --- a/packages/core/src/core/__snapshots__/prompts.test.ts.snap +++ b/packages/core/src/core/__snapshots__/prompts.test.ts.snap @@ -121,6 +121,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. @@ -414,6 +415,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. @@ -717,6 +719,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. @@ -1005,6 +1008,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. @@ -1293,6 +1297,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. @@ -1581,6 +1586,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. @@ -1869,6 +1875,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. @@ -2157,6 +2164,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. @@ -2445,6 +2453,7 @@ IMPORTANT: Always use the todo_write tool to plan and track tasks throughout the - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the 'todo_write' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the 'task' tool in order to reduce context usage. You should proactively use the 'task' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the 'save_memory' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 5a2bb85d..adf7086f 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -73,7 +73,7 @@ export type ExecutingToolCall = { request: ToolCallRequestInfo; tool: AnyDeclarativeTool; invocation: AnyToolInvocation; - liveOutput?: string; + liveOutput?: ToolResultDisplay; startTime?: number; outcome?: ToolConfirmationOutcome; }; @@ -120,7 +120,7 @@ export type ConfirmHandler = ( export type OutputUpdateHandler = ( toolCallId: string, - outputChunk: string, + outputChunk: ToolResultDisplay, ) => void; export type AllToolCallsCompleteHandler = ( @@ -818,7 +818,7 @@ export class CoreToolScheduler { const liveOutputCallback = scheduledCall.tool.canUpdateOutput && this.outputUpdateHandler - ? (outputChunk: string) => { + ? (outputChunk: ToolResultDisplay) => { if (this.outputUpdateHandler) { this.outputUpdateHandler(callId, outputChunk); } diff --git a/packages/core/src/core/prompts.ts b/packages/core/src/core/prompts.ts index 60769eef..8b0a0aea 100644 --- a/packages/core/src/core/prompts.ts +++ b/packages/core/src/core/prompts.ts @@ -18,6 +18,7 @@ import process from 'node:process'; import { isGitRepository } from '../utils/gitUtils.js'; import { MemoryTool, GEMINI_CONFIG_DIR } from '../tools/memoryTool.js'; import { TodoWriteTool } from '../tools/todoWrite.js'; +import { TaskTool } from '../tools/task.js'; import { GenerateContentConfig } from '@google/genai'; export interface ModelTemplateMapping { @@ -284,6 +285,7 @@ IMPORTANT: Always use the ${TodoWriteTool.Name} tool to plan and track tasks thr - **Background Processes:** Use background processes (via \`&\`) for commands that are unlikely to stop on their own, e.g. \`node server.js &\`. If unsure, ask the user. - **Interactive Commands:** Try to avoid shell commands that are likely to require user interaction (e.g. \`git rebase -i\`). Use non-interactive versions of commands (e.g. \`npm init -y\` instead of \`npm init\`) when available, and otherwise remind the user that interactive shell commands are not supported and may cause hangs until canceled by the user. - **Task Management:** Use the '${TodoWriteTool.Name}' tool proactively for complex, multi-step tasks to track progress and provide visibility to users. This tool helps organize work systematically and ensures no requirements are missed. +- **Subagent Delegation:** When doing file search, prefer to use the '${TaskTool.Name}' tool in order to reduce context usage. You should proactively use the '${TaskTool.Name}' tool with specialized agents when the task at hand matches the agent's description. - **Remembering Facts:** Use the '${MemoryTool.Name}' tool to remember specific, *user-related* facts or preferences when the user explicitly asks, or when they state a clear, concise piece of information that would help personalize or streamline *your future interactions with them* (e.g., preferred coding style, common project paths they use, personal tool aliases). This tool is for user-specific information that should persist across sessions. Do *not* use it for general project context or information. If unsure whether to save something, you can ask the user, "Should I remember that for you?" - **Respect User Confirmations:** Most tool calls (also denoted as 'function calls') will first require confirmation from the user, where they will either approve or cancel the function call. If a user cancels a function call, respect their choice and do _not_ try to make the function call again. It is okay to request the tool call again _only_ if the user requests that same tool call on a subsequent prompt. When a user cancels a function call, assume best intentions from the user and consider inquiring if they prefer any alternative paths forward. diff --git a/packages/core/src/subagents/index.ts b/packages/core/src/subagents/index.ts index 16ff4bc1..d427b9fa 100644 --- a/packages/core/src/subagents/index.ts +++ b/packages/core/src/subagents/index.ts @@ -47,6 +47,19 @@ export type { ToolConfig, SubagentTerminateMode, OutputObject, -} from '../core/subagent.js'; +} from './subagent.js'; -export { SubAgentScope } from '../core/subagent.js'; +export { SubAgentScope } from './subagent.js'; + +// Event system for UI integration +export type { + SubAgentEvent, + SubAgentStartEvent, + SubAgentFinishEvent, + SubAgentRoundEvent, + SubAgentToolCallEvent, + SubAgentToolResultEvent, + SubAgentModelTextEvent, +} from './subagent-events.js'; + +export { SubAgentEventEmitter } from './subagent-events.js'; diff --git a/packages/core/src/subagents/subagent-events.ts b/packages/core/src/subagents/subagent-events.ts new file mode 100644 index 00000000..520b8bd1 --- /dev/null +++ b/packages/core/src/subagents/subagent-events.ts @@ -0,0 +1,89 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { EventEmitter } from 'events'; + +export type SubAgentEvent = + | 'start' + | 'round_start' + | 'round_end' + | 'model_text' + | 'tool_call' + | 'tool_result' + | 'finish' + | 'error'; + +export interface SubAgentModelTextEvent { + subagentId: string; + round: number; + text: string; + timestamp: number; +} + +export interface SubAgentStartEvent { + subagentId: string; + name: string; + model?: string; + tools: string[]; + timestamp: number; +} + +export interface SubAgentRoundEvent { + subagentId: string; + round: number; + promptId: string; + timestamp: number; +} + +export interface SubAgentToolCallEvent { + subagentId: string; + round: number; + callId: string; + name: string; + args: Record; + timestamp: number; +} + +export interface SubAgentToolResultEvent { + subagentId: string; + round: number; + callId: string; + name: string; + success: boolean; + error?: string; + durationMs?: number; + timestamp: number; +} + +export interface SubAgentFinishEvent { + subagentId: string; + terminate_reason: string; + timestamp: number; + rounds?: number; + totalDurationMs?: number; + totalToolCalls?: number; + successfulToolCalls?: number; + failedToolCalls?: number; + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; +} + +export class SubAgentEventEmitter { + private ee = new EventEmitter(); + + on(event: SubAgentEvent, listener: (...args: unknown[]) => void) { + this.ee.on(event, listener); + } + + off(event: SubAgentEvent, listener: (...args: unknown[]) => void) { + this.ee.off(event, listener); + } + + emit(event: SubAgentEvent, payload: unknown) { + this.ee.emit(event, payload); + } +} diff --git a/packages/core/src/subagents/subagent-hooks.ts b/packages/core/src/subagents/subagent-hooks.ts new file mode 100644 index 00000000..6971b279 --- /dev/null +++ b/packages/core/src/subagents/subagent-hooks.ts @@ -0,0 +1,33 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +export interface PreToolUsePayload { + subagentId: string; + name: string; // subagent name + toolName: string; + args: Record; + timestamp: number; +} + +export interface PostToolUsePayload extends PreToolUsePayload { + success: boolean; + durationMs: number; + errorMessage?: string; +} + +export interface SubagentStopPayload { + subagentId: string; + name: string; // subagent name + terminateReason: string; + summary: Record; + timestamp: number; +} + +export interface SubagentHooks { + preToolUse?(payload: PreToolUsePayload): Promise | void; + postToolUse?(payload: PostToolUsePayload): Promise | void; + onStop?(payload: SubagentStopPayload): Promise | void; +} diff --git a/packages/core/src/subagents/subagent-manager.test.ts b/packages/core/src/subagents/subagent-manager.test.ts index c40b3541..cb285124 100644 --- a/packages/core/src/subagents/subagent-manager.test.ts +++ b/packages/core/src/subagents/subagent-manager.test.ts @@ -11,6 +11,8 @@ import * as os from 'os'; import { SubagentManager } from './subagent-manager.js'; import { SubagentConfig, SubagentError } from './types.js'; import { ToolRegistry } from '../tools/tool-registry.js'; +import { Config } from '../config/config.js'; +import { makeFakeConfig } from '../test-utils/config.js'; // Mock file system operations vi.mock('fs/promises'); @@ -36,15 +38,30 @@ vi.mock('./validation.js', () => ({ }, })); -vi.mock('../core/subagent.js'); +vi.mock('./subagent.js'); describe('SubagentManager', () => { let manager: SubagentManager; let mockToolRegistry: ToolRegistry; - const projectRoot = '/test/project'; + let mockConfig: Config; beforeEach(() => { - mockToolRegistry = {} as ToolRegistry; + mockToolRegistry = { + getAllTools: vi.fn().mockReturnValue([ + { name: 'read_file', displayName: 'Read File' }, + { name: 'write_file', displayName: 'Write File' }, + { name: 'grep', displayName: 'Search Files' }, + ]), + } as unknown as ToolRegistry; + + // Create mock Config object using test utility + mockConfig = makeFakeConfig({ + sessionId: 'test-session-id', + }); + + // Mock the tool registry and project root methods + vi.spyOn(mockConfig, 'getToolRegistry').mockReturnValue(mockToolRegistry); + vi.spyOn(mockConfig, 'getProjectRoot').mockReturnValue('/test/project'); // Mock os.homedir vi.mocked(os.homedir).mockReturnValue('/home/user'); @@ -134,7 +151,7 @@ describe('SubagentManager', () => { return yaml.trim(); }); - manager = new SubagentManager(projectRoot, mockToolRegistry); + manager = new SubagentManager(mockConfig); }); afterEach(() => { @@ -739,6 +756,25 @@ System prompt 3`); ]); }); + it('should transform display names to tool names in tool configuration', () => { + const configWithDisplayNames: SubagentConfig = { + ...validConfig, + tools: ['Read File', 'write_file', 'Search Files', 'unknown_tool'], + }; + + const runtimeConfig = manager.convertToRuntimeConfig( + configWithDisplayNames, + ); + + expect(runtimeConfig.toolConfig).toBeDefined(); + expect(runtimeConfig.toolConfig!.tools).toEqual([ + 'read_file', // 'Read File' -> 'read_file' (display name match) + 'write_file', // 'write_file' -> 'write_file' (exact name match) + 'grep', // 'Search Files' -> 'grep' (display name match) + 'unknown_tool', // 'unknown_tool' -> 'unknown_tool' (preserved as-is) + ]); + }); + it('should merge custom model and run configurations', () => { const configWithCustom: SubagentConfig = { ...validConfig, diff --git a/packages/core/src/subagents/subagent-manager.ts b/packages/core/src/subagents/subagent-manager.ts index 8bb4ac3c..9970289f 100644 --- a/packages/core/src/subagents/subagent-manager.ts +++ b/packages/core/src/subagents/subagent-manager.ts @@ -29,9 +29,8 @@ import { ModelConfig, RunConfig, ToolConfig, -} from '../core/subagent.js'; +} from './subagent.js'; import { Config } from '../config/config.js'; -import { ToolRegistry } from '../tools/tool-registry.js'; const QWEN_CONFIG_DIR = '.qwen'; const AGENT_CONFIG_DIR = 'agents'; @@ -43,11 +42,8 @@ const AGENT_CONFIG_DIR = 'agents'; export class SubagentManager { private readonly validator: SubagentValidator; - constructor( - private readonly projectRoot: string, - private readonly toolRegistry?: ToolRegistry, - ) { - this.validator = new SubagentValidator(toolRegistry); + constructor(private readonly config: Config) { + this.validator = new SubagentValidator(); } /** @@ -61,7 +57,6 @@ export class SubagentManager { config: SubagentConfig, options: CreateSubagentOptions, ): Promise { - // Validate the configuration this.validator.validateOrThrow(config); // Determine file path @@ -381,7 +376,7 @@ export class SubagentManager { // Determine level from file path // Project level paths contain the project root, user level paths are in home directory const isProjectLevel = - filePath.includes(this.projectRoot) && + filePath.includes(this.config.getProjectRoot()) && filePath.includes(`/${QWEN_CONFIG_DIR}/${AGENT_CONFIG_DIR}/`); const level: SubagentLevel = isProjectLevel ? 'project' : 'user'; @@ -393,11 +388,9 @@ export class SubagentManager { level, filePath, modelConfig: modelConfig as Partial< - import('../core/subagent.js').ModelConfig - >, - runConfig: runConfig as Partial< - import('../core/subagent.js').RunConfig + import('./subagent.js').ModelConfig >, + runConfig: runConfig as Partial, backgroundColor, }; @@ -433,6 +426,8 @@ export class SubagentManager { frontmatter['tools'] = config.tools; } + // No outputs section + if (config.modelConfig) { frontmatter['modelConfig'] = config.modelConfig; } @@ -465,6 +460,10 @@ export class SubagentManager { async createSubagentScope( config: SubagentConfig, runtimeContext: Config, + options?: { + eventEmitter?: import('./subagent-events.js').SubAgentEventEmitter; + hooks?: import('./subagent-hooks.js').SubagentHooks; + }, ): Promise { try { const runtimeConfig = this.convertToRuntimeConfig(config); @@ -476,6 +475,8 @@ export class SubagentManager { runtimeConfig.modelConfig, runtimeConfig.runConfig, runtimeConfig.toolConfig, + options?.eventEmitter, + options?.hooks, ); } catch (error) { if (error instanceof Error) { @@ -515,8 +516,10 @@ export class SubagentManager { // Build tool configuration if tools are specified let toolConfig: ToolConfig | undefined; if (config.tools && config.tools.length > 0) { + // Transform tools array to ensure all entries are tool names (not display names) + const toolNames = this.transformToToolNames(config.tools); toolConfig = { - tools: config.tools, + tools: toolNames, }; } @@ -528,6 +531,53 @@ export class SubagentManager { }; } + /** + * Transforms a tools array that may contain tool names or display names + * into an array containing only tool names. + * + * @param tools - Array of tool names or display names + * @returns Array of tool names + * @private + */ + private transformToToolNames(tools: string[]): string[] { + const toolRegistry = this.config.getToolRegistry(); + if (!toolRegistry) { + return tools; + } + + const allTools = toolRegistry.getAllTools(); + + const result: string[] = []; + for (const toolIdentifier of tools) { + // First, try to find an exact match by tool name (highest priority) + const exactNameMatch = allTools.find( + (tool) => tool.name === toolIdentifier, + ); + if (exactNameMatch) { + result.push(exactNameMatch.name); + continue; + } + + // If no exact name match, try to find by display name + const displayNameMatch = allTools.find( + (tool) => tool.displayName === toolIdentifier, + ); + if (displayNameMatch) { + result.push(displayNameMatch.name); + continue; + } + + // If no match found, preserve the original identifier as-is + // This allows for tools that might not be registered yet or custom tools + result.push(toolIdentifier); + console.warn( + `Tool "${toolIdentifier}" not found in tool registry, preserving as-is`, + ); + } + + return result; + } + /** * Merges partial configurations with defaults, useful for updating * existing configurations. @@ -563,7 +613,11 @@ export class SubagentManager { getSubagentPath(name: string, level: SubagentLevel): string { const baseDir = level === 'project' - ? path.join(this.projectRoot, QWEN_CONFIG_DIR, AGENT_CONFIG_DIR) + ? path.join( + this.config.getProjectRoot(), + QWEN_CONFIG_DIR, + AGENT_CONFIG_DIR, + ) : path.join(os.homedir(), QWEN_CONFIG_DIR, AGENT_CONFIG_DIR); return path.join(baseDir, `${name}.md`); @@ -580,7 +634,11 @@ export class SubagentManager { ): Promise { const baseDir = level === 'project' - ? path.join(this.projectRoot, QWEN_CONFIG_DIR, AGENT_CONFIG_DIR) + ? path.join( + this.config.getProjectRoot(), + QWEN_CONFIG_DIR, + AGENT_CONFIG_DIR, + ) : path.join(os.homedir(), QWEN_CONFIG_DIR, AGENT_CONFIG_DIR); try { @@ -630,20 +688,4 @@ export class SubagentManager { return false; // Name is already in use } - - /** - * Gets available tools from the tool registry. - * Useful for validation and UI purposes. - * - * @returns Array of available tool names - */ - getAvailableTools(): string[] { - if (!this.toolRegistry) { - return []; - } - - // This would need to be implemented in ToolRegistry - // For now, return empty array - return []; - } } diff --git a/packages/core/src/subagents/subagent-result-format.ts b/packages/core/src/subagents/subagent-result-format.ts new file mode 100644 index 00000000..213e0924 --- /dev/null +++ b/packages/core/src/subagents/subagent-result-format.ts @@ -0,0 +1,186 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +export interface SubAgentBasicStats { + rounds: number; + totalDurationMs: number; + totalToolCalls: number; + successfulToolCalls: number; + failedToolCalls: number; + successRate?: number; + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; +} + +function fmtDuration(ms: number): string { + if (ms < 1000) return `${Math.round(ms)}ms`; + if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; + if (ms < 3600000) { + const m = Math.floor(ms / 60000); + const s = Math.floor((ms % 60000) / 1000); + return `${m}m ${s}s`; + } + const h = Math.floor(ms / 3600000); + const m = Math.floor((ms % 3600000) / 60000); + return `${h}h ${m}m`; +} + +export function formatCompact( + stats: SubAgentBasicStats, + taskDesc: string, +): string { + const sr = + stats.totalToolCalls > 0 + ? (stats.successRate ?? + (stats.successfulToolCalls / stats.totalToolCalls) * 100) + : 0; + const lines = [ + `šŸ“‹ Task Completed: ${taskDesc}`, + `šŸ”§ Tool Usage: ${stats.totalToolCalls} calls${stats.totalToolCalls ? `, ${sr.toFixed(1)}% success` : ''}`, + `ā±ļø Duration: ${fmtDuration(stats.totalDurationMs)} | šŸ” Rounds: ${stats.rounds}`, + ]; + if (typeof stats.totalTokens === 'number') { + lines.push( + `šŸ”¢ Tokens: ${stats.totalTokens.toLocaleString()}${stats.inputTokens || stats.outputTokens ? ` (in ${stats.inputTokens ?? 0}, out ${stats.outputTokens ?? 0})` : ''}`, + ); + } + return lines.join('\n'); +} + +export function formatDetailed( + stats: SubAgentBasicStats & { + toolUsage?: Array<{ + name: string; + count: number; + success: number; + failure: number; + lastError?: string; + averageDurationMs?: number; + }>; + }, + taskDesc: string, +): string { + const sr = + stats.totalToolCalls > 0 + ? (stats.successRate ?? + (stats.successfulToolCalls / stats.totalToolCalls) * 100) + : 0; + const lines: string[] = []; + lines.push(`šŸ“‹ Task Completed: ${taskDesc}`); + lines.push( + `ā±ļø Duration: ${fmtDuration(stats.totalDurationMs)} | šŸ” Rounds: ${stats.rounds}`, + ); + // Quality indicator + let quality = 'Poor execution'; + if (sr >= 95) quality = 'Excellent execution'; + else if (sr >= 85) quality = 'Good execution'; + else if (sr >= 70) quality = 'Fair execution'; + lines.push(`āœ… Quality: ${quality} (${sr.toFixed(1)}% tool success)`); + // Speed category + const d = stats.totalDurationMs; + let speed = 'Long execution - consider breaking down tasks'; + if (d < 10_000) speed = 'Fast completion - under 10 seconds'; + else if (d < 60_000) speed = 'Good speed - under a minute'; + else if (d < 300_000) speed = 'Moderate duration - a few minutes'; + lines.push(`šŸš€ Speed: ${speed}`); + lines.push( + `šŸ”§ Tools: ${stats.totalToolCalls} calls, ${sr.toFixed(1)}% success (${stats.successfulToolCalls} ok, ${stats.failedToolCalls} failed)`, + ); + if (typeof stats.totalTokens === 'number') { + lines.push( + `šŸ”¢ Tokens: ${stats.totalTokens.toLocaleString()} (in ${stats.inputTokens ?? 0}, out ${stats.outputTokens ?? 0})`, + ); + } + if (stats.toolUsage && stats.toolUsage.length) { + const sorted = [...stats.toolUsage] + .sort((a, b) => b.count - a.count) + .slice(0, 5); + lines.push('\nTop tools:'); + for (const t of sorted) { + const avg = + typeof t.averageDurationMs === 'number' + ? `, avg ${fmtDuration(Math.round(t.averageDurationMs))}` + : ''; + lines.push( + ` - ${t.name}: ${t.count} calls (${t.success} ok, ${t.failure} fail${avg}${t.lastError ? `, last error: ${t.lastError}` : ''})`, + ); + } + } + const tips = generatePerformanceTips(stats); + if (tips.length) { + lines.push('\nšŸ’” Performance Insights:'); + for (const tip of tips.slice(0, 3)) lines.push(` - ${tip}`); + } + return lines.join('\n'); +} + +export function generatePerformanceTips( + stats: SubAgentBasicStats & { + toolUsage?: Array<{ + name: string; + count: number; + success: number; + failure: number; + lastError?: string; + averageDurationMs?: number; + }>; + }, +): string[] { + const tips: string[] = []; + const totalCalls = stats.totalToolCalls; + const sr = + stats.totalToolCalls > 0 + ? (stats.successRate ?? + (stats.successfulToolCalls / stats.totalToolCalls) * 100) + : 0; + + // High failure rate + if (sr < 80) + tips.push('Low tool success rate - review inputs and error messages'); + + // Long duration + if (stats.totalDurationMs > 60_000) + tips.push('Long execution time - consider breaking down complex tasks'); + + // Token usage + if (typeof stats.totalTokens === 'number' && stats.totalTokens > 100_000) { + tips.push( + 'High token usage - consider optimizing prompts or narrowing scope', + ); + } + if (typeof stats.totalTokens === 'number' && totalCalls > 0) { + const avgTokPerCall = stats.totalTokens / totalCalls; + if (avgTokPerCall > 5_000) + tips.push( + `High token usage per tool call (~${Math.round(avgTokPerCall)} tokens/call)`, + ); + } + + // Network failures + const isNetworkTool = (name: string) => /web|fetch|search/i.test(name); + const hadNetworkFailure = (stats.toolUsage || []).some( + (t) => + isNetworkTool(t.name) && + t.lastError && + /timeout|network/i.test(t.lastError), + ); + if (hadNetworkFailure) + tips.push( + 'Network operations had failures - consider increasing timeout or checking connectivity', + ); + + // Slow tools + const slow = (stats.toolUsage || []) + .filter((t) => (t.averageDurationMs ?? 0) > 10_000) + .sort((a, b) => (b.averageDurationMs ?? 0) - (a.averageDurationMs ?? 0)); + if (slow.length) + tips.push( + `Consider optimizing ${slow[0].name} operations (avg ${fmtDuration(Math.round(slow[0].averageDurationMs!))})`, + ); + + return tips; +} diff --git a/packages/core/src/subagents/subagent-statistics.ts b/packages/core/src/subagents/subagent-statistics.ts new file mode 100644 index 00000000..35529bb9 --- /dev/null +++ b/packages/core/src/subagents/subagent-statistics.ts @@ -0,0 +1,105 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +export interface ToolUsageStats { + name: string; + count: number; + success: number; + failure: number; + lastError?: string; + totalDurationMs: number; + averageDurationMs: number; +} + +export interface SubagentSummary { + rounds: number; + totalDurationMs: number; + totalToolCalls: number; + successfulToolCalls: number; + failedToolCalls: number; + successRate: number; + inputTokens: number; + outputTokens: number; + totalTokens: number; + estimatedCost: number; + toolUsage: ToolUsageStats[]; +} + +export class SubagentStatistics { + private startTimeMs = 0; + private rounds = 0; + private totalToolCalls = 0; + private successfulToolCalls = 0; + private failedToolCalls = 0; + private inputTokens = 0; + private outputTokens = 0; + private toolUsage = new Map(); + + start(now = Date.now()) { + this.startTimeMs = now; + } + + setRounds(rounds: number) { + this.rounds = rounds; + } + + recordToolCall( + name: string, + success: boolean, + durationMs: number, + lastError?: string, + ) { + this.totalToolCalls += 1; + if (success) this.successfulToolCalls += 1; + else this.failedToolCalls += 1; + + const tu = this.toolUsage.get(name) || { + name, + count: 0, + success: 0, + failure: 0, + lastError: undefined, + totalDurationMs: 0, + averageDurationMs: 0, + }; + tu.count += 1; + if (success) tu.success += 1; + else tu.failure += 1; + if (lastError) tu.lastError = lastError; + tu.totalDurationMs += Math.max(0, durationMs || 0); + tu.averageDurationMs = tu.count > 0 ? tu.totalDurationMs / tu.count : 0; + this.toolUsage.set(name, tu); + } + + recordTokens(input: number, output: number) { + this.inputTokens += Math.max(0, input || 0); + this.outputTokens += Math.max(0, output || 0); + } + + getSummary(now = Date.now()): SubagentSummary { + const totalDurationMs = this.startTimeMs ? now - this.startTimeMs : 0; + const totalToolCalls = this.totalToolCalls; + const successRate = + totalToolCalls > 0 + ? (this.successfulToolCalls / totalToolCalls) * 100 + : 0; + const totalTokens = this.inputTokens + this.outputTokens; + const estimatedCost = this.inputTokens * 3e-5 + this.outputTokens * 6e-5; + return { + rounds: this.rounds, + totalDurationMs, + totalToolCalls, + successfulToolCalls: this.successfulToolCalls, + failedToolCalls: this.failedToolCalls, + successRate, + inputTokens: this.inputTokens, + outputTokens: this.outputTokens, + totalTokens, + estimatedCost, + toolUsage: Array.from(this.toolUsage.values()), + }; + } +} diff --git a/packages/core/src/core/subagent.test.ts b/packages/core/src/subagents/subagent.test.ts similarity index 81% rename from packages/core/src/core/subagent.test.ts rename to packages/core/src/subagents/subagent.test.ts index 978a686b..1e253fae 100644 --- a/packages/core/src/core/subagent.test.ts +++ b/packages/core/src/subagents/subagent.test.ts @@ -12,14 +12,13 @@ import { PromptConfig, ModelConfig, RunConfig, - OutputConfig, ToolConfig, } from './subagent.js'; import { Config, ConfigParameters } from '../config/config.js'; -import { GeminiChat } from './geminiChat.js'; -import { createContentGenerator } from './contentGenerator.js'; +import { GeminiChat } from '../core/geminiChat.js'; +import { createContentGenerator } from '../core/contentGenerator.js'; import { getEnvironmentContext } from '../utils/environmentContext.js'; -import { executeToolCall } from './nonInteractiveToolExecutor.js'; +import { executeToolCall } from '../core/nonInteractiveToolExecutor.js'; import { ToolRegistry } from '../tools/tool-registry.js'; import { DEFAULT_GEMINI_MODEL } from '../config/models.js'; import { @@ -31,10 +30,10 @@ import { } from '@google/genai'; import { ToolErrorType } from '../tools/tool-error.js'; -vi.mock('./geminiChat.js'); -vi.mock('./contentGenerator.js'); +vi.mock('../core/geminiChat.js'); +vi.mock('../core/contentGenerator.js'); vi.mock('../utils/environmentContext.js'); -vi.mock('./nonInteractiveToolExecutor.js'); +vi.mock('../core/nonInteractiveToolExecutor.js'); vi.mock('../ide/ide-client.js'); async function createMockConfig( @@ -55,6 +54,7 @@ async function createMockConfig( // Mock ToolRegistry const mockToolRegistry = { getTool: vi.fn(), + getFunctionDeclarations: vi.fn().mockReturnValue([]), getFunctionDeclarationsFiltered: vi.fn().mockReturnValue([]), ...toolRegistryMocks, } as unknown as ToolRegistry; @@ -74,11 +74,27 @@ const createMockStream = ( return (async function* () { if (response === 'stop') { // When stopping, the model might return text, but the subagent logic primarily cares about the absence of functionCalls. - yield { text: 'Done.' }; + yield { + candidates: [ + { + content: { + parts: [{ text: 'Done.' }], + }, + }, + ], + }; } else if (response.length > 0) { yield { functionCalls: response }; } else { - yield { text: 'Done.' }; // Handle empty array also as stop + yield { + candidates: [ + { + content: { + parts: [{ text: 'Done.' }], + }, + }, + ], + }; // Handle empty array also as stop } })(); }); @@ -134,6 +150,15 @@ describe('subagent.ts', () => { sendMessageStream: mockSendMessageStream, }) as unknown as GeminiChat, ); + + // Default mock for executeToolCall + vi.mocked(executeToolCall).mockResolvedValue({ + callId: 'default-call', + responseParts: 'default response', + resultDisplay: 'Default tool result', + error: undefined, + errorType: undefined, + }); }); afterEach(() => { @@ -329,45 +354,6 @@ describe('subagent.ts', () => { ]); }); - it('should include output instructions in the system prompt when outputs are defined', async () => { - const { config } = await createMockConfig(); - vi.mocked(GeminiChat).mockClear(); - - const promptConfig: PromptConfig = { systemPrompt: 'Do the task.' }; - const outputConfig: OutputConfig = { - outputs: { - result1: 'The first result', - }, - }; - const context = new ContextState(); - - // Model stops immediately - mockSendMessageStream.mockImplementation(createMockStream(['stop'])); - - const scope = await SubAgentScope.create( - 'test-agent', - config, - promptConfig, - defaultModelConfig, - defaultRunConfig, - undefined, // ToolConfig - outputConfig, - ); - - await scope.runNonInteractive(context); - - const generationConfig = getGenerationConfigFromMock(); - const systemInstruction = generationConfig.systemInstruction as string; - - expect(systemInstruction).toContain('Do the task.'); - expect(systemInstruction).toContain( - 'you MUST emit the required output variables', - ); - expect(systemInstruction).toContain( - "Use 'self.emitvalue' to emit the 'result1' key", - ); - }); - it('should use initialMessages instead of systemPrompt if provided', async () => { const { config } = await createMockConfig(); vi.mocked(GeminiChat).mockClear(); @@ -473,7 +459,7 @@ describe('subagent.ts', () => { await scope.runNonInteractive(new ContextState()); expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL); - expect(scope.output.emitted_vars).toEqual({}); + expect(scope.output.result).toBe('Done.'); expect(mockSendMessageStream).toHaveBeenCalledTimes(1); // Check the initial message expect(mockSendMessageStream.mock.calls[0][0].message).toEqual([ @@ -481,28 +467,11 @@ describe('subagent.ts', () => { ]); }); - it('should handle self.emitvalue and terminate with GOAL when outputs are met', async () => { + it('should terminate with GOAL when model provides final text', async () => { const { config } = await createMockConfig(); - const outputConfig: OutputConfig = { - outputs: { result: 'The final result' }, - }; - // Turn 1: Model responds with emitvalue call - // Turn 2: Model stops after receiving the tool response - mockSendMessageStream.mockImplementation( - createMockStream([ - [ - { - name: 'self.emitvalue', - args: { - emit_variable_name: 'result', - emit_variable_value: 'Success!', - }, - }, - ], - 'stop', - ]), - ); + // Model stops immediately with text response + mockSendMessageStream.mockImplementation(createMockStream(['stop'])); const scope = await SubAgentScope.create( 'test-agent', @@ -510,21 +479,13 @@ describe('subagent.ts', () => { promptConfig, defaultModelConfig, defaultRunConfig, - undefined, - outputConfig, ); await scope.runNonInteractive(new ContextState()); expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL); - expect(scope.output.emitted_vars).toEqual({ result: 'Success!' }); - expect(mockSendMessageStream).toHaveBeenCalledTimes(2); - - // Check the tool response sent back in the second call - const secondCallArgs = mockSendMessageStream.mock.calls[1][0]; - expect(secondCallArgs.message).toEqual([ - { text: 'Emitted variable result successfully' }, - ]); + expect(scope.output.result).toBe('Done.'); + expect(mockSendMessageStream).toHaveBeenCalledTimes(1); }); it('should execute external tools and provide the response to the model', async () => { @@ -640,59 +601,6 @@ describe('subagent.ts', () => { }, ]); }); - - it('should nudge the model if it stops before emitting all required variables', async () => { - const { config } = await createMockConfig(); - const outputConfig: OutputConfig = { - outputs: { required_var: 'Must be present' }, - }; - - // Turn 1: Model stops prematurely - // Turn 2: Model responds to the nudge and emits the variable - // Turn 3: Model stops - mockSendMessageStream.mockImplementation( - createMockStream([ - 'stop', - [ - { - name: 'self.emitvalue', - args: { - emit_variable_name: 'required_var', - emit_variable_value: 'Here it is', - }, - }, - ], - 'stop', - ]), - ); - - const scope = await SubAgentScope.create( - 'test-agent', - config, - promptConfig, - defaultModelConfig, - defaultRunConfig, - undefined, - outputConfig, - ); - - await scope.runNonInteractive(new ContextState()); - - // Check the nudge message sent in Turn 2 - const secondCallArgs = mockSendMessageStream.mock.calls[1][0]; - - // We check that the message contains the required variable name and the nudge phrasing. - expect(secondCallArgs.message[0].text).toContain('required_var'); - expect(secondCallArgs.message[0].text).toContain( - 'You have stopped calling tools', - ); - - expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL); - expect(scope.output.emitted_vars).toEqual({ - required_var: 'Here it is', - }); - expect(mockSendMessageStream).toHaveBeenCalledTimes(3); - }); }); describe('runNonInteractive - Termination and Recovery', () => { @@ -702,26 +610,26 @@ describe('subagent.ts', () => { const { config } = await createMockConfig(); const runConfig: RunConfig = { ...defaultRunConfig, max_turns: 2 }; - // Model keeps looping by calling emitvalue repeatedly + // Model keeps calling tools repeatedly mockSendMessageStream.mockImplementation( createMockStream([ [ { - name: 'self.emitvalue', - args: { emit_variable_name: 'loop', emit_variable_value: 'v1' }, + name: 'list_files', + args: { path: '/test' }, }, ], [ { - name: 'self.emitvalue', - args: { emit_variable_name: 'loop', emit_variable_value: 'v2' }, + name: 'list_files', + args: { path: '/test2' }, }, ], // This turn should not happen [ { - name: 'self.emitvalue', - args: { emit_variable_name: 'loop', emit_variable_value: 'v3' }, + name: 'list_files', + args: { path: '/test3' }, }, ], ]), diff --git a/packages/core/src/core/subagent.ts b/packages/core/src/subagents/subagent.ts similarity index 52% rename from packages/core/src/core/subagent.ts rename to packages/core/src/subagents/subagent.ts index cd9d9181..5d3ac257 100644 --- a/packages/core/src/core/subagent.ts +++ b/packages/core/src/subagents/subagent.ts @@ -6,9 +6,9 @@ import { reportError } from '../utils/errorReporting.js'; import { Config } from '../config/config.js'; -import { ToolCallRequestInfo } from './turn.js'; -import { executeToolCall } from './nonInteractiveToolExecutor.js'; -import { createContentGenerator } from './contentGenerator.js'; +import { ToolCallRequestInfo } from '../core/turn.js'; +import { executeToolCall } from '../core/nonInteractiveToolExecutor.js'; +import { createContentGenerator } from '../core/contentGenerator.js'; import { getEnvironmentContext } from '../utils/environmentContext.js'; import { Content, @@ -16,9 +16,15 @@ import { FunctionCall, GenerateContentConfig, FunctionDeclaration, - Type, + GenerateContentResponseUsageMetadata, } from '@google/genai'; -import { GeminiChat } from './geminiChat.js'; +import { GeminiChat } from '../core/geminiChat.js'; +import { SubAgentEventEmitter } from './subagent-events.js'; +import { formatCompact, formatDetailed } from './subagent-result-format.js'; +import { SubagentStatistics } from './subagent-statistics.js'; +import { SubagentHooks } from './subagent-hooks.js'; +import { logSubagentExecution } from '../telemetry/loggers.js'; +import { SubagentExecutionEvent } from '../telemetry/types.js'; /** * @fileoverview Defines the configuration interfaces for a subagent. @@ -27,6 +33,19 @@ import { GeminiChat } from './geminiChat.js'; * the model parameters, and the execution settings. */ +interface ExecutionStats { + startTimeMs: number; + totalDurationMs: number; + rounds: number; + totalToolCalls: number; + successfulToolCalls: number; + failedToolCalls: number; + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + estimatedCost?: number; +} + /** * Describes the possible termination modes for a subagent. * This enum provides a clear indication of why a subagent's execution might have ended. @@ -53,14 +72,14 @@ export enum SubagentTerminateMode { /** * Represents the output structure of a subagent's execution. * This interface defines the data that a subagent will return upon completion, - * including any emitted variables and the reason for its termination. + * including the final result and the reason for its termination. */ export interface OutputObject { /** - * A record of key-value pairs representing variables emitted by the subagent - * during its execution. These variables can be used by the calling agent. + * The final result text returned by the subagent upon completion. + * This contains the direct output from the model's final response. */ - emitted_vars: Record; + result: string; /** * The reason for the subagent's termination, indicating whether it completed * successfully, timed out, or encountered an error. @@ -96,17 +115,6 @@ export interface ToolConfig { tools: Array; } -/** - * Configures the expected outputs for the subagent. - */ -export interface OutputConfig { - /** - * A record describing the variables the subagent is expected to emit. - * The subagent will be prompted to generate these values before terminating. - */ - outputs: Record; -} - /** * Configures the generative model parameters for the subagent. * This interface specifies the model to be used and its associated generation settings, @@ -232,8 +240,35 @@ function templateString(template: string, context: ContextState): string { export class SubAgentScope { output: OutputObject = { terminate_reason: SubagentTerminateMode.ERROR, - emitted_vars: {}, + result: '', }; + executionStats: ExecutionStats = { + startTimeMs: 0, + totalDurationMs: 0, + rounds: 0, + totalToolCalls: 0, + successfulToolCalls: 0, + failedToolCalls: 0, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + estimatedCost: 0, + }; + private toolUsage = new Map< + string, + { + count: number; + success: number; + failure: number; + lastError?: string; + totalDurationMs?: number; + averageDurationMs?: number; + } + >(); + private eventEmitter?: SubAgentEventEmitter; + private finalText: string = ''; + private readonly stats = new SubagentStatistics(); + private hooks?: SubagentHooks; private readonly subagentId: string; /** @@ -244,7 +279,6 @@ export class SubAgentScope { * @param modelConfig - Configuration for the generative model parameters. * @param runConfig - Configuration for the subagent's execution environment. * @param toolConfig - Optional configuration for tools available to the subagent. - * @param outputConfig - Optional configuration for the subagent's expected outputs. */ private constructor( readonly name: string, @@ -253,10 +287,13 @@ export class SubAgentScope { private readonly modelConfig: ModelConfig, private readonly runConfig: RunConfig, private readonly toolConfig?: ToolConfig, - private readonly outputConfig?: OutputConfig, + eventEmitter?: SubAgentEventEmitter, + hooks?: SubagentHooks, ) { const randomPart = Math.random().toString(36).slice(2, 8); this.subagentId = `${this.name}-${randomPart}`; + this.eventEmitter = eventEmitter; + this.hooks = hooks; } /** @@ -269,7 +306,6 @@ export class SubAgentScope { * @param {ModelConfig} modelConfig - Configuration for the generative model parameters. * @param {RunConfig} runConfig - Configuration for the subagent's execution environment. * @param {ToolConfig} [toolConfig] - Optional configuration for tools. - * @param {OutputConfig} [outputConfig] - Optional configuration for expected outputs. * @returns {Promise} A promise that resolves to a valid SubAgentScope instance. * @throws {Error} If any tool requires user confirmation. */ @@ -280,43 +316,60 @@ export class SubAgentScope { modelConfig: ModelConfig, runConfig: RunConfig, toolConfig?: ToolConfig, - outputConfig?: OutputConfig, + eventEmitter?: SubAgentEventEmitter, + hooks?: SubagentHooks, ): Promise { - if (toolConfig) { + // Validate tools for non-interactive use + if (toolConfig?.tools) { const toolRegistry = runtimeContext.getToolRegistry(); - const toolsToLoad: string[] = []; - for (const tool of toolConfig.tools) { - if (typeof tool === 'string') { - toolsToLoad.push(tool); + + for (const toolItem of toolConfig.tools) { + if (typeof toolItem !== 'string') { + continue; // Skip inline function declarations + } + const tool = toolRegistry.getTool(toolItem); + if (!tool) { + continue; // Skip unknown tools } - } - for (const toolName of toolsToLoad) { - const tool = toolRegistry.getTool(toolName); - if (tool) { - const requiredParams = tool.schema.parameters?.required ?? []; - if (requiredParams.length > 0) { - // This check is imperfect. A tool might require parameters but still - // be interactive (e.g., `delete_file(path)`). However, we cannot - // build a generic invocation without knowing what dummy parameters - // to provide. Crashing here because `build({})` fails is worse - // than allowing a potential hang later if an interactive tool is - // used. This is a best-effort check. - console.warn( - `Cannot check tool "${toolName}" for interactivity because it requires parameters. Assuming it is safe for non-interactive use.`, - ); - continue; - } + // Check if tool has required parameters + const hasRequiredParams = + tool.schema?.parameters?.required && + Array.isArray(tool.schema.parameters.required) && + tool.schema.parameters.required.length > 0; - const invocation = tool.build({}); - const confirmationDetails = await invocation.shouldConfirmExecute( + if (hasRequiredParams) { + // Can't check interactivity without parameters, log warning and continue + console.warn( + `Cannot check tool "${toolItem}" for interactivity because it requires parameters. Assuming it is safe for non-interactive use.`, + ); + continue; + } + + // Try to build the tool to check if it requires confirmation + try { + const toolInstance = tool.build({}); + const confirmationDetails = await toolInstance.shouldConfirmExecute( new AbortController().signal, ); + if (confirmationDetails) { throw new Error( - `Tool "${toolName}" requires user confirmation and cannot be used in a non-interactive subagent.`, + `Tool "${toolItem}" requires user confirmation and cannot be used in a non-interactive subagent.`, ); } + } catch (error) { + // If we can't build the tool, assume it's safe + if ( + error instanceof Error && + error.message.includes('requires user confirmation') + ) { + throw error; // Re-throw confirmation errors + } + // For other build errors, log warning and continue + console.warn( + `Cannot check tool "${toolItem}" for interactivity because it requires parameters. Assuming it is safe for non-interactive use.`, + ); } } } @@ -328,7 +381,8 @@ export class SubAgentScope { modelConfig, runConfig, toolConfig, - outputConfig, + eventEmitter, + hooks, ); } @@ -339,7 +393,10 @@ export class SubAgentScope { * @param {ContextState} context - The current context state containing variables for prompt templating. * @returns {Promise} A promise that resolves when the subagent has completed its execution. */ - async runNonInteractive(context: ContextState): Promise { + async runNonInteractive( + context: ContextState, + externalSignal?: AbortSignal, + ): Promise { const chat = await this.createChatObject(context); if (!chat) { @@ -348,35 +405,64 @@ export class SubAgentScope { } const abortController = new AbortController(); + const onAbort = () => abortController.abort(); + if (externalSignal) { + if (externalSignal.aborted) abortController.abort(); + externalSignal.addEventListener('abort', onAbort, { once: true }); + } const toolRegistry = this.runtimeContext.getToolRegistry(); // Prepare the list of tools available to the subagent. + // If no explicit toolConfig or it contains "*" or is empty, inherit all tools. const toolsList: FunctionDeclaration[] = []; if (this.toolConfig) { - const toolsToLoad: string[] = []; - for (const tool of this.toolConfig.tools) { - if (typeof tool === 'string') { - toolsToLoad.push(tool); - } else { - toolsList.push(tool); - } - } - toolsList.push( - ...toolRegistry.getFunctionDeclarationsFiltered(toolsToLoad), + const asStrings = this.toolConfig.tools.filter( + (t): t is string => typeof t === 'string', ); - } - // Add local scope functions if outputs are expected. - if (this.outputConfig && this.outputConfig.outputs) { - toolsList.push(...this.getScopeLocalFuncDefs()); + const hasWildcard = asStrings.includes('*'); + const onlyInlineDecls = this.toolConfig.tools.filter( + (t): t is FunctionDeclaration => typeof t !== 'string', + ); + + if (hasWildcard || asStrings.length === 0) { + toolsList.push(...toolRegistry.getFunctionDeclarations()); + } else { + toolsList.push( + ...toolRegistry.getFunctionDeclarationsFiltered(asStrings), + ); + } + toolsList.push(...onlyInlineDecls); + } else { + // Inherit all available tools by default when not specified. + toolsList.push(...toolRegistry.getFunctionDeclarations()); } + const initialTaskText = String( + (context.get('task_prompt') as string) ?? 'Get Started!', + ); let currentMessages: Content[] = [ - { role: 'user', parts: [{ text: 'Get Started!' }] }, + { role: 'user', parts: [{ text: initialTaskText }] }, ]; const startTime = Date.now(); + this.executionStats.startTimeMs = startTime; + this.stats.start(startTime); let turnCounter = 0; try { + // Emit start event + this.eventEmitter?.emit('start', { + subagentId: this.subagentId, + name: this.name, + model: this.modelConfig.model, + tools: (this.toolConfig?.tools || ['*']).map((t) => + typeof t === 'string' ? t : t.name, + ), + timestamp: Date.now(), + }); + + // Log telemetry for subagent start + const startEvent = new SubagentExecutionEvent(this.name, 'started'); + logSubagentExecution(this.runtimeContext, startEvent); while (true) { // Check termination conditions. if ( @@ -408,12 +494,37 @@ export class SubAgentScope { messageParams, promptId, ); + this.eventEmitter?.emit('round_start', { + subagentId: this.subagentId, + round: turnCounter, + promptId, + timestamp: Date.now(), + }); const functionCalls: FunctionCall[] = []; + let roundText = ''; + let lastUsage: GenerateContentResponseUsageMetadata | undefined = + undefined; for await (const resp of responseStream) { if (abortController.signal.aborted) return; if (resp.functionCalls) functionCalls.push(...resp.functionCalls); + const content = resp.candidates?.[0]?.content; + const parts = content?.parts || []; + for (const p of parts) { + const txt = (p as Part & { text?: string }).text; + if (txt) roundText += txt; + if (txt) + this.eventEmitter?.emit('model_text', { + subagentId: this.subagentId, + round: turnCounter, + text: txt, + timestamp: Date.now(), + }); + } + if (resp.usageMetadata) lastUsage = resp.usageMetadata; } + this.executionStats.rounds = turnCounter; + this.stats.setRounds(turnCounter); durationMin = (Date.now() - startTime) / (1000 * 60); if ( @@ -424,6 +535,31 @@ export class SubAgentScope { break; } + // Update token usage if available + if (lastUsage) { + const inTok = Number(lastUsage.promptTokenCount || 0); + const outTok = Number(lastUsage.candidatesTokenCount || 0); + if (isFinite(inTok) || isFinite(outTok)) { + this.stats.recordTokens( + isFinite(inTok) ? inTok : 0, + isFinite(outTok) ? outTok : 0, + ); + // mirror legacy fields for compatibility + this.executionStats.inputTokens = + (this.executionStats.inputTokens || 0) + + (isFinite(inTok) ? inTok : 0); + this.executionStats.outputTokens = + (this.executionStats.outputTokens || 0) + + (isFinite(outTok) ? outTok : 0); + this.executionStats.totalTokens = + (this.executionStats.inputTokens || 0) + + (this.executionStats.outputTokens || 0); + this.executionStats.estimatedCost = + (this.executionStats.inputTokens || 0) * 3e-5 + + (this.executionStats.outputTokens || 0) * 6e-5; + } + } + if (functionCalls.length > 0) { currentMessages = await this.processFunctionCalls( functionCalls, @@ -431,42 +567,90 @@ export class SubAgentScope { promptId, ); } else { - // Model stopped calling tools. Check if goal is met. - if ( - !this.outputConfig || - Object.keys(this.outputConfig.outputs).length === 0 - ) { + // No tool calls — treat this as the model's final answer. + if (roundText && roundText.trim().length > 0) { + this.finalText = roundText.trim(); + this.output.result = this.finalText; this.output.terminate_reason = SubagentTerminateMode.GOAL; break; } - - const remainingVars = Object.keys(this.outputConfig.outputs).filter( - (key) => !(key in this.output.emitted_vars), - ); - - if (remainingVars.length === 0) { - this.output.terminate_reason = SubagentTerminateMode.GOAL; - break; - } - - const nudgeMessage = `You have stopped calling tools but have not emitted the following required variables: ${remainingVars.join( - ', ', - )}. Please use the 'self.emitvalue' tool to emit them now, or continue working if necessary.`; - - console.debug(nudgeMessage); - + // Otherwise, nudge the model to finalize a result. currentMessages = [ { role: 'user', - parts: [{ text: nudgeMessage }], + parts: [ + { + text: 'Please provide the final result now and stop calling tools.', + }, + ], }, ]; } + this.eventEmitter?.emit('round_end', { + subagentId: this.subagentId, + round: turnCounter, + promptId, + timestamp: Date.now(), + }); } } catch (error) { console.error('Error during subagent execution:', error); this.output.terminate_reason = SubagentTerminateMode.ERROR; + this.eventEmitter?.emit('error', { + subagentId: this.subagentId, + error: error instanceof Error ? error.message : String(error), + timestamp: Date.now(), + }); + + // Log telemetry for subagent error + const errorEvent = new SubagentExecutionEvent(this.name, 'failed', { + terminate_reason: SubagentTerminateMode.ERROR, + result: error instanceof Error ? error.message : String(error), + }); + logSubagentExecution(this.runtimeContext, errorEvent); + throw error; + } finally { + if (externalSignal) externalSignal.removeEventListener('abort', onAbort); + this.executionStats.totalDurationMs = Date.now() - startTime; + const summary = this.stats.getSummary(Date.now()); + this.eventEmitter?.emit('finish', { + subagentId: this.subagentId, + terminate_reason: this.output.terminate_reason, + timestamp: Date.now(), + rounds: summary.rounds, + totalDurationMs: summary.totalDurationMs, + totalToolCalls: summary.totalToolCalls, + successfulToolCalls: summary.successfulToolCalls, + failedToolCalls: summary.failedToolCalls, + inputTokens: summary.inputTokens, + outputTokens: summary.outputTokens, + totalTokens: summary.totalTokens, + }); + + // Log telemetry for subagent completion + const completionEvent = new SubagentExecutionEvent( + this.name, + this.output.terminate_reason === SubagentTerminateMode.GOAL + ? 'completed' + : 'failed', + { + terminate_reason: this.output.terminate_reason, + result: this.finalText, + execution_summary: this.formatCompactResult( + 'Subagent execution completed', + ), + }, + ); + logSubagentExecution(this.runtimeContext, completionEvent); + + await this.hooks?.onStop?.({ + subagentId: this.subagentId, + name: this.name, + terminateReason: this.output.terminate_reason, + summary: summary as unknown as Record, + timestamp: Date.now(), + }); } } @@ -489,6 +673,7 @@ export class SubAgentScope { const toolResponseParts: Part[] = []; for (const functionCall of functionCalls) { + const toolName = String(functionCall.name || 'unknown'); const callId = functionCall.id ?? `${functionCall.name}-${Date.now()}`; const requestInfo: ToolCallRequestInfo = { callId, @@ -498,28 +683,112 @@ export class SubAgentScope { prompt_id: promptId, }; - let toolResponse; - - // Handle scope-local tools first. - if (functionCall.name === 'self.emitvalue') { - const valName = String(requestInfo.args['emit_variable_name']); - const valVal = String(requestInfo.args['emit_variable_value']); - this.output.emitted_vars[valName] = valVal; - - toolResponse = { - callId, - responseParts: `Emitted variable ${valName} successfully`, - resultDisplay: `Emitted variable ${valName} successfully`, - error: undefined, - }; + // Execute tools with timing and hooks + const start = Date.now(); + await this.hooks?.preToolUse?.({ + subagentId: this.subagentId, + name: this.name, + toolName, + args: requestInfo.args, + timestamp: Date.now(), + }); + const toolResponse = await executeToolCall( + this.runtimeContext, + requestInfo, + abortController.signal, + ); + const duration = Date.now() - start; + // Update tool call stats + this.executionStats.totalToolCalls += 1; + if (toolResponse.error) { + this.executionStats.failedToolCalls += 1; } else { - toolResponse = await executeToolCall( - this.runtimeContext, - requestInfo, - abortController.signal, - ); + this.executionStats.successfulToolCalls += 1; } + // Update per-tool usage + const tu = this.toolUsage.get(toolName) || { + count: 0, + success: 0, + failure: 0, + totalDurationMs: 0, + averageDurationMs: 0, + }; + tu.count += 1; + if (toolResponse?.error) { + tu.failure += 1; + const disp = + typeof toolResponse.resultDisplay === 'string' + ? toolResponse.resultDisplay + : toolResponse.resultDisplay + ? JSON.stringify(toolResponse.resultDisplay) + : undefined; + tu.lastError = disp || toolResponse.error?.message || 'Unknown error'; + } else { + tu.success += 1; + } + if (typeof tu.totalDurationMs === 'number') { + tu.totalDurationMs += duration; + tu.averageDurationMs = + tu.count > 0 ? tu.totalDurationMs / tu.count : tu.totalDurationMs; + } else { + tu.totalDurationMs = duration; + tu.averageDurationMs = duration; + } + this.toolUsage.set(toolName, tu); + + // Emit tool call/result events + this.eventEmitter?.emit('tool_call', { + subagentId: this.subagentId, + round: this.executionStats.rounds, + callId, + name: toolName, + args: requestInfo.args, + timestamp: Date.now(), + }); + this.eventEmitter?.emit('tool_result', { + subagentId: this.subagentId, + round: this.executionStats.rounds, + callId, + name: toolName, + success: !toolResponse?.error, + error: toolResponse?.error + ? typeof toolResponse.resultDisplay === 'string' + ? toolResponse.resultDisplay + : toolResponse.resultDisplay + ? JSON.stringify(toolResponse.resultDisplay) + : toolResponse.error.message + : undefined, + durationMs: duration, + timestamp: Date.now(), + }); + + // Update statistics service + this.stats.recordToolCall( + toolName, + !toolResponse?.error, + duration, + this.toolUsage.get(toolName)?.lastError, + ); + + // post-tool hook + await this.hooks?.postToolUse?.({ + subagentId: this.subagentId, + name: this.name, + toolName, + args: requestInfo.args, + success: !toolResponse?.error, + durationMs: duration, + errorMessage: toolResponse?.error + ? typeof toolResponse.resultDisplay === 'string' + ? toolResponse.resultDisplay + : toolResponse.resultDisplay + ? JSON.stringify(toolResponse.resultDisplay) + : toolResponse.error.message + : undefined, + timestamp: Date.now(), + }); + if (toolResponse.error) { console.error( `Error executing tool ${functionCall.name}: ${toolResponse.resultDisplay || toolResponse.error.message}`, @@ -549,6 +818,65 @@ export class SubAgentScope { return [{ role: 'user', parts: toolResponseParts }]; } + getEventEmitter() { + return this.eventEmitter; + } + + getStatistics() { + const total = this.executionStats.totalToolCalls; + const successRate = + total > 0 ? (this.executionStats.successfulToolCalls / total) * 100 : 0; + return { + ...this.executionStats, + successRate, + toolUsage: Array.from(this.toolUsage.entries()).map(([name, v]) => ({ + name, + ...v, + })), + }; + } + + formatCompactResult(taskDesc: string, _useColors = false) { + const stats = this.getStatistics(); + return formatCompact( + { + rounds: stats.rounds, + totalDurationMs: stats.totalDurationMs, + totalToolCalls: stats.totalToolCalls, + successfulToolCalls: stats.successfulToolCalls, + failedToolCalls: stats.failedToolCalls, + successRate: stats.successRate, + inputTokens: this.executionStats.inputTokens, + outputTokens: this.executionStats.outputTokens, + totalTokens: this.executionStats.totalTokens, + }, + taskDesc, + ); + } + + getFinalText(): string { + return this.finalText; + } + + formatDetailedResult(taskDesc: string) { + const stats = this.getStatistics(); + return formatDetailed( + { + rounds: stats.rounds, + totalDurationMs: stats.totalDurationMs, + totalToolCalls: stats.totalToolCalls, + successfulToolCalls: stats.successfulToolCalls, + failedToolCalls: stats.failedToolCalls, + successRate: stats.successRate, + inputTokens: this.executionStats.inputTokens, + outputTokens: this.executionStats.outputTokens, + totalTokens: this.executionStats.totalTokens, + toolUsage: stats.toolUsage, + }, + taskDesc, + ); + } + private async createChatObject(context: ContextState) { if (!this.promptConfig.systemPrompt && !this.promptConfig.initialMessages) { throw new Error( @@ -616,43 +944,6 @@ export class SubAgentScope { } } - /** - * Returns an array of FunctionDeclaration objects for tools that are local to the subagent's scope. - * Currently, this includes the `self.emitvalue` tool for emitting variables. - * @returns An array of `FunctionDeclaration` objects. - */ - private getScopeLocalFuncDefs() { - const emitValueTool: FunctionDeclaration = { - name: 'self.emitvalue', - description: `* This tool emits A SINGLE return value from this execution, such that it can be collected and presented to the calling function. - * You can only emit ONE VALUE each time you call this tool. You are expected to call this tool MULTIPLE TIMES if you have MULTIPLE OUTPUTS.`, - parameters: { - type: Type.OBJECT, - properties: { - emit_variable_name: { - description: 'This is the name of the variable to be returned.', - type: Type.STRING, - }, - emit_variable_value: { - description: - 'This is the _value_ to be returned for this variable.', - type: Type.STRING, - }, - }, - required: ['emit_variable_name', 'emit_variable_value'], - }, - }; - - return [emitValueTool]; - } - - /** - * Builds the system prompt for the chat based on the provided configurations. - * It templates the base system prompt and appends instructions for emitting - * variables if an `OutputConfig` is provided. - * @param {ContextState} context - The context for templating. - * @returns {string} The complete system prompt. - */ private buildChatSystemPrompt(context: ContextState): string { if (!this.promptConfig.systemPrompt) { // This should ideally be caught in createChatObject, but serves as a safeguard. @@ -661,23 +952,13 @@ export class SubAgentScope { let finalPrompt = templateString(this.promptConfig.systemPrompt, context); - // Add instructions for emitting variables if needed. - if (this.outputConfig && this.outputConfig.outputs) { - let outputInstructions = - '\n\nAfter you have achieved all other goals, you MUST emit the required output variables. For each expected output, make one final call to the `self.emitvalue` tool.'; - - for (const [key, value] of Object.entries(this.outputConfig.outputs)) { - outputInstructions += `\n* Use 'self.emitvalue' to emit the '${key}' key, with a value described as: '${value}'`; - } - finalPrompt += outputInstructions; - } - // Add general non-interactive instructions. finalPrompt += ` Important Rules: - * You are running in a non-interactive mode. You CANNOT ask the user for input or clarification. You must proceed with the information you have. - * Once you believe all goals have been met and all required outputs have been emitted, stop calling tools.`; + - You operate in non-interactive mode: do not ask the user questions; proceed with available context. + - Use tools only when necessary to obtain facts or make changes. + - When the task is complete, return the final result as a normal model response (not a tool call) and stop.`; return finalPrompt; } diff --git a/packages/core/src/subagents/types.ts b/packages/core/src/subagents/types.ts index 3220b849..5236b766 100644 --- a/packages/core/src/subagents/types.ts +++ b/packages/core/src/subagents/types.ts @@ -9,7 +9,7 @@ import { ModelConfig, RunConfig, ToolConfig, -} from '../core/subagent.js'; +} from './subagent.js'; /** * Represents the storage level for a subagent configuration. diff --git a/packages/core/src/subagents/validation.test.ts b/packages/core/src/subagents/validation.test.ts index 7d01564d..e6739a0e 100644 --- a/packages/core/src/subagents/validation.test.ts +++ b/packages/core/src/subagents/validation.test.ts @@ -4,21 +4,15 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { vi, describe, it, expect, beforeEach } from 'vitest'; +import { describe, it, expect, beforeEach } from 'vitest'; import { SubagentValidator } from './validation.js'; import { SubagentConfig, SubagentError } from './types.js'; -import { ToolRegistry } from '../tools/tool-registry.js'; describe('SubagentValidator', () => { let validator: SubagentValidator; - let mockToolRegistry: ToolRegistry; beforeEach(() => { - mockToolRegistry = { - getTool: vi.fn(), - } as unknown as ToolRegistry; - - validator = new SubagentValidator(mockToolRegistry); + validator = new SubagentValidator(); }); describe('validateName', () => { @@ -191,9 +185,6 @@ describe('SubagentValidator', () => { describe('validateTools', () => { it('should accept valid tool arrays', () => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - vi.mocked(mockToolRegistry.getTool).mockReturnValue({} as any); - const result = validator.validateTools(['read_file', 'write_file']); expect(result.isValid).toBe(true); expect(result.errors).toHaveLength(0); @@ -215,9 +206,6 @@ describe('SubagentValidator', () => { }); it('should warn about duplicate tools', () => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - vi.mocked(mockToolRegistry.getTool).mockReturnValue({} as any); - const result = validator.validateTools([ 'read_file', 'read_file', @@ -243,16 +231,6 @@ describe('SubagentValidator', () => { expect(result.isValid).toBe(false); expect(result.errors).toContain('Tool name cannot be empty'); }); - - it('should reject unknown tools when registry is available', () => { - vi.mocked(mockToolRegistry.getTool).mockReturnValue(undefined); - - const result = validator.validateTools(['unknown_tool']); - expect(result.isValid).toBe(false); - expect(result.errors).toContain( - 'Tool "unknown_tool" not found in tool registry', - ); - }); }); describe('validateModelConfig', () => { diff --git a/packages/core/src/subagents/validation.ts b/packages/core/src/subagents/validation.ts index b271d1d7..8c7ace3c 100644 --- a/packages/core/src/subagents/validation.ts +++ b/packages/core/src/subagents/validation.ts @@ -10,15 +10,12 @@ import { SubagentError, SubagentErrorCode, } from './types.js'; -import { ToolRegistry } from '../tools/tool-registry.js'; /** * Validates subagent configurations to ensure they are well-formed * and compatible with the runtime system. */ export class SubagentValidator { - constructor(private readonly toolRegistry?: ToolRegistry) {} - /** * Validates a complete subagent configuration. * @@ -238,14 +235,6 @@ export class SubagentValidator { errors.push('Tool name cannot be empty'); continue; } - - // Check if tool exists in registry (if available) - if (this.toolRegistry) { - const toolInstance = this.toolRegistry.getTool(tool); - if (!toolInstance) { - errors.push(`Tool "${tool}" not found in tool registry`); - } - } } return { @@ -262,7 +251,7 @@ export class SubagentValidator { * @returns ValidationResult */ validateModelConfig( - modelConfig: Partial, + modelConfig: Partial, ): ValidationResult { const errors: string[] = []; const warnings: string[] = []; @@ -310,7 +299,7 @@ export class SubagentValidator { * @returns ValidationResult */ validateRunConfig( - runConfig: Partial, + runConfig: Partial, ): ValidationResult { const errors: string[] = []; const warnings: string[] = []; diff --git a/packages/core/src/telemetry/constants.ts b/packages/core/src/telemetry/constants.ts index 372e52dd..e4e5d7f6 100644 --- a/packages/core/src/telemetry/constants.ts +++ b/packages/core/src/telemetry/constants.ts @@ -21,6 +21,7 @@ export const EVENT_INVALID_CHUNK = 'qwen-code.chat.invalid_chunk'; export const EVENT_CONTENT_RETRY = 'qwen-code.chat.content_retry'; export const EVENT_CONTENT_RETRY_FAILURE = 'qwen-code.chat.content_retry_failure'; +export const EVENT_SUBAGENT_EXECUTION = 'qwen-code.subagent_execution'; export const METRIC_TOOL_CALL_COUNT = 'qwen-code.tool.call.count'; export const METRIC_TOOL_CALL_LATENCY = 'qwen-code.tool.call.latency'; @@ -33,3 +34,5 @@ export const METRIC_INVALID_CHUNK_COUNT = 'qwen-code.chat.invalid_chunk.count'; export const METRIC_CONTENT_RETRY_COUNT = 'qwen-code.chat.content_retry.count'; export const METRIC_CONTENT_RETRY_FAILURE_COUNT = 'qwen-code.chat.content_retry_failure.count'; +export const METRIC_SUBAGENT_EXECUTION_COUNT = + 'qwen-code.subagent.execution.count'; diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts index 223ea78b..63964e73 100644 --- a/packages/core/src/telemetry/loggers.ts +++ b/packages/core/src/telemetry/loggers.ts @@ -23,6 +23,7 @@ import { EVENT_INVALID_CHUNK, EVENT_CONTENT_RETRY, EVENT_CONTENT_RETRY_FAILURE, + EVENT_SUBAGENT_EXECUTION, } from './constants.js'; import { ApiErrorEvent, @@ -41,6 +42,7 @@ import { InvalidChunkEvent, ContentRetryEvent, ContentRetryFailureEvent, + SubagentExecutionEvent, } from './types.js'; import { recordApiErrorMetrics, @@ -51,6 +53,7 @@ import { recordInvalidChunk, recordContentRetry, recordContentRetryFailure, + recordSubagentExecutionMetrics, } from './metrics.js'; import { QwenLogger } from './qwen-logger/qwen-logger.js'; import { isTelemetrySdkInitialized } from './sdk.js'; @@ -504,3 +507,31 @@ export function logContentRetryFailure( logger.emit(logRecord); recordContentRetryFailure(config); } + +export function logSubagentExecution( + config: Config, + event: SubagentExecutionEvent, +): void { + QwenLogger.getInstance(config)?.logSubagentExecutionEvent(event); + if (!isTelemetrySdkInitialized()) return; + + const attributes: LogAttributes = { + ...getCommonAttributes(config), + ...event, + 'event.name': EVENT_SUBAGENT_EXECUTION, + 'event.timestamp': new Date().toISOString(), + }; + + const logger = logs.getLogger(SERVICE_NAME); + const logRecord: LogRecord = { + body: `Subagent execution: ${event.subagent_name}.`, + attributes, + }; + logger.emit(logRecord); + recordSubagentExecutionMetrics( + config, + event.subagent_name, + event.status, + event.terminate_reason, + ); +} diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts index a84abbff..23ec5802 100644 --- a/packages/core/src/telemetry/metrics.ts +++ b/packages/core/src/telemetry/metrics.ts @@ -25,6 +25,7 @@ import { METRIC_INVALID_CHUNK_COUNT, METRIC_CONTENT_RETRY_COUNT, METRIC_CONTENT_RETRY_FAILURE_COUNT, + METRIC_SUBAGENT_EXECUTION_COUNT, } from './constants.js'; import { Config } from '../config/config.js'; import { DiffStat } from '../tools/tools.js'; @@ -46,6 +47,7 @@ let chatCompressionCounter: Counter | undefined; let invalidChunkCounter: Counter | undefined; let contentRetryCounter: Counter | undefined; let contentRetryFailureCounter: Counter | undefined; +let subagentExecutionCounter: Counter | undefined; let isMetricsInitialized = false; function getCommonAttributes(config: Config): Attributes { @@ -117,6 +119,14 @@ export function initializeMetrics(config: Config): void { valueType: ValueType.INT, }, ); + subagentExecutionCounter = meter.createCounter( + METRIC_SUBAGENT_EXECUTION_COUNT, + { + description: + 'Counts subagent execution events, tagged by status and subagent name.', + valueType: ValueType.INT, + }, + ); const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, { description: 'Count of CLI sessions started.', @@ -277,3 +287,27 @@ export function recordContentRetryFailure(config: Config): void { if (!contentRetryFailureCounter || !isMetricsInitialized) return; contentRetryFailureCounter.add(1, getCommonAttributes(config)); } + +/** + * Records a metric for subagent execution events. + */ +export function recordSubagentExecutionMetrics( + config: Config, + subagentName: string, + status: 'started' | 'progress' | 'completed' | 'failed', + terminateReason?: string, +): void { + if (!subagentExecutionCounter || !isMetricsInitialized) return; + + const attributes: Attributes = { + ...getCommonAttributes(config), + subagent_name: subagentName, + status, + }; + + if (terminateReason) { + attributes['terminate_reason'] = terminateReason; + } + + subagentExecutionCounter.add(1, attributes); +} diff --git a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts index 18590f0e..04c67322 100644 --- a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts +++ b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts @@ -27,6 +27,7 @@ import { InvalidChunkEvent, ContentRetryEvent, ContentRetryFailureEvent, + SubagentExecutionEvent, } from '../types.js'; import { RumEvent, @@ -628,6 +629,20 @@ export class QwenLogger { this.flushIfNeeded(); } + logSubagentExecutionEvent(event: SubagentExecutionEvent): void { + const rumEvent = this.createActionEvent('subagent', 'subagent_execution', { + snapshots: JSON.stringify({ + subagent_name: event.subagent_name, + status: event.status, + terminate_reason: event.terminate_reason, + execution_summary: event.execution_summary, + }), + }); + + this.enqueueLogEvent(rumEvent); + this.flushIfNeeded(); + } + logEndSessionEvent(_event: EndSessionEvent): void { const applicationEvent = this.createViewEvent('session', 'session_end', {}); diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index 338b087f..b5c0e051 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -444,6 +444,34 @@ export class ContentRetryFailureEvent implements BaseTelemetryEvent { } } +export class SubagentExecutionEvent implements BaseTelemetryEvent { + 'event.name': 'subagent_execution'; + 'event.timestamp': string; + subagent_name: string; + status: 'started' | 'progress' | 'completed' | 'failed'; + terminate_reason?: string; + result?: string; + execution_summary?: string; + + constructor( + subagent_name: string, + status: 'started' | 'progress' | 'completed' | 'failed', + options?: { + terminate_reason?: string; + result?: string; + execution_summary?: string; + }, + ) { + this['event.name'] = 'subagent_execution'; + this['event.timestamp'] = new Date().toISOString(); + this.subagent_name = subagent_name; + this.status = status; + this.terminate_reason = options?.terminate_reason; + this.result = options?.result; + this.execution_summary = options?.execution_summary; + } +} + export type TelemetryEvent = | StartSessionEvent | EndSessionEvent @@ -461,4 +489,5 @@ export type TelemetryEvent = | SlashCommandEvent | InvalidChunkEvent | ContentRetryEvent - | ContentRetryFailureEvent; + | ContentRetryFailureEvent + | SubagentExecutionEvent; diff --git a/packages/core/src/tools/shell.ts b/packages/core/src/tools/shell.ts index a7e20e56..5707b1ea 100644 --- a/packages/core/src/tools/shell.ts +++ b/packages/core/src/tools/shell.ts @@ -14,6 +14,7 @@ import { BaseToolInvocation, ToolInvocation, ToolResult, + ToolResultDisplay, ToolCallConfirmationDetails, ToolExecuteConfirmationDetails, ToolConfirmationOutcome, @@ -100,7 +101,7 @@ class ShellToolInvocation extends BaseToolInvocation< async execute( signal: AbortSignal, - updateOutput?: (output: string) => void, + updateOutput?: (output: ToolResultDisplay) => void, terminalColumns?: number, terminalRows?: number, ): Promise { diff --git a/packages/core/src/tools/task.test.ts b/packages/core/src/tools/task.test.ts new file mode 100644 index 00000000..8760b24c --- /dev/null +++ b/packages/core/src/tools/task.test.ts @@ -0,0 +1,507 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { TaskTool, TaskParams } from './task.js'; +import { Config } from '../config/config.js'; +import { SubagentManager } from '../subagents/subagent-manager.js'; +import { SubagentConfig } from '../subagents/types.js'; +import { + SubAgentScope, + ContextState, + SubagentTerminateMode, +} from '../subagents/subagent.js'; +import { partToString } from '../utils/partUtils.js'; + +// Type for accessing protected methods in tests +type TaskToolWithProtectedMethods = TaskTool & { + createInvocation: (params: TaskParams) => { + execute: ( + signal?: AbortSignal, + liveOutputCallback?: (chunk: string) => void, + ) => Promise<{ + llmContent: string; + returnDisplay: unknown; + }>; + getDescription: () => string; + shouldConfirmExecute: () => Promise; + }; +}; + +// Mock dependencies +vi.mock('../subagents/subagent-manager.js'); +vi.mock('../subagents/subagent.js'); + +const MockedSubagentManager = vi.mocked(SubagentManager); +const MockedContextState = vi.mocked(ContextState); + +describe('TaskTool', () => { + let config: Config; + let taskTool: TaskTool; + let mockSubagentManager: SubagentManager; + + const mockSubagents: SubagentConfig[] = [ + { + name: 'file-search', + description: 'Specialized agent for searching and analyzing files', + systemPrompt: 'You are a file search specialist.', + level: 'project', + filePath: '/project/.qwen/agents/file-search.md', + }, + { + name: 'code-review', + description: 'Agent for reviewing code quality and best practices', + systemPrompt: 'You are a code review specialist.', + level: 'user', + filePath: '/home/user/.qwen/agents/code-review.md', + }, + ]; + + beforeEach(async () => { + // Setup fake timers + vi.useFakeTimers(); + + // Create mock config + config = { + getProjectRoot: vi.fn().mockReturnValue('/test/project'), + getSessionId: vi.fn().mockReturnValue('test-session-id'), + getSubagentManager: vi.fn(), + } as unknown as Config; + + // Setup SubagentManager mock + mockSubagentManager = { + listSubagents: vi.fn().mockResolvedValue(mockSubagents), + loadSubagent: vi.fn(), + createSubagentScope: vi.fn(), + } as unknown as SubagentManager; + + MockedSubagentManager.mockImplementation(() => mockSubagentManager); + + // Make config return the mock SubagentManager + vi.mocked(config.getSubagentManager).mockReturnValue(mockSubagentManager); + + // Create TaskTool instance + taskTool = new TaskTool(config); + + // Allow async initialization to complete + await vi.runAllTimersAsync(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe('initialization', () => { + it('should initialize with correct name and properties', () => { + expect(taskTool.name).toBe('task'); + expect(taskTool.displayName).toBe('Task'); + expect(taskTool.kind).toBe('execute'); + }); + + it('should load available subagents during initialization', () => { + expect(mockSubagentManager.listSubagents).toHaveBeenCalled(); + }); + + it('should update description with available subagents', () => { + expect(taskTool.description).toContain('file-search'); + expect(taskTool.description).toContain( + 'Specialized agent for searching and analyzing files', + ); + expect(taskTool.description).toContain('code-review'); + expect(taskTool.description).toContain( + 'Agent for reviewing code quality and best practices', + ); + }); + + it('should handle empty subagents list gracefully', async () => { + vi.mocked(mockSubagentManager.listSubagents).mockResolvedValue([]); + + const emptyTaskTool = new TaskTool(config); + await vi.runAllTimersAsync(); + + expect(emptyTaskTool.description).toContain( + 'No subagents are currently configured', + ); + }); + + it('should handle subagent loading errors gracefully', async () => { + vi.mocked(mockSubagentManager.listSubagents).mockRejectedValue( + new Error('Loading failed'), + ); + + const consoleSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + new TaskTool(config); + await vi.runAllTimersAsync(); + + expect(consoleSpy).toHaveBeenCalledWith( + 'Failed to load subagents for Task tool:', + expect.any(Error), + ); + consoleSpy.mockRestore(); + }); + }); + + describe('schema generation', () => { + it('should generate schema with subagent names as enum', () => { + const schema = taskTool.schema; + const properties = schema.parametersJsonSchema as { + properties: { + subagent_type: { + enum?: string[]; + }; + }; + }; + expect(properties.properties.subagent_type.enum).toEqual([ + 'file-search', + 'code-review', + ]); + }); + + it('should generate schema without enum when no subagents available', async () => { + vi.mocked(mockSubagentManager.listSubagents).mockResolvedValue([]); + + const emptyTaskTool = new TaskTool(config); + await vi.runAllTimersAsync(); + + const schema = emptyTaskTool.schema; + const properties = schema.parametersJsonSchema as { + properties: { + subagent_type: { + enum?: string[]; + }; + }; + }; + expect(properties.properties.subagent_type.enum).toBeUndefined(); + }); + }); + + describe('validateToolParams', () => { + const validParams: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files in the project', + subagent_type: 'file-search', + }; + + it('should validate valid parameters', async () => { + const result = taskTool.validateToolParams(validParams); + expect(result).toBeNull(); + }); + + it('should reject empty description', async () => { + const result = taskTool.validateToolParams({ + ...validParams, + description: '', + }); + expect(result).toBe( + 'Parameter "description" must be a non-empty string.', + ); + }); + + it('should reject empty prompt', async () => { + const result = taskTool.validateToolParams({ + ...validParams, + prompt: '', + }); + expect(result).toBe('Parameter "prompt" must be a non-empty string.'); + }); + + it('should reject empty subagent_type', async () => { + const result = taskTool.validateToolParams({ + ...validParams, + subagent_type: '', + }); + expect(result).toBe( + 'Parameter "subagent_type" must be a non-empty string.', + ); + }); + + it('should reject non-existent subagent', async () => { + const result = taskTool.validateToolParams({ + ...validParams, + subagent_type: 'non-existent', + }); + expect(result).toBe( + 'Subagent "non-existent" not found. Available subagents: file-search, code-review', + ); + }); + }); + + describe('refreshSubagents', () => { + it('should refresh available subagents and update description', async () => { + const newSubagents: SubagentConfig[] = [ + { + name: 'test-agent', + description: 'A test agent', + systemPrompt: 'Test prompt', + level: 'project', + filePath: '/project/.qwen/agents/test-agent.md', + }, + ]; + + vi.mocked(mockSubagentManager.listSubagents).mockResolvedValue( + newSubagents, + ); + + await taskTool.refreshSubagents(); + + expect(taskTool.description).toContain('test-agent'); + expect(taskTool.description).toContain('A test agent'); + }); + }); + + describe('TaskToolInvocation', () => { + let mockSubagentScope: SubAgentScope; + let mockContextState: ContextState; + + beforeEach(() => { + mockSubagentScope = { + runNonInteractive: vi.fn().mockResolvedValue(undefined), + output: { + result: 'Task completed successfully', + terminate_reason: SubagentTerminateMode.GOAL, + }, + getFinalText: vi.fn().mockReturnValue('Task completed successfully'), + formatCompactResult: vi + .fn() + .mockReturnValue( + 'āœ… Success: Search files completed with GOAL termination', + ), + getStatistics: vi.fn().mockReturnValue({ + rounds: 2, + totalDurationMs: 1500, + totalToolCalls: 3, + successfulToolCalls: 3, + failedToolCalls: 0, + }), + } as unknown as SubAgentScope; + + mockContextState = { + set: vi.fn(), + } as unknown as ContextState; + + MockedContextState.mockImplementation(() => mockContextState); + + vi.mocked(mockSubagentManager.loadSubagent).mockResolvedValue( + mockSubagents[0], + ); + vi.mocked(mockSubagentManager.createSubagentScope).mockResolvedValue( + mockSubagentScope, + ); + }); + + it('should execute subagent successfully', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + expect(mockSubagentManager.loadSubagent).toHaveBeenCalledWith( + 'file-search', + ); + expect(mockSubagentManager.createSubagentScope).toHaveBeenCalledWith( + mockSubagents[0], + config, + expect.any(Object), // eventEmitter parameter + ); + expect(mockSubagentScope.runNonInteractive).toHaveBeenCalledWith( + mockContextState, + undefined, // signal parameter (undefined when not provided) + ); + + const llmText = partToString(result.llmContent); + const parsedResult = JSON.parse(llmText) as { + success: boolean; + subagent_name?: string; + error?: string; + }; + expect(parsedResult.success).toBe(true); + expect(parsedResult.subagent_name).toBe('file-search'); + }); + + it('should handle subagent not found error', async () => { + vi.mocked(mockSubagentManager.loadSubagent).mockResolvedValue(null); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'non-existent', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + const llmText = partToString(result.llmContent); + const parsedResult = JSON.parse(llmText) as { + success: boolean; + subagent_name?: string; + error?: string; + }; + expect(parsedResult.success).toBe(false); + expect(parsedResult.error).toContain('Subagent "non-existent" not found'); + }); + + it('should handle subagent execution failure', async () => { + mockSubagentScope.output.terminate_reason = SubagentTerminateMode.ERROR; + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + const llmText = partToString(result.llmContent); + const parsedResult = JSON.parse(llmText) as { + success: boolean; + subagent_name?: string; + error?: string; + }; + expect(parsedResult.success).toBe(false); + expect(parsedResult.error).toContain( + 'Task did not complete successfully', + ); + }); + + it('should handle execution errors gracefully', async () => { + vi.mocked(mockSubagentManager.createSubagentScope).mockRejectedValue( + new Error('Creation failed'), + ); + + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + const llmText = partToString(result.llmContent); + const parsedResult = JSON.parse(llmText) as { + success: boolean; + subagent_name?: string; + error?: string; + }; + expect(parsedResult.success).toBe(false); + expect(parsedResult.error).toContain('Failed to start subagent'); + }); + + it('should execute subagent without live output callback', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + // Verify that the task completed successfully + expect(result.llmContent).toBeDefined(); + expect(result.returnDisplay).toBeDefined(); + + // Verify the result has the expected structure + const llmContent = Array.isArray(result.llmContent) + ? result.llmContent + : [result.llmContent]; + const parsedResult = JSON.parse((llmContent[0] as { text: string }).text); + expect(parsedResult.success).toBe(true); + expect(parsedResult.subagent_name).toBe('file-search'); + }); + + it('should set context variables correctly', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + await invocation.execute(); + + expect(mockContextState.set).toHaveBeenCalledWith( + 'task_prompt', + 'Find all TypeScript files', + ); + }); + + it('should return structured display object', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const result = await invocation.execute(); + + expect(typeof result.returnDisplay).toBe('object'); + expect(result.returnDisplay).toHaveProperty('type', 'subagent_execution'); + expect(result.returnDisplay).toHaveProperty( + 'subagentName', + 'file-search', + ); + expect(result.returnDisplay).toHaveProperty( + 'taskDescription', + 'Search files', + ); + expect(result.returnDisplay).toHaveProperty('status', 'completed'); + }); + + it('should not require confirmation', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const shouldConfirm = await invocation.shouldConfirmExecute(); + + expect(shouldConfirm).toBe(false); + }); + + it('should provide correct description', async () => { + const params: TaskParams = { + description: 'Search files', + prompt: 'Find all TypeScript files', + subagent_type: 'file-search', + }; + + const invocation = ( + taskTool as TaskToolWithProtectedMethods + ).createInvocation(params); + const description = invocation.getDescription(); + + expect(description).toBe( + 'file-search subagent: "Search files"', + ); + }); + }); +}); diff --git a/packages/core/src/tools/task.ts b/packages/core/src/tools/task.ts new file mode 100644 index 00000000..d3bc4abb --- /dev/null +++ b/packages/core/src/tools/task.ts @@ -0,0 +1,498 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + BaseDeclarativeTool, + BaseToolInvocation, + Kind, + ToolResult, + ToolResultDisplay, + TaskResultDisplay, +} from './tools.js'; +import { Config } from '../config/config.js'; +import { SubagentManager } from '../subagents/subagent-manager.js'; +import { SubagentConfig } from '../subagents/types.js'; +import { ContextState } from '../subagents/subagent.js'; +import { + SubAgentEventEmitter, + SubAgentToolCallEvent, + SubAgentToolResultEvent, + SubAgentFinishEvent, +} from '../subagents/subagent-events.js'; +import { ChatRecordingService } from '../services/chatRecordingService.js'; + +export interface TaskParams { + description: string; + prompt: string; + subagent_type: string; +} + +export interface TaskResult { + success: boolean; + output?: string; + error?: string; + subagent_name?: string; + execution_summary?: string; +} + +/** + * Task tool that enables primary agents to delegate tasks to specialized subagents. + * The tool dynamically loads available subagents and includes them in its description + * for the model to choose from. + */ +export class TaskTool extends BaseDeclarativeTool { + static readonly Name: string = 'task'; + + private subagentManager: SubagentManager; + private availableSubagents: SubagentConfig[] = []; + + constructor(private readonly config: Config) { + // Initialize with a basic schema first + const initialSchema = { + type: 'object', + properties: { + description: { + type: 'string', + description: 'A short (3-5 word) description of the task', + }, + prompt: { + type: 'string', + description: 'The task for the agent to perform', + }, + subagent_type: { + type: 'string', + description: 'The type of specialized agent to use for this task', + }, + }, + required: ['description', 'prompt', 'subagent_type'], + additionalProperties: false, + $schema: 'http://json-schema.org/draft-07/schema#', + }; + + super( + TaskTool.Name, + 'Task', + 'Delegate tasks to specialized subagents. Loading available subagents...', // Initial description + Kind.Execute, + initialSchema, + true, // isOutputMarkdown + true, // canUpdateOutput - Enable live output updates for real-time progress + ); + + this.subagentManager = config.getSubagentManager(); + + // Initialize the tool asynchronously + this.initializeAsync(); + } + + /** + * Asynchronously initializes the tool by loading available subagents + * and updating the description and schema. + */ + private async initializeAsync(): Promise { + try { + this.availableSubagents = await this.subagentManager.listSubagents(); + this.updateDescriptionAndSchema(); + } catch (error) { + console.warn('Failed to load subagents for Task tool:', error); + this.availableSubagents = []; + this.updateDescriptionAndSchema(); + } + } + + /** + * Updates the tool's description and schema based on available subagents. + */ + private updateDescriptionAndSchema(): void { + // Generate dynamic description + const baseDescription = `Delegate tasks to specialized subagents. This tool allows you to offload specific tasks to agents optimized for particular domains, reducing context usage and improving task completion. + +## When to Use This Tool + +Use this tool proactively when: +- The task matches a specialized agent's description +- You want to reduce context usage for file searches or analysis +- The task requires domain-specific expertise +- You need to perform focused work that doesn't require the full conversation context + +## Available Subagents + +`; + + let subagentDescriptions = ''; + if (this.availableSubagents.length === 0) { + subagentDescriptions = + 'No subagents are currently configured. You can create subagents using the /agents command.'; + } else { + subagentDescriptions = this.availableSubagents + .map((subagent) => `- **${subagent.name}**: ${subagent.description}`) + .join('\n'); + } + + // Update description using object property assignment since it's readonly + (this as { description: string }).description = + baseDescription + subagentDescriptions; + + // Generate dynamic schema with enum of available subagent names + const subagentNames = this.availableSubagents.map((s) => s.name); + + // Update the parameter schema by modifying the existing object + const schema = this.parameterSchema as { + properties?: { + subagent_type?: { + enum?: string[]; + }; + }; + }; + if (schema.properties && schema.properties.subagent_type) { + if (subagentNames.length > 0) { + schema.properties.subagent_type.enum = subagentNames; + } else { + delete schema.properties.subagent_type.enum; + } + } + } + + /** + * Refreshes the available subagents and updates the tool description. + * This can be called when subagents are added or removed. + */ + async refreshSubagents(): Promise { + await this.initializeAsync(); + } + + override validateToolParams(params: TaskParams): string | null { + // Validate required fields + if ( + !params.description || + typeof params.description !== 'string' || + params.description.trim() === '' + ) { + return 'Parameter "description" must be a non-empty string.'; + } + + if ( + !params.prompt || + typeof params.prompt !== 'string' || + params.prompt.trim() === '' + ) { + return 'Parameter "prompt" must be a non-empty string.'; + } + + if ( + !params.subagent_type || + typeof params.subagent_type !== 'string' || + params.subagent_type.trim() === '' + ) { + return 'Parameter "subagent_type" must be a non-empty string.'; + } + + // Validate that the subagent exists + const subagentExists = this.availableSubagents.some( + (subagent) => subagent.name === params.subagent_type, + ); + + if (!subagentExists) { + const availableNames = this.availableSubagents.map((s) => s.name); + return `Subagent "${params.subagent_type}" not found. Available subagents: ${availableNames.join(', ')}`; + } + + return null; + } + + protected createInvocation(params: TaskParams) { + return new TaskToolInvocation(this.config, this.subagentManager, params); + } +} + +class TaskToolInvocation extends BaseToolInvocation { + private readonly _eventEmitter: SubAgentEventEmitter; + private currentDisplay: TaskResultDisplay | null = null; + private currentToolCalls: Array<{ + name: string; + status: 'executing' | 'success' | 'failed'; + error?: string; + args?: Record; + result?: string; + returnDisplay?: string; + }> = []; + + constructor( + private readonly config: Config, + private readonly subagentManager: SubagentManager, + params: TaskParams, + ) { + super(params); + this._eventEmitter = new SubAgentEventEmitter(); + } + + get eventEmitter(): SubAgentEventEmitter { + return this._eventEmitter; + } + + /** + * Updates the current display state and calls updateOutput if provided + */ + private updateDisplay( + updates: Partial, + updateOutput?: (output: ToolResultDisplay) => void, + ): void { + if (!this.currentDisplay) return; + + this.currentDisplay = { + ...this.currentDisplay, + ...updates, + }; + + if (updateOutput) { + updateOutput(this.currentDisplay); + } + } + + /** + * Sets up event listeners for real-time subagent progress updates + */ + private setupEventListeners( + updateOutput?: (output: ToolResultDisplay) => void, + ): void { + this.eventEmitter.on('start', () => { + this.updateDisplay({ status: 'running' }, updateOutput); + }); + + this.eventEmitter.on('model_text', (..._args: unknown[]) => { + // Model text events are no longer displayed as currentStep + // Keep the listener for potential future use + }); + + this.eventEmitter.on('tool_call', (...args: unknown[]) => { + const event = args[0] as SubAgentToolCallEvent; + const newToolCall = { + name: event.name, + status: 'executing' as const, + args: event.args, + }; + this.currentToolCalls.push(newToolCall); + + this.updateDisplay( + { + progress: { + toolCalls: [...this.currentToolCalls], + }, + }, + updateOutput, + ); + }); + + this.eventEmitter.on('tool_result', (...args: unknown[]) => { + const event = args[0] as SubAgentToolResultEvent; + const toolCallIndex = this.currentToolCalls.findIndex( + (call) => call.name === event.name, + ); + if (toolCallIndex >= 0) { + this.currentToolCalls[toolCallIndex] = { + ...this.currentToolCalls[toolCallIndex], + status: event.success ? 'success' : 'failed', + error: event.error, + // Note: result would need to be added to SubAgentToolResultEvent to be captured + }; + + this.updateDisplay( + { + progress: { + toolCalls: [...this.currentToolCalls], + }, + }, + updateOutput, + ); + } + }); + + this.eventEmitter.on('finish', (...args: unknown[]) => { + const event = args[0] as SubAgentFinishEvent; + this.updateDisplay( + { + status: event.terminate_reason === 'GOAL' ? 'completed' : 'failed', + terminateReason: event.terminate_reason, + // Keep progress data including tool calls for final display + }, + updateOutput, + ); + }); + + this.eventEmitter.on('error', () => { + this.updateDisplay({ status: 'failed' }, updateOutput); + }); + } + + getDescription(): string { + return `${this.params.subagent_type} subagent: "${this.params.description}"`; + } + + override async shouldConfirmExecute(): Promise { + // Task delegation should execute automatically without user confirmation + return false; + } + + async execute( + signal?: AbortSignal, + updateOutput?: (output: ToolResultDisplay) => void, + ): Promise { + try { + // Load the subagent configuration + const subagentConfig = await this.subagentManager.loadSubagent( + this.params.subagent_type, + ); + + if (!subagentConfig) { + const errorDisplay = { + type: 'subagent_execution' as const, + subagentName: this.params.subagent_type, + taskDescription: this.params.description, + status: 'failed' as const, + terminateReason: 'ERROR', + result: `Subagent "${this.params.subagent_type}" not found`, + }; + + return { + llmContent: [ + { + text: JSON.stringify({ + success: false, + error: `Subagent "${this.params.subagent_type}" not found`, + }), + }, + ], + returnDisplay: errorDisplay, + }; + } + + // Initialize the current display state + this.currentDisplay = { + type: 'subagent_execution' as const, + subagentName: subagentConfig.name, + taskDescription: this.params.description, + status: 'running' as const, + }; + + // Set up event listeners for real-time updates + this.setupEventListeners(updateOutput); + + // Send initial display + if (updateOutput) { + updateOutput(this.currentDisplay); + } + const chatRecorder = new ChatRecordingService(this.config); + try { + chatRecorder.initialize(); + } catch { + // Initialization failed, continue without recording + } + const subagentScope = await this.subagentManager.createSubagentScope( + subagentConfig, + this.config, + { eventEmitter: this.eventEmitter }, + ); + + // Set up basic event listeners for chat recording + this.eventEmitter.on('start', () => { + chatRecorder.recordMessage({ + type: 'user', + content: `Subagent(${this.params.subagent_type}) Task: ${this.params.description}\n\n${this.params.prompt}`, + }); + }); + + this.eventEmitter.on('finish', (e) => { + const finishEvent = e as { + inputTokens?: number; + outputTokens?: number; + }; + const text = subagentScope.getFinalText() || ''; + chatRecorder.recordMessage({ type: 'gemini', content: text }); + const input = finishEvent.inputTokens ?? 0; + const output = finishEvent.outputTokens ?? 0; + chatRecorder.recordMessageTokens({ + input, + output, + cached: 0, + total: input + output, + }); + }); + + // Create context state with the task prompt + const contextState = new ContextState(); + contextState.set('task_prompt', this.params.prompt); + + // Execute the subagent (blocking) + await subagentScope.runNonInteractive(contextState, signal); + + // Get the results + const finalText = subagentScope.getFinalText(); + const terminateReason = subagentScope.output.terminate_reason; + const success = terminateReason === 'GOAL'; + + // Format the results based on description (iflow-like switch) + const wantDetailed = /\b(stats|statistics|detailed)\b/i.test( + this.params.description, + ); + const executionSummary = wantDetailed + ? subagentScope.formatDetailedResult(this.params.description) + : subagentScope.formatCompactResult(this.params.description); + + const result: TaskResult = { + success, + output: finalText, + subagent_name: subagentConfig.name, + execution_summary: executionSummary, + }; + + if (!success) { + result.error = `Task did not complete successfully. Termination reason: ${terminateReason}`; + } + + // Update the final display state + this.updateDisplay( + { + status: success ? 'completed' : 'failed', + terminateReason, + result: finalText, + executionSummary, + // Keep progress data including tool calls for final display + }, + updateOutput, + ); + + return { + llmContent: [{ text: JSON.stringify(result) }], + returnDisplay: this.currentDisplay!, + }; + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : String(error); + console.error(`[TaskTool] Error starting subagent: ${errorMessage}`); + + const errorDisplay = { + type: 'subagent_execution' as const, + subagentName: this.params.subagent_type, + taskDescription: this.params.description, + status: 'failed' as const, + terminateReason: 'ERROR', + result: `Failed to start subagent: ${errorMessage}`, + }; + + return { + llmContent: [ + { + text: JSON.stringify({ + success: false, + error: `Failed to start subagent: ${errorMessage}`, + }), + }, + ], + returnDisplay: errorDisplay, + }; + } + } +} diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts index 90531742..9e430802 100644 --- a/packages/core/src/tools/tool-registry.ts +++ b/packages/core/src/tools/tool-registry.ts @@ -9,6 +9,7 @@ import { AnyDeclarativeTool, Kind, ToolResult, + ToolResultDisplay, BaseDeclarativeTool, BaseToolInvocation, ToolInvocation, @@ -41,7 +42,7 @@ class DiscoveredToolInvocation extends BaseToolInvocation< async execute( _signal: AbortSignal, - _updateOutput?: (output: string) => void, + _updateOutput?: (output: ToolResultDisplay) => void, ): Promise { const callCommand = this.config.getToolCallCommand()!; const child = spawn(callCommand, [this.toolName]); diff --git a/packages/core/src/tools/tools.ts b/packages/core/src/tools/tools.ts index c11631b1..cf7cfb98 100644 --- a/packages/core/src/tools/tools.ts +++ b/packages/core/src/tools/tools.ts @@ -50,7 +50,7 @@ export interface ToolInvocation< */ execute( signal: AbortSignal, - updateOutput?: (output: string) => void, + updateOutput?: (output: ToolResultDisplay) => void, ): Promise; } @@ -78,7 +78,7 @@ export abstract class BaseToolInvocation< abstract execute( signal: AbortSignal, - updateOutput?: (output: string) => void, + updateOutput?: (output: ToolResultDisplay) => void, ): Promise; } @@ -196,7 +196,7 @@ export abstract class DeclarativeTool< async buildAndExecute( params: TParams, signal: AbortSignal, - updateOutput?: (output: string) => void, + updateOutput?: (output: ToolResultDisplay) => void, ): Promise { const invocation = this.build(params); return invocation.execute(signal, updateOutput); @@ -421,7 +421,31 @@ export function hasCycleInSchema(schema: object): boolean { return traverse(schema, new Set(), new Set()); } -export type ToolResultDisplay = string | FileDiff | TodoResultDisplay; +export interface TaskResultDisplay { + type: 'subagent_execution'; + subagentName: string; + taskDescription: string; + status: 'running' | 'completed' | 'failed'; + terminateReason?: string; + result?: string; + executionSummary?: string; + progress?: { + toolCalls?: Array<{ + name: string; + status: 'executing' | 'success' | 'failed'; + error?: string; + args?: Record; + result?: string; + returnDisplay?: string; + }>; + }; +} + +export type ToolResultDisplay = + | string + | FileDiff + | TodoResultDisplay + | TaskResultDisplay; export interface FileDiff { fileDiff: string;