feat: subagent runtime & CLI display - wip

This commit is contained in:
tanzhenxin
2025-09-08 20:01:49 +08:00
parent 1f8ea7ab7a
commit 4985bfc000
31 changed files with 2664 additions and 390 deletions

View File

@@ -0,0 +1,721 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { vi, describe, it, expect, beforeEach, Mock, afterEach } from 'vitest';
import {
ContextState,
SubAgentScope,
SubagentTerminateMode,
PromptConfig,
ModelConfig,
RunConfig,
ToolConfig,
} from './subagent.js';
import { Config, ConfigParameters } from '../config/config.js';
import { GeminiChat } from '../core/geminiChat.js';
import { createContentGenerator } from '../core/contentGenerator.js';
import { getEnvironmentContext } from '../utils/environmentContext.js';
import { executeToolCall } from '../core/nonInteractiveToolExecutor.js';
import { ToolRegistry } from '../tools/tool-registry.js';
import { DEFAULT_GEMINI_MODEL } from '../config/models.js';
import {
Content,
FunctionCall,
FunctionDeclaration,
GenerateContentConfig,
Type,
} from '@google/genai';
import { ToolErrorType } from '../tools/tool-error.js';
vi.mock('../core/geminiChat.js');
vi.mock('../core/contentGenerator.js');
vi.mock('../utils/environmentContext.js');
vi.mock('../core/nonInteractiveToolExecutor.js');
vi.mock('../ide/ide-client.js');
async function createMockConfig(
toolRegistryMocks = {},
): Promise<{ config: Config; toolRegistry: ToolRegistry }> {
const configParams: ConfigParameters = {
sessionId: 'test-session',
model: DEFAULT_GEMINI_MODEL,
targetDir: '.',
debugMode: false,
cwd: process.cwd(),
};
const config = new Config(configParams);
await config.initialize();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await config.refreshAuth('test-auth' as any);
// Mock ToolRegistry
const mockToolRegistry = {
getTool: vi.fn(),
getFunctionDeclarations: vi.fn().mockReturnValue([]),
getFunctionDeclarationsFiltered: vi.fn().mockReturnValue([]),
...toolRegistryMocks,
} as unknown as ToolRegistry;
vi.spyOn(config, 'getToolRegistry').mockReturnValue(mockToolRegistry);
return { config, toolRegistry: mockToolRegistry };
}
// Helper to simulate LLM responses (sequence of tool calls over multiple turns)
const createMockStream = (
functionCallsList: Array<FunctionCall[] | 'stop'>,
) => {
let index = 0;
return vi.fn().mockImplementation(() => {
const response = functionCallsList[index] || 'stop';
index++;
return (async function* () {
if (response === 'stop') {
// When stopping, the model might return text, but the subagent logic primarily cares about the absence of functionCalls.
yield {
candidates: [
{
content: {
parts: [{ text: 'Done.' }],
},
},
],
};
} else if (response.length > 0) {
yield { functionCalls: response };
} else {
yield {
candidates: [
{
content: {
parts: [{ text: 'Done.' }],
},
},
],
}; // Handle empty array also as stop
}
})();
});
};
describe('subagent.ts', () => {
describe('ContextState', () => {
it('should set and get values correctly', () => {
const context = new ContextState();
context.set('key1', 'value1');
context.set('key2', 123);
expect(context.get('key1')).toBe('value1');
expect(context.get('key2')).toBe(123);
expect(context.get_keys()).toEqual(['key1', 'key2']);
});
it('should return undefined for missing keys', () => {
const context = new ContextState();
expect(context.get('missing')).toBeUndefined();
});
});
describe('SubAgentScope', () => {
let mockSendMessageStream: Mock;
const defaultModelConfig: ModelConfig = {
model: 'gemini-1.5-flash-latest',
temp: 0.5, // Specific temp to test override
top_p: 1,
};
const defaultRunConfig: RunConfig = {
max_time_minutes: 5,
max_turns: 10,
};
beforeEach(async () => {
vi.clearAllMocks();
vi.mocked(getEnvironmentContext).mockResolvedValue([
{ text: 'Env Context' },
]);
vi.mocked(createContentGenerator).mockResolvedValue({
getGenerativeModel: vi.fn(),
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} as any);
mockSendMessageStream = vi.fn();
// We mock the implementation of the constructor.
vi.mocked(GeminiChat).mockImplementation(
() =>
({
sendMessageStream: mockSendMessageStream,
}) as unknown as GeminiChat,
);
// Default mock for executeToolCall
vi.mocked(executeToolCall).mockResolvedValue({
callId: 'default-call',
responseParts: 'default response',
resultDisplay: 'Default tool result',
error: undefined,
errorType: undefined,
});
});
afterEach(() => {
vi.restoreAllMocks();
});
// Helper to safely access generationConfig from mock calls
const getGenerationConfigFromMock = (
callIndex = 0,
): GenerateContentConfig & { systemInstruction?: string | Content } => {
const callArgs = vi.mocked(GeminiChat).mock.calls[callIndex];
const generationConfig = callArgs?.[2];
// Ensure it's defined before proceeding
expect(generationConfig).toBeDefined();
if (!generationConfig) throw new Error('generationConfig is undefined');
return generationConfig as GenerateContentConfig & {
systemInstruction?: string | Content;
};
};
describe('create (Tool Validation)', () => {
const promptConfig: PromptConfig = { systemPrompt: 'Test prompt' };
it('should create a SubAgentScope successfully with minimal config', async () => {
const { config } = await createMockConfig();
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
);
expect(scope).toBeInstanceOf(SubAgentScope);
});
it('should throw an error if a tool requires confirmation', async () => {
const mockTool = {
schema: { parameters: { type: Type.OBJECT, properties: {} } },
build: vi.fn().mockReturnValue({
shouldConfirmExecute: vi.fn().mockResolvedValue({
type: 'exec',
title: 'Confirm',
command: 'rm -rf /',
}),
}),
};
const { config } = await createMockConfig({
// eslint-disable-next-line @typescript-eslint/no-explicit-any
getTool: vi.fn().mockReturnValue(mockTool as any),
});
const toolConfig: ToolConfig = { tools: ['risky_tool'] };
await expect(
SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
toolConfig,
),
).rejects.toThrow(
'Tool "risky_tool" requires user confirmation and cannot be used in a non-interactive subagent.',
);
});
it('should succeed if tools do not require confirmation', async () => {
const mockTool = {
schema: { parameters: { type: Type.OBJECT, properties: {} } },
build: vi.fn().mockReturnValue({
shouldConfirmExecute: vi.fn().mockResolvedValue(null),
}),
};
const { config } = await createMockConfig({
// eslint-disable-next-line @typescript-eslint/no-explicit-any
getTool: vi.fn().mockReturnValue(mockTool as any),
});
const toolConfig: ToolConfig = { tools: ['safe_tool'] };
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
toolConfig,
);
expect(scope).toBeInstanceOf(SubAgentScope);
});
it('should skip interactivity check and warn for tools with required parameters', async () => {
const consoleWarnSpy = vi
.spyOn(console, 'warn')
.mockImplementation(() => {});
const mockToolWithParams = {
schema: {
parameters: {
type: Type.OBJECT,
properties: {
path: { type: Type.STRING },
},
required: ['path'],
},
},
// build should not be called, but we mock it to be safe
build: vi.fn(),
};
const { config } = await createMockConfig({
getTool: vi.fn().mockReturnValue(mockToolWithParams),
});
const toolConfig: ToolConfig = { tools: ['tool_with_params'] };
// The creation should succeed without throwing
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
toolConfig,
);
expect(scope).toBeInstanceOf(SubAgentScope);
// Check that the warning was logged
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Cannot check tool "tool_with_params" for interactivity because it requires parameters. Assuming it is safe for non-interactive use.',
);
// Ensure build was never called
expect(mockToolWithParams.build).not.toHaveBeenCalled();
consoleWarnSpy.mockRestore();
});
});
describe('runNonInteractive - Initialization and Prompting', () => {
it('should correctly template the system prompt and initialize GeminiChat', async () => {
const { config } = await createMockConfig();
vi.mocked(GeminiChat).mockClear();
const promptConfig: PromptConfig = {
systemPrompt: 'Hello ${name}, your task is ${task}.',
};
const context = new ContextState();
context.set('name', 'Agent');
context.set('task', 'Testing');
// Model stops immediately
mockSendMessageStream.mockImplementation(createMockStream(['stop']));
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
);
await scope.runNonInteractive(context);
// Check if GeminiChat was initialized correctly by the subagent
expect(GeminiChat).toHaveBeenCalledTimes(1);
const callArgs = vi.mocked(GeminiChat).mock.calls[0];
// Check Generation Config
const generationConfig = getGenerationConfigFromMock();
// Check temperature override
expect(generationConfig.temperature).toBe(defaultModelConfig.temp);
expect(generationConfig.systemInstruction).toContain(
'Hello Agent, your task is Testing.',
);
expect(generationConfig.systemInstruction).toContain(
'Important Rules:',
);
// Check History (should include environment context)
const history = callArgs[3];
expect(history).toEqual([
{ role: 'user', parts: [{ text: 'Env Context' }] },
{
role: 'model',
parts: [{ text: 'Got it. Thanks for the context!' }],
},
]);
});
it('should use initialMessages instead of systemPrompt if provided', async () => {
const { config } = await createMockConfig();
vi.mocked(GeminiChat).mockClear();
const initialMessages: Content[] = [
{ role: 'user', parts: [{ text: 'Hi' }] },
];
const promptConfig: PromptConfig = { initialMessages };
const context = new ContextState();
// Model stops immediately
mockSendMessageStream.mockImplementation(createMockStream(['stop']));
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
);
await scope.runNonInteractive(context);
const callArgs = vi.mocked(GeminiChat).mock.calls[0];
const generationConfig = getGenerationConfigFromMock();
const history = callArgs[3];
expect(generationConfig.systemInstruction).toBeUndefined();
expect(history).toEqual([
{ role: 'user', parts: [{ text: 'Env Context' }] },
{
role: 'model',
parts: [{ text: 'Got it. Thanks for the context!' }],
},
...initialMessages,
]);
});
it('should throw an error if template variables are missing', async () => {
const { config } = await createMockConfig();
const promptConfig: PromptConfig = {
systemPrompt: 'Hello ${name}, you are missing ${missing}.',
};
const context = new ContextState();
context.set('name', 'Agent');
// 'missing' is not set
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
);
// The error from templating causes the runNonInteractive to reject and the terminate_reason to be ERROR.
await expect(scope.runNonInteractive(context)).rejects.toThrow(
'Missing context values for the following keys: missing',
);
expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.ERROR);
});
it('should validate that systemPrompt and initialMessages are mutually exclusive', async () => {
const { config } = await createMockConfig();
const promptConfig: PromptConfig = {
systemPrompt: 'System',
initialMessages: [{ role: 'user', parts: [{ text: 'Hi' }] }],
};
const context = new ContextState();
const agent = await SubAgentScope.create(
'TestAgent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
);
await expect(agent.runNonInteractive(context)).rejects.toThrow(
'PromptConfig cannot have both `systemPrompt` and `initialMessages` defined.',
);
expect(agent.output.terminate_reason).toBe(SubagentTerminateMode.ERROR);
});
});
describe('runNonInteractive - Execution and Tool Use', () => {
const promptConfig: PromptConfig = { systemPrompt: 'Execute task.' };
it('should terminate with GOAL if no outputs are expected and model stops', async () => {
const { config } = await createMockConfig();
// Model stops immediately
mockSendMessageStream.mockImplementation(createMockStream(['stop']));
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
// No ToolConfig, No OutputConfig
);
await scope.runNonInteractive(new ContextState());
expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL);
expect(scope.output.result).toBe('Done.');
expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
// Check the initial message
expect(mockSendMessageStream.mock.calls[0][0].message).toEqual([
{ text: 'Get Started!' },
]);
});
it('should terminate with GOAL when model provides final text', async () => {
const { config } = await createMockConfig();
// Model stops immediately with text response
mockSendMessageStream.mockImplementation(createMockStream(['stop']));
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
);
await scope.runNonInteractive(new ContextState());
expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL);
expect(scope.output.result).toBe('Done.');
expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
});
it('should execute external tools and provide the response to the model', async () => {
const listFilesToolDef: FunctionDeclaration = {
name: 'list_files',
description: 'Lists files',
parameters: { type: Type.OBJECT, properties: {} },
};
const { config } = await createMockConfig({
getFunctionDeclarationsFiltered: vi
.fn()
.mockReturnValue([listFilesToolDef]),
});
const toolConfig: ToolConfig = { tools: ['list_files'] };
// Turn 1: Model calls the external tool
// Turn 2: Model stops
mockSendMessageStream.mockImplementation(
createMockStream([
[
{
id: 'call_1',
name: 'list_files',
args: { path: '.' },
},
],
'stop',
]),
);
// Mock the tool execution result
vi.mocked(executeToolCall).mockResolvedValue({
callId: 'call_1',
responseParts: 'file1.txt\nfile2.ts',
resultDisplay: 'Listed 2 files',
error: undefined,
errorType: undefined, // Or ToolErrorType.NONE if available and appropriate
});
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
toolConfig,
);
await scope.runNonInteractive(new ContextState());
// Check tool execution
expect(executeToolCall).toHaveBeenCalledWith(
config,
expect.objectContaining({ name: 'list_files', args: { path: '.' } }),
expect.any(AbortSignal),
);
// Check the response sent back to the model
const secondCallArgs = mockSendMessageStream.mock.calls[1][0];
expect(secondCallArgs.message).toEqual([
{ text: 'file1.txt\nfile2.ts' },
]);
expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL);
});
it('should provide specific tool error responses to the model', async () => {
const { config } = await createMockConfig();
const toolConfig: ToolConfig = { tools: ['failing_tool'] };
// Turn 1: Model calls the failing tool
// Turn 2: Model stops after receiving the error response
mockSendMessageStream.mockImplementation(
createMockStream([
[
{
id: 'call_fail',
name: 'failing_tool',
args: {},
},
],
'stop',
]),
);
// Mock the tool execution failure.
vi.mocked(executeToolCall).mockResolvedValue({
callId: 'call_fail',
responseParts: 'ERROR: Tool failed catastrophically', // This should be sent to the model
resultDisplay: 'Tool failed catastrophically',
error: new Error('Failure'),
errorType: ToolErrorType.INVALID_TOOL_PARAMS,
});
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
toolConfig,
);
await scope.runNonInteractive(new ContextState());
// The agent should send the specific error message from responseParts.
const secondCallArgs = mockSendMessageStream.mock.calls[1][0];
expect(secondCallArgs.message).toEqual([
{
text: 'ERROR: Tool failed catastrophically',
},
]);
});
});
describe('runNonInteractive - Termination and Recovery', () => {
const promptConfig: PromptConfig = { systemPrompt: 'Execute task.' };
it('should terminate with MAX_TURNS if the limit is reached', async () => {
const { config } = await createMockConfig();
const runConfig: RunConfig = { ...defaultRunConfig, max_turns: 2 };
// Model keeps calling tools repeatedly
mockSendMessageStream.mockImplementation(
createMockStream([
[
{
name: 'list_files',
args: { path: '/test' },
},
],
[
{
name: 'list_files',
args: { path: '/test2' },
},
],
// This turn should not happen
[
{
name: 'list_files',
args: { path: '/test3' },
},
],
]),
);
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
runConfig,
);
await scope.runNonInteractive(new ContextState());
expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
expect(scope.output.terminate_reason).toBe(
SubagentTerminateMode.MAX_TURNS,
);
});
it('should terminate with TIMEOUT if the time limit is reached during an LLM call', async () => {
// Use fake timers to reliably test timeouts
vi.useFakeTimers();
const { config } = await createMockConfig();
const runConfig: RunConfig = { max_time_minutes: 5, max_turns: 100 };
// We need to control the resolution of the sendMessageStream promise to advance the timer during execution.
let resolveStream: (
value: AsyncGenerator<unknown, void, unknown>,
) => void;
const streamPromise = new Promise<
AsyncGenerator<unknown, void, unknown>
>((resolve) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
resolveStream = resolve as any;
});
// The LLM call will hang until we resolve the promise.
mockSendMessageStream.mockReturnValue(streamPromise);
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
runConfig,
);
const runPromise = scope.runNonInteractive(new ContextState());
// Advance time beyond the limit (6 minutes) while the agent is awaiting the LLM response.
await vi.advanceTimersByTimeAsync(6 * 60 * 1000);
// Now resolve the stream. The model returns 'stop'.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
resolveStream!(createMockStream(['stop'])() as any);
await runPromise;
expect(scope.output.terminate_reason).toBe(
SubagentTerminateMode.TIMEOUT,
);
expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
vi.useRealTimers();
});
it('should terminate with ERROR if the model call throws', async () => {
const { config } = await createMockConfig();
mockSendMessageStream.mockRejectedValue(new Error('API Failure'));
const scope = await SubAgentScope.create(
'test-agent',
config,
promptConfig,
defaultModelConfig,
defaultRunConfig,
);
await expect(
scope.runNonInteractive(new ContextState()),
).rejects.toThrow('API Failure');
expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.ERROR);
});
});
});
});