feat: subagent runtime & CLI display - wip

2025-12-21 09:17:53 +00:00 · 2025-09-08 20:01:49 +08:00
parent 1f8ea7ab7a
commit 4985bfc000
31 changed files with 2664 additions and 390 deletions
--- a/packages/core/src/subagents/subagent.test.ts
+++ b/packages/core/src/subagents/subagent.test.ts
@@ -0,0 +1,721 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { vi, describe, it, expect, beforeEach, Mock, afterEach } from 'vitest';
+import {
+  ContextState,
+  SubAgentScope,
+  SubagentTerminateMode,
+  PromptConfig,
+  ModelConfig,
+  RunConfig,
+  ToolConfig,
+} from './subagent.js';
+import { Config, ConfigParameters } from '../config/config.js';
+import { GeminiChat } from '../core/geminiChat.js';
+import { createContentGenerator } from '../core/contentGenerator.js';
+import { getEnvironmentContext } from '../utils/environmentContext.js';
+import { executeToolCall } from '../core/nonInteractiveToolExecutor.js';
+import { ToolRegistry } from '../tools/tool-registry.js';
+import { DEFAULT_GEMINI_MODEL } from '../config/models.js';
+import {
+  Content,
+  FunctionCall,
+  FunctionDeclaration,
+  GenerateContentConfig,
+  Type,
+} from '@google/genai';
+import { ToolErrorType } from '../tools/tool-error.js';
+
+vi.mock('../core/geminiChat.js');
+vi.mock('../core/contentGenerator.js');
+vi.mock('../utils/environmentContext.js');
+vi.mock('../core/nonInteractiveToolExecutor.js');
+vi.mock('../ide/ide-client.js');
+
+async function createMockConfig(
+  toolRegistryMocks = {},
+): Promise<{ config: Config; toolRegistry: ToolRegistry }> {
+  const configParams: ConfigParameters = {
+    sessionId: 'test-session',
+    model: DEFAULT_GEMINI_MODEL,
+    targetDir: '.',
+    debugMode: false,
+    cwd: process.cwd(),
+  };
+  const config = new Config(configParams);
+  await config.initialize();
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  await config.refreshAuth('test-auth' as any);
+
+  // Mock ToolRegistry
+  const mockToolRegistry = {
+    getTool: vi.fn(),
+    getFunctionDeclarations: vi.fn().mockReturnValue([]),
+    getFunctionDeclarationsFiltered: vi.fn().mockReturnValue([]),
+    ...toolRegistryMocks,
+  } as unknown as ToolRegistry;
+
+  vi.spyOn(config, 'getToolRegistry').mockReturnValue(mockToolRegistry);
+  return { config, toolRegistry: mockToolRegistry };
+}
+
+// Helper to simulate LLM responses (sequence of tool calls over multiple turns)
+const createMockStream = (
+  functionCallsList: Array<FunctionCall[] | 'stop'>,
+) => {
+  let index = 0;
+  return vi.fn().mockImplementation(() => {
+    const response = functionCallsList[index] || 'stop';
+    index++;
+    return (async function* () {
+      if (response === 'stop') {
+        // When stopping, the model might return text, but the subagent logic primarily cares about the absence of functionCalls.
+        yield {
+          candidates: [
+            {
+              content: {
+                parts: [{ text: 'Done.' }],
+              },
+            },
+          ],
+        };
+      } else if (response.length > 0) {
+        yield { functionCalls: response };
+      } else {
+        yield {
+          candidates: [
+            {
+              content: {
+                parts: [{ text: 'Done.' }],
+              },
+            },
+          ],
+        }; // Handle empty array also as stop
+      }
+    })();
+  });
+};
+
+describe('subagent.ts', () => {
+  describe('ContextState', () => {
+    it('should set and get values correctly', () => {
+      const context = new ContextState();
+      context.set('key1', 'value1');
+      context.set('key2', 123);
+      expect(context.get('key1')).toBe('value1');
+      expect(context.get('key2')).toBe(123);
+      expect(context.get_keys()).toEqual(['key1', 'key2']);
+    });
+
+    it('should return undefined for missing keys', () => {
+      const context = new ContextState();
+      expect(context.get('missing')).toBeUndefined();
+    });
+  });
+
+  describe('SubAgentScope', () => {
+    let mockSendMessageStream: Mock;
+
+    const defaultModelConfig: ModelConfig = {
+      model: 'gemini-1.5-flash-latest',
+      temp: 0.5, // Specific temp to test override
+      top_p: 1,
+    };
+
+    const defaultRunConfig: RunConfig = {
+      max_time_minutes: 5,
+      max_turns: 10,
+    };
+
+    beforeEach(async () => {
+      vi.clearAllMocks();
+
+      vi.mocked(getEnvironmentContext).mockResolvedValue([
+        { text: 'Env Context' },
+      ]);
+      vi.mocked(createContentGenerator).mockResolvedValue({
+        getGenerativeModel: vi.fn(),
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      } as any);
+
+      mockSendMessageStream = vi.fn();
+      // We mock the implementation of the constructor.
+      vi.mocked(GeminiChat).mockImplementation(
+        () =>
+          ({
+            sendMessageStream: mockSendMessageStream,
+          }) as unknown as GeminiChat,
+      );
+
+      // Default mock for executeToolCall
+      vi.mocked(executeToolCall).mockResolvedValue({
+        callId: 'default-call',
+        responseParts: 'default response',
+        resultDisplay: 'Default tool result',
+        error: undefined,
+        errorType: undefined,
+      });
+    });
+
+    afterEach(() => {
+      vi.restoreAllMocks();
+    });
+
+    // Helper to safely access generationConfig from mock calls
+    const getGenerationConfigFromMock = (
+      callIndex = 0,
+    ): GenerateContentConfig & { systemInstruction?: string | Content } => {
+      const callArgs = vi.mocked(GeminiChat).mock.calls[callIndex];
+      const generationConfig = callArgs?.[2];
+      // Ensure it's defined before proceeding
+      expect(generationConfig).toBeDefined();
+      if (!generationConfig) throw new Error('generationConfig is undefined');
+      return generationConfig as GenerateContentConfig & {
+        systemInstruction?: string | Content;
+      };
+    };
+
+    describe('create (Tool Validation)', () => {
+      const promptConfig: PromptConfig = { systemPrompt: 'Test prompt' };
+
+      it('should create a SubAgentScope successfully with minimal config', async () => {
+        const { config } = await createMockConfig();
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+        expect(scope).toBeInstanceOf(SubAgentScope);
+      });
+
+      it('should throw an error if a tool requires confirmation', async () => {
+        const mockTool = {
+          schema: { parameters: { type: Type.OBJECT, properties: {} } },
+          build: vi.fn().mockReturnValue({
+            shouldConfirmExecute: vi.fn().mockResolvedValue({
+              type: 'exec',
+              title: 'Confirm',
+              command: 'rm -rf /',
+            }),
+          }),
+        };
+
+        const { config } = await createMockConfig({
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          getTool: vi.fn().mockReturnValue(mockTool as any),
+        });
+
+        const toolConfig: ToolConfig = { tools: ['risky_tool'] };
+
+        await expect(
+          SubAgentScope.create(
+            'test-agent',
+            config,
+            promptConfig,
+            defaultModelConfig,
+            defaultRunConfig,
+            toolConfig,
+          ),
+        ).rejects.toThrow(
+          'Tool "risky_tool" requires user confirmation and cannot be used in a non-interactive subagent.',
+        );
+      });
+
+      it('should succeed if tools do not require confirmation', async () => {
+        const mockTool = {
+          schema: { parameters: { type: Type.OBJECT, properties: {} } },
+          build: vi.fn().mockReturnValue({
+            shouldConfirmExecute: vi.fn().mockResolvedValue(null),
+          }),
+        };
+        const { config } = await createMockConfig({
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          getTool: vi.fn().mockReturnValue(mockTool as any),
+        });
+
+        const toolConfig: ToolConfig = { tools: ['safe_tool'] };
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+          toolConfig,
+        );
+        expect(scope).toBeInstanceOf(SubAgentScope);
+      });
+
+      it('should skip interactivity check and warn for tools with required parameters', async () => {
+        const consoleWarnSpy = vi
+          .spyOn(console, 'warn')
+          .mockImplementation(() => {});
+
+        const mockToolWithParams = {
+          schema: {
+            parameters: {
+              type: Type.OBJECT,
+              properties: {
+                path: { type: Type.STRING },
+              },
+              required: ['path'],
+            },
+          },
+          // build should not be called, but we mock it to be safe
+          build: vi.fn(),
+        };
+
+        const { config } = await createMockConfig({
+          getTool: vi.fn().mockReturnValue(mockToolWithParams),
+        });
+
+        const toolConfig: ToolConfig = { tools: ['tool_with_params'] };
+
+        // The creation should succeed without throwing
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+          toolConfig,
+        );
+
+        expect(scope).toBeInstanceOf(SubAgentScope);
+
+        // Check that the warning was logged
+        expect(consoleWarnSpy).toHaveBeenCalledWith(
+          'Cannot check tool "tool_with_params" for interactivity because it requires parameters. Assuming it is safe for non-interactive use.',
+        );
+
+        // Ensure build was never called
+        expect(mockToolWithParams.build).not.toHaveBeenCalled();
+
+        consoleWarnSpy.mockRestore();
+      });
+    });
+
+    describe('runNonInteractive - Initialization and Prompting', () => {
+      it('should correctly template the system prompt and initialize GeminiChat', async () => {
+        const { config } = await createMockConfig();
+
+        vi.mocked(GeminiChat).mockClear();
+
+        const promptConfig: PromptConfig = {
+          systemPrompt: 'Hello ${name}, your task is ${task}.',
+        };
+        const context = new ContextState();
+        context.set('name', 'Agent');
+        context.set('task', 'Testing');
+
+        // Model stops immediately
+        mockSendMessageStream.mockImplementation(createMockStream(['stop']));
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        await scope.runNonInteractive(context);
+
+        // Check if GeminiChat was initialized correctly by the subagent
+        expect(GeminiChat).toHaveBeenCalledTimes(1);
+        const callArgs = vi.mocked(GeminiChat).mock.calls[0];
+
+        // Check Generation Config
+        const generationConfig = getGenerationConfigFromMock();
+
+        // Check temperature override
+        expect(generationConfig.temperature).toBe(defaultModelConfig.temp);
+        expect(generationConfig.systemInstruction).toContain(
+          'Hello Agent, your task is Testing.',
+        );
+        expect(generationConfig.systemInstruction).toContain(
+          'Important Rules:',
+        );
+
+        // Check History (should include environment context)
+        const history = callArgs[3];
+        expect(history).toEqual([
+          { role: 'user', parts: [{ text: 'Env Context' }] },
+          {
+            role: 'model',
+            parts: [{ text: 'Got it. Thanks for the context!' }],
+          },
+        ]);
+      });
+
+      it('should use initialMessages instead of systemPrompt if provided', async () => {
+        const { config } = await createMockConfig();
+        vi.mocked(GeminiChat).mockClear();
+
+        const initialMessages: Content[] = [
+          { role: 'user', parts: [{ text: 'Hi' }] },
+        ];
+        const promptConfig: PromptConfig = { initialMessages };
+        const context = new ContextState();
+
+        // Model stops immediately
+        mockSendMessageStream.mockImplementation(createMockStream(['stop']));
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        await scope.runNonInteractive(context);
+
+        const callArgs = vi.mocked(GeminiChat).mock.calls[0];
+        const generationConfig = getGenerationConfigFromMock();
+        const history = callArgs[3];
+
+        expect(generationConfig.systemInstruction).toBeUndefined();
+        expect(history).toEqual([
+          { role: 'user', parts: [{ text: 'Env Context' }] },
+          {
+            role: 'model',
+            parts: [{ text: 'Got it. Thanks for the context!' }],
+          },
+          ...initialMessages,
+        ]);
+      });
+
+      it('should throw an error if template variables are missing', async () => {
+        const { config } = await createMockConfig();
+        const promptConfig: PromptConfig = {
+          systemPrompt: 'Hello ${name}, you are missing ${missing}.',
+        };
+        const context = new ContextState();
+        context.set('name', 'Agent');
+        // 'missing' is not set
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        // The error from templating causes the runNonInteractive to reject and the terminate_reason to be ERROR.
+        await expect(scope.runNonInteractive(context)).rejects.toThrow(
+          'Missing context values for the following keys: missing',
+        );
+        expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.ERROR);
+      });
+
+      it('should validate that systemPrompt and initialMessages are mutually exclusive', async () => {
+        const { config } = await createMockConfig();
+        const promptConfig: PromptConfig = {
+          systemPrompt: 'System',
+          initialMessages: [{ role: 'user', parts: [{ text: 'Hi' }] }],
+        };
+        const context = new ContextState();
+
+        const agent = await SubAgentScope.create(
+          'TestAgent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        await expect(agent.runNonInteractive(context)).rejects.toThrow(
+          'PromptConfig cannot have both `systemPrompt` and `initialMessages` defined.',
+        );
+        expect(agent.output.terminate_reason).toBe(SubagentTerminateMode.ERROR);
+      });
+    });
+
+    describe('runNonInteractive - Execution and Tool Use', () => {
+      const promptConfig: PromptConfig = { systemPrompt: 'Execute task.' };
+
+      it('should terminate with GOAL if no outputs are expected and model stops', async () => {
+        const { config } = await createMockConfig();
+        // Model stops immediately
+        mockSendMessageStream.mockImplementation(createMockStream(['stop']));
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+          // No ToolConfig, No OutputConfig
+        );
+
+        await scope.runNonInteractive(new ContextState());
+
+        expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL);
+        expect(scope.output.result).toBe('Done.');
+        expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
+        // Check the initial message
+        expect(mockSendMessageStream.mock.calls[0][0].message).toEqual([
+          { text: 'Get Started!' },
+        ]);
+      });
+
+      it('should terminate with GOAL when model provides final text', async () => {
+        const { config } = await createMockConfig();
+
+        // Model stops immediately with text response
+        mockSendMessageStream.mockImplementation(createMockStream(['stop']));
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        await scope.runNonInteractive(new ContextState());
+
+        expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL);
+        expect(scope.output.result).toBe('Done.');
+        expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
+      });
+
+      it('should execute external tools and provide the response to the model', async () => {
+        const listFilesToolDef: FunctionDeclaration = {
+          name: 'list_files',
+          description: 'Lists files',
+          parameters: { type: Type.OBJECT, properties: {} },
+        };
+
+        const { config } = await createMockConfig({
+          getFunctionDeclarationsFiltered: vi
+            .fn()
+            .mockReturnValue([listFilesToolDef]),
+        });
+        const toolConfig: ToolConfig = { tools: ['list_files'] };
+
+        // Turn 1: Model calls the external tool
+        // Turn 2: Model stops
+        mockSendMessageStream.mockImplementation(
+          createMockStream([
+            [
+              {
+                id: 'call_1',
+                name: 'list_files',
+                args: { path: '.' },
+              },
+            ],
+            'stop',
+          ]),
+        );
+
+        // Mock the tool execution result
+        vi.mocked(executeToolCall).mockResolvedValue({
+          callId: 'call_1',
+          responseParts: 'file1.txt\nfile2.ts',
+          resultDisplay: 'Listed 2 files',
+          error: undefined,
+          errorType: undefined, // Or ToolErrorType.NONE if available and appropriate
+        });
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+          toolConfig,
+        );
+
+        await scope.runNonInteractive(new ContextState());
+
+        // Check tool execution
+        expect(executeToolCall).toHaveBeenCalledWith(
+          config,
+          expect.objectContaining({ name: 'list_files', args: { path: '.' } }),
+          expect.any(AbortSignal),
+        );
+
+        // Check the response sent back to the model
+        const secondCallArgs = mockSendMessageStream.mock.calls[1][0];
+        expect(secondCallArgs.message).toEqual([
+          { text: 'file1.txt\nfile2.ts' },
+        ]);
+
+        expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.GOAL);
+      });
+
+      it('should provide specific tool error responses to the model', async () => {
+        const { config } = await createMockConfig();
+        const toolConfig: ToolConfig = { tools: ['failing_tool'] };
+
+        // Turn 1: Model calls the failing tool
+        // Turn 2: Model stops after receiving the error response
+        mockSendMessageStream.mockImplementation(
+          createMockStream([
+            [
+              {
+                id: 'call_fail',
+                name: 'failing_tool',
+                args: {},
+              },
+            ],
+            'stop',
+          ]),
+        );
+
+        // Mock the tool execution failure.
+        vi.mocked(executeToolCall).mockResolvedValue({
+          callId: 'call_fail',
+          responseParts: 'ERROR: Tool failed catastrophically', // This should be sent to the model
+          resultDisplay: 'Tool failed catastrophically',
+          error: new Error('Failure'),
+          errorType: ToolErrorType.INVALID_TOOL_PARAMS,
+        });
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+          toolConfig,
+        );
+
+        await scope.runNonInteractive(new ContextState());
+
+        // The agent should send the specific error message from responseParts.
+        const secondCallArgs = mockSendMessageStream.mock.calls[1][0];
+
+        expect(secondCallArgs.message).toEqual([
+          {
+            text: 'ERROR: Tool failed catastrophically',
+          },
+        ]);
+      });
+    });
+
+    describe('runNonInteractive - Termination and Recovery', () => {
+      const promptConfig: PromptConfig = { systemPrompt: 'Execute task.' };
+
+      it('should terminate with MAX_TURNS if the limit is reached', async () => {
+        const { config } = await createMockConfig();
+        const runConfig: RunConfig = { ...defaultRunConfig, max_turns: 2 };
+
+        // Model keeps calling tools repeatedly
+        mockSendMessageStream.mockImplementation(
+          createMockStream([
+            [
+              {
+                name: 'list_files',
+                args: { path: '/test' },
+              },
+            ],
+            [
+              {
+                name: 'list_files',
+                args: { path: '/test2' },
+              },
+            ],
+            // This turn should not happen
+            [
+              {
+                name: 'list_files',
+                args: { path: '/test3' },
+              },
+            ],
+          ]),
+        );
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          runConfig,
+        );
+
+        await scope.runNonInteractive(new ContextState());
+
+        expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
+        expect(scope.output.terminate_reason).toBe(
+          SubagentTerminateMode.MAX_TURNS,
+        );
+      });
+
+      it('should terminate with TIMEOUT if the time limit is reached during an LLM call', async () => {
+        // Use fake timers to reliably test timeouts
+        vi.useFakeTimers();
+
+        const { config } = await createMockConfig();
+        const runConfig: RunConfig = { max_time_minutes: 5, max_turns: 100 };
+
+        // We need to control the resolution of the sendMessageStream promise to advance the timer during execution.
+        let resolveStream: (
+          value: AsyncGenerator<unknown, void, unknown>,
+        ) => void;
+        const streamPromise = new Promise<
+          AsyncGenerator<unknown, void, unknown>
+        >((resolve) => {
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          resolveStream = resolve as any;
+        });
+
+        // The LLM call will hang until we resolve the promise.
+        mockSendMessageStream.mockReturnValue(streamPromise);
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          runConfig,
+        );
+
+        const runPromise = scope.runNonInteractive(new ContextState());
+
+        // Advance time beyond the limit (6 minutes) while the agent is awaiting the LLM response.
+        await vi.advanceTimersByTimeAsync(6 * 60 * 1000);
+
+        // Now resolve the stream. The model returns 'stop'.
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        resolveStream!(createMockStream(['stop'])() as any);
+
+        await runPromise;
+
+        expect(scope.output.terminate_reason).toBe(
+          SubagentTerminateMode.TIMEOUT,
+        );
+        expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
+
+        vi.useRealTimers();
+      });
+
+      it('should terminate with ERROR if the model call throws', async () => {
+        const { config } = await createMockConfig();
+        mockSendMessageStream.mockRejectedValue(new Error('API Failure'));
+
+        const scope = await SubAgentScope.create(
+          'test-agent',
+          config,
+          promptConfig,
+          defaultModelConfig,
+          defaultRunConfig,
+        );
+
+        await expect(
+          scope.runNonInteractive(new ContextState()),
+        ).rejects.toThrow('API Failure');
+        expect(scope.output.terminate_reason).toBe(SubagentTerminateMode.ERROR);
+      });
+    });
+  });
+});