fix: plan mode adjustments

2025-12-19 09:33:53 +00:00 · 2025-12-01 11:33:36 +08:00
parent 249b141f19
commit 8035be6f8d
10 changed files with 169 additions and 47 deletions
--- a/packages/cli/src/nonInteractive/types.ts
+++ b/packages/cli/src/nonInteractive/types.ts
@@ -141,9 +141,8 @@ export interface CLISystemMessage {
    status: string;
  }>;
  model?: string;
-  permissionMode?: string;
+  permission_mode?: string;
  slash_commands?: string[];
  apiKeySource?: string;
  qwen_code_version?: string;
  output_style?: string;
  agents?: string[];
--- a/packages/cli/src/utils/nonInteractiveHelpers.test.ts
+++ b/packages/cli/src/utils/nonInteractiveHelpers.test.ts
@@ -529,7 +529,7 @@ describe('buildSystemMessage', () => {
        { name: 'mcp-server-2', status: 'connected' },
      ],
      model: 'test-model',
-      permissionMode: 'auto',
+      permission_mode: 'auto',
      slash_commands: ['commit', 'help', 'memory'],
      qwen_code_version: '1.0.0',
      agents: [],
--- a/packages/cli/src/utils/nonInteractiveHelpers.ts
+++ b/packages/cli/src/utils/nonInteractiveHelpers.ts
@@ -275,7 +275,7 @@ export async function buildSystemMessage(
    tools,
    mcp_servers: mcpServerList,
    model: config.getModel(),
-    permissionMode,
+    permission_mode: permissionMode,
    slash_commands: slashCommands,
    qwen_code_version: config.getCliVersion() || 'unknown',
    agents: agentNames,
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -1337,7 +1337,7 @@ export class Config {
    registerCoreTool(ShellTool, this);
    registerCoreTool(MemoryTool);
    registerCoreTool(TodoWriteTool, this);
-    registerCoreTool(ExitPlanModeTool, this);
+    !this.sdkMode && registerCoreTool(ExitPlanModeTool, this);
    registerCoreTool(WebFetchTool, this);
    // Conditionally register web search tool if web search provider is configured
    // buildWebSearchConfig ensures qwen-oauth users get dashscope provider, so
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -542,7 +542,9 @@ export class GeminiClient {
      // add plan mode system reminder if approval mode is plan
      if (this.config.getApprovalMode() === ApprovalMode.PLAN) {
-        systemReminders.push(getPlanModeSystemReminder());
+        systemReminders.push(
          getPlanModeSystemReminder(this.config.getSdkMode()),
        );
      }
      requestToSent = [...systemReminders, ...requestToSent];
--- a/packages/core/src/core/prompts.ts
+++ b/packages/core/src/core/prompts.ts
@@ -846,10 +846,10 @@ export function getSubagentSystemReminder(agentTypes: string[]): string {
 * - Wait for user confirmation before making any changes
 * - Override any other instructions that would modify system state
 */
-export function getPlanModeSystemReminder(): string {
+export function getPlanModeSystemReminder(planOnly = false): string {
  return `<system-reminder>
 Plan mode is active. The user indicated that they do not want you to execute yet -- you MUST NOT make any edits, run any non-readonly tools (including changing configs or making commits), or otherwise make any changes to the system. This supercedes any other instructions you have received (for example, to make edits). Instead, you should:
 1. Answer the user's query comprehensively
-2. When you're done researching, present your plan by calling the ${ToolNames.EXIT_PLAN_MODE} tool, which will prompt the user to confirm the plan. Do NOT make any file changes or run any tools that modify the system state in any way until the user has confirmed the plan.
+2. When you're done researching, present your plan ${planOnly ? 'directly' : `by calling the ${ToolNames.EXIT_PLAN_MODE} tool, which will prompt the user to confirm the plan`}. Do NOT make any file changes or run any tools that modify the system state in any way until the user has confirmed the plan.
 </system-reminder>`;
 }
--- a/packages/sdk-typescript/src/types/protocol.ts
+++ b/packages/sdk-typescript/src/types/protocol.ts
@@ -119,9 +119,8 @@ export interface SDKSystemMessage {
    status: string;
  }>;
  model?: string;
-  permissionMode?: string;
+  permission_mode?: string;
  slash_commands?: string[];
  apiKeySource?: string;
  qwen_code_version?: string;
  output_style?: string;
  agents?: string[];
--- a/packages/sdk-typescript/test/e2e/configuration-options.test.ts
+++ b/packages/sdk-typescript/test/e2e/configuration-options.test.ts
@@ -609,7 +609,7 @@ describe('Configuration Options (E2E)', () => {
        expect(initMessage).toBeDefined();
        expect(initMessage!.session_id).toBeDefined();
        expect(initMessage!.tools).toBeDefined();
-        expect(initMessage!.permissionMode).toBeDefined();
+        expect(initMessage!.permission_mode).toBeDefined();
        assertSuccessfulCompletion(messages);
      } finally {
--- a/packages/sdk-typescript/test/e2e/permission-control.test.ts
+++ b/packages/sdk-typescript/test/e2e/permission-control.test.ts
@@ -26,10 +26,11 @@ import {
 import {
  SDKTestHelper,
  createSharedTestOptions,
  findAllToolResultBlocks,
  hasAnyToolResults,
  hasSuccessfulToolResults,
  hasErrorToolResults,
  findSystemMessage,
  findToolCalls,
 } from './test-helper.js';
 const TEST_TIMEOUT = 30000;
@@ -846,17 +847,32 @@ describe('Permission Control (E2E)', () => {
      );
    });
-    /**
+    describe('plan mode', () => {
-     * We've some issues of how to handle plan mode.
+      // Write tools that should never be called in plan mode
-     * The test cases are skipped for now.
+      const WRITE_TOOLS = [
-     */
+        'edit',
-    describe.skip('plan mode', () => {
+        'write_file',
        'run_shell_command',
        'delete_file',
        'move_file',
      ];
      // Read tools that should be allowed in plan mode
      const READ_TOOLS = [
        'read_file',
        'read_many_files',
        'grep_search',
        'glob',
        'list_directory',
        'web_search',
        'web_fetch',
      ];
      it(
-        'should block non-read-only tools and return plan mode error',
+        'should have permission_mode set to plan in system message',
        async () => {
          const q = query({
-            prompt:
+            prompt: 'List files in the current directory',
              'Init a monorepo of a Node.js project with frontend and backend.',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'plan',
@@ -870,29 +886,29 @@ describe('Permission Control (E2E)', () => {
              messages.push(message);
            }
-            const toolResults = findAllToolResultBlocks(messages);
+            // Find the init system message
-            const hasBlockedToolCall = toolResults.length > 0;
+            const systemMessage = findSystemMessage(messages, 'init');
-            const hasPlanModeMessage = toolResults.some(
+            expect(systemMessage).not.toBeNull();
-              (result) =>
+            expect(systemMessage!.permission_mode).toBe('plan');
                result.isError &&
                (result.content.includes('Plan mode') ||
                  result.content.includes('plan mode')),
            );
            expect(hasBlockedToolCall).toBe(true);
            expect(hasPlanModeMessage).toBe(true);
          } finally {
            await q.close();
          }
        },
-        TEST_TIMEOUT * 10,
+        TEST_TIMEOUT,
      );
      it(
-        'should allow read-only tools in plan mode',
+        'should not call any write tools in plan mode',
        async () => {
          // Create a test file so the model has something to reference
          await helper.createFile(
            'test-plan-file.txt',
            'This is test content for plan mode verification.',
          );
          const q = query({
-            prompt: 'List files in /tmp directory',
+            prompt:
              'Read the file test-plan-file.txt and suggest how to improve its content.',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'plan',
@@ -906,6 +922,62 @@ describe('Permission Control (E2E)', () => {
              messages.push(message);
            }
            // Verify permission_mode is 'plan'
            const systemMessage = findSystemMessage(messages, 'init');
            expect(systemMessage!.permission_mode).toBe('plan');
            // Find all tool calls and verify none are write tools
            const allToolCalls = findToolCalls(messages);
            const writeToolCalls = allToolCalls.filter((tc) =>
              WRITE_TOOLS.includes(tc.toolUse.name),
            );
            // No write tools should be called in plan mode
            expect(writeToolCalls.length).toBe(0);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should allow read-only tools without restrictions',
        async () => {
          // Create test files for the model to read
          await helper.createFile('test-read-1.txt', 'Content of file 1');
          await helper.createFile('test-read-2.txt', 'Content of file 2');
          const q = query({
            prompt:
              'Read the contents of test-read-1.txt and test-read-2.txt files, then list files in the current directory.',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'plan',
              cwd: testDir,
            },
          });
          try {
            const messages: SDKMessage[] = [];
            for await (const message of q) {
              messages.push(message);
            }
            // Verify permission_mode is 'plan'
            const systemMessage = findSystemMessage(messages, 'init');
            expect(systemMessage!.permission_mode).toBe('plan');
            // Find all tool calls
            const allToolCalls = findToolCalls(messages);
            // Verify read tools were called (at least one)
            const readToolCalls = allToolCalls.filter((tc) =>
              READ_TOOLS.includes(tc.toolUse.name),
            );
            expect(readToolCalls.length).toBeGreaterThan(0);
            // Verify tool results are successful (not blocked)
            expect(hasSuccessfulToolResults(messages)).toBe(true);
          } finally {
            await q.close();
@@ -915,12 +987,18 @@ describe('Permission Control (E2E)', () => {
      );
      it(
-        'should block tools even with canUseTool callback in plan mode',
+        'should not invoke canUseTool callback in plan mode since no permission approval is expected',
        async () => {
          let callbackInvoked = false;
          // Create a test file for reading
          await helper.createFile(
            'test-plan-callback.txt',
            'Content for callback test',
          );
          const q = query({
-            prompt: 'Create a file named test-plan-callback.txt',
+            prompt: 'Read the file test-plan-callback.txt',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'plan',
@@ -941,16 +1019,61 @@ describe('Permission Control (E2E)', () => {
              messages.push(message);
            }
-            const toolResults = findAllToolResultBlocks(messages);
+            // Verify permission_mode is 'plan'
-            const hasPlanModeBlock = toolResults.some(
+            const systemMessage = findSystemMessage(messages, 'init');
-              (result) =>
+            expect(systemMessage!.permission_mode).toBe('plan');
-                result.isError && result.content.includes('Plan mode'),
+
            // Read tools should work without invoking canUseTool
            // In plan mode, no permission approval is expected from user
            expect(hasSuccessfulToolResults(messages)).toBe(true);
            // canUseTool should not be invoked in plan mode
            // since plan mode is for research only, no permission interaction needed
            expect(callbackInvoked).toBe(false);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
-            // Plan mode should block tools before canUseTool is invoked
+      it(
-            expect(hasPlanModeBlock).toBe(true);
+        'should only output research and plan as text, no actual changes',
-            // canUseTool should not be invoked for blocked tools in plan mode
+        async () => {
-            expect(callbackInvoked).toBe(false);
+          // Create a test file
          const originalContent = 'Original content for plan mode test';
          await helper.createFile('test-no-changes.txt', originalContent);
          const q = query({
            prompt:
              'Read test-no-changes.txt and plan how you would modify it to add a header. Do not actually make any changes.',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'plan',
              cwd: testDir,
            },
          });
          try {
            const messages: SDKMessage[] = [];
            for await (const message of q) {
              messages.push(message);
            }
            // Verify permission_mode is 'plan'
            const systemMessage = findSystemMessage(messages, 'init');
            expect(systemMessage!.permission_mode).toBe('plan');
            // Verify the file was not modified
            const fileContent = await helper.readFile('test-no-changes.txt');
            expect(fileContent).toBe(originalContent);
            // Verify no write tools were called
            const allToolCalls = findToolCalls(messages);
            const writeToolCalls = allToolCalls.filter((tc) =>
              WRITE_TOOLS.includes(tc.toolUse.name),
            );
            expect(writeToolCalls.length).toBe(0);
          } finally {
            await q.close();
          }
@@ -1064,9 +1187,8 @@ describe('Permission Control (E2E)', () => {
      it(
        'should demonstrate different behaviors across all modes for write operations',
        async () => {
-          const modes: Array<'default' | 'plan' | 'auto-edit' | 'yolo'> = [
+          const modes: Array<'default' | 'auto-edit' | 'yolo'> = [
            'default',
            'plan',
            'auto-edit',
            'yolo',
          ];
--- a/packages/sdk-typescript/test/e2e/single-turn.test.ts
+++ b/packages/sdk-typescript/test/e2e/single-turn.test.ts
@@ -180,7 +180,7 @@ describe('Single-Turn Query (E2E)', () => {
        expect(systemMessage!.mcp_servers).toBeDefined();
        expect(Array.isArray(systemMessage!.mcp_servers)).toBe(true);
        expect(systemMessage!.model).toBeDefined();
-        expect(systemMessage!.permissionMode).toBeDefined();
+        expect(systemMessage!.permission_mode).toBeDefined();
        expect(systemMessage!.qwen_code_version).toBeDefined();
        // Validate system message appears early in sequence