fix: plan mode adjustments

This commit is contained in:
mingholy.lmh
2025-12-01 11:33:36 +08:00
parent 249b141f19
commit 8035be6f8d
10 changed files with 169 additions and 47 deletions

View File

@@ -141,9 +141,8 @@ export interface CLISystemMessage {
status: string; status: string;
}>; }>;
model?: string; model?: string;
permissionMode?: string; permission_mode?: string;
slash_commands?: string[]; slash_commands?: string[];
apiKeySource?: string;
qwen_code_version?: string; qwen_code_version?: string;
output_style?: string; output_style?: string;
agents?: string[]; agents?: string[];

View File

@@ -529,7 +529,7 @@ describe('buildSystemMessage', () => {
{ name: 'mcp-server-2', status: 'connected' }, { name: 'mcp-server-2', status: 'connected' },
], ],
model: 'test-model', model: 'test-model',
permissionMode: 'auto', permission_mode: 'auto',
slash_commands: ['commit', 'help', 'memory'], slash_commands: ['commit', 'help', 'memory'],
qwen_code_version: '1.0.0', qwen_code_version: '1.0.0',
agents: [], agents: [],

View File

@@ -275,7 +275,7 @@ export async function buildSystemMessage(
tools, tools,
mcp_servers: mcpServerList, mcp_servers: mcpServerList,
model: config.getModel(), model: config.getModel(),
permissionMode, permission_mode: permissionMode,
slash_commands: slashCommands, slash_commands: slashCommands,
qwen_code_version: config.getCliVersion() || 'unknown', qwen_code_version: config.getCliVersion() || 'unknown',
agents: agentNames, agents: agentNames,

View File

@@ -1337,7 +1337,7 @@ export class Config {
registerCoreTool(ShellTool, this); registerCoreTool(ShellTool, this);
registerCoreTool(MemoryTool); registerCoreTool(MemoryTool);
registerCoreTool(TodoWriteTool, this); registerCoreTool(TodoWriteTool, this);
registerCoreTool(ExitPlanModeTool, this); !this.sdkMode && registerCoreTool(ExitPlanModeTool, this);
registerCoreTool(WebFetchTool, this); registerCoreTool(WebFetchTool, this);
// Conditionally register web search tool if web search provider is configured // Conditionally register web search tool if web search provider is configured
// buildWebSearchConfig ensures qwen-oauth users get dashscope provider, so // buildWebSearchConfig ensures qwen-oauth users get dashscope provider, so

View File

@@ -542,7 +542,9 @@ export class GeminiClient {
// add plan mode system reminder if approval mode is plan // add plan mode system reminder if approval mode is plan
if (this.config.getApprovalMode() === ApprovalMode.PLAN) { if (this.config.getApprovalMode() === ApprovalMode.PLAN) {
systemReminders.push(getPlanModeSystemReminder()); systemReminders.push(
getPlanModeSystemReminder(this.config.getSdkMode()),
);
} }
requestToSent = [...systemReminders, ...requestToSent]; requestToSent = [...systemReminders, ...requestToSent];

View File

@@ -846,10 +846,10 @@ export function getSubagentSystemReminder(agentTypes: string[]): string {
* - Wait for user confirmation before making any changes * - Wait for user confirmation before making any changes
* - Override any other instructions that would modify system state * - Override any other instructions that would modify system state
*/ */
export function getPlanModeSystemReminder(): string { export function getPlanModeSystemReminder(planOnly = false): string {
return `<system-reminder> return `<system-reminder>
Plan mode is active. The user indicated that they do not want you to execute yet -- you MUST NOT make any edits, run any non-readonly tools (including changing configs or making commits), or otherwise make any changes to the system. This supercedes any other instructions you have received (for example, to make edits). Instead, you should: Plan mode is active. The user indicated that they do not want you to execute yet -- you MUST NOT make any edits, run any non-readonly tools (including changing configs or making commits), or otherwise make any changes to the system. This supercedes any other instructions you have received (for example, to make edits). Instead, you should:
1. Answer the user's query comprehensively 1. Answer the user's query comprehensively
2. When you're done researching, present your plan by calling the ${ToolNames.EXIT_PLAN_MODE} tool, which will prompt the user to confirm the plan. Do NOT make any file changes or run any tools that modify the system state in any way until the user has confirmed the plan. 2. When you're done researching, present your plan ${planOnly ? 'directly' : `by calling the ${ToolNames.EXIT_PLAN_MODE} tool, which will prompt the user to confirm the plan`}. Do NOT make any file changes or run any tools that modify the system state in any way until the user has confirmed the plan.
</system-reminder>`; </system-reminder>`;
} }

View File

@@ -119,9 +119,8 @@ export interface SDKSystemMessage {
status: string; status: string;
}>; }>;
model?: string; model?: string;
permissionMode?: string; permission_mode?: string;
slash_commands?: string[]; slash_commands?: string[];
apiKeySource?: string;
qwen_code_version?: string; qwen_code_version?: string;
output_style?: string; output_style?: string;
agents?: string[]; agents?: string[];

View File

@@ -609,7 +609,7 @@ describe('Configuration Options (E2E)', () => {
expect(initMessage).toBeDefined(); expect(initMessage).toBeDefined();
expect(initMessage!.session_id).toBeDefined(); expect(initMessage!.session_id).toBeDefined();
expect(initMessage!.tools).toBeDefined(); expect(initMessage!.tools).toBeDefined();
expect(initMessage!.permissionMode).toBeDefined(); expect(initMessage!.permission_mode).toBeDefined();
assertSuccessfulCompletion(messages); assertSuccessfulCompletion(messages);
} finally { } finally {

View File

@@ -26,10 +26,11 @@ import {
import { import {
SDKTestHelper, SDKTestHelper,
createSharedTestOptions, createSharedTestOptions,
findAllToolResultBlocks,
hasAnyToolResults, hasAnyToolResults,
hasSuccessfulToolResults, hasSuccessfulToolResults,
hasErrorToolResults, hasErrorToolResults,
findSystemMessage,
findToolCalls,
} from './test-helper.js'; } from './test-helper.js';
const TEST_TIMEOUT = 30000; const TEST_TIMEOUT = 30000;
@@ -846,17 +847,32 @@ describe('Permission Control (E2E)', () => {
); );
}); });
/** describe('plan mode', () => {
* We've some issues of how to handle plan mode. // Write tools that should never be called in plan mode
* The test cases are skipped for now. const WRITE_TOOLS = [
*/ 'edit',
describe.skip('plan mode', () => { 'write_file',
'run_shell_command',
'delete_file',
'move_file',
];
// Read tools that should be allowed in plan mode
const READ_TOOLS = [
'read_file',
'read_many_files',
'grep_search',
'glob',
'list_directory',
'web_search',
'web_fetch',
];
it( it(
'should block non-read-only tools and return plan mode error', 'should have permission_mode set to plan in system message',
async () => { async () => {
const q = query({ const q = query({
prompt: prompt: 'List files in the current directory',
'Init a monorepo of a Node.js project with frontend and backend.',
options: { options: {
...SHARED_TEST_OPTIONS, ...SHARED_TEST_OPTIONS,
permissionMode: 'plan', permissionMode: 'plan',
@@ -870,29 +886,29 @@ describe('Permission Control (E2E)', () => {
messages.push(message); messages.push(message);
} }
const toolResults = findAllToolResultBlocks(messages); // Find the init system message
const hasBlockedToolCall = toolResults.length > 0; const systemMessage = findSystemMessage(messages, 'init');
const hasPlanModeMessage = toolResults.some( expect(systemMessage).not.toBeNull();
(result) => expect(systemMessage!.permission_mode).toBe('plan');
result.isError &&
(result.content.includes('Plan mode') ||
result.content.includes('plan mode')),
);
expect(hasBlockedToolCall).toBe(true);
expect(hasPlanModeMessage).toBe(true);
} finally { } finally {
await q.close(); await q.close();
} }
}, },
TEST_TIMEOUT * 10, TEST_TIMEOUT,
); );
it( it(
'should allow read-only tools in plan mode', 'should not call any write tools in plan mode',
async () => { async () => {
// Create a test file so the model has something to reference
await helper.createFile(
'test-plan-file.txt',
'This is test content for plan mode verification.',
);
const q = query({ const q = query({
prompt: 'List files in /tmp directory', prompt:
'Read the file test-plan-file.txt and suggest how to improve its content.',
options: { options: {
...SHARED_TEST_OPTIONS, ...SHARED_TEST_OPTIONS,
permissionMode: 'plan', permissionMode: 'plan',
@@ -906,6 +922,62 @@ describe('Permission Control (E2E)', () => {
messages.push(message); messages.push(message);
} }
// Verify permission_mode is 'plan'
const systemMessage = findSystemMessage(messages, 'init');
expect(systemMessage!.permission_mode).toBe('plan');
// Find all tool calls and verify none are write tools
const allToolCalls = findToolCalls(messages);
const writeToolCalls = allToolCalls.filter((tc) =>
WRITE_TOOLS.includes(tc.toolUse.name),
);
// No write tools should be called in plan mode
expect(writeToolCalls.length).toBe(0);
} finally {
await q.close();
}
},
TEST_TIMEOUT,
);
it(
'should allow read-only tools without restrictions',
async () => {
// Create test files for the model to read
await helper.createFile('test-read-1.txt', 'Content of file 1');
await helper.createFile('test-read-2.txt', 'Content of file 2');
const q = query({
prompt:
'Read the contents of test-read-1.txt and test-read-2.txt files, then list files in the current directory.',
options: {
...SHARED_TEST_OPTIONS,
permissionMode: 'plan',
cwd: testDir,
},
});
try {
const messages: SDKMessage[] = [];
for await (const message of q) {
messages.push(message);
}
// Verify permission_mode is 'plan'
const systemMessage = findSystemMessage(messages, 'init');
expect(systemMessage!.permission_mode).toBe('plan');
// Find all tool calls
const allToolCalls = findToolCalls(messages);
// Verify read tools were called (at least one)
const readToolCalls = allToolCalls.filter((tc) =>
READ_TOOLS.includes(tc.toolUse.name),
);
expect(readToolCalls.length).toBeGreaterThan(0);
// Verify tool results are successful (not blocked)
expect(hasSuccessfulToolResults(messages)).toBe(true); expect(hasSuccessfulToolResults(messages)).toBe(true);
} finally { } finally {
await q.close(); await q.close();
@@ -915,12 +987,18 @@ describe('Permission Control (E2E)', () => {
); );
it( it(
'should block tools even with canUseTool callback in plan mode', 'should not invoke canUseTool callback in plan mode since no permission approval is expected',
async () => { async () => {
let callbackInvoked = false; let callbackInvoked = false;
// Create a test file for reading
await helper.createFile(
'test-plan-callback.txt',
'Content for callback test',
);
const q = query({ const q = query({
prompt: 'Create a file named test-plan-callback.txt', prompt: 'Read the file test-plan-callback.txt',
options: { options: {
...SHARED_TEST_OPTIONS, ...SHARED_TEST_OPTIONS,
permissionMode: 'plan', permissionMode: 'plan',
@@ -941,16 +1019,61 @@ describe('Permission Control (E2E)', () => {
messages.push(message); messages.push(message);
} }
const toolResults = findAllToolResultBlocks(messages); // Verify permission_mode is 'plan'
const hasPlanModeBlock = toolResults.some( const systemMessage = findSystemMessage(messages, 'init');
(result) => expect(systemMessage!.permission_mode).toBe('plan');
result.isError && result.content.includes('Plan mode'),
// Read tools should work without invoking canUseTool
// In plan mode, no permission approval is expected from user
expect(hasSuccessfulToolResults(messages)).toBe(true);
// canUseTool should not be invoked in plan mode
// since plan mode is for research only, no permission interaction needed
expect(callbackInvoked).toBe(false);
} finally {
await q.close();
}
},
TEST_TIMEOUT,
); );
// Plan mode should block tools before canUseTool is invoked it(
expect(hasPlanModeBlock).toBe(true); 'should only output research and plan as text, no actual changes',
// canUseTool should not be invoked for blocked tools in plan mode async () => {
expect(callbackInvoked).toBe(false); // Create a test file
const originalContent = 'Original content for plan mode test';
await helper.createFile('test-no-changes.txt', originalContent);
const q = query({
prompt:
'Read test-no-changes.txt and plan how you would modify it to add a header. Do not actually make any changes.',
options: {
...SHARED_TEST_OPTIONS,
permissionMode: 'plan',
cwd: testDir,
},
});
try {
const messages: SDKMessage[] = [];
for await (const message of q) {
messages.push(message);
}
// Verify permission_mode is 'plan'
const systemMessage = findSystemMessage(messages, 'init');
expect(systemMessage!.permission_mode).toBe('plan');
// Verify the file was not modified
const fileContent = await helper.readFile('test-no-changes.txt');
expect(fileContent).toBe(originalContent);
// Verify no write tools were called
const allToolCalls = findToolCalls(messages);
const writeToolCalls = allToolCalls.filter((tc) =>
WRITE_TOOLS.includes(tc.toolUse.name),
);
expect(writeToolCalls.length).toBe(0);
} finally { } finally {
await q.close(); await q.close();
} }
@@ -1064,9 +1187,8 @@ describe('Permission Control (E2E)', () => {
it( it(
'should demonstrate different behaviors across all modes for write operations', 'should demonstrate different behaviors across all modes for write operations',
async () => { async () => {
const modes: Array<'default' | 'plan' | 'auto-edit' | 'yolo'> = [ const modes: Array<'default' | 'auto-edit' | 'yolo'> = [
'default', 'default',
'plan',
'auto-edit', 'auto-edit',
'yolo', 'yolo',
]; ];

View File

@@ -180,7 +180,7 @@ describe('Single-Turn Query (E2E)', () => {
expect(systemMessage!.mcp_servers).toBeDefined(); expect(systemMessage!.mcp_servers).toBeDefined();
expect(Array.isArray(systemMessage!.mcp_servers)).toBe(true); expect(Array.isArray(systemMessage!.mcp_servers)).toBe(true);
expect(systemMessage!.model).toBeDefined(); expect(systemMessage!.model).toBeDefined();
expect(systemMessage!.permissionMode).toBeDefined(); expect(systemMessage!.permission_mode).toBeDefined();
expect(systemMessage!.qwen_code_version).toBeDefined(); expect(systemMessage!.qwen_code_version).toBeDefined();
// Validate system message appears early in sequence // Validate system message appears early in sequence