feat: add allowedTools support

2025-12-19 09:33:53 +00:00 · 2025-11-27 11:44:57 +08:00
parent d76341b8d8
commit 638b7bb466
5 changed files with 828 additions and 69 deletions
--- a/packages/sdk-typescript/src/transport/ProcessTransport.ts
+++ b/packages/sdk-typescript/src/transport/ProcessTransport.ts
@@ -161,6 +161,10 @@ export class ProcessTransport implements Transport {
      args.push('--exclude-tools', this.options.excludeTools.join(','));
    }

+    if (this.options.allowedTools && this.options.allowedTools.length > 0) {
+      args.push('--allowed-tools', this.options.allowedTools.join(','));
+    }
+
    if (this.options.authType) {
      args.push('--auth-type', this.options.authType);
    }
--- a/packages/sdk-typescript/src/types/queryOptionsSchema.ts
+++ b/packages/sdk-typescript/src/types/queryOptionsSchema.ts
@@ -61,6 +61,7 @@ export const QueryOptionsSchema = z
    maxSessionTurns: z.number().optional(),
    coreTools: z.array(z.string()).optional(),
    excludeTools: z.array(z.string()).optional(),
+    allowedTools: z.array(z.string()).optional(),
    authType: z.enum(['openai', 'qwen-oauth']).optional(),
    agents: z
      .array(
--- a/packages/sdk-typescript/src/types/types.ts
+++ b/packages/sdk-typescript/src/types/types.ts
@@ -34,6 +34,7 @@ export type TransportOptions = {
  maxSessionTurns?: number;
  coreTools?: string[];
  excludeTools?: string[];
+  allowedTools?: string[];
  authType?: string;
  includePartialMessages?: boolean;
 };
@@ -125,22 +126,50 @@ export interface QueryOptions {
  env?: Record<string, string>;

  /**
-   * Alias for `approval-mode` command line argument.
-   * Behaves slightly differently from the command line argument.
-   * Permission mode controlling how the CLI handles tool usage and file operations **in non-interactive mode**.
-   * - 'default': Automatically deny all write-like tools(edit, write_file, etc.) and dangers commands.
-   * - 'plan': Shows a plan before executing operations
-   * - 'auto-edit': Automatically applies edits without confirmation
-   * - 'yolo': Executes all operations without prompting
+   * Permission mode controlling how the SDK handles tool execution approval.
+   *
+   * - 'default': Write tools are denied unless approved via `canUseTool` callback or in `allowedTools`.
+   *   Read-only tools execute without confirmation.
+   * - 'plan': Blocks all write tools, instructing AI to present a plan first.
+   *   Read-only tools execute normally.
+   * - 'auto-edit': Auto-approve edit tools (edit, write_file) while other tools require confirmation.
+   * - 'yolo': All tools execute automatically without confirmation.
+   *
+   * **Priority Chain (highest to lowest):**
+   * 1. `excludeTools` - Blocks tools completely (returns permission error)
+   * 2. `permissionMode: 'plan'` - Blocks non-read-only tools (except exit_plan_mode)
+   * 3. `permissionMode: 'yolo'` - Auto-approves all tools
+   * 4. `allowedTools` - Auto-approves matching tools
+   * 5. `canUseTool` callback - Custom approval logic
+   * 6. Default behavior - Auto-deny in SDK mode
+   *
   * @default 'default'
+   * @see canUseTool For custom permission handling
+   * @see allowedTools For auto-approving specific tools
+   * @see excludeTools For blocking specific tools
   */
  permissionMode?: 'default' | 'plan' | 'auto-edit' | 'yolo';

  /**
-   * Custom permission handler for tool usage.
-   * This function is called when the SDK needs to determine if a tool should be allowed.
-   * Use this with `permissionMode` to gain more control over the tool usage.
-   * TODO: For now we don't support modifying the input.
+   * Custom permission handler for tool execution approval.
+   *
+   * This callback is invoked when a tool requires confirmation and allows you to
+   * programmatically approve or deny execution. It acts as a fallback after
+   * `allowedTools` check but before default denial.
+   *
+   * **When is this called?**
+   * - Only for tools requiring confirmation (write operations, shell commands, etc.)
+   * - After `excludeTools` and `allowedTools` checks
+   * - Not called in 'yolo' mode or 'plan' mode
+   * - Not called for tools already in `allowedTools`
+   *
+   * **Usage with permissionMode:**
+   * - 'default': Invoked for all write tools not in `allowedTools`; if not provided, auto-denied.
+   * - 'auto-edit': Invoked for non-edit tools (edit/write_file auto-approved); if not provided, auto-denied.
+   * - 'plan': Not invoked; write tools are blocked by plan mode.
+   * - 'yolo': Not invoked; all tools auto-approved.
+   *
+   * @see allowedTools For auto-approving tools without callback
   */
  canUseTool?: CanUseTool;

@@ -197,11 +226,49 @@ export interface QueryOptions {
  /**
   * Equivalent to `tool.exclude` in settings.json.
   * List of tools to exclude from the session.
-   * These tools will not be available to the AI, even if they are core tools.
-   * @example ['run_terminal_cmd', 'delete_file']
+   *
+   * **Behavior:**
+   * - Excluded tools return a permission error immediately when invoked
+   * - Takes highest priority - overrides all other permission settings
+   * - Tools will not be available to the AI, even if in `coreTools` or `allowedTools`
+   *
+   * **Pattern matching:**
+   * - Tool name: `'write_file'`, `'run_shell_command'`
+   * - Tool class: `'WriteTool'`, `'ShellTool'`
+   * - Shell command prefix: `'ShellTool(git commit)'` (matches commands starting with "git commit")
+   *
+   * @example ['run_terminal_cmd', 'delete_file', 'ShellTool(rm )']
+   * @see allowedTools For allowing specific tools
   */
  excludeTools?: string[];

+  /**
+   * Equivalent to `tool.allowed` in settings.json.
+   * List of tools that are allowed to run without confirmation.
+   *
+   * **Behavior:**
+   * - Matching tools bypass `canUseTool` callback and execute automatically
+   * - Only applies when tool requires confirmation (write operations, shell commands)
+   * - Checked after `excludeTools` but before `canUseTool` callback
+   * - Does not override `permissionMode: 'plan'` (plan mode blocks all write tools)
+   * - Has no effect in `permissionMode: 'yolo'` (already auto-approved)
+   *
+   * **Pattern matching:**
+   * - Tool name: `'write_file'`, `'run_shell_command'`
+   * - Tool class: `'WriteTool'`, `'ShellTool'`
+   * - Shell command prefix: `'ShellTool(git status)'` (matches commands starting with "git status")
+   *
+   * **Use cases:**
+   * - Auto-approve safe shell commands: `['ShellTool(git status)', 'ShellTool(ls)']`
+   * - Auto-approve specific tools: `['write_file', 'edit']`
+   * - Combine with `permissionMode: 'default'` to selectively auto-approve tools
+   *
+   * @example ['read_file', 'ShellTool(git status)', 'ShellTool(npm test)']
+   * @see canUseTool For custom approval logic
+   * @see excludeTools For blocking specific tools
+   */
+  allowedTools?: string[];
+
  /**
   * Authentication type for the AI service.
   * - 'openai': Use OpenAI-compatible authentication
--- a/packages/sdk-typescript/test/e2e/permission-control.test.ts
+++ b/packages/sdk-typescript/test/e2e/permission-control.test.ts
@@ -673,4 +673,640 @@ describe('Permission Control (E2E)', () => {
      }
    });
  });
+
+  describe('ApprovalMode behavior tests', () => {
+    describe('default mode', () => {
+      it(
+        'should auto-deny tools requiring confirmation without canUseTool callback',
+        async () => {
+          const q = query({
+            prompt:
+              'Create a file named test-default-deny.txt with content "hello"',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'default',
+              cwd: '/tmp',
+              // No canUseTool callback provided
+            },
+          });
+
+          try {
+            let hasToolResult = false;
+            let hasErrorInResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult && 'tool_use_id' in toolResult) {
+                    hasToolResult = true;
+                    // Check if the result contains an error about permission
+                    if (
+                      'content' in toolResult &&
+                      typeof toolResult.content === 'string' &&
+                      (toolResult.content.includes('permission') ||
+                        toolResult.content.includes('declined'))
+                    ) {
+                      hasErrorInResult = true;
+                    }
+                  }
+                }
+              }
+            }
+
+            // In default mode without canUseTool, tools should be denied
+            expect(hasToolResult).toBe(true);
+            expect(hasErrorInResult).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+
+      it(
+        'should allow tools when canUseTool returns allow',
+        async () => {
+          let callbackInvoked = false;
+
+          const q = query({
+            prompt:
+              'Create a file named test-default-allow.txt with content "world"',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'default',
+              cwd: '/tmp',
+              canUseTool: async (toolName, input) => {
+                callbackInvoked = true;
+                return {
+                  behavior: 'allow',
+                  updatedInput: input,
+                };
+              },
+            },
+          });
+
+          try {
+            let hasSuccessfulToolResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult && 'tool_use_id' in toolResult) {
+                    // Check if the result is successful (not an error)
+                    if (
+                      'content' in toolResult &&
+                      typeof toolResult.content === 'string' &&
+                      !toolResult.content.includes('permission') &&
+                      !toolResult.content.includes('declined')
+                    ) {
+                      hasSuccessfulToolResult = true;
+                    }
+                  }
+                }
+              }
+            }
+
+            expect(callbackInvoked).toBe(true);
+            expect(hasSuccessfulToolResult).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+
+      it(
+        'should execute read-only tools without confirmation',
+        async () => {
+          const q = query({
+            prompt: 'List files in the current directory',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'default',
+              cwd: '/tmp',
+              // No canUseTool callback - read-only tools should still work
+            },
+          });
+
+          try {
+            let hasToolResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult) {
+                    hasToolResult = true;
+                  }
+                }
+              }
+            }
+
+            expect(hasToolResult).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+    });
+
+    describe('yolo mode', () => {
+      it(
+        'should auto-approve all tools without canUseTool callback',
+        async () => {
+          const q = query({
+            prompt:
+              'Create a file named test-yolo.txt with content "yolo mode"',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'yolo',
+              cwd: '/tmp',
+              // No canUseTool callback - tools should still execute
+            },
+          });
+
+          try {
+            let hasSuccessfulToolResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult && 'tool_use_id' in toolResult) {
+                    // Check if the result is successful (not a permission error)
+                    if (
+                      'content' in toolResult &&
+                      typeof toolResult.content === 'string' &&
+                      !toolResult.content.includes('permission') &&
+                      !toolResult.content.includes('declined')
+                    ) {
+                      hasSuccessfulToolResult = true;
+                    }
+                  }
+                }
+              }
+            }
+
+            expect(hasSuccessfulToolResult).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+
+      it(
+        'should not invoke canUseTool callback in yolo mode',
+        async () => {
+          let callbackInvoked = false;
+
+          const q = query({
+            prompt: 'Create a file named test-yolo-no-callback.txt',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'yolo',
+              cwd: '/tmp',
+              canUseTool: async (toolName, input) => {
+                callbackInvoked = true;
+                return {
+                  behavior: 'allow',
+                  updatedInput: input,
+                };
+              },
+            },
+          });
+
+          try {
+            let hasToolResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult) {
+                    hasToolResult = true;
+                  }
+                }
+              }
+            }
+
+            expect(hasToolResult).toBe(true);
+            // canUseTool should not be invoked in yolo mode
+            expect(callbackInvoked).toBe(false);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+
+      it(
+        'should execute dangerous commands without confirmation',
+        async () => {
+          const q = query({
+            prompt: 'Run command: echo "dangerous operation"',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'yolo',
+              cwd: '/tmp',
+            },
+          });
+
+          try {
+            let hasCommandResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult && 'tool_use_id' in toolResult) {
+                    hasCommandResult = true;
+                  }
+                }
+              }
+            }
+
+            expect(hasCommandResult).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+    });
+
+    describe('plan mode', () => {
+      it(
+        'should block non-read-only tools and return plan mode error',
+        async () => {
+          const q = query({
+            prompt: 'Create a file named test-plan.txt',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'plan',
+              cwd: '/tmp',
+            },
+          });
+
+          try {
+            let hasBlockedToolCall = false;
+            let hasPlanModeMessage = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult && 'tool_use_id' in toolResult) {
+                    hasBlockedToolCall = true;
+                    // Check for plan mode specific error message
+                    if (
+                      'content' in toolResult &&
+                      typeof toolResult.content === 'string' &&
+                      (toolResult.content.includes('Plan mode') ||
+                        toolResult.content.includes('plan mode'))
+                    ) {
+                      hasPlanModeMessage = true;
+                    }
+                  }
+                }
+              }
+            }
+
+            expect(hasBlockedToolCall).toBe(true);
+            expect(hasPlanModeMessage).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+
+      it(
+        'should allow read-only tools in plan mode',
+        async () => {
+          const q = query({
+            prompt: 'List files in /tmp directory',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'plan',
+              cwd: '/tmp',
+            },
+          });
+
+          try {
+            let hasSuccessfulToolResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult && 'tool_use_id' in toolResult) {
+                    // Check if the result is successful (not blocked by plan mode)
+                    if (
+                      'content' in toolResult &&
+                      typeof toolResult.content === 'string' &&
+                      !toolResult.content.includes('Plan mode')
+                    ) {
+                      hasSuccessfulToolResult = true;
+                    }
+                  }
+                }
+              }
+            }
+
+            expect(hasSuccessfulToolResult).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+
+      it(
+        'should block tools even with canUseTool callback in plan mode',
+        async () => {
+          let callbackInvoked = false;
+
+          const q = query({
+            prompt: 'Create a file named test-plan-callback.txt',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'plan',
+              cwd: '/tmp',
+              canUseTool: async (toolName, input) => {
+                callbackInvoked = true;
+                return {
+                  behavior: 'allow',
+                  updatedInput: input,
+                };
+              },
+            },
+          });
+
+          try {
+            let hasPlanModeBlock = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (
+                    toolResult &&
+                    'content' in toolResult &&
+                    typeof toolResult.content === 'string' &&
+                    toolResult.content.includes('Plan mode')
+                  ) {
+                    hasPlanModeBlock = true;
+                  }
+                }
+              }
+            }
+
+            // Plan mode should block tools before canUseTool is invoked
+            expect(hasPlanModeBlock).toBe(true);
+            // canUseTool should not be invoked for blocked tools in plan mode
+            expect(callbackInvoked).toBe(false);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+    });
+
+    describe('auto-edit mode', () => {
+      it(
+        'should behave like default mode without canUseTool callback',
+        async () => {
+          const q = query({
+            prompt: 'Create a file named test-auto-edit.txt',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'auto-edit',
+              cwd: '/tmp',
+              // No canUseTool callback
+            },
+          });
+
+          try {
+            let hasToolResult = false;
+            let hasDeniedTool = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult && 'tool_use_id' in toolResult) {
+                    hasToolResult = true;
+                    // Check if the tool was denied
+                    if (
+                      'content' in toolResult &&
+                      typeof toolResult.content === 'string' &&
+                      (toolResult.content.includes('permission') ||
+                        toolResult.content.includes('declined'))
+                    ) {
+                      hasDeniedTool = true;
+                    }
+                  }
+                }
+              }
+            }
+
+            expect(hasToolResult).toBe(true);
+            expect(hasDeniedTool).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+
+      it(
+        'should allow tools when canUseTool returns allow',
+        async () => {
+          let callbackInvoked = false;
+
+          const q = query({
+            prompt: 'Create a file named test-auto-edit-allow.txt',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'auto-edit',
+              cwd: '/tmp',
+              canUseTool: async (toolName, input) => {
+                callbackInvoked = true;
+                return {
+                  behavior: 'allow',
+                  updatedInput: input,
+                };
+              },
+            },
+          });
+
+          try {
+            let hasSuccessfulToolResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult && 'tool_use_id' in toolResult) {
+                    // Check if the result is successful
+                    if (
+                      'content' in toolResult &&
+                      typeof toolResult.content === 'string' &&
+                      !toolResult.content.includes('permission') &&
+                      !toolResult.content.includes('declined')
+                    ) {
+                      hasSuccessfulToolResult = true;
+                    }
+                  }
+                }
+              }
+            }
+
+            expect(callbackInvoked).toBe(true);
+            expect(hasSuccessfulToolResult).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+
+      it(
+        'should execute read-only tools without confirmation',
+        async () => {
+          const q = query({
+            prompt: 'Read the contents of /etc/hosts file',
+            options: {
+              ...SHARED_TEST_OPTIONS,
+              permissionMode: 'auto-edit',
+              // No canUseTool callback - read-only tools should still work
+            },
+          });
+
+          try {
+            let hasToolResult = false;
+
+            for await (const message of q) {
+              if (isCLIUserMessage(message)) {
+                if (Array.isArray(message.message.content)) {
+                  const toolResult = message.message.content.find(
+                    (block) => block.type === 'tool_result',
+                  );
+                  if (toolResult) {
+                    hasToolResult = true;
+                  }
+                }
+              }
+            }
+
+            expect(hasToolResult).toBe(true);
+          } finally {
+            await q.close();
+          }
+        },
+        TEST_TIMEOUT,
+      );
+    });
+
+    describe('mode comparison tests', () => {
+      it(
+        'should demonstrate different behaviors across all modes for write operations',
+        async () => {
+          const modes: Array<'default' | 'plan' | 'auto-edit' | 'yolo'> = [
+            'default',
+            'plan',
+            'auto-edit',
+            'yolo',
+          ];
+          const results: Record<string, boolean> = {};
+
+          for (const mode of modes) {
+            const q = query({
+              prompt: `Create a file named test-${mode}.txt`,
+              options: {
+                ...SHARED_TEST_OPTIONS,
+                permissionMode: mode,
+                cwd: '/tmp',
+                canUseTool:
+                  mode === 'yolo'
+                    ? undefined
+                    : async (toolName, input) => {
+                        return {
+                          behavior: 'allow',
+                          updatedInput: input,
+                        };
+                      },
+              },
+            });
+
+            try {
+              let toolExecuted = false;
+
+              for await (const message of q) {
+                if (isCLIUserMessage(message)) {
+                  if (Array.isArray(message.message.content)) {
+                    const toolResult = message.message.content.find(
+                      (block) => block.type === 'tool_result',
+                    );
+                    if (
+                      toolResult &&
+                      'content' in toolResult &&
+                      typeof toolResult.content === 'string'
+                    ) {
+                      // Check if tool executed successfully (not blocked or denied)
+                      if (
+                        !toolResult.content.includes('Plan mode') &&
+                        !toolResult.content.includes('permission') &&
+                        !toolResult.content.includes('declined')
+                      ) {
+                        toolExecuted = true;
+                      }
+                    }
+                  }
+                }
+              }
+
+              results[mode] = toolExecuted;
+            } finally {
+              await q.close();
+            }
+          }
+
+          // Verify expected behaviors
+          expect(results['default']).toBe(true); // Allowed via canUseTool
+          expect(results['plan']).toBe(false); // Blocked by plan mode
+          expect(results['auto-edit']).toBe(true); // Allowed via canUseTool
+          expect(results['yolo']).toBe(true); // Auto-approved
+        },
+        TEST_TIMEOUT * 4,
+      );
+    });
+  });
 });
--- a/packages/sdk-typescript/test/e2e/system-control.test.ts
+++ b/packages/sdk-typescript/test/e2e/system-control.test.ts
@@ -1,8 +1,12 @@
+/**
+ * E2E tests for system controller features:
+ * - setModel API for dynamic model switching
+ */
+
 import { describe, it, expect } from 'vitest';
 import { query } from '../../src/index.js';
 import {
  isCLIAssistantMessage,
-  isCLIResultMessage,
  isCLISystemMessage,
  type CLIUserMessage,
 } from '../../src/types/protocol.js';
@@ -16,7 +20,7 @@ const SHARED_TEST_OPTIONS = {
 /**
 * Factory function that creates a streaming input with a control point.
 * After the first message is yielded, the generator waits for a resume signal,
- * allowing the test code to call query instance methods like setModel or setPermissionMode.
+ * allowing the test code to call query instance methods like setModel.
 *
 * @param firstMessage - The first user message to send
 * @param secondMessage - The second user message to send after control operations
@@ -73,9 +77,9 @@ function createStreamingInputWithControlPoint(
  return { generator, resume };
 }

-describe('Control Request/Response (E2E)', () => {
-  describe('System Controller Scope', () => {
-    it('should set model via control request during streaming input', async () => {
+describe('System Control (E2E)', () => {
+  describe('setModel API', () => {
+    it('should change model dynamically during streaming input', async () => {
      const { generator, resume } = createStreamingInputWithControlPoint(
        'Tell me the model name.',
        'Tell me the model name now again.',
@@ -164,50 +168,77 @@ describe('Control Request/Response (E2E)', () => {
        await q.close();
      }
    });
-  });

-  describe('Permission Controller Scope', () => {
-    it('should set permission mode via control request during streaming input', async () => {
-      const { generator, resume } = createStreamingInputWithControlPoint(
-        'What is 1 + 1?',
-        'What is 2 + 2?',
-      );
+    it('should handle multiple model changes in sequence', async () => {
+      const sessionId = crypto.randomUUID();
+      let resumeResolve1: (() => void) | null = null;
+      let resumeResolve2: (() => void) | null = null;
+      const resumePromise1 = new Promise<void>((resolve) => {
+        resumeResolve1 = resolve;
+      });
+      const resumePromise2 = new Promise<void>((resolve) => {
+        resumeResolve2 = resolve;
+      });
+
+      const generator = (async function* () {
+        yield {
+          type: 'user',
+          session_id: sessionId,
+          message: { role: 'user', content: 'First message' },
+          parent_tool_use_id: null,
+        } as CLIUserMessage;
+
+        await new Promise((resolve) => setTimeout(resolve, 200));
+        await resumePromise1;
+        await new Promise((resolve) => setTimeout(resolve, 200));
+
+        yield {
+          type: 'user',
+          session_id: sessionId,
+          message: { role: 'user', content: 'Second message' },
+          parent_tool_use_id: null,
+        } as CLIUserMessage;
+
+        await new Promise((resolve) => setTimeout(resolve, 200));
+        await resumePromise2;
+        await new Promise((resolve) => setTimeout(resolve, 200));
+
+        yield {
+          type: 'user',
+          session_id: sessionId,
+          message: { role: 'user', content: 'Third message' },
+          parent_tool_use_id: null,
+        } as CLIUserMessage;
+      })();

      const q = query({
        prompt: generator,
        options: {
-          pathToQwenExecutable: TEST_CLI_PATH,
-          permissionMode: 'default',
+          ...SHARED_TEST_OPTIONS,
+          model: 'qwen3-max',
          debug: false,
        },
      });

      try {
-        const resolvers: {
-          first?: () => void;
-          second?: () => void;
-        } = {};
-        const firstResponsePromise = new Promise<void>((resolve) => {
-          resolvers.first = resolve;
-        });
-        const secondResponsePromise = new Promise<void>((resolve) => {
-          resolvers.second = resolve;
-        });
+        const systemMessages: Array<{ model?: string }> = [];
+        let responseCount = 0;
+        const resolvers: Array<() => void> = [];
+        const responsePromises = [
+          new Promise<void>((resolve) => resolvers.push(resolve)),
+          new Promise<void>((resolve) => resolvers.push(resolve)),
+          new Promise<void>((resolve) => resolvers.push(resolve)),
+        ];

-        let firstResponseReceived = false;
-        let permissionModeChanged = false;
-        let secondResponseReceived = false;
-
-        // Consume messages in a single loop
        (async () => {
          for await (const message of q) {
-            if (isCLIAssistantMessage(message) || isCLIResultMessage(message)) {
-              if (!firstResponseReceived) {
-                firstResponseReceived = true;
-                resolvers.first?.();
-              } else if (!secondResponseReceived) {
-                secondResponseReceived = true;
-                resolvers.second?.();
+            if (isCLISystemMessage(message)) {
+              systemMessages.push({ model: message.model });
+            }
+            if (isCLIAssistantMessage(message)) {
+              if (responseCount < resolvers.length) {
+                resolvers[responseCount]?.();
+                responseCount++;
              }
            }
          }
@@ -215,40 +246,60 @@ describe('Control Request/Response (E2E)', () => {

        // Wait for first response
        await Promise.race([
-          firstResponsePromise,
+          responsePromises[0],
          new Promise((_, reject) =>
-            setTimeout(
-              () => reject(new Error('Timeout waiting for first response')),
-              10000,
-            ),
+            setTimeout(() => reject(new Error('Timeout 1')), 10000),
          ),
        ]);

-        expect(firstResponseReceived).toBe(true);
-
-        // Perform control operation: set permission mode
-        await q.setPermissionMode('yolo');
-        permissionModeChanged = true;
-
-        // Resume the input stream
-        resume();
+        // First model change
+        await q.setModel('qwen3-turbo');
+        resumeResolve1?.();

        // Wait for second response
        await Promise.race([
-          secondResponsePromise,
+          responsePromises[1],
          new Promise((_, reject) =>
-            setTimeout(
-              () => reject(new Error('Timeout waiting for second response')),
-              10000,
-            ),
+            setTimeout(() => reject(new Error('Timeout 2')), 10000),
          ),
        ]);

-        expect(permissionModeChanged).toBe(true);
-        expect(secondResponseReceived).toBe(true);
+        // Second model change
+        await q.setModel('qwen3-vl-plus');
+        resumeResolve2?.();
+
+        // Wait for third response
+        await Promise.race([
+          responsePromises[2],
+          new Promise((_, reject) =>
+            setTimeout(() => reject(new Error('Timeout 3')), 10000),
+          ),
+        ]);
+
+        // Verify we received system messages for each model
+        expect(systemMessages.length).toBeGreaterThanOrEqual(3);
+        expect(systemMessages[0].model).toBeOneOf(['qwen3-max', 'coder-model']);
+        expect(systemMessages[1].model).toBe('qwen3-turbo');
+        expect(systemMessages[2].model).toBe('qwen3-vl-plus');
      } finally {
        await q.close();
      }
    });
+
+    it('should throw error when setModel is called on closed query', async () => {
+      const q = query({
+        prompt: 'Hello',
+        options: {
+          ...SHARED_TEST_OPTIONS,
+          model: 'qwen3-max',
+        },
+      });
+
+      await q.close();
+
+      await expect(q.setModel('qwen3-turbo')).rejects.toThrow(
+        'Query is closed',
+      );
+    });
  });
 });