feat: add allowedTools support

2025-12-19 09:33:53 +00:00 · 2025-11-27 11:44:57 +08:00
parent d76341b8d8
commit 638b7bb466
5 changed files with 828 additions and 69 deletions
--- a/packages/sdk-typescript/src/transport/ProcessTransport.ts
+++ b/packages/sdk-typescript/src/transport/ProcessTransport.ts
@@ -161,6 +161,10 @@ export class ProcessTransport implements Transport {
      args.push('--exclude-tools', this.options.excludeTools.join(','));
    }
    if (this.options.allowedTools && this.options.allowedTools.length > 0) {
      args.push('--allowed-tools', this.options.allowedTools.join(','));
    }
    if (this.options.authType) {
      args.push('--auth-type', this.options.authType);
    }
--- a/packages/sdk-typescript/src/types/queryOptionsSchema.ts
+++ b/packages/sdk-typescript/src/types/queryOptionsSchema.ts
@@ -61,6 +61,7 @@ export const QueryOptionsSchema = z
    maxSessionTurns: z.number().optional(),
    coreTools: z.array(z.string()).optional(),
    excludeTools: z.array(z.string()).optional(),
    allowedTools: z.array(z.string()).optional(),
    authType: z.enum(['openai', 'qwen-oauth']).optional(),
    agents: z
      .array(
--- a/packages/sdk-typescript/src/types/types.ts
+++ b/packages/sdk-typescript/src/types/types.ts
@@ -34,6 +34,7 @@ export type TransportOptions = {
  maxSessionTurns?: number;
  coreTools?: string[];
  excludeTools?: string[];
  allowedTools?: string[];
  authType?: string;
  includePartialMessages?: boolean;
 };
@@ -125,22 +126,50 @@ export interface QueryOptions {
  env?: Record<string, string>;
  /**
-   * Alias for `approval-mode` command line argument.
+   * Permission mode controlling how the SDK handles tool execution approval.
-   * Behaves slightly differently from the command line argument.
+   *
-   * Permission mode controlling how the CLI handles tool usage and file operations **in non-interactive mode**.
+   * - 'default': Write tools are denied unless approved via `canUseTool` callback or in `allowedTools`.
-   * - 'default': Automatically deny all write-like tools(edit, write_file, etc.) and dangers commands.
+   *   Read-only tools execute without confirmation.
-   * - 'plan': Shows a plan before executing operations
+   * - 'plan': Blocks all write tools, instructing AI to present a plan first.
-   * - 'auto-edit': Automatically applies edits without confirmation
+   *   Read-only tools execute normally.
-   * - 'yolo': Executes all operations without prompting
+   * - 'auto-edit': Auto-approve edit tools (edit, write_file) while other tools require confirmation.
   * - 'yolo': All tools execute automatically without confirmation.
   *
   * **Priority Chain (highest to lowest):**
   * 1. `excludeTools` - Blocks tools completely (returns permission error)
   * 2. `permissionMode: 'plan'` - Blocks non-read-only tools (except exit_plan_mode)
   * 3. `permissionMode: 'yolo'` - Auto-approves all tools
   * 4. `allowedTools` - Auto-approves matching tools
   * 5. `canUseTool` callback - Custom approval logic
   * 6. Default behavior - Auto-deny in SDK mode
   *
   * @default 'default'
   * @see canUseTool For custom permission handling
   * @see allowedTools For auto-approving specific tools
   * @see excludeTools For blocking specific tools
   */
  permissionMode?: 'default' | 'plan' | 'auto-edit' | 'yolo';
  /**
-   * Custom permission handler for tool usage.
+   * Custom permission handler for tool execution approval.
-   * This function is called when the SDK needs to determine if a tool should be allowed.
+   *
-   * Use this with `permissionMode` to gain more control over the tool usage.
+   * This callback is invoked when a tool requires confirmation and allows you to
-   * TODO: For now we don't support modifying the input.
+   * programmatically approve or deny execution. It acts as a fallback after
   * `allowedTools` check but before default denial.
   *
   * **When is this called?**
   * - Only for tools requiring confirmation (write operations, shell commands, etc.)
   * - After `excludeTools` and `allowedTools` checks
   * - Not called in 'yolo' mode or 'plan' mode
   * - Not called for tools already in `allowedTools`
   *
   * **Usage with permissionMode:**
   * - 'default': Invoked for all write tools not in `allowedTools`; if not provided, auto-denied.
   * - 'auto-edit': Invoked for non-edit tools (edit/write_file auto-approved); if not provided, auto-denied.
   * - 'plan': Not invoked; write tools are blocked by plan mode.
   * - 'yolo': Not invoked; all tools auto-approved.
   *
   * @see allowedTools For auto-approving tools without callback
   */
  canUseTool?: CanUseTool;
@@ -197,11 +226,49 @@ export interface QueryOptions {
  /**
   * Equivalent to `tool.exclude` in settings.json.
   * List of tools to exclude from the session.
-   * These tools will not be available to the AI, even if they are core tools.
+   *
-   * @example ['run_terminal_cmd', 'delete_file']
+   * **Behavior:**
   * - Excluded tools return a permission error immediately when invoked
   * - Takes highest priority - overrides all other permission settings
   * - Tools will not be available to the AI, even if in `coreTools` or `allowedTools`
   *
   * **Pattern matching:**
   * - Tool name: `'write_file'`, `'run_shell_command'`
   * - Tool class: `'WriteTool'`, `'ShellTool'`
   * - Shell command prefix: `'ShellTool(git commit)'` (matches commands starting with "git commit")
   *
   * @example ['run_terminal_cmd', 'delete_file', 'ShellTool(rm )']
   * @see allowedTools For allowing specific tools
   */
  excludeTools?: string[];
  /**
   * Equivalent to `tool.allowed` in settings.json.
   * List of tools that are allowed to run without confirmation.
   *
   * **Behavior:**
   * - Matching tools bypass `canUseTool` callback and execute automatically
   * - Only applies when tool requires confirmation (write operations, shell commands)
   * - Checked after `excludeTools` but before `canUseTool` callback
   * - Does not override `permissionMode: 'plan'` (plan mode blocks all write tools)
   * - Has no effect in `permissionMode: 'yolo'` (already auto-approved)
   *
   * **Pattern matching:**
   * - Tool name: `'write_file'`, `'run_shell_command'`
   * - Tool class: `'WriteTool'`, `'ShellTool'`
   * - Shell command prefix: `'ShellTool(git status)'` (matches commands starting with "git status")
   *
   * **Use cases:**
   * - Auto-approve safe shell commands: `['ShellTool(git status)', 'ShellTool(ls)']`
   * - Auto-approve specific tools: `['write_file', 'edit']`
   * - Combine with `permissionMode: 'default'` to selectively auto-approve tools
   *
   * @example ['read_file', 'ShellTool(git status)', 'ShellTool(npm test)']
   * @see canUseTool For custom approval logic
   * @see excludeTools For blocking specific tools
   */
  allowedTools?: string[];
  /**
   * Authentication type for the AI service.
   * - 'openai': Use OpenAI-compatible authentication
--- a/packages/sdk-typescript/test/e2e/permission-control.test.ts
+++ b/packages/sdk-typescript/test/e2e/permission-control.test.ts
@@ -673,4 +673,640 @@ describe('Permission Control (E2E)', () => {
      }
    });
  });
  describe('ApprovalMode behavior tests', () => {
    describe('default mode', () => {
      it(
        'should auto-deny tools requiring confirmation without canUseTool callback',
        async () => {
          const q = query({
            prompt:
              'Create a file named test-default-deny.txt with content "hello"',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'default',
              cwd: '/tmp',
              // No canUseTool callback provided
            },
          });
          try {
            let hasToolResult = false;
            let hasErrorInResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult && 'tool_use_id' in toolResult) {
                    hasToolResult = true;
                    // Check if the result contains an error about permission
                    if (
                      'content' in toolResult &&
                      typeof toolResult.content === 'string' &&
                      (toolResult.content.includes('permission') ||
                        toolResult.content.includes('declined'))
                    ) {
                      hasErrorInResult = true;
                    }
                  }
                }
              }
            }
            // In default mode without canUseTool, tools should be denied
            expect(hasToolResult).toBe(true);
            expect(hasErrorInResult).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should allow tools when canUseTool returns allow',
        async () => {
          let callbackInvoked = false;
          const q = query({
            prompt:
              'Create a file named test-default-allow.txt with content "world"',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'default',
              cwd: '/tmp',
              canUseTool: async (toolName, input) => {
                callbackInvoked = true;
                return {
                  behavior: 'allow',
                  updatedInput: input,
                };
              },
            },
          });
          try {
            let hasSuccessfulToolResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult && 'tool_use_id' in toolResult) {
                    // Check if the result is successful (not an error)
                    if (
                      'content' in toolResult &&
                      typeof toolResult.content === 'string' &&
                      !toolResult.content.includes('permission') &&
                      !toolResult.content.includes('declined')
                    ) {
                      hasSuccessfulToolResult = true;
                    }
                  }
                }
              }
            }
            expect(callbackInvoked).toBe(true);
            expect(hasSuccessfulToolResult).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should execute read-only tools without confirmation',
        async () => {
          const q = query({
            prompt: 'List files in the current directory',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'default',
              cwd: '/tmp',
              // No canUseTool callback - read-only tools should still work
            },
          });
          try {
            let hasToolResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult) {
                    hasToolResult = true;
                  }
                }
              }
            }
            expect(hasToolResult).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
    });
    describe('yolo mode', () => {
      it(
        'should auto-approve all tools without canUseTool callback',
        async () => {
          const q = query({
            prompt:
              'Create a file named test-yolo.txt with content "yolo mode"',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'yolo',
              cwd: '/tmp',
              // No canUseTool callback - tools should still execute
            },
          });
          try {
            let hasSuccessfulToolResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult && 'tool_use_id' in toolResult) {
                    // Check if the result is successful (not a permission error)
                    if (
                      'content' in toolResult &&
                      typeof toolResult.content === 'string' &&
                      !toolResult.content.includes('permission') &&
                      !toolResult.content.includes('declined')
                    ) {
                      hasSuccessfulToolResult = true;
                    }
                  }
                }
              }
            }
            expect(hasSuccessfulToolResult).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should not invoke canUseTool callback in yolo mode',
        async () => {
          let callbackInvoked = false;
          const q = query({
            prompt: 'Create a file named test-yolo-no-callback.txt',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'yolo',
              cwd: '/tmp',
              canUseTool: async (toolName, input) => {
                callbackInvoked = true;
                return {
                  behavior: 'allow',
                  updatedInput: input,
                };
              },
            },
          });
          try {
            let hasToolResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult) {
                    hasToolResult = true;
                  }
                }
              }
            }
            expect(hasToolResult).toBe(true);
            // canUseTool should not be invoked in yolo mode
            expect(callbackInvoked).toBe(false);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should execute dangerous commands without confirmation',
        async () => {
          const q = query({
            prompt: 'Run command: echo "dangerous operation"',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'yolo',
              cwd: '/tmp',
            },
          });
          try {
            let hasCommandResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult && 'tool_use_id' in toolResult) {
                    hasCommandResult = true;
                  }
                }
              }
            }
            expect(hasCommandResult).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
    });
    describe('plan mode', () => {
      it(
        'should block non-read-only tools and return plan mode error',
        async () => {
          const q = query({
            prompt: 'Create a file named test-plan.txt',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'plan',
              cwd: '/tmp',
            },
          });
          try {
            let hasBlockedToolCall = false;
            let hasPlanModeMessage = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult && 'tool_use_id' in toolResult) {
                    hasBlockedToolCall = true;
                    // Check for plan mode specific error message
                    if (
                      'content' in toolResult &&
                      typeof toolResult.content === 'string' &&
                      (toolResult.content.includes('Plan mode') ||
                        toolResult.content.includes('plan mode'))
                    ) {
                      hasPlanModeMessage = true;
                    }
                  }
                }
              }
            }
            expect(hasBlockedToolCall).toBe(true);
            expect(hasPlanModeMessage).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should allow read-only tools in plan mode',
        async () => {
          const q = query({
            prompt: 'List files in /tmp directory',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'plan',
              cwd: '/tmp',
            },
          });
          try {
            let hasSuccessfulToolResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult && 'tool_use_id' in toolResult) {
                    // Check if the result is successful (not blocked by plan mode)
                    if (
                      'content' in toolResult &&
                      typeof toolResult.content === 'string' &&
                      !toolResult.content.includes('Plan mode')
                    ) {
                      hasSuccessfulToolResult = true;
                    }
                  }
                }
              }
            }
            expect(hasSuccessfulToolResult).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should block tools even with canUseTool callback in plan mode',
        async () => {
          let callbackInvoked = false;
          const q = query({
            prompt: 'Create a file named test-plan-callback.txt',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'plan',
              cwd: '/tmp',
              canUseTool: async (toolName, input) => {
                callbackInvoked = true;
                return {
                  behavior: 'allow',
                  updatedInput: input,
                };
              },
            },
          });
          try {
            let hasPlanModeBlock = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (
                    toolResult &&
                    'content' in toolResult &&
                    typeof toolResult.content === 'string' &&
                    toolResult.content.includes('Plan mode')
                  ) {
                    hasPlanModeBlock = true;
                  }
                }
              }
            }
            // Plan mode should block tools before canUseTool is invoked
            expect(hasPlanModeBlock).toBe(true);
            // canUseTool should not be invoked for blocked tools in plan mode
            expect(callbackInvoked).toBe(false);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
    });
    describe('auto-edit mode', () => {
      it(
        'should behave like default mode without canUseTool callback',
        async () => {
          const q = query({
            prompt: 'Create a file named test-auto-edit.txt',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'auto-edit',
              cwd: '/tmp',
              // No canUseTool callback
            },
          });
          try {
            let hasToolResult = false;
            let hasDeniedTool = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult && 'tool_use_id' in toolResult) {
                    hasToolResult = true;
                    // Check if the tool was denied
                    if (
                      'content' in toolResult &&
                      typeof toolResult.content === 'string' &&
                      (toolResult.content.includes('permission') ||
                        toolResult.content.includes('declined'))
                    ) {
                      hasDeniedTool = true;
                    }
                  }
                }
              }
            }
            expect(hasToolResult).toBe(true);
            expect(hasDeniedTool).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should allow tools when canUseTool returns allow',
        async () => {
          let callbackInvoked = false;
          const q = query({
            prompt: 'Create a file named test-auto-edit-allow.txt',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'auto-edit',
              cwd: '/tmp',
              canUseTool: async (toolName, input) => {
                callbackInvoked = true;
                return {
                  behavior: 'allow',
                  updatedInput: input,
                };
              },
            },
          });
          try {
            let hasSuccessfulToolResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult && 'tool_use_id' in toolResult) {
                    // Check if the result is successful
                    if (
                      'content' in toolResult &&
                      typeof toolResult.content === 'string' &&
                      !toolResult.content.includes('permission') &&
                      !toolResult.content.includes('declined')
                    ) {
                      hasSuccessfulToolResult = true;
                    }
                  }
                }
              }
            }
            expect(callbackInvoked).toBe(true);
            expect(hasSuccessfulToolResult).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
      it(
        'should execute read-only tools without confirmation',
        async () => {
          const q = query({
            prompt: 'Read the contents of /etc/hosts file',
            options: {
              ...SHARED_TEST_OPTIONS,
              permissionMode: 'auto-edit',
              // No canUseTool callback - read-only tools should still work
            },
          });
          try {
            let hasToolResult = false;
            for await (const message of q) {
              if (isCLIUserMessage(message)) {
                if (Array.isArray(message.message.content)) {
                  const toolResult = message.message.content.find(
                    (block) => block.type === 'tool_result',
                  );
                  if (toolResult) {
                    hasToolResult = true;
                  }
                }
              }
            }
            expect(hasToolResult).toBe(true);
          } finally {
            await q.close();
          }
        },
        TEST_TIMEOUT,
      );
    });
    describe('mode comparison tests', () => {
      it(
        'should demonstrate different behaviors across all modes for write operations',
        async () => {
          const modes: Array<'default' | 'plan' | 'auto-edit' | 'yolo'> = [
            'default',
            'plan',
            'auto-edit',
            'yolo',
          ];
          const results: Record<string, boolean> = {};
          for (const mode of modes) {
            const q = query({
              prompt: `Create a file named test-${mode}.txt`,
              options: {
                ...SHARED_TEST_OPTIONS,
                permissionMode: mode,
                cwd: '/tmp',
                canUseTool:
                  mode === 'yolo'
                    ? undefined
                    : async (toolName, input) => {
                        return {
                          behavior: 'allow',
                          updatedInput: input,
                        };
                      },
              },
            });
            try {
              let toolExecuted = false;
              for await (const message of q) {
                if (isCLIUserMessage(message)) {
                  if (Array.isArray(message.message.content)) {
                    const toolResult = message.message.content.find(
                      (block) => block.type === 'tool_result',
                    );
                    if (
                      toolResult &&
                      'content' in toolResult &&
                      typeof toolResult.content === 'string'
                    ) {
                      // Check if tool executed successfully (not blocked or denied)
                      if (
                        !toolResult.content.includes('Plan mode') &&
                        !toolResult.content.includes('permission') &&
                        !toolResult.content.includes('declined')
                      ) {
                        toolExecuted = true;
                      }
                    }
                  }
                }
              }
              results[mode] = toolExecuted;
            } finally {
              await q.close();
            }
          }
          // Verify expected behaviors
          expect(results['default']).toBe(true); // Allowed via canUseTool
          expect(results['plan']).toBe(false); // Blocked by plan mode
          expect(results['auto-edit']).toBe(true); // Allowed via canUseTool
          expect(results['yolo']).toBe(true); // Auto-approved
        },
        TEST_TIMEOUT * 4,
      );
    });
  });
 });
--- a/packages/sdk-typescript/test/e2e/system-control.test.ts
+++ b/packages/sdk-typescript/test/e2e/system-control.test.ts
@@ -1,8 +1,12 @@
 /**
 * E2E tests for system controller features:
 * - setModel API for dynamic model switching
 */
 import { describe, it, expect } from 'vitest';
 import { query } from '../../src/index.js';
 import {
  isCLIAssistantMessage,
  isCLIResultMessage,
  isCLISystemMessage,
  type CLIUserMessage,
 } from '../../src/types/protocol.js';
@@ -16,7 +20,7 @@ const SHARED_TEST_OPTIONS = {
 /**
 * Factory function that creates a streaming input with a control point.
 * After the first message is yielded, the generator waits for a resume signal,
- * allowing the test code to call query instance methods like setModel or setPermissionMode.
+ * allowing the test code to call query instance methods like setModel.
 *
 * @param firstMessage - The first user message to send
 * @param secondMessage - The second user message to send after control operations
@@ -73,9 +77,9 @@ function createStreamingInputWithControlPoint(
  return { generator, resume };
 }
-describe('Control Request/Response (E2E)', () => {
+describe('System Control (E2E)', () => {
-  describe('System Controller Scope', () => {
+  describe('setModel API', () => {
-    it('should set model via control request during streaming input', async () => {
+    it('should change model dynamically during streaming input', async () => {
      const { generator, resume } = createStreamingInputWithControlPoint(
        'Tell me the model name.',
        'Tell me the model name now again.',
@@ -164,50 +168,77 @@ describe('Control Request/Response (E2E)', () => {
        await q.close();
      }
    });
  });
-  describe('Permission Controller Scope', () => {
+    it('should handle multiple model changes in sequence', async () => {
-    it('should set permission mode via control request during streaming input', async () => {
+      const sessionId = crypto.randomUUID();
-      const { generator, resume } = createStreamingInputWithControlPoint(
+      let resumeResolve1: (() => void) | null = null;
-        'What is 1 + 1?',
+      let resumeResolve2: (() => void) | null = null;
-        'What is 2 + 2?',
+      const resumePromise1 = new Promise<void>((resolve) => {
-      );
+        resumeResolve1 = resolve;
      });
      const resumePromise2 = new Promise<void>((resolve) => {
        resumeResolve2 = resolve;
      });
      const generator = (async function* () {
        yield {
          type: 'user',
          session_id: sessionId,
          message: { role: 'user', content: 'First message' },
          parent_tool_use_id: null,
        } as CLIUserMessage;
        await new Promise((resolve) => setTimeout(resolve, 200));
        await resumePromise1;
        await new Promise((resolve) => setTimeout(resolve, 200));
        yield {
          type: 'user',
          session_id: sessionId,
          message: { role: 'user', content: 'Second message' },
          parent_tool_use_id: null,
        } as CLIUserMessage;
        await new Promise((resolve) => setTimeout(resolve, 200));
        await resumePromise2;
        await new Promise((resolve) => setTimeout(resolve, 200));
        yield {
          type: 'user',
          session_id: sessionId,
          message: { role: 'user', content: 'Third message' },
          parent_tool_use_id: null,
        } as CLIUserMessage;
      })();
      const q = query({
        prompt: generator,
        options: {
-          pathToQwenExecutable: TEST_CLI_PATH,
+          ...SHARED_TEST_OPTIONS,
-          permissionMode: 'default',
+          model: 'qwen3-max',
          debug: false,
        },
      });
      try {
-        const resolvers: {
+        const systemMessages: Array<{ model?: string }> = [];
-          first?: () => void;
+        let responseCount = 0;
-          second?: () => void;
+        const resolvers: Array<() => void> = [];
-        } = {};
+        const responsePromises = [
-        const firstResponsePromise = new Promise<void>((resolve) => {
+          new Promise<void>((resolve) => resolvers.push(resolve)),
-          resolvers.first = resolve;
+          new Promise<void>((resolve) => resolvers.push(resolve)),
-        });
+          new Promise<void>((resolve) => resolvers.push(resolve)),
-        const secondResponsePromise = new Promise<void>((resolve) => {
+        ];
          resolvers.second = resolve;
        });
        let firstResponseReceived = false;
        let permissionModeChanged = false;
        let secondResponseReceived = false;
        // Consume messages in a single loop
        (async () => {
          for await (const message of q) {
-            if (isCLIAssistantMessage(message) || isCLIResultMessage(message)) {
+            if (isCLISystemMessage(message)) {
-              if (!firstResponseReceived) {
+              systemMessages.push({ model: message.model });
-                firstResponseReceived = true;
+            }
-                resolvers.first?.();
+            if (isCLIAssistantMessage(message)) {
-              } else if (!secondResponseReceived) {
+              if (responseCount < resolvers.length) {
-                secondResponseReceived = true;
+                resolvers[responseCount]?.();
-                resolvers.second?.();
+                responseCount++;
              }
            }
          }
@@ -215,40 +246,60 @@ describe('Control Request/Response (E2E)', () => {
        // Wait for first response
        await Promise.race([
-          firstResponsePromise,
+          responsePromises[0],
          new Promise((_, reject) =>
-            setTimeout(
+            setTimeout(() => reject(new Error('Timeout 1')), 10000),
              () => reject(new Error('Timeout waiting for first response')),
              10000,
            ),
          ),
        ]);
-        expect(firstResponseReceived).toBe(true);
+        // First model change
-
+        await q.setModel('qwen3-turbo');
-        // Perform control operation: set permission mode
+        resumeResolve1?.();
        await q.setPermissionMode('yolo');
        permissionModeChanged = true;
        // Resume the input stream
        resume();
        // Wait for second response
        await Promise.race([
-          secondResponsePromise,
+          responsePromises[1],
          new Promise((_, reject) =>
-            setTimeout(
+            setTimeout(() => reject(new Error('Timeout 2')), 10000),
              () => reject(new Error('Timeout waiting for second response')),
              10000,
            ),
          ),
        ]);
-        expect(permissionModeChanged).toBe(true);
+        // Second model change
-        expect(secondResponseReceived).toBe(true);
+        await q.setModel('qwen3-vl-plus');
        resumeResolve2?.();
        // Wait for third response
        await Promise.race([
          responsePromises[2],
          new Promise((_, reject) =>
            setTimeout(() => reject(new Error('Timeout 3')), 10000),
          ),
        ]);
        // Verify we received system messages for each model
        expect(systemMessages.length).toBeGreaterThanOrEqual(3);
        expect(systemMessages[0].model).toBeOneOf(['qwen3-max', 'coder-model']);
        expect(systemMessages[1].model).toBe('qwen3-turbo');
        expect(systemMessages[2].model).toBe('qwen3-vl-plus');
      } finally {
        await q.close();
      }
    });
    it('should throw error when setModel is called on closed query', async () => {
      const q = query({
        prompt: 'Hello',
        options: {
          ...SHARED_TEST_OPTIONS,
          model: 'qwen3-max',
        },
      });
      await q.close();
      await expect(q.setModel('qwen3-turbo')).rejects.toThrow(
        'Query is closed',
      );
    });
  });
 });