fix: auth hang when select qwen-oauth

2025-12-24 18:49:13 +00:00 · 2025-09-22 22:32:27 +08:00
15 changed files with 120 additions and 425 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -101,6 +101,13 @@
      "env": {
        "GEMINI_SANDBOX": "false"
      }
+    },
+    {
+      "name": "Attach by Process ID",
+      "processId": "${command:PickProcess}",
+      "request": "attach",
+      "skipFiles": ["<node_internals>/**"],
+      "type": "node"
    }
  ],
  "inputs": [
@@ -115,6 +122,12 @@
      "type": "promptString",
      "description": "Enter your prompt for non-interactive mode",
      "default": "Explain this code"
+    },
+    {
+      "id": "debugPort",
+      "type": "promptString",
+      "description": "Enter the debug port number (default: 9229)",
+      "default": "9229"
    }
  ]
 }
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,25 +1,5 @@
 # Changelog

-## 0.0.12
-
- Added vision model support for Qwen-OAuth authentication.
- Synced upstream `gemini-cli` to v0.3.4 with numerous improvements and bug fixes.
- Enhanced subagent functionality with system reminders and improved user experience.
- Added tool call type coercion for better compatibility.
- Fixed arrow key navigation issues on Windows.
- Fixed missing tool call chunks for OpenAI logging.
- Fixed system prompt issues to avoid malformed tool calls.
- Fixed terminal flicker when subagent is executing.
- Fixed duplicate subagents configuration when running in home directory.
- Fixed Esc key unable to cancel subagent dialog.
- Added confirmation prompt for `/init` command when context file exists.
- Added `skipLoopDetection` configuration option.
- Fixed `is_background` parameter reset issues.
- Enhanced Windows compatibility with multi-line paste handling.
- Improved subagent documentation and branding consistency.
- Fixed various linting errors and improved code quality.
- Miscellaneous improvements and bug fixes.
-
 ## 0.0.11

 - Added subagents feature with file-based configuration system for specialized AI assistants.
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@qwen-code/qwen-code",
-  "version": "0.0.12",
+  "version": "0.0.11",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@qwen-code/qwen-code",
-      "version": "0.0.12",
+      "version": "0.0.11",
      "workspaces": [
        "packages/*"
      ],
@@ -13454,7 +13454,7 @@
    },
    "packages/cli": {
      "name": "@qwen-code/qwen-code",
-      "version": "0.0.12",
+      "version": "0.0.11",
      "dependencies": {
        "@google/genai": "1.9.0",
        "@iarna/toml": "^2.2.5",
@@ -13662,7 +13662,7 @@
    },
    "packages/core": {
      "name": "@qwen-code/qwen-code-core",
-      "version": "0.0.12",
+      "version": "0.0.11",
      "dependencies": {
        "@google/genai": "1.13.0",
        "@lvce-editor/ripgrep": "^1.6.0",
@@ -13788,7 +13788,7 @@
    },
    "packages/test-utils": {
      "name": "@qwen-code/qwen-code-test-utils",
-      "version": "0.0.12",
+      "version": "0.0.11",
      "dev": true,
      "license": "Apache-2.0",
      "devDependencies": {
@@ -13800,7 +13800,7 @@
    },
    "packages/vscode-ide-companion": {
      "name": "qwen-code-vscode-ide-companion",
-      "version": "0.0.12",
+      "version": "0.0.11",
      "license": "LICENSE",
      "dependencies": {
        "@modelcontextprotocol/sdk": "^1.15.1",
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code",
-  "version": "0.0.12",
+  "version": "0.0.11",
  "engines": {
    "node": ">=20.0.0"
  },
@@ -13,7 +13,7 @@
    "url": "git+https://github.com/QwenLM/qwen-code.git"
  },
  "config": {
-    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.12"
+    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.11"
  },
  "scripts": {
    "start": "node scripts/start.js",
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code",
-  "version": "0.0.12",
+  "version": "0.0.11",
  "description": "Qwen Code",
  "repository": {
    "type": "git",
@@ -25,7 +25,7 @@
    "dist"
  ],
  "config": {
-    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.12"
+    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.11"
  },
  "dependencies": {
    "@google/genai": "1.9.0",
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code-core",
-  "version": "0.0.12",
+  "version": "0.0.11",
  "description": "Qwen Code Core",
  "repository": {
    "type": "git",
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
@@ -560,146 +560,4 @@ describe('DashScopeOpenAICompatibleProvider', () => {
      ]);
    });
  });
-
-  describe('output token limits', () => {
-    it('should limit max_tokens when it exceeds model limit for qwen3-coder-plus', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
-        messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: 100000, // Exceeds the 65536 limit
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      expect(result.max_tokens).toBe(65536); // Should be limited to model's output limit
-    });
-
-    it('should limit max_tokens when it exceeds model limit for qwen-vl-max-latest', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen-vl-max-latest',
-        messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: 20000, // Exceeds the 8192 limit
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      expect(result.max_tokens).toBe(8192); // Should be limited to model's output limit
-    });
-
-    it('should not modify max_tokens when it is within model limit', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
-        messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: 1000, // Within the 65536 limit
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      expect(result.max_tokens).toBe(1000); // Should remain unchanged
-    });
-
-    it('should not add max_tokens when not present in request', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
-        messages: [{ role: 'user', content: 'Hello' }],
-        // No max_tokens parameter
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      expect(result.max_tokens).toBeUndefined(); // Should remain undefined
-    });
-
-    it('should handle null max_tokens parameter', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
-        messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: null,
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      expect(result.max_tokens).toBeNull(); // Should remain null
-    });
-
-    it('should use default output limit for unknown models', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'unknown-model',
-        messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: 10000, // Exceeds the default 4096 limit
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      expect(result.max_tokens).toBe(4096); // Should be limited to default output limit
-    });
-
-    it('should preserve other request parameters when limiting max_tokens', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
-        messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: 100000, // Will be limited
-        temperature: 0.8,
-        top_p: 0.9,
-        frequency_penalty: 0.1,
-        presence_penalty: 0.2,
-        stop: ['END'],
-        user: 'test-user',
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      // max_tokens should be limited
-      expect(result.max_tokens).toBe(65536);
-
-      // Other parameters should be preserved
-      expect(result.temperature).toBe(0.8);
-      expect(result.top_p).toBe(0.9);
-      expect(result.frequency_penalty).toBe(0.1);
-      expect(result.presence_penalty).toBe(0.2);
-      expect(result.stop).toEqual(['END']);
-      expect(result.user).toBe('test-user');
-    });
-
-    it('should work with vision models and output token limits', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen-vl-max-latest',
-        messages: [
-          {
-            role: 'user',
-            content: [
-              { type: 'text', text: 'Look at this image:' },
-              {
-                type: 'image_url',
-                image_url: { url: 'https://example.com/image.jpg' },
-              },
-            ],
-          },
-        ],
-        max_tokens: 20000, // Exceeds the 8192 limit
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      expect(result.max_tokens).toBe(8192); // Should be limited
-      expect(
-        (result as { vl_high_resolution_images?: boolean })
-          .vl_high_resolution_images,
-      ).toBe(true); // Vision-specific parameter should be preserved
-    });
-
-    it('should handle streaming requests with output token limits', () => {
-      const request: OpenAI.Chat.ChatCompletionCreateParams = {
-        model: 'qwen3-coder-plus',
-        messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: 100000, // Exceeds the 65536 limit
-        stream: true,
-      };
-
-      const result = provider.buildRequest(request, 'test-prompt-id');
-
-      expect(result.max_tokens).toBe(65536); // Should be limited
-      expect(result.stream).toBe(true); // Streaming should be preserved
-    });
-  });
 });
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
@@ -3,7 +3,6 @@ import type { Config } from '../../../config/config.js';
 import type { ContentGeneratorConfig } from '../../contentGenerator.js';
 import { AuthType } from '../../contentGenerator.js';
 import { DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES } from '../constants.js';
-import { tokenLimit } from '../../tokenLimits.js';
 import type {
  OpenAICompatibleProvider,
  DashScopeRequestMetadata,
@@ -66,19 +65,6 @@ export class DashScopeOpenAICompatibleProvider
    });
  }

-  /**
-   * Build and configure the request for DashScope API.
-   *
-   * This method applies DashScope-specific configurations including:
-   * - Cache control for system and user messages
-   * - Output token limits based on model capabilities
-   * - Vision model specific parameters (vl_high_resolution_images)
-   * - Request metadata for session tracking
-   *
-   * @param request - The original chat completion request parameters
-   * @param userPromptId - Unique identifier for the user prompt for session tracking
-   * @returns Configured request with DashScope-specific parameters applied
-   */
  buildRequest(
    request: OpenAI.Chat.ChatCompletionCreateParams,
    userPromptId: string,
@@ -93,28 +79,21 @@ export class DashScopeOpenAICompatibleProvider
      messages = this.addDashScopeCacheControl(messages, cacheTarget);
    }

-    // Apply output token limits based on model capabilities
-    // This ensures max_tokens doesn't exceed the model's maximum output limit
-    const requestWithTokenLimits = this.applyOutputTokenLimit(
-      request,
-      request.model,
-    );
-
    if (request.model.startsWith('qwen-vl')) {
      return {
-        ...requestWithTokenLimits,
+        ...request,
        messages,
        ...(this.buildMetadata(userPromptId) || {}),
        /* @ts-expect-error dashscope exclusive */
        vl_high_resolution_images: true,
-      } as OpenAI.Chat.ChatCompletionCreateParams;
+      };
    }

    return {
-      ...requestWithTokenLimits, // Preserve all original parameters including sampling params and adjusted max_tokens
+      ...request, // Preserve all original parameters including sampling params
      messages,
      ...(this.buildMetadata(userPromptId) || {}),
-    } as OpenAI.Chat.ChatCompletionCreateParams;
+    };
  }

  buildMetadata(userPromptId: string): DashScopeRequestMetadata {
@@ -267,41 +246,6 @@ export class DashScopeOpenAICompatibleProvider
    return contentArray;
  }

-  /**
-   * Apply output token limit to a request's max_tokens parameter.
-   *
-   * Ensures that existing max_tokens parameters don't exceed the model's maximum output
-   * token limit. Only modifies max_tokens when already present in the request.
-   *
-   * @param request - The chat completion request parameters
-   * @param model - The model name to get the output token limit for
-   * @returns The request with max_tokens adjusted to respect the model's limits (if present)
-   */
-  private applyOutputTokenLimit<T extends { max_tokens?: number | null }>(
-    request: T,
-    model: string,
-  ): T {
-    const currentMaxTokens = request.max_tokens;
-
-    // Only process if max_tokens is already present in the request
-    if (currentMaxTokens === undefined || currentMaxTokens === null) {
-      return request; // No max_tokens parameter, return unchanged
-    }
-
-    const modelLimit = tokenLimit(model, 'output');
-
-    // If max_tokens exceeds the model limit, cap it to the model's limit
-    if (currentMaxTokens > modelLimit) {
-      return {
-        ...request,
-        max_tokens: modelLimit,
-      };
-    }
-
-    // If max_tokens is within the limit, return the request unchanged
-    return request;
-  }
-
  /**
   * Check if cache control should be disabled based on configuration.
   *
--- a/packages/core/src/core/tokenLimits.test.ts
+++ b/packages/core/src/core/tokenLimits.test.ts
@@ -1,10 +1,5 @@
 import { describe, it, expect } from 'vitest';
-import {
-  normalize,
-  tokenLimit,
-  DEFAULT_TOKEN_LIMIT,
-  DEFAULT_OUTPUT_TOKEN_LIMIT,
-} from './tokenLimits.js';
+import { normalize, tokenLimit, DEFAULT_TOKEN_LIMIT } from './tokenLimits.js';

 describe('normalize', () => {
  it('should lowercase and trim the model string', () => {
@@ -230,96 +225,3 @@ describe('tokenLimit', () => {
    expect(tokenLimit('CLAUDE-3.5-SONNET')).toBe(200000);
  });
 });
-
-describe('tokenLimit with output type', () => {
-  describe('Qwen models with output limits', () => {
-    it('should return the correct output limit for qwen3-coder-plus', () => {
-      expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536);
-      expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
-    });
-
-    it('should return the correct output limit for qwen-vl-max-latest', () => {
-      expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192);
-    });
-  });
-
-  describe('Default output limits', () => {
-    it('should return the default output limit for unknown models', () => {
-      expect(tokenLimit('unknown-model', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
-      expect(tokenLimit('gpt-4', 'output')).toBe(DEFAULT_OUTPUT_TOKEN_LIMIT);
-      expect(tokenLimit('claude-3.5-sonnet', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
-    });
-
-    it('should return the default output limit for models without specific output patterns', () => {
-      expect(tokenLimit('qwen3-coder-7b', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
-      expect(tokenLimit('qwen-plus', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
-      expect(tokenLimit('qwen-vl-max', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      );
-    });
-  });
-
-  describe('Input vs Output limits comparison', () => {
-    it('should return different limits for input vs output for qwen3-coder-plus', () => {
-      expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576); // 1M input
-      expect(tokenLimit('qwen3-coder-plus', 'output')).toBe(65536); // 64K output
-    });
-
-    it('should return different limits for input vs output for qwen-vl-max-latest', () => {
-      expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072); // 128K input
-      expect(tokenLimit('qwen-vl-max-latest', 'output')).toBe(8192); // 8K output
-    });
-
-    it('should return same default limits for unknown models', () => {
-      expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT); // 128K input
-      expect(tokenLimit('unknown-model', 'output')).toBe(
-        DEFAULT_OUTPUT_TOKEN_LIMIT,
-      ); // 4K output
-    });
-  });
-
-  describe('Backward compatibility', () => {
-    it('should default to input type when no type is specified', () => {
-      expect(tokenLimit('qwen3-coder-plus')).toBe(1048576); // Should be input limit
-      expect(tokenLimit('qwen-vl-max-latest')).toBe(131072); // Should be input limit
-      expect(tokenLimit('unknown-model')).toBe(DEFAULT_TOKEN_LIMIT); // Should be input default
-    });
-
-    it('should work with explicit input type', () => {
-      expect(tokenLimit('qwen3-coder-plus', 'input')).toBe(1048576);
-      expect(tokenLimit('qwen-vl-max-latest', 'input')).toBe(131072);
-      expect(tokenLimit('unknown-model', 'input')).toBe(DEFAULT_TOKEN_LIMIT);
-    });
-  });
-
-  describe('Model normalization with output limits', () => {
-    it('should handle normalized model names for output limits', () => {
-      expect(tokenLimit('QWEN3-CODER-PLUS', 'output')).toBe(65536);
-      expect(tokenLimit('qwen3-coder-plus-20250601', 'output')).toBe(65536);
-      expect(tokenLimit('QWEN-VL-MAX-LATEST', 'output')).toBe(8192);
-    });
-
-    it('should handle complex model strings for output limits', () => {
-      expect(
-        tokenLimit(
-          '  a/b/c|QWEN3-CODER-PLUS:qwen3-coder-plus-2024-05-13  ',
-          'output',
-        ),
-      ).toBe(65536);
-      expect(
-        tokenLimit(
-          'provider/qwen-vl-max-latest:qwen-vl-max-latest-v1',
-          'output',
-        ),
-      ).toBe(8192);
-    });
-  });
-});
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -1,15 +1,7 @@
 type Model = string;
 type TokenCount = number;

-/**
- * Token limit types for different use cases.
- * - 'input': Maximum input context window size
- * - 'output': Maximum output tokens that can be generated in a single response
- */
-export type TokenLimitType = 'input' | 'output';
-
 export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
-export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 4_096; // 4K tokens

 /**
 * Accurate numeric limits:
@@ -26,10 +18,6 @@ const LIMITS = {
  '1m': 1_048_576,
  '2m': 2_097_152,
  '10m': 10_485_760, // 10 million tokens
-  // Output token limits (typically much smaller than input limits)
-  '4k': 4_096,
-  '8k': 8_192,
-  '16k': 16_384,
 } as const;

 /** Robust normalizer: strips provider prefixes, pipes/colons, date/version suffixes, etc. */
@@ -48,7 +36,7 @@ export function normalize(model: string): string {
  // - dates (e.g., -20250219), -v1, version numbers, 'latest', 'preview' etc.
  s = s.replace(/-preview/g, '');
  // Special handling for Qwen model names that include "-latest" as part of the model name
-  if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
+  if (!s.match(/^qwen-(?:plus|flash)-latest$/)) {
    // \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
    // \d+x\d+b - Match patterns like 4x8b, -7b, -70b
    // v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
@@ -154,48 +142,16 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  [/^mistral-large-2.*$/, LIMITS['128k']],
 ];

-/**
- * Output token limit patterns for specific model families.
- * These patterns define the maximum number of tokens that can be generated
- * in a single response for specific models.
- */
-const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
-  // -------------------
-  // Alibaba / Qwen - DashScope Models
-  // -------------------
-  // Qwen3-Coder-Plus: 65,536 max output tokens
-  [/^qwen3-coder-plus(-.*)?$/, LIMITS['64k']],
-
-  // Qwen-VL-Max-Latest: 8,192 max output tokens
-  [/^qwen-vl-max-latest$/, LIMITS['8k']],
-];
-
-/**
- * Return the token limit for a model string based on the specified type.
- *
- * This function determines the maximum number of tokens for either input context
- * or output generation based on the model and token type. It uses the same
- * normalization logic for consistency across both input and output limits.
- *
- * @param model - The model name to get the token limit for
- * @param type - The type of token limit ('input' for context window, 'output' for generation)
- * @returns The maximum number of tokens allowed for this model and type
- */
-export function tokenLimit(
-  model: Model,
-  type: TokenLimitType = 'input',
-): TokenCount {
+/** Return the token limit for a model string (uses normalize + ordered regex list). */
+export function tokenLimit(model: Model): TokenCount {
  const norm = normalize(model);

-  // Choose the appropriate patterns based on token type
-  const patterns = type === 'output' ? OUTPUT_PATTERNS : PATTERNS;
-
-  for (const [regex, limit] of patterns) {
+  for (const [regex, limit] of PATTERNS) {
    if (regex.test(norm)) {
      return limit;
    }
  }

-  // Return appropriate default based on token type
-  return type === 'output' ? DEFAULT_OUTPUT_TOKEN_LIMIT : DEFAULT_TOKEN_LIMIT;
+  // final fallback: DEFAULT_TOKEN_LIMIT (power-of-two 128K)
+  return DEFAULT_TOKEN_LIMIT;
 }
--- a/packages/core/src/qwen/qwenOAuth2.ts
+++ b/packages/core/src/qwen/qwenOAuth2.ts
@@ -712,8 +712,6 @@ async function authWithQwenDeviceFlow(
            `Polling... (attempt ${attempt + 1}/${maxAttempts})`,
          );

-          process.stdout.write('.');
-
          // Wait with cancellation check every 100ms
          await new Promise<void>((resolve) => {
            const checkInterval = 100; // Check every 100ms
--- a/packages/core/src/qwen/sharedTokenManager.test.ts
+++ b/packages/core/src/qwen/sharedTokenManager.test.ts
@@ -901,5 +901,37 @@ describe('SharedTokenManager', () => {
        );
      }
    });
+
+    it('should properly clean up timeout when file operation completes before timeout', async () => {
+      const tokenManager = SharedTokenManager.getInstance();
+      tokenManager.clearCache();
+
+      const mockClient = {
+        getCredentials: vi.fn().mockReturnValue(null),
+        setCredentials: vi.fn(),
+        getAccessToken: vi.fn(),
+        requestDeviceAuthorization: vi.fn(),
+        pollDeviceToken: vi.fn(),
+        refreshAccessToken: vi.fn(),
+      };
+
+      // Mock clearTimeout to verify it's called
+      const clearTimeoutSpy = vi.spyOn(global, 'clearTimeout');
+
+      // Mock file stat to resolve quickly (before timeout)
+      mockFs.stat.mockResolvedValue({ mtimeMs: 12345 } as Stats);
+
+      // Call checkAndReloadIfNeeded which uses withTimeout internally
+      const checkMethod = getPrivateProperty(
+        tokenManager,
+        'checkAndReloadIfNeeded',
+      ) as (client?: IQwenOAuth2Client) => Promise<void>;
+      await checkMethod.call(tokenManager, mockClient);
+
+      // Verify that clearTimeout was called to clean up the timer
+      expect(clearTimeoutSpy).toHaveBeenCalled();
+
+      clearTimeoutSpy.mockRestore();
+    });
  });
 });
--- a/packages/core/src/qwen/sharedTokenManager.ts
+++ b/packages/core/src/qwen/sharedTokenManager.ts
@@ -290,6 +290,36 @@ export class SharedTokenManager {
    }
  }

+  /**
+   * Utility method to add timeout to any promise operation
+   * Properly cleans up the timeout when the promise completes
+   */
+  private withTimeout<T>(
+    promise: Promise<T>,
+    timeoutMs: number,
+    operationType = 'Operation',
+  ): Promise<T> {
+    let timeoutId: NodeJS.Timeout;
+
+    return Promise.race([
+      promise.finally(() => {
+        // Clear timeout when main promise completes (success or failure)
+        if (timeoutId) {
+          clearTimeout(timeoutId);
+        }
+      }),
+      new Promise<never>((_, reject) => {
+        timeoutId = setTimeout(
+          () =>
+            reject(
+              new Error(`${operationType} timed out after ${timeoutMs}ms`),
+            ),
+          timeoutMs,
+        );
+      }),
+    ]);
+  }
+
  /**
   * Perform the actual file check and reload operation
   * This is separated to enable proper promise-based synchronization
@@ -303,25 +333,12 @@ export class SharedTokenManager {

    try {
      const filePath = this.getCredentialFilePath();
-      // Add timeout to file stat operation
-      const withTimeout = async <T>(
-        promise: Promise<T>,
-        timeoutMs: number,
-      ): Promise<T> =>
-        Promise.race([
-          promise,
-          new Promise<never>((_, reject) =>
-            setTimeout(
-              () =>
-                reject(
-                  new Error(`File operation timed out after ${timeoutMs}ms`),
-                ),
-              timeoutMs,
-            ),
-          ),
-        ]);

-      const stats = await withTimeout(fs.stat(filePath), 3000);
+      const stats = await this.withTimeout(
+        fs.stat(filePath),
+        3000,
+        'File operation',
+      );
      const fileModTime = stats.mtimeMs;

      // Reload credentials if file has been modified since last cache
@@ -451,7 +468,7 @@ export class SharedTokenManager {
      // Check if we have a refresh token before attempting refresh
      const currentCredentials = qwenClient.getCredentials();
      if (!currentCredentials.refresh_token) {
-        console.debug('create a NO_REFRESH_TOKEN error');
+        // console.debug('create a NO_REFRESH_TOKEN error');
        throw new TokenManagerError(
          TokenError.NO_REFRESH_TOKEN,
          'No refresh token available for token refresh',
@@ -589,26 +606,12 @@ export class SharedTokenManager {
    const dirPath = path.dirname(filePath);
    const tempPath = `${filePath}.tmp.${randomUUID()}`;

-    // Add timeout wrapper for file operations
-    const withTimeout = async <T>(
-      promise: Promise<T>,
-      timeoutMs: number,
-    ): Promise<T> =>
-      Promise.race([
-        promise,
-        new Promise<never>((_, reject) =>
-          setTimeout(
-            () => reject(new Error(`Operation timed out after ${timeoutMs}ms`)),
-            timeoutMs,
-          ),
-        ),
-      ]);
-
    // Create directory with restricted permissions
    try {
-      await withTimeout(
+      await this.withTimeout(
        fs.mkdir(dirPath, { recursive: true, mode: 0o700 }),
        5000,
+        'File operation',
      );
    } catch (error) {
      throw new TokenManagerError(
@@ -622,21 +625,30 @@ export class SharedTokenManager {

    try {
      // Write to temporary file first with restricted permissions
-      await withTimeout(
+      await this.withTimeout(
        fs.writeFile(tempPath, credString, { mode: 0o600 }),
        5000,
+        'File operation',
      );

      // Atomic move to final location
-      await withTimeout(fs.rename(tempPath, filePath), 5000);
+      await this.withTimeout(
+        fs.rename(tempPath, filePath),
+        5000,
+        'File operation',
+      );

      // Update cached file modification time atomically after successful write
-      const stats = await withTimeout(fs.stat(filePath), 5000);
+      const stats = await this.withTimeout(
+        fs.stat(filePath),
+        5000,
+        'File operation',
+      );
      this.memoryCache.fileModTime = stats.mtimeMs;
    } catch (error) {
      // Clean up temp file if it exists
      try {
-        await withTimeout(fs.unlink(tempPath), 1000);
+        await this.withTimeout(fs.unlink(tempPath), 1000, 'File operation');
      } catch (_cleanupError) {
        // Ignore cleanup errors - temp file might not exist
      }
--- a/packages/test-utils/package.json
+++ b/packages/test-utils/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code-test-utils",
-  "version": "0.0.12",
+  "version": "0.0.11",
  "private": true,
  "main": "src/index.ts",
  "license": "Apache-2.0",
--- a/packages/vscode-ide-companion/package.json
+++ b/packages/vscode-ide-companion/package.json
@@ -2,7 +2,7 @@
  "name": "qwen-code-vscode-ide-companion",
  "displayName": "Qwen Code Companion",
  "description": "Enable Qwen Code with direct access to your VS Code workspace.",
-  "version": "0.0.12",
+  "version": "0.0.11",
  "publisher": "qwenlm",
  "icon": "assets/icon.png",
  "repository": {