chore(release): v0.0.15-nightly.7

Fix and update the token limits handling (#754 )
* fix: make token limits regex normalize e.g. `some-model-1.1` -> `some-model` while preserve e.g. `gpt-4.1` as-is. * feat: update token limits regex for latest models `GLM-4.6`, `deepseek-v3.2-exp`. * feat: add exact token limit number 202752 per the model config file for `GLM-4.6`.
2025-12-23 10:17:50 +00:00 · 2025-10-16 00:11:27 +00:00 · 2025-10-14 16:11:55 +08:00 · 2025-10-14 15:55:38 +08:00 · 2025-10-14 15:41:30 +08:00 · 2025-10-14 15:40:20 +08:00
30 changed files with 404 additions and 82 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -108,6 +108,17 @@
      "request": "attach",
      "skipFiles": ["<node_internals>/**"],
      "type": "node"
+    },
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Debug Current TS File",
+      "runtimeExecutable": "npx",
+      "runtimeArgs": ["tsx", "${file}"],
+      "skipFiles": ["<node_internals>/**"],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "env": {}
    }
  ],
  "inputs": [
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@qwen-code/qwen-code",
-  "version": "0.0.14",
+  "version": "0.0.15-nightly.7",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@qwen-code/qwen-code",
-      "version": "0.0.14",
+      "version": "0.0.15-nightly.7",
      "workspaces": [
        "packages/*"
      ],
@@ -13454,7 +13454,7 @@
    },
    "packages/cli": {
      "name": "@qwen-code/qwen-code",
-      "version": "0.0.14",
+      "version": "0.0.15-nightly.7",
      "dependencies": {
        "@google/genai": "1.9.0",
        "@iarna/toml": "^2.2.5",
@@ -13662,7 +13662,7 @@
    },
    "packages/core": {
      "name": "@qwen-code/qwen-code-core",
-      "version": "0.0.14",
+      "version": "0.0.15-nightly.7",
      "dependencies": {
        "@google/genai": "1.13.0",
        "@lvce-editor/ripgrep": "^1.6.0",
@@ -13788,7 +13788,7 @@
    },
    "packages/test-utils": {
      "name": "@qwen-code/qwen-code-test-utils",
-      "version": "0.0.14",
+      "version": "0.0.15-nightly.7",
      "dev": true,
      "license": "Apache-2.0",
      "devDependencies": {
@@ -13800,7 +13800,7 @@
    },
    "packages/vscode-ide-companion": {
      "name": "qwen-code-vscode-ide-companion",
-      "version": "0.0.14",
+      "version": "0.0.15-nightly.7",
      "license": "LICENSE",
      "dependencies": {
        "@modelcontextprotocol/sdk": "^1.15.1",
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code",
-  "version": "0.0.14",
+  "version": "0.0.15-nightly.7",
  "engines": {
    "node": ">=20.0.0"
  },
@@ -13,7 +13,7 @@
    "url": "git+https://github.com/QwenLM/qwen-code.git"
  },
  "config": {
-    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.14"
+    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.15-nightly.7"
  },
  "scripts": {
    "start": "node scripts/start.js",
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code",
-  "version": "0.0.14",
+  "version": "0.0.15-nightly.7",
  "description": "Qwen Code",
  "repository": {
    "type": "git",
@@ -25,7 +25,7 @@
    "dist"
  ],
  "config": {
-    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.14"
+    "sandboxImageUri": "ghcr.io/qwenlm/qwen-code:0.0.15-nightly.7"
  },
  "dependencies": {
    "@google/genai": "1.9.0",
--- a/packages/cli/src/ui/App.tsx
+++ b/packages/cli/src/ui/App.tsx
@@ -913,7 +913,21 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
        return;
      }

-      // 1. Close other dialogs (highest priority)
+      /**
+       * For AuthDialog it is required to complete the authentication process,
+       * otherwise user cannot proceed to the next step.
+       * So a quit on AuthDialog should go with normal two press quit
+       * and without quit-confirm dialog.
+       */
+      if (isAuthDialogOpen) {
+        setPressedOnce(true);
+        timerRef.current = setTimeout(() => {
+          setPressedOnce(false);
+        }, 500);
+        return;
+      }
+
+      //1. Close other dialogs (highest priority)
      if (closeAnyOpenDialog()) {
        return; // Dialog closed, end processing
      }
@@ -934,6 +948,7 @@ const App = ({ config, settings, startupWarnings = [], version }: AppProps) => {
      handleSlashCommand('/quit-confirm');
    },
    [
+      isAuthDialogOpen,
      handleSlashCommand,
      quitConfirmationRequest,
      closeAnyOpenDialog,
--- a/packages/cli/src/ui/components/QwenOAuthProgress.test.tsx
+++ b/packages/cli/src/ui/components/QwenOAuthProgress.test.tsx
@@ -81,7 +81,7 @@ describe('QwenOAuthProgress', () => {
      const output = lastFrame();
      expect(output).toContain('MockSpinner(dots)');
      expect(output).toContain('Waiting for Qwen OAuth authentication...');
-      expect(output).toContain('(Press ESC to cancel)');
+      expect(output).toContain('(Press ESC or CTRL+C to cancel)');
    });

    it('should render loading state with gray border', () => {
@@ -105,7 +105,7 @@ describe('QwenOAuthProgress', () => {
      expect(output).toContain('MockSpinner(dots)');
      expect(output).toContain('Waiting for authorization');
      expect(output).toContain('Time remaining: 5:00');
-      expect(output).toContain('(Press ESC to cancel)');
+      expect(output).toContain('(Press ESC or CTRL+C to cancel)');
    });

    it('should display correct URL in Static component when QR code is generated', async () => {
--- a/packages/cli/src/ui/components/QwenOAuthProgress.tsx
+++ b/packages/cli/src/ui/components/QwenOAuthProgress.tsx
@@ -110,7 +110,7 @@ function StatusDisplay({
        <Text color={Colors.Gray}>
          Time remaining: {formatTime(timeRemaining)}
        </Text>
-        <Text color={Colors.AccentPurple}>(Press ESC to cancel)</Text>
+        <Text color={Colors.AccentPurple}>(Press ESC or CTRL+C to cancel)</Text>
      </Box>
    </Box>
  );
@@ -132,7 +132,7 @@ export function QwenOAuthProgress({
    if (authStatus === 'timeout') {
      // Any key press in timeout state should trigger cancel to return to auth dialog
      onCancel();
-    } else if (key.escape) {
+    } else if (key.escape || (key.ctrl && input === 'c')) {
      onCancel();
    }
  });
@@ -250,7 +250,9 @@ export function QwenOAuthProgress({
            Time remaining: {Math.floor(timeRemaining / 60)}:
            {(timeRemaining % 60).toString().padStart(2, '0')}
          </Text>
-          <Text color={Colors.AccentPurple}>(Press ESC to cancel)</Text>
+          <Text color={Colors.AccentPurple}>
+            (Press ESC or CTRL+C to cancel)
+          </Text>
        </Box>
      </Box>
    );
--- a/packages/cli/src/ui/hooks/useDialogClose.ts
+++ b/packages/cli/src/ui/hooks/useDialogClose.ts
@@ -61,16 +61,6 @@ export function useDialogClose(options: DialogCloseOptions) {
      return true;
    }

-    if (options.isAuthDialogOpen) {
-      // Mimic ESC behavior: only close if already authenticated (same as AuthDialog ESC logic)
-      if (options.selectedAuthType !== undefined) {
-        // Note: We don't await this since we want non-blocking behavior like ESC
-        void options.handleAuthSelect(undefined, SettingScope.User);
-      }
-      // Note: AuthDialog prevents ESC exit if not authenticated, we follow same logic
-      return true;
-    }
-
    if (options.isEditorDialogOpen) {
      // Mimic ESC behavior: call onExit() directly
      options.exitEditorDialog();
--- a/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
+++ b/packages/cli/src/ui/hooks/useGeminiStream.test.tsx
@@ -54,7 +54,11 @@ const MockedGeminiClientClass = vi.hoisted(() =>
 const MockedUserPromptEvent = vi.hoisted(() =>
  vi.fn().mockImplementation(() => {}),
 );
+const MockedApiCancelEvent = vi.hoisted(() =>
+  vi.fn().mockImplementation(() => {}),
+);
 const mockParseAndFormatApiError = vi.hoisted(() => vi.fn());
+const mockLogApiCancel = vi.hoisted(() => vi.fn());

 // Vision auto-switch mocks (hoisted)
 const mockHandleVisionSwitch = vi.hoisted(() =>
@@ -71,7 +75,9 @@ vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => {
    GitService: vi.fn(),
    GeminiClient: MockedGeminiClientClass,
    UserPromptEvent: MockedUserPromptEvent,
+    ApiCancelEvent: MockedApiCancelEvent,
    parseAndFormatApiError: mockParseAndFormatApiError,
+    logApiCancel: mockLogApiCancel,
  };
 });

--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -31,6 +31,8 @@ import {
  ConversationFinishedEvent,
  ApprovalMode,
  parseAndFormatApiError,
+  logApiCancel,
+  ApiCancelEvent,
 } from '@qwen-code/qwen-code-core';
 import { type Part, type PartListUnion, FinishReason } from '@google/genai';
 import type {
@@ -223,6 +225,16 @@ export const useGeminiStream = (
    turnCancelledRef.current = true;
    isSubmittingQueryRef.current = false;
    abortControllerRef.current?.abort();
+
+    // Log API cancellation
+    const prompt_id = config.getSessionId() + '########' + getPromptCount();
+    const cancellationEvent = new ApiCancelEvent(
+      config.getModel(),
+      prompt_id,
+      config.getContentGeneratorConfig()?.authType,
+    );
+    logApiCancel(config, cancellationEvent);
+
    if (pendingHistoryItemRef.current) {
      addItem(pendingHistoryItemRef.current, Date.now());
    }
@@ -242,6 +254,8 @@ export const useGeminiStream = (
    setPendingHistoryItem,
    onCancelSubmit,
    pendingHistoryItemRef,
+    config,
+    getPromptCount,
  ]);

  useKeypress(
@@ -448,6 +462,7 @@ export const useGeminiStream = (
      if (turnCancelledRef.current) {
        return;
      }
+
      if (pendingHistoryItemRef.current) {
        if (pendingHistoryItemRef.current.type === 'tool_group') {
          const updatedTools = pendingHistoryItemRef.current.tools.map(
--- a/packages/cli/src/zed-integration/zedIntegration.ts
+++ b/packages/cli/src/zed-integration/zedIntegration.ts
@@ -81,22 +81,6 @@ class GeminiAgent {
  ): Promise<acp.InitializeResponse> {
    this.clientCapabilities = args.clientCapabilities;
    const authMethods = [
-      {
-        id: AuthType.LOGIN_WITH_GOOGLE,
-        name: 'Log in with Google',
-        description: null,
-      },
-      {
-        id: AuthType.USE_GEMINI,
-        name: 'Use Gemini API key',
-        description:
-          'Requires setting the `GEMINI_API_KEY` environment variable',
-      },
-      {
-        id: AuthType.USE_VERTEX_AI,
-        name: 'Vertex AI',
-        description: null,
-      },
      {
        id: AuthType.USE_OPENAI,
        name: 'Use OpenAI API key',
@@ -365,6 +349,7 @@ class Session {
        function_name: fc.name ?? '',
        function_args: args,
        duration_ms: durationMs,
+        status: 'error',
        success: false,
        error: error.message,
        tool_type:
@@ -483,6 +468,7 @@ class Session {
        function_name: fc.name,
        function_args: args,
        duration_ms: durationMs,
+        status: 'success',
        success: true,
        prompt_id: promptId,
        tool_type:
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code-core",
-  "version": "0.0.14",
+  "version": "0.0.15-nightly.7",
  "description": "Qwen Code Core",
  "repository": {
    "type": "git",
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -434,8 +434,6 @@ describe('Gemini Client (client.ts)', () => {
          config: {
            abortSignal,
            systemInstruction: getCoreSystemPrompt(''),
-            temperature: 0,
-            topP: 1,
            tools: [
              {
                functionDeclarations: [
@@ -486,7 +484,6 @@ describe('Gemini Client (client.ts)', () => {
            abortSignal,
            systemInstruction: getCoreSystemPrompt(''),
            temperature: 0.9,
-            topP: 1, // from default
            topK: 20,
            tools: [
              {
@@ -2461,7 +2458,6 @@ ${JSON.stringify(
            abortSignal,
            systemInstruction: getCoreSystemPrompt(''),
            temperature: 0.5,
-            topP: 1,
          },
          contents,
        },
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -115,10 +115,7 @@ export class GeminiClient {
  private chat?: GeminiChat;
  private contentGenerator?: ContentGenerator;
  private readonly embeddingModel: string;
-  private readonly generateContentConfig: GenerateContentConfig = {
-    temperature: 0,
-    topP: 1,
-  };
+  private readonly generateContentConfig: GenerateContentConfig = {};
  private sessionTurnCount = 0;

  private readonly loopDetector: LoopDetectionService;
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@@ -401,7 +401,7 @@ export class CoreToolScheduler {
            }
          }

-          return {
+          const cancelledCall = {
            request: currentCall.request,
            tool: toolInstance,
            invocation,
@@ -426,6 +426,8 @@ export class CoreToolScheduler {
            durationMs,
            outcome,
          } as CancelledToolCall;
+
+          return cancelledCall;
        }
        case 'validating':
          return {
--- a/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/pipeline.test.ts
@@ -161,6 +161,9 @@ describe('ContentGenerationPipeline', () => {
          top_p: 0.9,
          max_tokens: 1000,
        }),
+        expect.objectContaining({
+          signal: undefined,
+        }),
      );
      expect(mockConverter.convertOpenAIResponseToGemini).toHaveBeenCalledWith(
        mockOpenAIResponse,
@@ -238,6 +241,9 @@ describe('ContentGenerationPipeline', () => {
        expect.objectContaining({
          tools: mockTools,
        }),
+        expect.objectContaining({
+          signal: undefined,
+        }),
      );
    });

@@ -274,6 +280,30 @@ describe('ContentGenerationPipeline', () => {
        request,
      );
    });
+
+    it('should pass abort signal to OpenAI client when provided', async () => {
+      const abortController = new AbortController();
+      const request: GenerateContentParameters = {
+        model: 'test-model',
+        contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
+        config: { abortSignal: abortController.signal },
+      };
+
+      (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue([]);
+      (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue(
+        new GenerateContentResponse(),
+      );
+      (mockClient.chat.completions.create as Mock).mockResolvedValue({
+        choices: [{ message: { content: 'response' } }],
+      });
+
+      await pipeline.execute(request, 'test-id');
+
+      expect(mockClient.chat.completions.create).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({ signal: abortController.signal }),
+      );
+    });
  });

  describe('executeStream', () => {
@@ -338,6 +368,9 @@ describe('ContentGenerationPipeline', () => {
          stream: true,
          stream_options: { include_usage: true },
        }),
+        expect.objectContaining({
+          signal: undefined,
+        }),
      );
      expect(mockTelemetryService.logStreamingSuccess).toHaveBeenCalledWith(
        expect.objectContaining({
@@ -470,6 +503,42 @@ describe('ContentGenerationPipeline', () => {
      );
    });

+    it('should pass abort signal to OpenAI client for streaming requests', async () => {
+      const abortController = new AbortController();
+      const request: GenerateContentParameters = {
+        model: 'test-model',
+        contents: [{ parts: [{ text: 'Hello' }], role: 'user' }],
+        config: { abortSignal: abortController.signal },
+      };
+
+      const mockStream = {
+        async *[Symbol.asyncIterator]() {
+          yield {
+            id: 'chunk-1',
+            choices: [{ delta: { content: 'Hello' }, finish_reason: 'stop' }],
+          };
+        },
+      };
+
+      (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue([]);
+      (mockConverter.convertOpenAIChunkToGemini as Mock).mockReturnValue(
+        new GenerateContentResponse(),
+      );
+      (mockClient.chat.completions.create as Mock).mockResolvedValue(
+        mockStream,
+      );
+
+      const resultGenerator = await pipeline.executeStream(request, 'test-id');
+      for await (const _result of resultGenerator) {
+        // Consume stream
+      }
+
+      expect(mockClient.chat.completions.create).toHaveBeenCalledWith(
+        expect.any(Object),
+        expect.objectContaining({ signal: abortController.signal }),
+      );
+    });
+
    it('should merge finishReason and usageMetadata from separate chunks', async () => {
      // Arrange
      const request: GenerateContentParameters = {
@@ -924,6 +993,9 @@ describe('ContentGenerationPipeline', () => {
          top_p: 0.9, // Config parameter used since request overrides are not being applied in current implementation
          max_tokens: 1000, // Config parameter used since request overrides are not being applied in current implementation
        }),
+        expect.objectContaining({
+          signal: undefined,
+        }),
      );
    });

@@ -960,6 +1032,9 @@ describe('ContentGenerationPipeline', () => {
          top_p: 0.9, // From config
          max_tokens: 1000, // From config
        }),
+        expect.objectContaining({
+          signal: undefined,
+        }),
      );
    });

@@ -1009,6 +1084,9 @@ describe('ContentGenerationPipeline', () => {
        expect.objectContaining({
          metadata: { promptId: userPromptId },
        }),
+        expect.objectContaining({
+          signal: undefined,
+        }),
      );
    });
  });
--- a/packages/core/src/core/openaiContentGenerator/pipeline.ts
+++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts
@@ -48,6 +48,9 @@ export class ContentGenerationPipeline {
      async (openaiRequest, context) => {
        const openaiResponse = (await this.client.chat.completions.create(
          openaiRequest,
+          {
+            signal: request.config?.abortSignal,
+          },
        )) as OpenAI.Chat.ChatCompletion;

        const geminiResponse =
@@ -78,6 +81,9 @@ export class ContentGenerationPipeline {
        // Stage 1: Create OpenAI stream
        const stream = (await this.client.chat.completions.create(
          openaiRequest,
+          {
+            signal: request.config?.abortSignal,
+          },
        )) as AsyncIterable<OpenAI.Chat.ChatCompletionChunk>;

        // Stage 2: Process stream with conversion and logging
@@ -221,6 +227,12 @@ export class ContentGenerationPipeline {
        mergedResponse.usageMetadata = lastResponse.usageMetadata;
      }

+      // Copy other essential properties from the current response
+      mergedResponse.responseId = response.responseId;
+      mergedResponse.createTime = response.createTime;
+      mergedResponse.modelVersion = response.modelVersion;
+      mergedResponse.promptFeedback = response.promptFeedback;
+
      // Update the collected responses with the merged response
      collectedGeminiResponses[collectedGeminiResponses.length - 1] =
        mergedResponse;
@@ -302,9 +314,9 @@ export class ContentGenerationPipeline {
    };

    const params = {
-      // Parameters with request fallback and defaults
-      temperature: getParameterValue('temperature', 'temperature', 0.0),
-      top_p: getParameterValue('top_p', 'topP', 1.0),
+      // Parameters with request fallback but no defaults
+      ...addParameterIfDefined('temperature', 'temperature', 'temperature'),
+      ...addParameterIfDefined('top_p', 'top_p', 'topP'),

      // Max tokens (special case: different property names)
      ...addParameterIfDefined('max_tokens', 'max_tokens', 'maxOutputTokens'),
--- a/packages/core/src/core/tokenLimits.test.ts
+++ b/packages/core/src/core/tokenLimits.test.ts
@@ -58,9 +58,29 @@ describe('normalize', () => {
    expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash');
  });

-  it('should remove version numbers with dots when they are at the end', () => {
-    expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1');
+  it('should not remove "-latest" from specific Qwen model names', () => {
+    expect(normalize('qwen-plus-latest')).toBe('qwen-plus-latest');
+    expect(normalize('qwen-flash-latest')).toBe('qwen-flash-latest');
+    expect(normalize('qwen-vl-max-latest')).toBe('qwen-vl-max-latest');
+  });
+
+  it('should remove date like suffixes', () => {
+    expect(normalize('deepseek-r1-0528')).toBe('deepseek-r1');
+  });
+
+  it('should remove literal "-latest" "-exp" suffixes', () => {
    expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
+    expect(normalize('deepseek-v3.2-exp')).toBe('deepseek-v3.2');
+  });
+
+  it('should remove suffix version numbers with "v" prefix', () => {
+    expect(normalize('model-test-v1.1')).toBe('model-test');
+    expect(normalize('model-v1.1')).toBe('model');
+  });
+
+  it('should remove suffix version numbers w/o "v" prefix only if they are preceded by another dash', () => {
+    expect(normalize('model-test-1.1')).toBe('model-test');
+    expect(normalize('gpt-4.1')).toBe('gpt-4.1');
  });
 });

@@ -188,6 +208,9 @@ describe('tokenLimit', () => {
    it('should return the correct limit for glm-4.5', () => {
      expect(tokenLimit('glm-4.5')).toBe(131072);
    });
+    it('should return the correct limit for glm-4.6', () => {
+      expect(tokenLimit('glm-4.6')).toBe(202752);
+    });
  });

  describe('Other models', () => {
@@ -200,6 +223,9 @@ describe('tokenLimit', () => {
    it('should return the correct limit for deepseek-v3.1', () => {
      expect(tokenLimit('deepseek-v3.1')).toBe(131072);
    });
+    it('should return the correct limit for deepseek-v3.2', () => {
+      expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072);
+    });
    it('should return the correct limit for kimi-k2-instruct', () => {
      expect(tokenLimit('kimi-k2-instruct')).toBe(131072);
    });
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -20,7 +20,7 @@ const LIMITS = {
  '32k': 32_768,
  '64k': 65_536,
  '128k': 131_072,
-  '200k': 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k)
+  '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
  '256k': 262_144,
  '512k': 524_288,
  '1m': 1_048_576,
@@ -49,15 +49,18 @@ export function normalize(model: string): string {
  s = s.replace(/-preview/g, '');
  // Special handling for Qwen model names that include "-latest" as part of the model name
  if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
-    // \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
-    // \d+x\d+b - Match patterns like 4x8b, -7b, -70b
-    // v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
-    // -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash),
-    //   like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test;
-    //   Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context.
-    // latest - Match the literal string "latest"
+    // Regex breakdown:
+    // -(?:...)$ - Non-capturing group for suffixes at the end of the string
+    // The following patterns are matched within the group:
+    //   \d{4,} - Match 4 or more digits (dates) like -20250219 -0528 (4+ digit dates)
+    //   \d+x\d+b - Match patterns like 4x8b, -7b, -70b
+    //   v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
+    //   (?<=-[^-]+-)\d+(?:\.\d+)+ - Match version numbers with dots that are preceded by another dash,
+    //     like -1.1, -2.0.1 but only when they are preceded by another dash, Example: model-test-1.1 → model-test;
+    //     Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before -4.1 in that context.
+    //   latest|exp - Match the literal string "latest" or "exp"
    s = s.replace(
-      /-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g,
+      /-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g,
      '',
    );
  }
@@ -115,7 +118,7 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  [/^coder-model$/, LIMITS['1m']],

  // Commercial Qwen3-Max-Preview: 256K token context
-  [/^qwen3-max-preview(-.*)?$/, LIMITS['256k']], // catches "qwen3-max-preview" and date variants
+  [/^qwen3-max(-preview)?(-.*)?$/, LIMITS['256k']], // catches "qwen3-max" or "qwen3-max-preview" and date variants

  // Open-source Qwen3-Coder variants: 256K native
  [/^qwen3-coder-.*$/, LIMITS['256k']],
@@ -149,18 +152,24 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  // -------------------
  // Zhipu GLM
  // -------------------
-  [/^glm-4\.5v.*$/, LIMITS['64k']],
-  [/^glm-4\.5-air.*$/, LIMITS['128k']],
-  [/^glm-4\.5.*$/, LIMITS['128k']],
+  [/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']],
+  [/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']],
+  [/^glm-4\.5(?:-.*)?$/, LIMITS['128k']],
+  [/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file

  // -------------------
-  // DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples
+  // DeepSeek
+  // -------------------
+  [/^deepseek$/, LIMITS['128k']],
+  [/^deepseek-r1(?:-.*)?$/, LIMITS['128k']],
+  [/^deepseek-v3(?:\.\d+)?(?:-.*)?$/, LIMITS['128k']],
+
+  // -------------------
+  // GPT-OSS / Kimi / Llama & Mistral examples
  // -------------------
-  [/^deepseek-r1.*$/, LIMITS['128k']],
-  [/^deepseek-v3(?:\.1)?.*$/, LIMITS['128k']],
  [/^kimi-k2-instruct.*$/, LIMITS['128k']],
  [/^gpt-oss.*$/, LIMITS['128k']],
-  [/^llama-4-scout.*$/, LIMITS['10m'] as unknown as TokenCount], // ultra-long variants - handle carefully
+  [/^llama-4-scout.*$/, LIMITS['10m']],
  [/^mistral-large-2.*$/, LIMITS['128k']],
 ];

@@ -179,8 +188,8 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
  // Generic coder-model: same as qwen3-coder-plus (64K max output tokens)
  [/^coder-model$/, LIMITS['64k']],

-  // Qwen3-Max-Preview: 65,536 max output tokens
-  [/^qwen3-max-preview(-.*)?$/, LIMITS['64k']],
+  // Qwen3-Max: 65,536 max output tokens
+  [/^qwen3-max(-preview)?(-.*)?$/, LIMITS['64k']],

  // Qwen-VL-Max-Latest: 8,192 max output tokens
  [/^qwen-vl-max-latest$/, LIMITS['8k']],
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@@ -84,6 +84,7 @@ export interface ToolCallRequestInfo {
  args: Record<string, unknown>;
  isClientInitiated: boolean;
  prompt_id: string;
+  response_id?: string;
 }

 export interface ToolCallResponseInfo {
@@ -202,6 +203,7 @@ export class Turn {
  readonly pendingToolCalls: ToolCallRequestInfo[];
  private debugResponses: GenerateContentResponse[];
  finishReason: FinishReason | undefined;
+  private currentResponseId?: string;

  constructor(
    private readonly chat: GeminiChat,
@@ -247,6 +249,11 @@ export class Turn {

        this.debugResponses.push(resp);

+        // Track the current response ID for tool call correlation
+        if (resp.responseId) {
+          this.currentResponseId = resp.responseId;
+        }
+
        const thoughtPart = resp.candidates?.[0]?.content?.parts?.[0];
        if (thoughtPart?.thought) {
          // Thought always has a bold "subject" part enclosed in double asterisks
@@ -346,6 +353,7 @@ export class Turn {
      args,
      isClientInitiated: false,
      prompt_id: this.prompt_id,
+      response_id: this.currentResponseId,
    };

    this.pendingToolCalls.push(toolCallRequest);
--- a/packages/core/src/subagents/subagent.ts
+++ b/packages/core/src/subagents/subagent.ts
@@ -381,6 +381,7 @@ export class SubAgentScope {
        let roundText = '';
        let lastUsage: GenerateContentResponseUsageMetadata | undefined =
          undefined;
+        let currentResponseId: string | undefined = undefined;
        for await (const streamEvent of responseStream) {
          if (abortController.signal.aborted) {
            this.terminateMode = SubagentTerminateMode.CANCELLED;
@@ -395,6 +396,10 @@ export class SubAgentScope {
          // Handle chunk events
          if (streamEvent.type === 'chunk') {
            const resp = streamEvent.value;
+            // Track the response ID for tool call correlation
+            if (resp.responseId) {
+              currentResponseId = resp.responseId;
+            }
            if (resp.functionCalls) functionCalls.push(...resp.functionCalls);
            const content = resp.candidates?.[0]?.content;
            const parts = content?.parts || [];
@@ -455,6 +460,7 @@ export class SubAgentScope {
            abortController,
            promptId,
            turnCounter,
+            currentResponseId,
          );
        } else {
          // No tool calls — treat this as the model's final answer.
@@ -543,6 +549,7 @@ export class SubAgentScope {
   * @param {FunctionCall[]} functionCalls - An array of `FunctionCall` objects to process.
   * @param {ToolRegistry} toolRegistry - The tool registry to look up and execute tools.
   * @param {AbortController} abortController - An `AbortController` to signal cancellation of tool executions.
+   * @param {string} responseId - Optional API response ID for correlation with tool calls.
   * @returns {Promise<Content[]>} A promise that resolves to an array of `Content` parts representing the tool responses,
   *          which are then used to update the chat history.
   */
@@ -551,6 +558,7 @@ export class SubAgentScope {
    abortController: AbortController,
    promptId: string,
    currentRound: number,
+    responseId?: string,
  ): Promise<Content[]> {
    const toolResponseParts: Part[] = [];

@@ -704,6 +712,7 @@ export class SubAgentScope {
        args,
        isClientInitiated: true,
        prompt_id: promptId,
+        response_id: responseId,
      };

      const description = this.getToolDescription(toolName, args);
--- a/packages/core/src/telemetry/constants.ts
+++ b/packages/core/src/telemetry/constants.ts
@@ -10,6 +10,7 @@ export const EVENT_USER_PROMPT = 'qwen-code.user_prompt';
 export const EVENT_TOOL_CALL = 'qwen-code.tool_call';
 export const EVENT_API_REQUEST = 'qwen-code.api_request';
 export const EVENT_API_ERROR = 'qwen-code.api_error';
+export const EVENT_API_CANCEL = 'qwen-code.api_cancel';
 export const EVENT_API_RESPONSE = 'qwen-code.api_response';
 export const EVENT_CLI_CONFIG = 'qwen-code.config';
 export const EVENT_FLASH_FALLBACK = 'qwen-code.flash_fallback';
--- a/packages/core/src/telemetry/index.ts
+++ b/packages/core/src/telemetry/index.ts
@@ -17,6 +17,7 @@ export { SpanStatusCode, ValueType } from '@opentelemetry/api';
 export { SemanticAttributes } from '@opentelemetry/semantic-conventions';
 export {
  logApiError,
+  logApiCancel,
  logApiRequest,
  logApiResponse,
  logChatCompression,
@@ -35,6 +36,7 @@ export {
 } from './sdk.js';
 export {
  ApiErrorEvent,
+  ApiCancelEvent,
  ApiRequestEvent,
  ApiResponseEvent,
  ConversationFinishedEvent,
@@ -54,4 +56,5 @@ export type {
  TelemetryEvent,
 } from './types.js';
 export * from './uiTelemetry.js';
+export { QwenLogger } from './qwen-logger/qwen-logger.js';
 export { DEFAULT_OTLP_ENDPOINT, DEFAULT_TELEMETRY_TARGET };
--- a/packages/core/src/telemetry/loggers.test.ts
+++ b/packages/core/src/telemetry/loggers.test.ts
@@ -550,6 +550,7 @@ describe('loggers', () => {
            2,
          ),
          duration_ms: 100,
+          status: 'success',
          success: true,
          decision: ToolCallDecision.ACCEPT,
          prompt_id: 'prompt-id-1',
@@ -619,6 +620,7 @@ describe('loggers', () => {
            2,
          ),
          duration_ms: 100,
+          status: 'error',
          success: false,
          decision: ToolCallDecision.REJECT,
          prompt_id: 'prompt-id-2',
@@ -691,6 +693,7 @@ describe('loggers', () => {
            2,
          ),
          duration_ms: 100,
+          status: 'success',
          success: true,
          decision: ToolCallDecision.MODIFY,
          prompt_id: 'prompt-id-3',
@@ -762,6 +765,7 @@ describe('loggers', () => {
            2,
          ),
          duration_ms: 100,
+          status: 'success',
          success: true,
          prompt_id: 'prompt-id-4',
          tool_type: 'native',
@@ -834,6 +838,7 @@ describe('loggers', () => {
            2,
          ),
          duration_ms: 100,
+          status: 'error',
          success: false,
          error: 'test-error',
          'error.message': 'test-error',
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -12,6 +12,7 @@ import { safeJsonStringify } from '../utils/safeJsonStringify.js';
 import { UserAccountManager } from '../utils/userAccountManager.js';
 import {
  EVENT_API_ERROR,
+  EVENT_API_CANCEL,
  EVENT_API_REQUEST,
  EVENT_API_RESPONSE,
  EVENT_CHAT_COMPRESSION,
@@ -45,6 +46,7 @@ import { QwenLogger } from './qwen-logger/qwen-logger.js';
 import { isTelemetrySdkInitialized } from './sdk.js';
 import type {
  ApiErrorEvent,
+  ApiCancelEvent,
  ApiRequestEvent,
  ApiResponseEvent,
  ChatCompressionEvent,
@@ -282,6 +284,32 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
  );
 }

+export function logApiCancel(config: Config, event: ApiCancelEvent): void {
+  const uiEvent = {
+    ...event,
+    'event.name': EVENT_API_CANCEL,
+    'event.timestamp': new Date().toISOString(),
+  } as UiEvent;
+  uiTelemetryService.addEvent(uiEvent);
+  QwenLogger.getInstance(config)?.logApiCancelEvent(event);
+  if (!isTelemetrySdkInitialized()) return;
+
+  const attributes: LogAttributes = {
+    ...getCommonAttributes(config),
+    ...event,
+    'event.name': EVENT_API_CANCEL,
+    'event.timestamp': new Date().toISOString(),
+    model_name: event.model,
+  };
+
+  const logger = logs.getLogger(SERVICE_NAME);
+  const logRecord: LogRecord = {
+    body: `API request cancelled for ${event.model}.`,
+    attributes,
+  };
+  logger.emit(logRecord);
+}
+
 export function logApiResponse(config: Config, event: ApiResponseEvent): void {
  const uiEvent = {
    ...event,
--- a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts
+++ b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts
@@ -15,6 +15,7 @@ import type {
  ApiRequestEvent,
  ApiResponseEvent,
  ApiErrorEvent,
+  ApiCancelEvent,
  FileOperationEvent,
  FlashFallbackEvent,
  LoopDetectedEvent,
@@ -411,6 +412,7 @@ export class QwenLogger {
      {
        properties: {
          prompt_id: event.prompt_id,
+          response_id: event.response_id,
        },
        snapshots: JSON.stringify({
          function_name: event.function_name,
@@ -427,6 +429,19 @@ export class QwenLogger {
    this.flushIfNeeded();
  }

+  logApiCancelEvent(event: ApiCancelEvent): void {
+    const rumEvent = this.createActionEvent('api', 'api_cancel', {
+      properties: {
+        model: event.model,
+        prompt_id: event.prompt_id,
+        auth_type: event.auth_type,
+      },
+    });
+
+    this.enqueueLogEvent(rumEvent);
+    this.flushIfNeeded();
+  }
+
  logFileOperationEvent(event: FileOperationEvent): void {
    const rumEvent = this.createActionEvent(
      'file_operation',
--- a/packages/core/src/telemetry/types.ts
+++ b/packages/core/src/telemetry/types.ts
@@ -127,11 +127,13 @@ export class ToolCallEvent implements BaseTelemetryEvent {
  function_name: string;
  function_args: Record<string, unknown>;
  duration_ms: number;
-  success: boolean;
+  status: 'success' | 'error' | 'cancelled';
+  success: boolean; // Keep for backward compatibility
  decision?: ToolCallDecision;
  error?: string;
  error_type?: string;
  prompt_id: string;
+  response_id?: string;
  tool_type: 'native' | 'mcp';
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  metadata?: { [key: string]: any };
@@ -142,13 +144,15 @@ export class ToolCallEvent implements BaseTelemetryEvent {
    this.function_name = call.request.name;
    this.function_args = call.request.args;
    this.duration_ms = call.durationMs ?? 0;
-    this.success = call.status === 'success';
+    this.status = call.status;
+    this.success = call.status === 'success'; // Keep for backward compatibility
    this.decision = call.outcome
      ? getDecisionFromOutcome(call.outcome)
      : undefined;
    this.error = call.response.error?.message;
    this.error_type = call.response.errorType;
    this.prompt_id = call.request.prompt_id;
+    this.response_id = call.request.response_id;
    this.tool_type =
      typeof call.tool !== 'undefined' && call.tool instanceof DiscoveredMCPTool
        ? 'mcp'
@@ -224,6 +228,22 @@ export class ApiErrorEvent implements BaseTelemetryEvent {
  }
 }

+export class ApiCancelEvent implements BaseTelemetryEvent {
+  'event.name': 'api_cancel';
+  'event.timestamp': string;
+  model: string;
+  prompt_id: string;
+  auth_type?: string;
+
+  constructor(model: string, prompt_id: string, auth_type?: string) {
+    this['event.name'] = 'api_cancel';
+    this['event.timestamp'] = new Date().toISOString();
+    this.model = model;
+    this.prompt_id = prompt_id;
+    this.auth_type = auth_type;
+  }
+}
+
 export class ApiResponseEvent implements BaseTelemetryEvent {
  'event.name': 'api_response';
  'event.timestamp': string; // ISO 8601
@@ -542,6 +562,7 @@ export type TelemetryEvent =
  | ToolCallEvent
  | ApiRequestEvent
  | ApiErrorEvent
+  | ApiCancelEvent
  | ApiResponseEvent
  | FlashFallbackEvent
  | LoopDetectedEvent
--- a/packages/core/src/telemetry/uiTelemetry.test.ts
+++ b/packages/core/src/telemetry/uiTelemetry.test.ts
@@ -15,6 +15,7 @@ import {
  EVENT_TOOL_CALL,
 } from './constants.js';
 import type {
+  CancelledToolCall,
  CompletedToolCall,
  ErroredToolCall,
  SuccessfulToolCall,
@@ -25,7 +26,7 @@ import { MockTool } from '../test-utils/tools.js';

 const createFakeCompletedToolCall = (
  name: string,
-  success: boolean,
+  success: boolean | 'cancelled',
  duration = 100,
  outcome?: ToolConfirmationOutcome,
  error?: Error,
@@ -39,7 +40,7 @@ const createFakeCompletedToolCall = (
  };
  const tool = new MockTool(name);

-  if (success) {
+  if (success === true) {
    return {
      status: 'success',
      request,
@@ -63,6 +64,30 @@ const createFakeCompletedToolCall = (
      durationMs: duration,
      outcome,
    } as SuccessfulToolCall;
+  } else if (success === 'cancelled') {
+    return {
+      status: 'cancelled',
+      request,
+      tool,
+      invocation: tool.build({ param: 'test' }),
+      response: {
+        callId: request.callId,
+        responseParts: [
+          {
+            functionResponse: {
+              id: request.callId,
+              name,
+              response: { error: 'Tool cancelled' },
+            },
+          },
+        ],
+        error: new Error('Tool cancelled'),
+        errorType: ToolErrorType.UNKNOWN,
+        resultDisplay: 'Cancelled!',
+      },
+      durationMs: duration,
+      outcome,
+    } as CancelledToolCall;
  } else {
    return {
      status: 'error',
@@ -411,6 +436,40 @@ describe('UiTelemetryService', () => {
      });
    });

+    it('should process a single cancelled ToolCallEvent', () => {
+      const toolCall = createFakeCompletedToolCall(
+        'test_tool',
+        'cancelled',
+        180,
+        ToolConfirmationOutcome.Cancel,
+      );
+      service.addEvent({
+        ...structuredClone(new ToolCallEvent(toolCall)),
+        'event.name': EVENT_TOOL_CALL,
+      } as ToolCallEvent & { 'event.name': typeof EVENT_TOOL_CALL });
+
+      const metrics = service.getMetrics();
+      const { tools } = metrics;
+
+      expect(tools.totalCalls).toBe(1);
+      expect(tools.totalSuccess).toBe(0);
+      expect(tools.totalFail).toBe(1);
+      expect(tools.totalDurationMs).toBe(180);
+      expect(tools.totalDecisions[ToolCallDecision.REJECT]).toBe(1);
+      expect(tools.byName['test_tool']).toEqual({
+        count: 1,
+        success: 0,
+        fail: 1,
+        durationMs: 180,
+        decisions: {
+          [ToolCallDecision.ACCEPT]: 0,
+          [ToolCallDecision.REJECT]: 1,
+          [ToolCallDecision.MODIFY]: 0,
+          [ToolCallDecision.AUTO_ACCEPT]: 0,
+        },
+      });
+    });
+
    it('should process a ToolCallEvent with modify decision', () => {
      const toolCall = createFakeCompletedToolCall(
        'test_tool',
@@ -637,6 +696,34 @@ describe('UiTelemetryService', () => {
      expect(service.getLastPromptTokenCount()).toBe(0);
      expect(spy).toHaveBeenCalledOnce();
    });
+
+    it('should correctly set status field for success/error/cancelled calls', () => {
+      const successCall = createFakeCompletedToolCall(
+        'success_tool',
+        true,
+        100,
+      );
+      const errorCall = createFakeCompletedToolCall('error_tool', false, 150);
+      const cancelledCall = createFakeCompletedToolCall(
+        'cancelled_tool',
+        'cancelled',
+        200,
+      );
+
+      const successEvent = new ToolCallEvent(successCall);
+      const errorEvent = new ToolCallEvent(errorCall);
+      const cancelledEvent = new ToolCallEvent(cancelledCall);
+
+      // Verify status field is correctly set
+      expect(successEvent.status).toBe('success');
+      expect(errorEvent.status).toBe('error');
+      expect(cancelledEvent.status).toBe('cancelled');
+
+      // Verify backward compatibility with success field
+      expect(successEvent.success).toBe(true);
+      expect(errorEvent.success).toBe(false);
+      expect(cancelledEvent.success).toBe(false);
+    });
  });

  describe('Tool Call Event with Line Count Metadata', () => {
--- a/packages/test-utils/package.json
+++ b/packages/test-utils/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@qwen-code/qwen-code-test-utils",
-  "version": "0.0.14",
+  "version": "0.0.15-nightly.7",
  "private": true,
  "main": "src/index.ts",
  "license": "Apache-2.0",
--- a/packages/vscode-ide-companion/package.json
+++ b/packages/vscode-ide-companion/package.json
@@ -2,7 +2,7 @@
  "name": "qwen-code-vscode-ide-companion",
  "displayName": "Qwen Code Companion",
  "description": "Enable Qwen Code with direct access to your VS Code workspace.",
-  "version": "0.0.14",
+  "version": "0.0.15-nightly.7",
  "publisher": "qwenlm",
  "icon": "assets/icon.png",
  "repository": {
Author	SHA1	Message	Date
github-actions[bot]	5dca06ee6a	chore(release): v0.0.15-nightly.7	2025-10-16 00:11:27 +00:00
zhutao100	9d664623f5	Fix and update the token limits handling (#754 ) * fix: make token limits regex normalize e.g. `some-model-1.1` -> `some-model` while preserve e.g. `gpt-4.1` as-is. * feat: update token limits regex for latest models `GLM-4.6`, `deepseek-v3.2-exp`. * feat: add exact token limit number 202752 per the model config file for `GLM-4.6`.	2025-10-14 16:11:55 +08:00
Mingholy	a779d44b38	fix: unable to quit when auth dialog is opened (#804 )	2025-10-14 15:55:38 +08:00
Mingholy	40810945e0	fix: add missing trace info and cancellation events (#791 ) * fix: add missing trace info and cancellation events * fix: re-organize tool/request cancellation logging	2025-10-14 15:41:30 +08:00
Mingholy	e28255edb6	fix: token limits for qwen3-max (#724 )	2025-10-14 15:40:20 +08:00
Mingholy	ae3223a317	fix: remove unavailable options (#685 )	2025-10-14 15:39:48 +08:00
tanzhenxin	270dda4aa7	fix: invalid tool_calls request due to improper cancellation (#790 )	2025-10-13 09:25:31 +08:00
Fan	d4fa15dd53	remove topp default value 0.0 (#785 )	2025-10-09 15:41:57 +08:00