Update: compress/hard constrained token usage (#136)

* setup truncation & folder structure * fix: xml in prompt; qwen code in stats page * fix: clear & continue logic * preflight * add maxSessionLimit in README
2025-12-19 09:33:53 +00:00 · 2025-07-30 18:14:24 +08:00
parent a08bcb2f41
commit df5c4e8079
29 changed files with 1117 additions and 386 deletions
--- a/README.md
+++ b/README.md
@@ -45,6 +45,17 @@ npm install
 npm install -g .
 ```

+We now support max session token limit, you can set it in your `.qwen/settings.json` file to save the token usage.
+For example, if you want to set the max session token limit to 32000, you can set it like this:
+
+```json
+{
+  "maxSessionToken": 32000
+}
+```
+
+The max session means the maximum number of tokens that can be used in one chat (not the total usage during multiple tool call shoots); if you reach the limit, you can use the `/compress` command to compress the history and go on, or use `/clear` command to clear the history.
+
 ### API Configuration

 Set your Qwen API key (In Qwen Code project, you can also set your API key in `.env` file). the `.env` file should be placed in the root directory of your current project.
--- a/package-lock.json
+++ b/package-lock.json
@@ -10582,6 +10582,12 @@
        "tslib": "^2"
      }
    },
+    "node_modules/tiktoken": {
+      "version": "1.0.21",
+      "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.21.tgz",
+      "integrity": "sha512-/kqtlepLMptX0OgbYD9aMYbM7EFrMZCL7EoHM8Psmg2FuhXoo/bH64KqOiZGGwa6oS9TPdSEDKBnV2LuB8+5vQ==",
+      "license": "MIT"
+    },
    "node_modules/tinybench": {
      "version": "2.9.0",
      "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
@@ -12143,6 +12149,7 @@
        "shell-quote": "^1.8.3",
        "simple-git": "^3.28.0",
        "strip-ansi": "^7.1.0",
+        "tiktoken": "^1.0.21",
        "undici": "^7.10.0",
        "ws": "^8.18.0"
      },
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -382,6 +382,8 @@ export async function loadCliConfig(
    model: argv.model!,
    extensionContextFilePaths,
    maxSessionTurns: settings.maxSessionTurns ?? -1,
+    sessionTokenLimit: settings.sessionTokenLimit ?? 32000,
+    maxFolderItems: settings.maxFolderItems ?? 20,
    listExtensions: argv.listExtensions || false,
    activeExtensions: activeExtensions.map((e) => ({
      name: e.config.name,
--- a/packages/cli/src/config/settings.ts
+++ b/packages/cli/src/config/settings.ts
@@ -85,6 +85,12 @@ export interface Settings {
  // Setting for setting maximum number of user/model/tool turns in a session.
  maxSessionTurns?: number;

+  // Setting for maximum token limit for conversation history before blocking requests
+  sessionTokenLimit?: number;
+
+  // Setting for maximum number of files and folders to show in folder structure
+  maxFolderItems?: number;
+
  // Sampling parameters for content generation
  sampling_params?: {
    top_p?: number;
--- a/packages/cli/src/test-utils/mockCommandContext.ts
+++ b/packages/cli/src/test-utils/mockCommandContext.ts
@@ -60,7 +60,9 @@ export const createMockCommandContext = (
            byName: {},
          },
        },
+        promptCount: 0,
      } as SessionStatsState,
+      resetSession: vi.fn(),
    },
  };

--- a/packages/cli/src/ui/commands/clearCommand.test.ts
+++ b/packages/cli/src/ui/commands/clearCommand.test.ts
@@ -43,17 +43,22 @@ describe('clearCommand', () => {

    expect(mockResetChat).toHaveBeenCalledTimes(1);

+    expect(mockContext.session.resetSession).toHaveBeenCalledTimes(1);
+
    expect(mockContext.ui.clear).toHaveBeenCalledTimes(1);

    // Check the order of operations.
    const setDebugMessageOrder = (mockContext.ui.setDebugMessage as Mock).mock
      .invocationCallOrder[0];
    const resetChatOrder = mockResetChat.mock.invocationCallOrder[0];
+    const resetSessionOrder = (mockContext.session.resetSession as Mock).mock
+      .invocationCallOrder[0];
    const clearOrder = (mockContext.ui.clear as Mock).mock
      .invocationCallOrder[0];

    expect(setDebugMessageOrder).toBeLessThan(resetChatOrder);
-    expect(resetChatOrder).toBeLessThan(clearOrder);
+    expect(resetChatOrder).toBeLessThan(resetSessionOrder);
+    expect(resetSessionOrder).toBeLessThan(clearOrder);
  });

  it('should not attempt to reset chat if config service is not available', async () => {
@@ -73,6 +78,7 @@ describe('clearCommand', () => {
      'Clearing terminal and resetting chat.',
    );
    expect(mockResetChat).not.toHaveBeenCalled();
+    expect(nullConfigContext.session.resetSession).toHaveBeenCalledTimes(1);
    expect(nullConfigContext.ui.clear).toHaveBeenCalledTimes(1);
  });
 });
--- a/packages/cli/src/ui/commands/clearCommand.ts
+++ b/packages/cli/src/ui/commands/clearCommand.ts
@@ -12,6 +12,7 @@ export const clearCommand: SlashCommand = {
  action: async (context, _args) => {
    context.ui.setDebugMessage('Clearing terminal and resetting chat.');
    await context.services.config?.getGeminiClient()?.resetChat();
+    context.session.resetSession();
    context.ui.clear();
  },
 };
--- a/packages/cli/src/ui/commands/types.ts
+++ b/packages/cli/src/ui/commands/types.ts
@@ -38,6 +38,7 @@ export interface CommandContext {
  // Session-specific data
  session: {
    stats: SessionStatsState;
+    resetSession: () => void;
  };
 }

--- a/packages/cli/src/ui/components/AboutBox.tsx
+++ b/packages/cli/src/ui/components/AboutBox.tsx
@@ -36,7 +36,7 @@ export const AboutBox: React.FC<AboutBoxProps> = ({
  >
    <Box marginBottom={1}>
      <Text bold color={Colors.AccentPurple}>
-        About Gemini CLI
+        About Qwen Code
      </Text>
    </Box>
    <Box flexDirection="row">
--- a/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx
+++ b/packages/cli/src/ui/components/HistoryItemDisplay.test.tsx
@@ -63,7 +63,7 @@ describe('<HistoryItemDisplay />', () => {
    const { lastFrame } = render(
      <HistoryItemDisplay {...baseItem} item={item} />,
    );
-    expect(lastFrame()).toContain('About Gemini CLI');
+    expect(lastFrame()).toContain('About Qwen Code');
  });

  it('renders ModelStatsDisplay for "model_stats" type', () => {
--- a/packages/cli/src/ui/contexts/SessionContext.tsx
+++ b/packages/cli/src/ui/contexts/SessionContext.tsx
@@ -50,6 +50,7 @@ interface SessionStatsContextValue {
  stats: SessionStatsState;
  startNewPrompt: () => void;
  getPromptCount: () => number;
+  resetSession: () => void;
 }

 // --- Context Definition ---
@@ -109,13 +110,23 @@ export const SessionStatsProvider: React.FC<{ children: React.ReactNode }> = ({
    [stats.promptCount],
  );

+  const resetSession = useCallback(() => {
+    setStats({
+      sessionStartTime: new Date(),
+      metrics: uiTelemetryService.getMetrics(),
+      lastPromptTokenCount: uiTelemetryService.getLastPromptTokenCount(),
+      promptCount: 0,
+    });
+  }, []);
+
  const value = useMemo(
    () => ({
      stats,
      startNewPrompt,
      getPromptCount,
+      resetSession,
    }),
-    [stats, startNewPrompt, getPromptCount],
+    [stats, startNewPrompt, getPromptCount, resetSession],
  );

  return (
--- a/packages/cli/src/ui/hooks/slashCommandProcessor.ts
+++ b/packages/cli/src/ui/hooks/slashCommandProcessor.ts
@@ -172,6 +172,7 @@ export const useSlashCommandProcessor = (
      },
      session: {
        stats: session.stats,
+        resetSession: session.resetSession,
      },
    }),
    [
@@ -183,6 +184,7 @@ export const useSlashCommandProcessor = (
      clearItems,
      refreshStatic,
      session.stats,
+      session.resetSession,
      onDebugMessage,
    ],
  );
--- a/packages/cli/src/ui/hooks/useGeminiStream.ts
+++ b/packages/cli/src/ui/hooks/useGeminiStream.ts
@@ -452,6 +452,23 @@ export const useGeminiStream = (
    [addItem, config],
  );

+  const handleSessionTokenLimitExceededEvent = useCallback(
+    (value: { currentTokens: number; limit: number; message: string }) =>
+      addItem(
+        {
+          type: 'error',
+          text:
+            `🚫 Session token limit exceeded: ${value.currentTokens.toLocaleString()} tokens > ${value.limit.toLocaleString()} limit.\n\n` +
+            `💡 Solutions:\n` +
+            `   • Start a new session: Use /clear command\n` +
+            `   • Increase limit: Add "sessionTokenLimit": (e.g., 128000) to your settings.json\n` +
+            `   • Compress history: Use /compress command to compress history`,
+        },
+        Date.now(),
+      ),
+    [addItem],
+  );
+
  const handleLoopDetectedEvent = useCallback(() => {
    addItem(
      {
@@ -501,6 +518,9 @@ export const useGeminiStream = (
          case ServerGeminiEventType.MaxSessionTurns:
            handleMaxSessionTurnsEvent();
            break;
+          case ServerGeminiEventType.SessionTokenLimitExceeded:
+            handleSessionTokenLimitExceededEvent(event.value);
+            break;
          case ServerGeminiEventType.LoopDetected:
            // handle later because we want to move pending history to history
            // before we add loop detected message to history
@@ -525,6 +545,7 @@ export const useGeminiStream = (
      scheduleToolCalls,
      handleChatCompressionEvent,
      handleMaxSessionTurnsEvent,
+      handleSessionTokenLimitExceededEvent,
    ],
  );

--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -44,6 +44,7 @@
    "shell-quote": "^1.8.3",
    "simple-git": "^3.28.0",
    "strip-ansi": "^7.1.0",
+    "tiktoken": "^1.0.21",
    "undici": "^7.10.0",
    "ws": "^8.18.0"
  },
--- a/packages/core/src/code_assist/server.ts
+++ b/packages/core/src/code_assist/server.ts
@@ -56,7 +56,7 @@ export interface HttpOptions {
  headers?: Record<string, string>;
 }

-export const CODE_ASSIST_ENDPOINT = 'https://cloudcode-pa.googleapis.com';
+export const CODE_ASSIST_ENDPOINT = 'https://localhost:0'; // Disable Google Code Assist API Request
 export const CODE_ASSIST_API_VERSION = 'v1internal';

 export class CodeAssistServer implements ContentGenerator {
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -140,6 +140,8 @@ export interface ConfigParameters {
  model: string;
  extensionContextFilePaths?: string[];
  maxSessionTurns?: number;
+  sessionTokenLimit?: number;
+  maxFolderItems?: number;
  listExtensions?: boolean;
  activeExtensions?: ActiveExtension[];
  noBrowser?: boolean;
@@ -216,6 +218,8 @@ export class Config {
  }>;
  private modelSwitchedDuringSession: boolean = false;
  private readonly maxSessionTurns: number;
+  private readonly sessionTokenLimit: number;
+  private readonly maxFolderItems: number;
  private readonly listExtensions: boolean;
  private readonly _activeExtensions: ActiveExtension[];
  flashFallbackHandler?: FlashFallbackHandler;
@@ -262,6 +266,8 @@ export class Config {
    this.model = params.model;
    this.extensionContextFilePaths = params.extensionContextFilePaths ?? [];
    this.maxSessionTurns = params.maxSessionTurns ?? -1;
+    this.sessionTokenLimit = params.sessionTokenLimit ?? 32000;
+    this.maxFolderItems = params.maxFolderItems ?? 20;
    this.listExtensions = params.listExtensions ?? false;
    this._activeExtensions = params.activeExtensions ?? [];
    this.noBrowser = params.noBrowser ?? false;
@@ -353,6 +359,14 @@ export class Config {
    return this.maxSessionTurns;
  }

+  getSessionTokenLimit(): number {
+    return this.sessionTokenLimit;
+  }
+
+  getMaxFolderItems(): number {
+    return this.maxFolderItems;
+  }
+
  setQuotaErrorOccurred(value: boolean): void {
    this.quotaErrorOccurred = value;
  }
@@ -516,7 +530,7 @@ export class Config {
  }

  getUsageStatisticsEnabled(): boolean {
-    return this.usageStatisticsEnabled;
+    return false; // 禁用遥测统计，防止网络请求
  }

  getExtensionContextFilePaths(): string[] {
--- a/packages/core/src/core/snapshots/prompts.test.ts.snap
+++ b/packages/core/src/core/snapshots/prompts.test.ts.snap
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -195,6 +195,8 @@ describe('Gemini Client (client.ts)', () => {
        getWorkingDir: vi.fn().mockReturnValue('/test/dir'),
        getFileService: vi.fn().mockReturnValue(fileService),
        getMaxSessionTurns: vi.fn().mockReturnValue(0),
+        getSessionTokenLimit: vi.fn().mockReturnValue(32000),
+        getMaxFolderItems: vi.fn().mockReturnValue(20),
        getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
        setQuotaErrorOccurred: vi.fn(),
        getNoBrowser: vi.fn().mockReturnValue(false),
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -167,6 +167,7 @@ export class GeminiClient {
    const platform = process.platform;
    const folderStructure = await getFolderStructure(cwd, {
      fileService: this.config.getFileService(),
+      maxItems: this.config.getMaxFolderItems(),
    });
    const context = `
  This is the Qwen Code. We are setting up the context for our chat.
@@ -306,6 +307,49 @@ export class GeminiClient {
    if (compressed) {
      yield { type: GeminiEventType.ChatCompressed, value: compressed };
    }
+
+    // Check session token limit after compression using accurate token counting
+    const sessionTokenLimit = this.config.getSessionTokenLimit();
+    if (sessionTokenLimit > 0) {
+      // Get all the content that would be sent in an API call
+      const currentHistory = this.getChat().getHistory(true);
+      const userMemory = this.config.getUserMemory();
+      const systemPrompt = getCoreSystemPrompt(userMemory);
+      const environment = await this.getEnvironment();
+
+      // Create a mock request content to count total tokens
+      const mockRequestContent = [
+        {
+          role: 'system' as const,
+          parts: [{ text: systemPrompt }, ...environment],
+        },
+        ...currentHistory,
+      ];
+
+      // Use the improved countTokens method for accurate counting
+      const { totalTokens: totalRequestTokens } =
+        await this.getContentGenerator().countTokens({
+          model: this.config.getModel(),
+          contents: mockRequestContent,
+        });
+
+      if (
+        totalRequestTokens !== undefined &&
+        totalRequestTokens > sessionTokenLimit
+      ) {
+        yield {
+          type: GeminiEventType.SessionTokenLimitExceeded,
+          value: {
+            currentTokens: totalRequestTokens,
+            limit: sessionTokenLimit,
+            message:
+              `Session token limit exceeded: ${totalRequestTokens} tokens > ${sessionTokenLimit} limit. ` +
+              'Please start a new session or increase the sessionTokenLimit in your settings.json.',
+          },
+        };
+        return new Turn(this.getChat(), prompt_id);
+      }
+    }
    const turn = new Turn(this.getChat(), prompt_id);
    const resultStream = turn.run(request, signal);
    for await (const event of resultStream) {
--- a/packages/core/src/core/modelCheck.ts
+++ b/packages/core/src/core/modelCheck.ts
@@ -4,10 +4,7 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import {
-  DEFAULT_GEMINI_MODEL,
-  DEFAULT_GEMINI_FLASH_MODEL,
-} from '../config/models.js';
+// 移除未使用的导入

 /**
 * Checks if the default "pro" model is rate-limited and returns a fallback "flash"
@@ -18,51 +15,9 @@ import {
 *          and the original model if a switch happened.
 */
 export async function getEffectiveModel(
-  apiKey: string,
+  _apiKey: string,
  currentConfiguredModel: string,
 ): Promise<string> {
-  if (currentConfiguredModel !== DEFAULT_GEMINI_MODEL) {
-    // Only check if the user is trying to use the specific pro model we want to fallback from.
+  // Disable Google API Model Check
  return currentConfiguredModel;
 }
-
-  const modelToTest = DEFAULT_GEMINI_MODEL;
-  const fallbackModel = DEFAULT_GEMINI_FLASH_MODEL;
-  const endpoint = `https://generativelanguage.googleapis.com/v1beta/models/${modelToTest}:generateContent?key=${apiKey}`;
-  const body = JSON.stringify({
-    contents: [{ parts: [{ text: 'test' }] }],
-    generationConfig: {
-      maxOutputTokens: 1,
-      temperature: 0,
-      topK: 1,
-      thinkingConfig: { thinkingBudget: 128, includeThoughts: false },
-    },
-  });
-
-  const controller = new AbortController();
-  const timeoutId = setTimeout(() => controller.abort(), 2000); // 500ms timeout for the request
-
-  try {
-    const response = await fetch(endpoint, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body,
-      signal: controller.signal,
-    });
-
-    clearTimeout(timeoutId);
-
-    if (response.status === 429) {
-      console.log(
-        `[INFO] Your configured model (${modelToTest}) was temporarily unavailable. Switched to ${fallbackModel} for this session.`,
-      );
-      return fallbackModel;
-    }
-    // For any other case (success, other error codes), we stick to the original model.
-    return currentConfiguredModel;
-  } catch (_error) {
-    clearTimeout(timeoutId);
-    // On timeout or any other fetch error, stick to the original model.
-    return currentConfiguredModel;
-  }
-}
--- a/packages/core/src/core/openaiContentGenerator.ts
+++ b/packages/core/src/core/openaiContentGenerator.ts
@@ -578,14 +578,26 @@ export class OpenAIContentGenerator implements ContentGenerator {
  async countTokens(
    request: CountTokensParameters,
  ): Promise<CountTokensResponse> {
-    // OpenAI doesn't have a direct token counting endpoint
-    // We'll estimate based on the tiktoken library or a rough calculation
-    // For now, return a rough estimate
+    // Use tiktoken for accurate token counting
    const content = JSON.stringify(request.contents);
-    const estimatedTokens = Math.ceil(content.length / 4); // Rough estimate: 1 token ≈ 4 characters
+    let totalTokens = 0;
+
+    try {
+      const { get_encoding } = await import('tiktoken');
+      const encoding = get_encoding('cl100k_base'); // GPT-4 encoding, but estimate for qwen
+      totalTokens = encoding.encode(content).length;
+      encoding.free();
+    } catch (error) {
+      console.warn(
+        'Failed to load tiktoken, falling back to character approximation:',
+        error,
+      );
+      // Fallback: rough approximation using character count
+      totalTokens = Math.ceil(content.length / 4); // Rough estimate: 1 token ≈ 4 characters
+    }

    return {
-      totalTokens: estimatedTokens,
+      totalTokens,
    };
  }

--- a/packages/core/src/core/prompts.test.ts
+++ b/packages/core/src/core/prompts.test.ts
@@ -32,7 +32,7 @@ describe('Core System Prompt (prompts.ts)', () => {
    vi.stubEnv('SANDBOX', undefined);
    const prompt = getCoreSystemPrompt();
    expect(prompt).not.toContain('---\n\n'); // Separator should not be present
-    expect(prompt).toContain('You are an interactive CLI agent'); // Check for core content
+    expect(prompt).toContain('You are Qwen Code, an interactive CLI agent'); // Check for core content
    expect(prompt).toMatchSnapshot(); // Use snapshot for base prompt structure
  });

@@ -40,7 +40,7 @@ describe('Core System Prompt (prompts.ts)', () => {
    vi.stubEnv('SANDBOX', undefined);
    const prompt = getCoreSystemPrompt('');
    expect(prompt).not.toContain('---\n\n');
-    expect(prompt).toContain('You are an interactive CLI agent');
+    expect(prompt).toContain('You are Qwen Code, an interactive CLI agent');
    expect(prompt).toMatchSnapshot();
  });

@@ -48,7 +48,7 @@ describe('Core System Prompt (prompts.ts)', () => {
    vi.stubEnv('SANDBOX', undefined);
    const prompt = getCoreSystemPrompt('   \n  \t ');
    expect(prompt).not.toContain('---\n\n');
-    expect(prompt).toContain('You are an interactive CLI agent');
+    expect(prompt).toContain('You are Qwen Code, an interactive CLI agent');
    expect(prompt).toMatchSnapshot();
  });

@@ -59,7 +59,7 @@ describe('Core System Prompt (prompts.ts)', () => {
    const prompt = getCoreSystemPrompt(memory);

    expect(prompt.endsWith(expectedSuffix)).toBe(true);
-    expect(prompt).toContain('You are an interactive CLI agent'); // Ensure base prompt follows
+    expect(prompt).toContain('You are Qwen Code, an interactive CLI agent'); // Ensure base prompt follows
    expect(prompt).toMatchSnapshot(); // Snapshot the combined prompt
  });

--- a/packages/core/src/core/prompts.ts
+++ b/packages/core/src/core/prompts.ts
@@ -6,7 +6,6 @@

 import path from 'node:path';
 import fs from 'node:fs';
-import { LSTool } from '../tools/ls.js';
 import { EditTool } from '../tools/edit.js';
 import { GlobTool } from '../tools/glob.js';
 import { GrepTool } from '../tools/grep.js';
@@ -112,7 +111,7 @@ export function getCoreSystemPrompt(
  const basePrompt = systemMdEnabled
    ? fs.readFileSync(systemMdPath, 'utf8')
    : `
-You are an interactive CLI agent specializing in software engineering tasks. Your primary goal is to help users safely and efficiently, adhering strictly to the following instructions and utilizing your available tools.
+You are Qwen Code, an interactive CLI agent developed by Alibaba Group, specializing in software engineering tasks. Your primary goal is to help users safely and efficiently, adhering strictly to the following instructions and utilizing your available tools.

 # Core Mandates

@@ -242,24 +241,56 @@ model: true

 <example>
 user: list files here.
-model: [tool_call: ${LSTool.Name} for path '/path/to/project']
+model: 
+<tool_call>
+<function=list_directory>
+<parameter=path>
+.
+</parameter>
+</function>
+</tool_call>
 </example>

 <example>
 user: start the server implemented in server.js
-model: [tool_call: ${ShellTool.Name} for 'node server.js &' because it must run in the background]
+model: 
+<tool_call>
+<function=run_shell_command>
+<parameter=command>
+node server.js &
+</parameter>
+</function>
+</tool_call>
 </example>

 <example>
 user: Refactor the auth logic in src/auth.py to use the requests library instead of urllib.
 model: Okay, I can refactor 'src/auth.py'.
 First, I'll analyze the code and check for a test safety net before planning any changes.
-[tool_call: ${GlobTool.Name} for path 'tests/test_auth.py']
-[tool_call: ${ReadFileTool.Name} for absolute_path '/path/to/tests/test_auth.py']
+<tool_call>
+<function=glob>
+<parameter=path>
+tests/test_auth.py
+</parameter>
+</function>
+</tool_call>
+<tool_call>
+<function=read_file>
+<parameter=path>
+/path/to/tests/test_auth.py
+</parameter>
+</function>
+</tool_call>
 (After analysis)
 Great, 'tests/test_auth.py' exists and covers the core authentication logic. With this safety net in place, I can safely plan the refactoring.
 I'll also confirm 'requests' is a dependency.
-[tool_call: ${ReadFileTool.Name} for absolute_path '/path/to/requirements.txt']
+<tool_call>
+<function=read_file>
+<parameter=path>
+/path/to/requirements.txt
+</parameter>
+</function>
+</tool_call>
 (After analysis)
 Looks good, 'requests' is available.

@@ -269,20 +300,30 @@ Here's the plan:
 3.  Remove the old 'urllib' import.
 4.  Run the project's linter and tests to verify the changes.

-Should I proceed?
-user: Yes
-model:
-[tool_call: ${WriteFileTool.Name} or ${EditTool.Name} to apply the refactoring to 'src/auth.py']
+<tool_call>
+<function=replace>
+<parameter=path>
+src/auth.py
+</parameter>
+<parameter=old_content>
+(old code content)
+</parameter>
+<parameter=new_content>
+(new code content)
+</parameter>
+</function>
+</tool_call>
 Refactoring complete. Running verification...
-[tool_call: ${ShellTool.Name} for 'ruff check src/auth.py && pytest']
+<tool_call>
+<function=run_shell_command
+<parameter=command>
+ruff check src/auth.py && pytest
+</parameter>
+</function>
+</tool_call>
 (After verification passes)
 All checks passed. This is a stable checkpoint.
-${(function () {
-  if (isGitRepository(process.cwd())) {
-    return `Would you like me to write a commit message and commit these changes?`;
-  }
-  return '';
-})()}
+Would you like me to write a commit message and commit these changes?
 </example>

 <example>
@@ -293,32 +334,48 @@ model: I can run \`rm -rf /path/to/project/temp\`. This will permanently delete
 <example>
 user: Write tests for someFile.ts
 model:
-Okay, I can write those tests. First, I'll read \`someFile.ts\` to understand its functionality.
-[tool_call: ${ReadFileTool.Name} for absolute_path '/path/to/someFile.ts' or use ${GlobTool.Name} to find \`someFile.ts\` if its location is unknown]
+Okay, I can write those tests. First, I'll read someFile.ts to understand its functionality.
+<tool_call>
+<function=read_file>
+<parameter=path>
+/path/to/someFile.ts
+</parameter>
+</function>
+</tool_call>
 Now I'll look for existing or related test files to understand current testing conventions and dependencies.
-[tool_call: ${ReadManyFilesTool.Name} for paths ['**/*.test.ts', 'src/**/*.spec.ts'] assuming someFile.ts is in the src directory]
+<tool_call>
+<function>read_many_files for paths ['**/*.test.ts', 'src/**/*.spec.ts'] assuming someFile.ts is in the src directory]
+</tool_call>
 (After reviewing existing tests and the file content)
-[tool_call: ${WriteFileTool.Name} to create /path/to/someFile.test.ts with the test code]
+<tool_call>
+<function=write_file>
+<parameter=path>
+/path/to/someFile.test.ts
+</parameter>
+</function>
+</tool_call>
 I've written the tests. Now I'll run the project's test command to verify them.
-[tool_call: ${ShellTool.Name} for 'npm run test']
-</example>
-
-<example>
-user: How do I update the user's profile information in this system?
-model:
-I'm not immediately sure how user profile information is updated. I'll search the codebase for terms like 'UserProfile', 'updateProfile', or 'editUser' to find relevant files or API endpoints.
-[tool_call: ${GrepTool.Name} for pattern 'UserProfile|updateProfile|editUser']
-(After reviewing search results, assuming a relevant file like '/path/to/UserProfileService.java' was found)
-Okay, \`/path/to/UserProfileService.java\` seems like the most relevant file. I'll read its content to understand how updates are handled.
-[tool_call: ${ReadFileTool.Name} for absolute_path '/path/to/UserProfileService.java']
-(After reading the file)
-It appears the \`updateUserProfile\` method in \`UserProfileService.java\` is responsible for this. It expects a user ID and a \`UserProfileDTO\` object...
+<tool_call>
+<function=run_shell_command>
+<parameter=command>
+npm run test
+</parameter>
+</function>
+</tool_call>
+(After verification passes)
+All checks passed. This is a stable checkpoint.
 </example>

 <example>
 user: Where are all the 'app.config' files in this project? I need to check their settings.
 model:
-[tool_call: ${GlobTool.Name} for pattern '**/app.config']
+<tool_call>
+<function=glob>
+<parameter=pattern>
+./**/app.config
+</parameter>
+</function>
+</tool_call>
 (Assuming GlobTool returns a list of paths like ['/path/to/moduleA/app.config', '/path/to/moduleB/app.config'])
 I found the following 'app.config' files:
 - /path/to/moduleA/app.config
--- a/packages/core/src/core/turn.ts
+++ b/packages/core/src/core/turn.ts
@@ -49,6 +49,7 @@ export enum GeminiEventType {
  ChatCompressed = 'chat_compressed',
  Thought = 'thought',
  MaxSessionTurns = 'max_session_turns',
+  SessionTokenLimitExceeded = 'session_token_limit_exceeded',
  LoopDetected = 'loop_detected',
 }

@@ -61,6 +62,12 @@ export interface GeminiErrorEventValue {
  error: StructuredError;
 }

+export interface SessionTokenLimitExceededValue {
+  currentTokens: number;
+  limit: number;
+  message: string;
+}
+
 export interface ToolCallRequestInfo {
  callId: string;
  name: string;
@@ -134,6 +141,11 @@ export type ServerGeminiMaxSessionTurnsEvent = {
  type: GeminiEventType.MaxSessionTurns;
 };

+export type ServerGeminiSessionTokenLimitExceededEvent = {
+  type: GeminiEventType.SessionTokenLimitExceeded;
+  value: SessionTokenLimitExceededValue;
+};
+
 export type ServerGeminiLoopDetectedEvent = {
  type: GeminiEventType.LoopDetected;
 };
@@ -149,6 +161,7 @@ export type ServerGeminiStreamEvent =
  | ServerGeminiChatCompressedEvent
  | ServerGeminiThoughtEvent
  | ServerGeminiMaxSessionTurnsEvent
+  | ServerGeminiSessionTokenLimitExceededEvent
  | ServerGeminiLoopDetectedEvent;

 // A turn manages the agentic loop turn within the server context.
--- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
+++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
@@ -54,13 +54,9 @@ export class ClearcutLogger {
    this.config = config;
  }

-  static getInstance(config?: Config): ClearcutLogger | undefined {
-    if (config === undefined || !config?.getUsageStatisticsEnabled())
+  static getInstance(_config?: Config): ClearcutLogger | undefined {
+    // Disable Clearcut Logger，to avoid network request
    return undefined;
-    if (!ClearcutLogger.instance) {
-      ClearcutLogger.instance = new ClearcutLogger(config);
-    }
-    return ClearcutLogger.instance;
  }

  // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Clearcut expects this format.
--- a/packages/core/src/telemetry/loggers.test.ts
+++ b/packages/core/src/telemetry/loggers.test.ts
@@ -57,6 +57,7 @@ describe('loggers', () => {
  };

  beforeEach(() => {
+    vi.clearAllMocks(); // 清除之前测试的 mock 调用
    vi.spyOn(sdk, 'isTelemetrySdkInitialized').mockReturnValue(true);
    vi.spyOn(logs, 'getLogger').mockReturnValue(mockLogger);
    vi.spyOn(uiTelemetry.uiTelemetryService, 'addEvent').mockImplementation(
@@ -146,7 +147,7 @@ describe('loggers', () => {
          'event.name': EVENT_USER_PROMPT,
          'event.timestamp': '2025-01-01T00:00:00.000Z',
          prompt_length: 11,
-          prompt: 'test-prompt',
+          // 移除 prompt 字段，因为 shouldLogUserPrompts 现在返回 false
        },
      });
    });
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -38,8 +38,7 @@ import { uiTelemetryService, UiEvent } from './uiTelemetry.js';
 import { ClearcutLogger } from './clearcut-logger/clearcut-logger.js';
 import { safeJsonStringify } from '../utils/safeJsonStringify.js';

-const shouldLogUserPrompts = (config: Config): boolean =>
-  config.getTelemetryLogPromptsEnabled();
+const shouldLogUserPrompts = (_config: Config): boolean => false; // 禁用用户提示日志

 function getCommonAttributes(config: Config): LogAttributes {
  return {
--- a/packages/core/src/utils/getFolderStructure.test.ts
+++ b/packages/core/src/utils/getFolderStructure.test.ts
@@ -115,7 +115,7 @@ describe('getFolderStructure', () => {
  it('should return basic folder structure', async () => {
    const structure = await getFolderStructure('/testroot/subfolderA');
    const expected = `
-Showing up to 200 items (files + folders).
+Showing up to 20 items (files + folders).

 /testroot/subfolderA/
 ├───fileA1.ts
@@ -129,7 +129,7 @@ Showing up to 200 items (files + folders).
  it('should handle an empty folder', async () => {
    const structure = await getFolderStructure('/testroot/emptyFolder');
    const expected = `
-Showing up to 200 items (files + folders).
+Showing up to 20 items (files + folders).

 /testroot/emptyFolder/
 `.trim();
@@ -139,7 +139,7 @@ Showing up to 200 items (files + folders).
  it('should ignore folders specified in ignoredFolders (default)', async () => {
    const structure = await getFolderStructure('/testroot');
    const expected = `
-Showing up to 200 items (files + folders). Folders or files indicated with ... contain more items not shown, were ignored, or the display limit (200 items) was reached.
+Showing up to 20 items (files + folders). Folders or files indicated with ... contain more items not shown, were ignored, or the display limit (20 items) was reached.

 /testroot/
 ├───.hiddenfile
@@ -160,7 +160,7 @@ Showing up to 200 items (files + folders). Folders or files indicated with ... c
      ignoredFolders: new Set(['subfolderA', 'node_modules']),
    });
    const expected = `
-Showing up to 200 items (files + folders). Folders or files indicated with ... contain more items not shown, were ignored, or the display limit (200 items) was reached.
+Showing up to 20 items (files + folders). Folders or files indicated with ... contain more items not shown, were ignored, or the display limit (20 items) was reached.

 /testroot/
 ├───.hiddenfile
@@ -177,7 +177,7 @@ Showing up to 200 items (files + folders). Folders or files indicated with ... c
      fileIncludePattern: /\.ts$/,
    });
    const expected = `
-Showing up to 200 items (files + folders).
+Showing up to 20 items (files + folders).

 /testroot/subfolderA/
 ├───fileA1.ts
--- a/packages/core/src/utils/getFolderStructure.ts
+++ b/packages/core/src/utils/getFolderStructure.ts
@@ -10,7 +10,7 @@ import * as path from 'path';
 import { getErrorMessage, isNodeError } from './errors.js';
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';

-const MAX_ITEMS = 200;
+const MAX_ITEMS = 20;
 const TRUNCATION_INDICATOR = '...';
 const DEFAULT_IGNORED_FOLDERS = new Set(['node_modules', '.git', 'dist']);

@@ -18,7 +18,7 @@ const DEFAULT_IGNORED_FOLDERS = new Set(['node_modules', '.git', 'dist']);

 /** Options for customizing folder structure retrieval. */
 interface FolderStructureOptions {
-  /** Maximum number of files and folders combined to display. Defaults to 200. */
+  /** Maximum number of files and folders combined to display. Defaults to 20. */
  maxItems?: number;
  /** Set of folder names to ignore completely. Case-sensitive. */
  ignoredFolders?: Set<string>;