Metrics for Retries on Content Errors (#6870)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-12-19 09:33:53 +00:00 · 2025-08-22 19:06:29 -04:00
parent 33d49291ec
commit 5b5290146a
4 changed files with 95 additions and 6 deletions
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@@ -25,6 +25,22 @@ const mockModelsModule = {
  batchEmbedContents: vi.fn(),
 } as unknown as Models;
 const {
  mockRecordInvalidChunk,
  mockRecordContentRetry,
  mockRecordContentRetryFailure,
 } = vi.hoisted(() => ({
  mockRecordInvalidChunk: vi.fn(),
  mockRecordContentRetry: vi.fn(),
  mockRecordContentRetryFailure: vi.fn(),
 }));
 vi.mock('../telemetry/metrics.js', () => ({
  recordInvalidChunk: mockRecordInvalidChunk,
  recordContentRetry: mockRecordContentRetry,
  recordContentRetryFailure: mockRecordContentRetryFailure,
 }));
 describe('GeminiChat', () => {
  let chat: GeminiChat;
  let mockConfig: Config;
@@ -483,7 +499,7 @@ describe('GeminiChat', () => {
  });
  describe('sendMessageStream with retries', () => {
-    it('should retry on invalid content and succeed on the second attempt', async () => {
+    it('should retry on invalid content, succeed, and report metrics', async () => {
      // Use mockImplementationOnce to provide a fresh, promise-wrapped generator for each attempt.
      vi.mocked(mockModelsModule.generateContentStream)
        .mockImplementationOnce(async () =>
@@ -515,6 +531,9 @@ describe('GeminiChat', () => {
      }
      // Assertions
      expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(1);
      expect(mockRecordContentRetry).toHaveBeenCalledTimes(1);
      expect(mockRecordContentRetryFailure).not.toHaveBeenCalled();
      expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(2);
      expect(
        chunks.some(
@@ -537,7 +556,7 @@ describe('GeminiChat', () => {
      });
    });
-    it('should fail after all retries on persistent invalid content', async () => {
+    it('should fail after all retries on persistent invalid content and report metrics', async () => {
      vi.mocked(mockModelsModule.generateContentStream).mockImplementation(
        async () =>
          (async function* () {
@@ -571,6 +590,9 @@ describe('GeminiChat', () => {
      // Should be called 3 times (initial + 2 retries)
      expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(3);
      expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(3);
      expect(mockRecordContentRetry).toHaveBeenCalledTimes(2);
      expect(mockRecordContentRetryFailure).toHaveBeenCalledTimes(1);
      // History should be clean, as if the failed turn never happened.
      const history = chat.getHistory();
@@ -585,7 +607,7 @@ describe('GeminiChat', () => {
    ];
    chat.setHistory(initialHistory);
-    // 2. Mock the API
+    // 2. Mock the API to fail once with an empty stream, then succeed.
    vi.mocked(mockModelsModule.generateContentStream)
      .mockImplementationOnce(async () =>
        (async function* () {
@@ -595,6 +617,7 @@ describe('GeminiChat', () => {
        })(),
      )
      .mockImplementationOnce(async () =>
        // Second attempt succeeds
        (async function* () {
          yield {
            candidates: [{ content: { parts: [{ text: 'Second answer' }] } }],
@@ -611,10 +634,13 @@ describe('GeminiChat', () => {
      // consume stream
    }
-    // 4. Assert the final history
+    // 4. Assert the final history and metrics
    const history = chat.getHistory();
    expect(history.length).toBe(4);
    // Assert that the correct metrics were reported for one empty-stream retry
    expect(mockRecordContentRetry).toHaveBeenCalledTimes(1);
    // Explicitly verify the structure of each part to satisfy TypeScript
    const turn1 = history[0];
    if (!turn1?.parts?.[0] || !('text' in turn1.parts[0])) {
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -23,6 +23,11 @@ import { Config } from '../config/config.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { hasCycleInSchema } from '../tools/tools.js';
 import { StructuredError } from './turn.js';
 import {
  recordContentRetry,
  recordContentRetryFailure,
  recordInvalidChunk,
 } from '../telemetry/metrics.js';
 /**
 * Options for retrying due to invalid content from the model.
@@ -38,7 +43,6 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = {
  maxAttempts: 3, // 1 initial call + 2 retries
  initialDelayMs: 500,
 };
 /**
 * Returns true if the response is valid, false otherwise.
 */
@@ -349,7 +353,7 @@ export class GeminiChat {
        for (
          let attempt = 0;
-          attempt <= INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
+          attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
          attempt++
        ) {
          try {
@@ -373,6 +377,7 @@ export class GeminiChat {
            if (isContentError) {
              // Check if we have more attempts left.
              if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
                recordContentRetry(self.config);
                await new Promise((res) =>
                  setTimeout(
                    res,
@@ -388,6 +393,9 @@ export class GeminiChat {
        }
        if (lastError) {
          if (lastError instanceof EmptyStreamError) {
            recordContentRetryFailure(self.config);
          }
          // If the stream fails, remove the user message that was added.
          if (self.history[self.history.length - 1] === userContent) {
            self.history.pop();
@@ -545,6 +553,7 @@ export class GeminiChat {
          }
        }
      } else {
        recordInvalidChunk(this.config);
        isStreamInvalid = true;
      }
      yield chunk; // Yield every chunk to the UI immediately.
--- a/packages/core/src/telemetry/constants.ts
+++ b/packages/core/src/telemetry/constants.ts
@@ -24,3 +24,7 @@ export const METRIC_API_REQUEST_LATENCY = 'gemini_cli.api.request.latency';
 export const METRIC_TOKEN_USAGE = 'gemini_cli.token.usage';
 export const METRIC_SESSION_COUNT = 'gemini_cli.session.count';
 export const METRIC_FILE_OPERATION_COUNT = 'gemini_cli.file.operation.count';
 export const METRIC_INVALID_CHUNK_COUNT = 'gemini_cli.chat.invalid_chunk.count';
 export const METRIC_CONTENT_RETRY_COUNT = 'gemini_cli.chat.content_retry.count';
 export const METRIC_CONTENT_RETRY_FAILURE_COUNT =
  'gemini_cli.chat.content_retry_failure.count';
--- a/packages/core/src/telemetry/metrics.ts
+++ b/packages/core/src/telemetry/metrics.ts
@@ -22,6 +22,9 @@ import {
  METRIC_SESSION_COUNT,
  METRIC_FILE_OPERATION_COUNT,
  EVENT_CHAT_COMPRESSION,
  METRIC_INVALID_CHUNK_COUNT,
  METRIC_CONTENT_RETRY_COUNT,
  METRIC_CONTENT_RETRY_FAILURE_COUNT,
 } from './constants.js';
 import { Config } from '../config/config.js';
 import { DiffStat } from '../tools/tools.js';
@@ -40,6 +43,9 @@ let apiRequestLatencyHistogram: Histogram | undefined;
 let tokenUsageCounter: Counter | undefined;
 let fileOperationCounter: Counter | undefined;
 let chatCompressionCounter: Counter | undefined;
 let invalidChunkCounter: Counter | undefined;
 let contentRetryCounter: Counter | undefined;
 let contentRetryFailureCounter: Counter | undefined;
 let isMetricsInitialized = false;
 function getCommonAttributes(config: Config): Attributes {
@@ -94,6 +100,24 @@ export function initializeMetrics(config: Config): void {
    description: 'Counts chat compression events.',
    valueType: ValueType.INT,
  });
  // New counters for content errors
  invalidChunkCounter = meter.createCounter(METRIC_INVALID_CHUNK_COUNT, {
    description: 'Counts invalid chunks received from a stream.',
    valueType: ValueType.INT,
  });
  contentRetryCounter = meter.createCounter(METRIC_CONTENT_RETRY_COUNT, {
    description: 'Counts retries due to content errors (e.g., empty stream).',
    valueType: ValueType.INT,
  });
  contentRetryFailureCounter = meter.createCounter(
    METRIC_CONTENT_RETRY_FAILURE_COUNT,
    {
      description: 'Counts occurrences of all content retries failing.',
      valueType: ValueType.INT,
    },
  );
  const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, {
    description: 'Count of CLI sessions started.',
    valueType: ValueType.INT,
@@ -231,3 +255,29 @@ export function recordFileOperationMetric(
  }
  fileOperationCounter.add(1, attributes);
 }
 // --- New Metric Recording Functions ---
 /**
 * Records a metric for when an invalid chunk is received from a stream.
 */
 export function recordInvalidChunk(config: Config): void {
  if (!invalidChunkCounter || !isMetricsInitialized) return;
  invalidChunkCounter.add(1, getCommonAttributes(config));
 }
 /**
 * Records a metric for when a retry is triggered due to a content error.
 */
 export function recordContentRetry(config: Config): void {
  if (!contentRetryCounter || !isMetricsInitialized) return;
  contentRetryCounter.add(1, getCommonAttributes(config));
 }
 /**
 * Records a metric for when all content error retries have failed for a request.
 */
 export function recordContentRetryFailure(config: Config): void {
  if (!contentRetryFailureCounter || !isMetricsInitialized) return;
  contentRetryFailureCounter.add(1, getCommonAttributes(config));
 }