Metrics for Retries on Content Errors (#6870)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-12-19 09:33:53 +00:00 · 2025-08-22 19:06:29 -04:00
parent 33d49291ec
commit 5b5290146a
4 changed files with 95 additions and 6 deletions
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@@ -25,6 +25,22 @@ const mockModelsModule = {
  batchEmbedContents: vi.fn(),
 } as unknown as Models;

+const {
+  mockRecordInvalidChunk,
+  mockRecordContentRetry,
+  mockRecordContentRetryFailure,
+} = vi.hoisted(() => ({
+  mockRecordInvalidChunk: vi.fn(),
+  mockRecordContentRetry: vi.fn(),
+  mockRecordContentRetryFailure: vi.fn(),
+}));
+
+vi.mock('../telemetry/metrics.js', () => ({
+  recordInvalidChunk: mockRecordInvalidChunk,
+  recordContentRetry: mockRecordContentRetry,
+  recordContentRetryFailure: mockRecordContentRetryFailure,
+}));
+
 describe('GeminiChat', () => {
  let chat: GeminiChat;
  let mockConfig: Config;
@@ -483,7 +499,7 @@ describe('GeminiChat', () => {
  });

  describe('sendMessageStream with retries', () => {
-    it('should retry on invalid content and succeed on the second attempt', async () => {
+    it('should retry on invalid content, succeed, and report metrics', async () => {
      // Use mockImplementationOnce to provide a fresh, promise-wrapped generator for each attempt.
      vi.mocked(mockModelsModule.generateContentStream)
        .mockImplementationOnce(async () =>
@@ -515,6 +531,9 @@ describe('GeminiChat', () => {
      }

      // Assertions
+      expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(1);
+      expect(mockRecordContentRetry).toHaveBeenCalledTimes(1);
+      expect(mockRecordContentRetryFailure).not.toHaveBeenCalled();
      expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(2);
      expect(
        chunks.some(
@@ -537,7 +556,7 @@ describe('GeminiChat', () => {
      });
    });

-    it('should fail after all retries on persistent invalid content', async () => {
+    it('should fail after all retries on persistent invalid content and report metrics', async () => {
      vi.mocked(mockModelsModule.generateContentStream).mockImplementation(
        async () =>
          (async function* () {
@@ -571,6 +590,9 @@ describe('GeminiChat', () => {

      // Should be called 3 times (initial + 2 retries)
      expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(3);
+      expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(3);
+      expect(mockRecordContentRetry).toHaveBeenCalledTimes(2);
+      expect(mockRecordContentRetryFailure).toHaveBeenCalledTimes(1);

      // History should be clean, as if the failed turn never happened.
      const history = chat.getHistory();
@@ -585,7 +607,7 @@ describe('GeminiChat', () => {
    ];
    chat.setHistory(initialHistory);

-    // 2. Mock the API
+    // 2. Mock the API to fail once with an empty stream, then succeed.
    vi.mocked(mockModelsModule.generateContentStream)
      .mockImplementationOnce(async () =>
        (async function* () {
@@ -595,6 +617,7 @@ describe('GeminiChat', () => {
        })(),
      )
      .mockImplementationOnce(async () =>
+        // Second attempt succeeds
        (async function* () {
          yield {
            candidates: [{ content: { parts: [{ text: 'Second answer' }] } }],
@@ -611,10 +634,13 @@ describe('GeminiChat', () => {
      // consume stream
    }

-    // 4. Assert the final history
+    // 4. Assert the final history and metrics
    const history = chat.getHistory();
    expect(history.length).toBe(4);

+    // Assert that the correct metrics were reported for one empty-stream retry
+    expect(mockRecordContentRetry).toHaveBeenCalledTimes(1);
+
    // Explicitly verify the structure of each part to satisfy TypeScript
    const turn1 = history[0];
    if (!turn1?.parts?.[0] || !('text' in turn1.parts[0])) {
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -23,6 +23,11 @@ import { Config } from '../config/config.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { hasCycleInSchema } from '../tools/tools.js';
 import { StructuredError } from './turn.js';
+import {
+  recordContentRetry,
+  recordContentRetryFailure,
+  recordInvalidChunk,
+} from '../telemetry/metrics.js';

 /**
 * Options for retrying due to invalid content from the model.
@@ -38,7 +43,6 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = {
  maxAttempts: 3, // 1 initial call + 2 retries
  initialDelayMs: 500,
 };
-
 /**
 * Returns true if the response is valid, false otherwise.
 */
@@ -349,7 +353,7 @@ export class GeminiChat {

        for (
          let attempt = 0;
-          attempt <= INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
+          attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
          attempt++
        ) {
          try {
@@ -373,6 +377,7 @@ export class GeminiChat {
            if (isContentError) {
              // Check if we have more attempts left.
              if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
+                recordContentRetry(self.config);
                await new Promise((res) =>
                  setTimeout(
                    res,
@@ -388,6 +393,9 @@ export class GeminiChat {
        }

        if (lastError) {
+          if (lastError instanceof EmptyStreamError) {
+            recordContentRetryFailure(self.config);
+          }
          // If the stream fails, remove the user message that was added.
          if (self.history[self.history.length - 1] === userContent) {
            self.history.pop();
@@ -545,6 +553,7 @@ export class GeminiChat {
          }
        }
      } else {
+        recordInvalidChunk(this.config);
        isStreamInvalid = true;
      }
      yield chunk; // Yield every chunk to the UI immediately.
--- a/packages/core/src/telemetry/constants.ts
+++ b/packages/core/src/telemetry/constants.ts
@@ -24,3 +24,7 @@ export const METRIC_API_REQUEST_LATENCY = 'gemini_cli.api.request.latency';
 export const METRIC_TOKEN_USAGE = 'gemini_cli.token.usage';
 export const METRIC_SESSION_COUNT = 'gemini_cli.session.count';
 export const METRIC_FILE_OPERATION_COUNT = 'gemini_cli.file.operation.count';
+export const METRIC_INVALID_CHUNK_COUNT = 'gemini_cli.chat.invalid_chunk.count';
+export const METRIC_CONTENT_RETRY_COUNT = 'gemini_cli.chat.content_retry.count';
+export const METRIC_CONTENT_RETRY_FAILURE_COUNT =
+  'gemini_cli.chat.content_retry_failure.count';
--- a/packages/core/src/telemetry/metrics.ts
+++ b/packages/core/src/telemetry/metrics.ts
@@ -22,6 +22,9 @@ import {
  METRIC_SESSION_COUNT,
  METRIC_FILE_OPERATION_COUNT,
  EVENT_CHAT_COMPRESSION,
+  METRIC_INVALID_CHUNK_COUNT,
+  METRIC_CONTENT_RETRY_COUNT,
+  METRIC_CONTENT_RETRY_FAILURE_COUNT,
 } from './constants.js';
 import { Config } from '../config/config.js';
 import { DiffStat } from '../tools/tools.js';
@@ -40,6 +43,9 @@ let apiRequestLatencyHistogram: Histogram | undefined;
 let tokenUsageCounter: Counter | undefined;
 let fileOperationCounter: Counter | undefined;
 let chatCompressionCounter: Counter | undefined;
+let invalidChunkCounter: Counter | undefined;
+let contentRetryCounter: Counter | undefined;
+let contentRetryFailureCounter: Counter | undefined;
 let isMetricsInitialized = false;

 function getCommonAttributes(config: Config): Attributes {
@@ -94,6 +100,24 @@ export function initializeMetrics(config: Config): void {
    description: 'Counts chat compression events.',
    valueType: ValueType.INT,
  });
+
+  // New counters for content errors
+  invalidChunkCounter = meter.createCounter(METRIC_INVALID_CHUNK_COUNT, {
+    description: 'Counts invalid chunks received from a stream.',
+    valueType: ValueType.INT,
+  });
+  contentRetryCounter = meter.createCounter(METRIC_CONTENT_RETRY_COUNT, {
+    description: 'Counts retries due to content errors (e.g., empty stream).',
+    valueType: ValueType.INT,
+  });
+  contentRetryFailureCounter = meter.createCounter(
+    METRIC_CONTENT_RETRY_FAILURE_COUNT,
+    {
+      description: 'Counts occurrences of all content retries failing.',
+      valueType: ValueType.INT,
+    },
+  );
+
  const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, {
    description: 'Count of CLI sessions started.',
    valueType: ValueType.INT,
@@ -231,3 +255,29 @@ export function recordFileOperationMetric(
  }
  fileOperationCounter.add(1, attributes);
 }
+
+// --- New Metric Recording Functions ---
+
+/**
+ * Records a metric for when an invalid chunk is received from a stream.
+ */
+export function recordInvalidChunk(config: Config): void {
+  if (!invalidChunkCounter || !isMetricsInitialized) return;
+  invalidChunkCounter.add(1, getCommonAttributes(config));
+}
+
+/**
+ * Records a metric for when a retry is triggered due to a content error.
+ */
+export function recordContentRetry(config: Config): void {
+  if (!contentRetryCounter || !isMetricsInitialized) return;
+  contentRetryCounter.add(1, getCommonAttributes(config));
+}
+
+/**
+ * Records a metric for when all content error retries have failed for a request.
+ */
+export function recordContentRetryFailure(config: Config): void {
+  if (!contentRetryFailureCounter || !isMetricsInitialized) return;
+  contentRetryFailureCounter.add(1, getCommonAttributes(config));
+}