Clearcut Logging of Content Error Metrics (#7099)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-12-19 09:33:53 +00:00 · 2025-08-26 15:37:18 -04:00
parent 142192ae59
commit 08bdd08412
7 changed files with 313 additions and 39 deletions
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@@ -25,20 +25,17 @@ const mockModelsModule = {
  batchEmbedContents: vi.fn(),
 } as unknown as Models;

-const {
-  mockRecordInvalidChunk,
-  mockRecordContentRetry,
-  mockRecordContentRetryFailure,
-} = vi.hoisted(() => ({
-  mockRecordInvalidChunk: vi.fn(),
-  mockRecordContentRetry: vi.fn(),
-  mockRecordContentRetryFailure: vi.fn(),
-}));
+const { mockLogInvalidChunk, mockLogContentRetry, mockLogContentRetryFailure } =
+  vi.hoisted(() => ({
+    mockLogInvalidChunk: vi.fn(),
+    mockLogContentRetry: vi.fn(),
+    mockLogContentRetryFailure: vi.fn(),
+  }));

-vi.mock('../telemetry/metrics.js', () => ({
-  recordInvalidChunk: mockRecordInvalidChunk,
-  recordContentRetry: mockRecordContentRetry,
-  recordContentRetryFailure: mockRecordContentRetryFailure,
+vi.mock('../telemetry/loggers.js', () => ({
+  logInvalidChunk: mockLogInvalidChunk,
+  logContentRetry: mockLogContentRetry,
+  logContentRetryFailure: mockLogContentRetryFailure,
 }));

 describe('GeminiChat', () => {
@@ -767,9 +764,9 @@ describe('GeminiChat', () => {
      }

      // Assertions
-      expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(1);
-      expect(mockRecordContentRetry).toHaveBeenCalledTimes(1);
-      expect(mockRecordContentRetryFailure).not.toHaveBeenCalled();
+      expect(mockLogInvalidChunk).toHaveBeenCalledTimes(1);
+      expect(mockLogContentRetry).toHaveBeenCalledTimes(1);
+      expect(mockLogContentRetryFailure).not.toHaveBeenCalled();
      expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(2);
      expect(
        chunks.some(
@@ -826,9 +823,9 @@ describe('GeminiChat', () => {

      // Should be called 3 times (initial + 2 retries)
      expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(3);
-      expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(3);
-      expect(mockRecordContentRetry).toHaveBeenCalledTimes(2);
-      expect(mockRecordContentRetryFailure).toHaveBeenCalledTimes(1);
+      expect(mockLogInvalidChunk).toHaveBeenCalledTimes(3);
+      expect(mockLogContentRetry).toHaveBeenCalledTimes(2);
+      expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1);

      // History should be clean, as if the failed turn never happened.
      const history = chat.getHistory();
@@ -875,7 +872,7 @@ describe('GeminiChat', () => {
    expect(history.length).toBe(4);

    // Assert that the correct metrics were reported for one empty-stream retry
-    expect(mockRecordContentRetry).toHaveBeenCalledTimes(1);
+    expect(mockLogContentRetry).toHaveBeenCalledTimes(1);

    // Explicitly verify the structure of each part to satisfy TypeScript
    const turn1 = history[0];
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -24,10 +24,15 @@ import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { hasCycleInSchema } from '../tools/tools.js';
 import type { StructuredError } from './turn.js';
 import {
-  recordContentRetry,
-  recordContentRetryFailure,
-  recordInvalidChunk,
-} from '../telemetry/metrics.js';
+  logContentRetry,
+  logContentRetryFailure,
+  logInvalidChunk,
+} from '../telemetry/loggers.js';
+import {
+  ContentRetryEvent,
+  ContentRetryFailureEvent,
+  InvalidChunkEvent,
+} from '../telemetry/types.js';

 /**
 * Options for retrying due to invalid content from the model.
@@ -382,7 +387,14 @@ export class GeminiChat {
            if (isContentError) {
              // Check if we have more attempts left.
              if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
-                recordContentRetry(self.config);
+                logContentRetry(
+                  self.config,
+                  new ContentRetryEvent(
+                    attempt,
+                    'EmptyStreamError',
+                    INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs,
+                  ),
+                );
                await new Promise((res) =>
                  setTimeout(
                    res,
@@ -399,7 +411,13 @@ export class GeminiChat {

        if (lastError) {
          if (lastError instanceof EmptyStreamError) {
-            recordContentRetryFailure(self.config);
+            logContentRetryFailure(
+              self.config,
+              new ContentRetryFailureEvent(
+                INVALID_CONTENT_RETRY_OPTIONS.maxAttempts,
+                'EmptyStreamError',
+              ),
+            );
          }
          // If the stream fails, remove the user message that was added.
          if (self.history[self.history.length - 1] === userContent) {
@@ -555,7 +573,10 @@ export class GeminiChat {
          modelResponseParts.push(...content.parts);
        }
      } else {
-        recordInvalidChunk(this.config);
+        logInvalidChunk(
+          this.config,
+          new InvalidChunkEvent('Invalid chunk received from stream.'),
+        );
        isStreamInvalid = true;
      }
      yield chunk;
--- a/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
+++ b/packages/core/src/telemetry/clearcut-logger/clearcut-logger.ts
@@ -21,6 +21,9 @@ import type {
  KittySequenceOverflowEvent,
  ChatCompressionEvent,
  FileOperationEvent,
+  InvalidChunkEvent,
+  ContentRetryEvent,
+  ContentRetryFailureEvent,
 } from '../types.js';
 import { EventMetadataKey } from './event-metadata-key.js';
 import type { Config } from '../../config/config.js';
@@ -49,6 +52,9 @@ export enum EventNames {
  KITTY_SEQUENCE_OVERFLOW = 'kitty_sequence_overflow',
  CHAT_COMPRESSION = 'chat_compression',
  CONVERSATION_FINISHED = 'conversation_finished',
+  INVALID_CHUNK = 'invalid_chunk',
+  CONTENT_RETRY = 'content_retry',
+  CONTENT_RETRY_FAILURE = 'content_retry_failure',
 }

 export interface LogResponse {
@@ -778,6 +784,69 @@ export class ClearcutLogger {
    });
  }

+  logInvalidChunkEvent(event: InvalidChunkEvent): void {
+    const data: EventValue[] = [];
+
+    if (event.error_message) {
+      data.push({
+        gemini_cli_key: EventMetadataKey.GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE,
+        value: event.error_message,
+      });
+    }
+
+    this.enqueueLogEvent(this.createLogEvent(EventNames.INVALID_CHUNK, data));
+    this.flushIfNeeded();
+  }
+
+  logContentRetryEvent(event: ContentRetryEvent): void {
+    const data: EventValue[] = [
+      {
+        gemini_cli_key:
+          EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER,
+        value: String(event.attempt_number),
+      },
+      {
+        gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE,
+        value: event.error_type,
+      },
+      {
+        gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_DELAY_MS,
+        value: String(event.retry_delay_ms),
+      },
+    ];
+
+    this.enqueueLogEvent(this.createLogEvent(EventNames.CONTENT_RETRY, data));
+    this.flushIfNeeded();
+  }
+
+  logContentRetryFailureEvent(event: ContentRetryFailureEvent): void {
+    const data: EventValue[] = [
+      {
+        gemini_cli_key:
+          EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS,
+        value: String(event.total_attempts),
+      },
+      {
+        gemini_cli_key:
+          EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE,
+        value: event.final_error_type,
+      },
+    ];
+
+    if (event.total_duration_ms) {
+      data.push({
+        gemini_cli_key:
+          EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS,
+        value: String(event.total_duration_ms),
+      });
+    }
+
+    this.enqueueLogEvent(
+      this.createLogEvent(EventNames.CONTENT_RETRY_FAILURE, data),
+    );
+    this.flushIfNeeded();
+  }
+
  /**
   * Adds default fields to data, and returns a new data array.  This fields
   * should exist on all log events.
--- a/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts
+++ b/packages/core/src/telemetry/clearcut-logger/event-metadata-key.ts
@@ -219,9 +219,6 @@ export enum EventMetadataKey {
  // Logs user removed lines in edit/write tool response.
  GEMINI_CLI_USER_REMOVED_LINES = 50,

-  // Logs the programming language of the project.
-  GEMINI_CLI_PROGRAMMING_LANGUAGE = 56,
-
  // ==========================================================================
  // Kitty Sequence Overflow Event Keys
  // ===========================================================================
@@ -236,11 +233,11 @@ export enum EventMetadataKey {
  // Conversation Finished Event Keys
  // ===========================================================================

-  // Logs the approval mode
-  GEMINI_CLI_APPROVAL_MODE = 70,
+  // Logs the approval mode of the session.
+  GEMINI_CLI_APPROVAL_MODE = 58,

  // Logs the number of turns
-  GEMINI_CLI_CONVERSATION_TURN_COUNT = 71,
+  GEMINI_CLI_CONVERSATION_TURN_COUNT = 59,

  // Logs the number of tokens before context window compression.
  GEMINI_CLI_COMPRESSION_TOKENS_BEFORE = 60,
@@ -257,22 +254,72 @@ export enum EventMetadataKey {
  // Logs count of MCP tools in Start Session Event
  GEMINI_CLI_START_SESSION_MCP_TOOLS_COUNT = 64,

-  // Logs name of MCP tools as comma seperated string
+  // Logs name of MCP tools as comma separated string
  GEMINI_CLI_START_SESSION_MCP_TOOLS = 65,

+  // ==========================================================================
+  // Research Event Keys
+  // ===========================================================================
+
+  // Logs the research opt-in status (true/false)
+  GEMINI_CLI_RESEARCH_OPT_IN_STATUS = 66,
+
+  // Logs the contact email for research participation
+  GEMINI_CLI_RESEARCH_CONTACT_EMAIL = 67,
+
+  // Logs the user ID for research events
+  GEMINI_CLI_RESEARCH_USER_ID = 68,
+
+  // Logs the type of research feedback
+  GEMINI_CLI_RESEARCH_FEEDBACK_TYPE = 69,
+
+  // Logs the content of research feedback
+  GEMINI_CLI_RESEARCH_FEEDBACK_CONTENT = 70,
+
+  // Logs survey responses for research feedback (JSON stringified)
+  GEMINI_CLI_RESEARCH_SURVEY_RESPONSES = 71,
+
  // ==========================================================================
  // File Operation Event Keys
  // ===========================================================================

+  // Logs the programming language of the project.
+  GEMINI_CLI_PROGRAMMING_LANGUAGE = 56,
+
  // Logs the operation type of the file operation.
-  GEMINI_CLI_FILE_OPERATION_TYPE = 66,
+  GEMINI_CLI_FILE_OPERATION_TYPE = 57,

  // Logs the number of lines in the file operation.
-  GEMINI_CLI_FILE_OPERATION_LINES = 67,
+  GEMINI_CLI_FILE_OPERATION_LINES = 72,

  // Logs the mimetype of the file in the file operation.
-  GEMINI_CLI_FILE_OPERATION_MIMETYPE = 68,
+  GEMINI_CLI_FILE_OPERATION_MIMETYPE = 73,

  // Logs the extension of the file in the file operation.
-  GEMINI_CLI_FILE_OPERATION_EXTENSION = 69,
+  GEMINI_CLI_FILE_OPERATION_EXTENSION = 74,
+
+  // ==========================================================================
+  // Content Streaming Event Keys
+  // ===========================================================================
+
+  // Logs the error message for an invalid chunk.
+  GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE = 75,
+
+  // Logs the attempt number for a content retry.
+  GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER = 76,
+
+  // Logs the error type for a content retry.
+  GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE = 77,
+
+  // Logs the delay in milliseconds for a content retry.
+  GEMINI_CLI_CONTENT_RETRY_DELAY_MS = 78,
+
+  // Logs the total number of attempts for a content retry failure.
+  GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS = 79,
+
+  // Logs the final error type for a content retry failure.
+  GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE = 80,
+
+  // Logs the total duration in milliseconds for a content retry failure.
+  GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS = 81,
 }
--- a/packages/core/src/telemetry/constants.ts
+++ b/packages/core/src/telemetry/constants.ts
@@ -20,6 +20,11 @@ export const EVENT_CONVERSATION_FINISHED = 'gemini_cli.conversation_finished';
 export const EVENT_CHAT_COMPRESSION = 'gemini_cli.chat_compression';
 export const EVENT_MALFORMED_JSON_RESPONSE =
  'gemini_cli.malformed_json_response';
+export const EVENT_INVALID_CHUNK = 'gemini_cli.chat.invalid_chunk';
+export const EVENT_CONTENT_RETRY = 'gemini_cli.chat.content_retry';
+export const EVENT_CONTENT_RETRY_FAILURE =
+  'gemini_cli.chat.content_retry_failure';
+
 export const METRIC_TOOL_CALL_COUNT = 'gemini_cli.tool.call.count';
 export const METRIC_TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency';
 export const METRIC_API_REQUEST_COUNT = 'gemini_cli.api.request.count';
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -23,6 +23,9 @@ import {
  EVENT_CONVERSATION_FINISHED,
  EVENT_CHAT_COMPRESSION,
  EVENT_MALFORMED_JSON_RESPONSE,
+  EVENT_INVALID_CHUNK,
+  EVENT_CONTENT_RETRY,
+  EVENT_CONTENT_RETRY_FAILURE,
 } from './constants.js';
 import type {
  ApiErrorEvent,
@@ -41,6 +44,9 @@ import type {
  KittySequenceOverflowEvent,
  ChatCompressionEvent,
  MalformedJsonResponseEvent,
+  InvalidChunkEvent,
+  ContentRetryEvent,
+  ContentRetryFailureEvent,
 } from './types.js';
 import {
  recordApiErrorMetrics,
@@ -49,6 +55,9 @@ import {
  recordToolCallMetrics,
  recordChatCompressionMetrics,
  recordFileOperationMetric,
+  recordInvalidChunk,
+  recordContentRetry,
+  recordContentRetryFailure,
 } from './metrics.js';
 import { isTelemetrySdkInitialized } from './sdk.js';
 import type { UiEvent } from './uiTelemetry.js';
@@ -501,3 +510,73 @@ export function logMalformedJsonResponse(
  };
  logger.emit(logRecord);
 }
+
+export function logInvalidChunk(
+  config: Config,
+  event: InvalidChunkEvent,
+): void {
+  ClearcutLogger.getInstance(config)?.logInvalidChunkEvent(event);
+  if (!isTelemetrySdkInitialized()) return;
+
+  const attributes: LogAttributes = {
+    ...getCommonAttributes(config),
+    'event.name': EVENT_INVALID_CHUNK,
+    'event.timestamp': event['event.timestamp'],
+  };
+
+  if (event.error_message) {
+    attributes['error.message'] = event.error_message;
+  }
+
+  const logger = logs.getLogger(SERVICE_NAME);
+  const logRecord: LogRecord = {
+    body: `Invalid chunk received from stream.`,
+    attributes,
+  };
+  logger.emit(logRecord);
+  recordInvalidChunk(config);
+}
+
+export function logContentRetry(
+  config: Config,
+  event: ContentRetryEvent,
+): void {
+  ClearcutLogger.getInstance(config)?.logContentRetryEvent(event);
+  if (!isTelemetrySdkInitialized()) return;
+
+  const attributes: LogAttributes = {
+    ...getCommonAttributes(config),
+    ...event,
+    'event.name': EVENT_CONTENT_RETRY,
+  };
+
+  const logger = logs.getLogger(SERVICE_NAME);
+  const logRecord: LogRecord = {
+    body: `Content retry attempt ${event.attempt_number} due to ${event.error_type}.`,
+    attributes,
+  };
+  logger.emit(logRecord);
+  recordContentRetry(config);
+}
+
+export function logContentRetryFailure(
+  config: Config,
+  event: ContentRetryFailureEvent,
+): void {
+  ClearcutLogger.getInstance(config)?.logContentRetryFailureEvent(event);
+  if (!isTelemetrySdkInitialized()) return;
+
+  const attributes: LogAttributes = {
+    ...getCommonAttributes(config),
+    ...event,
+    'event.name': EVENT_CONTENT_RETRY_FAILURE,
+  };
+
+  const logger = logs.getLogger(SERVICE_NAME);
+  const logRecord: LogRecord = {
+    body: `All content retries failed after ${event.total_attempts} attempts.`,
+    attributes,
+  };
+  logger.emit(logRecord);
+  recordContentRetryFailure(config);
+}
--- a/packages/core/src/telemetry/types.ts
+++ b/packages/core/src/telemetry/types.ts
@@ -448,6 +448,59 @@ export class FileOperationEvent implements BaseTelemetryEvent {
  }
 }

+// Add these new event interfaces
+export class InvalidChunkEvent implements BaseTelemetryEvent {
+  'event.name': 'invalid_chunk';
+  'event.timestamp': string;
+  error_message?: string; // Optional: validation error details
+
+  constructor(error_message?: string) {
+    this['event.name'] = 'invalid_chunk';
+    this['event.timestamp'] = new Date().toISOString();
+    this.error_message = error_message;
+  }
+}
+
+export class ContentRetryEvent implements BaseTelemetryEvent {
+  'event.name': 'content_retry';
+  'event.timestamp': string;
+  attempt_number: number;
+  error_type: string; // e.g., 'EmptyStreamError'
+  retry_delay_ms: number;
+
+  constructor(
+    attempt_number: number,
+    error_type: string,
+    retry_delay_ms: number,
+  ) {
+    this['event.name'] = 'content_retry';
+    this['event.timestamp'] = new Date().toISOString();
+    this.attempt_number = attempt_number;
+    this.error_type = error_type;
+    this.retry_delay_ms = retry_delay_ms;
+  }
+}
+
+export class ContentRetryFailureEvent implements BaseTelemetryEvent {
+  'event.name': 'content_retry_failure';
+  'event.timestamp': string;
+  total_attempts: number;
+  final_error_type: string;
+  total_duration_ms?: number; // Optional: total time spent retrying
+
+  constructor(
+    total_attempts: number,
+    final_error_type: string,
+    total_duration_ms?: number,
+  ) {
+    this['event.name'] = 'content_retry_failure';
+    this['event.timestamp'] = new Date().toISOString();
+    this.total_attempts = total_attempts;
+    this.final_error_type = final_error_type;
+    this.total_duration_ms = total_duration_ms;
+  }
+}
+
 export type TelemetryEvent =
  | StartSessionEvent
  | EndSessionEvent
@@ -464,4 +517,7 @@ export type TelemetryEvent =
  | IdeConnectionEvent
  | ConversationFinishedEvent
  | SlashCommandEvent
-  | FileOperationEvent;
+  | FileOperationEvent
+  | InvalidChunkEvent
+  | ContentRetryEvent
+  | ContentRetryFailureEvent;