Clearcut Logging of Content Error Metrics (#7099)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Victor May
2025-08-26 15:37:18 -04:00
committed by GitHub
parent 142192ae59
commit 08bdd08412
7 changed files with 313 additions and 39 deletions

View File

@@ -25,20 +25,17 @@ const mockModelsModule = {
batchEmbedContents: vi.fn(),
} as unknown as Models;
const {
mockRecordInvalidChunk,
mockRecordContentRetry,
mockRecordContentRetryFailure,
} = vi.hoisted(() => ({
mockRecordInvalidChunk: vi.fn(),
mockRecordContentRetry: vi.fn(),
mockRecordContentRetryFailure: vi.fn(),
}));
const { mockLogInvalidChunk, mockLogContentRetry, mockLogContentRetryFailure } =
vi.hoisted(() => ({
mockLogInvalidChunk: vi.fn(),
mockLogContentRetry: vi.fn(),
mockLogContentRetryFailure: vi.fn(),
}));
vi.mock('../telemetry/metrics.js', () => ({
recordInvalidChunk: mockRecordInvalidChunk,
recordContentRetry: mockRecordContentRetry,
recordContentRetryFailure: mockRecordContentRetryFailure,
vi.mock('../telemetry/loggers.js', () => ({
logInvalidChunk: mockLogInvalidChunk,
logContentRetry: mockLogContentRetry,
logContentRetryFailure: mockLogContentRetryFailure,
}));
describe('GeminiChat', () => {
@@ -767,9 +764,9 @@ describe('GeminiChat', () => {
}
// Assertions
expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(1);
expect(mockRecordContentRetry).toHaveBeenCalledTimes(1);
expect(mockRecordContentRetryFailure).not.toHaveBeenCalled();
expect(mockLogInvalidChunk).toHaveBeenCalledTimes(1);
expect(mockLogContentRetry).toHaveBeenCalledTimes(1);
expect(mockLogContentRetryFailure).not.toHaveBeenCalled();
expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(2);
expect(
chunks.some(
@@ -826,9 +823,9 @@ describe('GeminiChat', () => {
// Should be called 3 times (initial + 2 retries)
expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(3);
expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(3);
expect(mockRecordContentRetry).toHaveBeenCalledTimes(2);
expect(mockRecordContentRetryFailure).toHaveBeenCalledTimes(1);
expect(mockLogInvalidChunk).toHaveBeenCalledTimes(3);
expect(mockLogContentRetry).toHaveBeenCalledTimes(2);
expect(mockLogContentRetryFailure).toHaveBeenCalledTimes(1);
// History should be clean, as if the failed turn never happened.
const history = chat.getHistory();
@@ -875,7 +872,7 @@ describe('GeminiChat', () => {
expect(history.length).toBe(4);
// Assert that the correct metrics were reported for one empty-stream retry
expect(mockRecordContentRetry).toHaveBeenCalledTimes(1);
expect(mockLogContentRetry).toHaveBeenCalledTimes(1);
// Explicitly verify the structure of each part to satisfy TypeScript
const turn1 = history[0];

View File

@@ -24,10 +24,15 @@ import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
import { hasCycleInSchema } from '../tools/tools.js';
import type { StructuredError } from './turn.js';
import {
recordContentRetry,
recordContentRetryFailure,
recordInvalidChunk,
} from '../telemetry/metrics.js';
logContentRetry,
logContentRetryFailure,
logInvalidChunk,
} from '../telemetry/loggers.js';
import {
ContentRetryEvent,
ContentRetryFailureEvent,
InvalidChunkEvent,
} from '../telemetry/types.js';
/**
* Options for retrying due to invalid content from the model.
@@ -382,7 +387,14 @@ export class GeminiChat {
if (isContentError) {
// Check if we have more attempts left.
if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
recordContentRetry(self.config);
logContentRetry(
self.config,
new ContentRetryEvent(
attempt,
'EmptyStreamError',
INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs,
),
);
await new Promise((res) =>
setTimeout(
res,
@@ -399,7 +411,13 @@ export class GeminiChat {
if (lastError) {
if (lastError instanceof EmptyStreamError) {
recordContentRetryFailure(self.config);
logContentRetryFailure(
self.config,
new ContentRetryFailureEvent(
INVALID_CONTENT_RETRY_OPTIONS.maxAttempts,
'EmptyStreamError',
),
);
}
// If the stream fails, remove the user message that was added.
if (self.history[self.history.length - 1] === userContent) {
@@ -555,7 +573,10 @@ export class GeminiChat {
modelResponseParts.push(...content.parts);
}
} else {
recordInvalidChunk(this.config);
logInvalidChunk(
this.config,
new InvalidChunkEvent('Invalid chunk received from stream.'),
);
isStreamInvalid = true;
}
yield chunk;

View File

@@ -21,6 +21,9 @@ import type {
KittySequenceOverflowEvent,
ChatCompressionEvent,
FileOperationEvent,
InvalidChunkEvent,
ContentRetryEvent,
ContentRetryFailureEvent,
} from '../types.js';
import { EventMetadataKey } from './event-metadata-key.js';
import type { Config } from '../../config/config.js';
@@ -49,6 +52,9 @@ export enum EventNames {
KITTY_SEQUENCE_OVERFLOW = 'kitty_sequence_overflow',
CHAT_COMPRESSION = 'chat_compression',
CONVERSATION_FINISHED = 'conversation_finished',
INVALID_CHUNK = 'invalid_chunk',
CONTENT_RETRY = 'content_retry',
CONTENT_RETRY_FAILURE = 'content_retry_failure',
}
export interface LogResponse {
@@ -778,6 +784,69 @@ export class ClearcutLogger {
});
}
logInvalidChunkEvent(event: InvalidChunkEvent): void {
const data: EventValue[] = [];
if (event.error_message) {
data.push({
gemini_cli_key: EventMetadataKey.GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE,
value: event.error_message,
});
}
this.enqueueLogEvent(this.createLogEvent(EventNames.INVALID_CHUNK, data));
this.flushIfNeeded();
}
logContentRetryEvent(event: ContentRetryEvent): void {
const data: EventValue[] = [
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER,
value: String(event.attempt_number),
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE,
value: event.error_type,
},
{
gemini_cli_key: EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_DELAY_MS,
value: String(event.retry_delay_ms),
},
];
this.enqueueLogEvent(this.createLogEvent(EventNames.CONTENT_RETRY, data));
this.flushIfNeeded();
}
logContentRetryFailureEvent(event: ContentRetryFailureEvent): void {
const data: EventValue[] = [
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS,
value: String(event.total_attempts),
},
{
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE,
value: event.final_error_type,
},
];
if (event.total_duration_ms) {
data.push({
gemini_cli_key:
EventMetadataKey.GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS,
value: String(event.total_duration_ms),
});
}
this.enqueueLogEvent(
this.createLogEvent(EventNames.CONTENT_RETRY_FAILURE, data),
);
this.flushIfNeeded();
}
/**
* Adds default fields to data, and returns a new data array. This fields
* should exist on all log events.

View File

@@ -219,9 +219,6 @@ export enum EventMetadataKey {
// Logs user removed lines in edit/write tool response.
GEMINI_CLI_USER_REMOVED_LINES = 50,
// Logs the programming language of the project.
GEMINI_CLI_PROGRAMMING_LANGUAGE = 56,
// ==========================================================================
// Kitty Sequence Overflow Event Keys
// ===========================================================================
@@ -236,11 +233,11 @@ export enum EventMetadataKey {
// Conversation Finished Event Keys
// ===========================================================================
// Logs the approval mode
GEMINI_CLI_APPROVAL_MODE = 70,
// Logs the approval mode of the session.
GEMINI_CLI_APPROVAL_MODE = 58,
// Logs the number of turns
GEMINI_CLI_CONVERSATION_TURN_COUNT = 71,
GEMINI_CLI_CONVERSATION_TURN_COUNT = 59,
// Logs the number of tokens before context window compression.
GEMINI_CLI_COMPRESSION_TOKENS_BEFORE = 60,
@@ -257,22 +254,72 @@ export enum EventMetadataKey {
// Logs count of MCP tools in Start Session Event
GEMINI_CLI_START_SESSION_MCP_TOOLS_COUNT = 64,
// Logs name of MCP tools as comma seperated string
// Logs name of MCP tools as comma separated string
GEMINI_CLI_START_SESSION_MCP_TOOLS = 65,
// ==========================================================================
// Research Event Keys
// ===========================================================================
// Logs the research opt-in status (true/false)
GEMINI_CLI_RESEARCH_OPT_IN_STATUS = 66,
// Logs the contact email for research participation
GEMINI_CLI_RESEARCH_CONTACT_EMAIL = 67,
// Logs the user ID for research events
GEMINI_CLI_RESEARCH_USER_ID = 68,
// Logs the type of research feedback
GEMINI_CLI_RESEARCH_FEEDBACK_TYPE = 69,
// Logs the content of research feedback
GEMINI_CLI_RESEARCH_FEEDBACK_CONTENT = 70,
// Logs survey responses for research feedback (JSON stringified)
GEMINI_CLI_RESEARCH_SURVEY_RESPONSES = 71,
// ==========================================================================
// File Operation Event Keys
// ===========================================================================
// Logs the programming language of the project.
GEMINI_CLI_PROGRAMMING_LANGUAGE = 56,
// Logs the operation type of the file operation.
GEMINI_CLI_FILE_OPERATION_TYPE = 66,
GEMINI_CLI_FILE_OPERATION_TYPE = 57,
// Logs the number of lines in the file operation.
GEMINI_CLI_FILE_OPERATION_LINES = 67,
GEMINI_CLI_FILE_OPERATION_LINES = 72,
// Logs the mimetype of the file in the file operation.
GEMINI_CLI_FILE_OPERATION_MIMETYPE = 68,
GEMINI_CLI_FILE_OPERATION_MIMETYPE = 73,
// Logs the extension of the file in the file operation.
GEMINI_CLI_FILE_OPERATION_EXTENSION = 69,
GEMINI_CLI_FILE_OPERATION_EXTENSION = 74,
// ==========================================================================
// Content Streaming Event Keys
// ===========================================================================
// Logs the error message for an invalid chunk.
GEMINI_CLI_INVALID_CHUNK_ERROR_MESSAGE = 75,
// Logs the attempt number for a content retry.
GEMINI_CLI_CONTENT_RETRY_ATTEMPT_NUMBER = 76,
// Logs the error type for a content retry.
GEMINI_CLI_CONTENT_RETRY_ERROR_TYPE = 77,
// Logs the delay in milliseconds for a content retry.
GEMINI_CLI_CONTENT_RETRY_DELAY_MS = 78,
// Logs the total number of attempts for a content retry failure.
GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_ATTEMPTS = 79,
// Logs the final error type for a content retry failure.
GEMINI_CLI_CONTENT_RETRY_FAILURE_FINAL_ERROR_TYPE = 80,
// Logs the total duration in milliseconds for a content retry failure.
GEMINI_CLI_CONTENT_RETRY_FAILURE_TOTAL_DURATION_MS = 81,
}

View File

@@ -20,6 +20,11 @@ export const EVENT_CONVERSATION_FINISHED = 'gemini_cli.conversation_finished';
export const EVENT_CHAT_COMPRESSION = 'gemini_cli.chat_compression';
export const EVENT_MALFORMED_JSON_RESPONSE =
'gemini_cli.malformed_json_response';
export const EVENT_INVALID_CHUNK = 'gemini_cli.chat.invalid_chunk';
export const EVENT_CONTENT_RETRY = 'gemini_cli.chat.content_retry';
export const EVENT_CONTENT_RETRY_FAILURE =
'gemini_cli.chat.content_retry_failure';
export const METRIC_TOOL_CALL_COUNT = 'gemini_cli.tool.call.count';
export const METRIC_TOOL_CALL_LATENCY = 'gemini_cli.tool.call.latency';
export const METRIC_API_REQUEST_COUNT = 'gemini_cli.api.request.count';

View File

@@ -23,6 +23,9 @@ import {
EVENT_CONVERSATION_FINISHED,
EVENT_CHAT_COMPRESSION,
EVENT_MALFORMED_JSON_RESPONSE,
EVENT_INVALID_CHUNK,
EVENT_CONTENT_RETRY,
EVENT_CONTENT_RETRY_FAILURE,
} from './constants.js';
import type {
ApiErrorEvent,
@@ -41,6 +44,9 @@ import type {
KittySequenceOverflowEvent,
ChatCompressionEvent,
MalformedJsonResponseEvent,
InvalidChunkEvent,
ContentRetryEvent,
ContentRetryFailureEvent,
} from './types.js';
import {
recordApiErrorMetrics,
@@ -49,6 +55,9 @@ import {
recordToolCallMetrics,
recordChatCompressionMetrics,
recordFileOperationMetric,
recordInvalidChunk,
recordContentRetry,
recordContentRetryFailure,
} from './metrics.js';
import { isTelemetrySdkInitialized } from './sdk.js';
import type { UiEvent } from './uiTelemetry.js';
@@ -501,3 +510,73 @@ export function logMalformedJsonResponse(
};
logger.emit(logRecord);
}
export function logInvalidChunk(
config: Config,
event: InvalidChunkEvent,
): void {
ClearcutLogger.getInstance(config)?.logInvalidChunkEvent(event);
if (!isTelemetrySdkInitialized()) return;
const attributes: LogAttributes = {
...getCommonAttributes(config),
'event.name': EVENT_INVALID_CHUNK,
'event.timestamp': event['event.timestamp'],
};
if (event.error_message) {
attributes['error.message'] = event.error_message;
}
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: `Invalid chunk received from stream.`,
attributes,
};
logger.emit(logRecord);
recordInvalidChunk(config);
}
export function logContentRetry(
config: Config,
event: ContentRetryEvent,
): void {
ClearcutLogger.getInstance(config)?.logContentRetryEvent(event);
if (!isTelemetrySdkInitialized()) return;
const attributes: LogAttributes = {
...getCommonAttributes(config),
...event,
'event.name': EVENT_CONTENT_RETRY,
};
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: `Content retry attempt ${event.attempt_number} due to ${event.error_type}.`,
attributes,
};
logger.emit(logRecord);
recordContentRetry(config);
}
export function logContentRetryFailure(
config: Config,
event: ContentRetryFailureEvent,
): void {
ClearcutLogger.getInstance(config)?.logContentRetryFailureEvent(event);
if (!isTelemetrySdkInitialized()) return;
const attributes: LogAttributes = {
...getCommonAttributes(config),
...event,
'event.name': EVENT_CONTENT_RETRY_FAILURE,
};
const logger = logs.getLogger(SERVICE_NAME);
const logRecord: LogRecord = {
body: `All content retries failed after ${event.total_attempts} attempts.`,
attributes,
};
logger.emit(logRecord);
recordContentRetryFailure(config);
}

View File

@@ -448,6 +448,59 @@ export class FileOperationEvent implements BaseTelemetryEvent {
}
}
// Add these new event interfaces
export class InvalidChunkEvent implements BaseTelemetryEvent {
'event.name': 'invalid_chunk';
'event.timestamp': string;
error_message?: string; // Optional: validation error details
constructor(error_message?: string) {
this['event.name'] = 'invalid_chunk';
this['event.timestamp'] = new Date().toISOString();
this.error_message = error_message;
}
}
export class ContentRetryEvent implements BaseTelemetryEvent {
'event.name': 'content_retry';
'event.timestamp': string;
attempt_number: number;
error_type: string; // e.g., 'EmptyStreamError'
retry_delay_ms: number;
constructor(
attempt_number: number,
error_type: string,
retry_delay_ms: number,
) {
this['event.name'] = 'content_retry';
this['event.timestamp'] = new Date().toISOString();
this.attempt_number = attempt_number;
this.error_type = error_type;
this.retry_delay_ms = retry_delay_ms;
}
}
export class ContentRetryFailureEvent implements BaseTelemetryEvent {
'event.name': 'content_retry_failure';
'event.timestamp': string;
total_attempts: number;
final_error_type: string;
total_duration_ms?: number; // Optional: total time spent retrying
constructor(
total_attempts: number,
final_error_type: string,
total_duration_ms?: number,
) {
this['event.name'] = 'content_retry_failure';
this['event.timestamp'] = new Date().toISOString();
this.total_attempts = total_attempts;
this.final_error_type = final_error_type;
this.total_duration_ms = total_duration_ms;
}
}
export type TelemetryEvent =
| StartSessionEvent
| EndSessionEvent
@@ -464,4 +517,7 @@ export type TelemetryEvent =
| IdeConnectionEvent
| ConversationFinishedEvent
| SlashCommandEvent
| FileOperationEvent;
| FileOperationEvent
| InvalidChunkEvent
| ContentRetryEvent
| ContentRetryFailureEvent;