diff --git a/packages/core/src/core/geminiChat.test.ts b/packages/core/src/core/geminiChat.test.ts index c4fb7f0f..0bb4059d 100644 --- a/packages/core/src/core/geminiChat.test.ts +++ b/packages/core/src/core/geminiChat.test.ts @@ -25,6 +25,22 @@ const mockModelsModule = { batchEmbedContents: vi.fn(), } as unknown as Models; +const { + mockRecordInvalidChunk, + mockRecordContentRetry, + mockRecordContentRetryFailure, +} = vi.hoisted(() => ({ + mockRecordInvalidChunk: vi.fn(), + mockRecordContentRetry: vi.fn(), + mockRecordContentRetryFailure: vi.fn(), +})); + +vi.mock('../telemetry/metrics.js', () => ({ + recordInvalidChunk: mockRecordInvalidChunk, + recordContentRetry: mockRecordContentRetry, + recordContentRetryFailure: mockRecordContentRetryFailure, +})); + describe('GeminiChat', () => { let chat: GeminiChat; let mockConfig: Config; @@ -483,7 +499,7 @@ describe('GeminiChat', () => { }); describe('sendMessageStream with retries', () => { - it('should retry on invalid content and succeed on the second attempt', async () => { + it('should retry on invalid content, succeed, and report metrics', async () => { // Use mockImplementationOnce to provide a fresh, promise-wrapped generator for each attempt. vi.mocked(mockModelsModule.generateContentStream) .mockImplementationOnce(async () => @@ -515,6 +531,9 @@ describe('GeminiChat', () => { } // Assertions + expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(1); + expect(mockRecordContentRetry).toHaveBeenCalledTimes(1); + expect(mockRecordContentRetryFailure).not.toHaveBeenCalled(); expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(2); expect( chunks.some( @@ -537,7 +556,7 @@ describe('GeminiChat', () => { }); }); - it('should fail after all retries on persistent invalid content', async () => { + it('should fail after all retries on persistent invalid content and report metrics', async () => { vi.mocked(mockModelsModule.generateContentStream).mockImplementation( async () => (async function* () { @@ -571,6 +590,9 @@ describe('GeminiChat', () => { // Should be called 3 times (initial + 2 retries) expect(mockModelsModule.generateContentStream).toHaveBeenCalledTimes(3); + expect(mockRecordInvalidChunk).toHaveBeenCalledTimes(3); + expect(mockRecordContentRetry).toHaveBeenCalledTimes(2); + expect(mockRecordContentRetryFailure).toHaveBeenCalledTimes(1); // History should be clean, as if the failed turn never happened. const history = chat.getHistory(); @@ -585,7 +607,7 @@ describe('GeminiChat', () => { ]; chat.setHistory(initialHistory); - // 2. Mock the API + // 2. Mock the API to fail once with an empty stream, then succeed. vi.mocked(mockModelsModule.generateContentStream) .mockImplementationOnce(async () => (async function* () { @@ -595,6 +617,7 @@ describe('GeminiChat', () => { })(), ) .mockImplementationOnce(async () => + // Second attempt succeeds (async function* () { yield { candidates: [{ content: { parts: [{ text: 'Second answer' }] } }], @@ -611,10 +634,13 @@ describe('GeminiChat', () => { // consume stream } - // 4. Assert the final history + // 4. Assert the final history and metrics const history = chat.getHistory(); expect(history.length).toBe(4); + // Assert that the correct metrics were reported for one empty-stream retry + expect(mockRecordContentRetry).toHaveBeenCalledTimes(1); + // Explicitly verify the structure of each part to satisfy TypeScript const turn1 = history[0]; if (!turn1?.parts?.[0] || !('text' in turn1.parts[0])) { diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 93428684..52833f82 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -23,6 +23,11 @@ import { Config } from '../config/config.js'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; import { hasCycleInSchema } from '../tools/tools.js'; import { StructuredError } from './turn.js'; +import { + recordContentRetry, + recordContentRetryFailure, + recordInvalidChunk, +} from '../telemetry/metrics.js'; /** * Options for retrying due to invalid content from the model. @@ -38,7 +43,6 @@ const INVALID_CONTENT_RETRY_OPTIONS: ContentRetryOptions = { maxAttempts: 3, // 1 initial call + 2 retries initialDelayMs: 500, }; - /** * Returns true if the response is valid, false otherwise. */ @@ -349,7 +353,7 @@ export class GeminiChat { for ( let attempt = 0; - attempt <= INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; + attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; attempt++ ) { try { @@ -373,6 +377,7 @@ export class GeminiChat { if (isContentError) { // Check if we have more attempts left. if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) { + recordContentRetry(self.config); await new Promise((res) => setTimeout( res, @@ -388,6 +393,9 @@ export class GeminiChat { } if (lastError) { + if (lastError instanceof EmptyStreamError) { + recordContentRetryFailure(self.config); + } // If the stream fails, remove the user message that was added. if (self.history[self.history.length - 1] === userContent) { self.history.pop(); @@ -545,6 +553,7 @@ export class GeminiChat { } } } else { + recordInvalidChunk(this.config); isStreamInvalid = true; } yield chunk; // Yield every chunk to the UI immediately. diff --git a/packages/core/src/telemetry/constants.ts b/packages/core/src/telemetry/constants.ts index 0d978fb2..a058734b 100644 --- a/packages/core/src/telemetry/constants.ts +++ b/packages/core/src/telemetry/constants.ts @@ -24,3 +24,7 @@ export const METRIC_API_REQUEST_LATENCY = 'gemini_cli.api.request.latency'; export const METRIC_TOKEN_USAGE = 'gemini_cli.token.usage'; export const METRIC_SESSION_COUNT = 'gemini_cli.session.count'; export const METRIC_FILE_OPERATION_COUNT = 'gemini_cli.file.operation.count'; +export const METRIC_INVALID_CHUNK_COUNT = 'gemini_cli.chat.invalid_chunk.count'; +export const METRIC_CONTENT_RETRY_COUNT = 'gemini_cli.chat.content_retry.count'; +export const METRIC_CONTENT_RETRY_FAILURE_COUNT = + 'gemini_cli.chat.content_retry_failure.count'; diff --git a/packages/core/src/telemetry/metrics.ts b/packages/core/src/telemetry/metrics.ts index d3c4ea9c..5dd04682 100644 --- a/packages/core/src/telemetry/metrics.ts +++ b/packages/core/src/telemetry/metrics.ts @@ -22,6 +22,9 @@ import { METRIC_SESSION_COUNT, METRIC_FILE_OPERATION_COUNT, EVENT_CHAT_COMPRESSION, + METRIC_INVALID_CHUNK_COUNT, + METRIC_CONTENT_RETRY_COUNT, + METRIC_CONTENT_RETRY_FAILURE_COUNT, } from './constants.js'; import { Config } from '../config/config.js'; import { DiffStat } from '../tools/tools.js'; @@ -40,6 +43,9 @@ let apiRequestLatencyHistogram: Histogram | undefined; let tokenUsageCounter: Counter | undefined; let fileOperationCounter: Counter | undefined; let chatCompressionCounter: Counter | undefined; +let invalidChunkCounter: Counter | undefined; +let contentRetryCounter: Counter | undefined; +let contentRetryFailureCounter: Counter | undefined; let isMetricsInitialized = false; function getCommonAttributes(config: Config): Attributes { @@ -94,6 +100,24 @@ export function initializeMetrics(config: Config): void { description: 'Counts chat compression events.', valueType: ValueType.INT, }); + + // New counters for content errors + invalidChunkCounter = meter.createCounter(METRIC_INVALID_CHUNK_COUNT, { + description: 'Counts invalid chunks received from a stream.', + valueType: ValueType.INT, + }); + contentRetryCounter = meter.createCounter(METRIC_CONTENT_RETRY_COUNT, { + description: 'Counts retries due to content errors (e.g., empty stream).', + valueType: ValueType.INT, + }); + contentRetryFailureCounter = meter.createCounter( + METRIC_CONTENT_RETRY_FAILURE_COUNT, + { + description: 'Counts occurrences of all content retries failing.', + valueType: ValueType.INT, + }, + ); + const sessionCounter = meter.createCounter(METRIC_SESSION_COUNT, { description: 'Count of CLI sessions started.', valueType: ValueType.INT, @@ -231,3 +255,29 @@ export function recordFileOperationMetric( } fileOperationCounter.add(1, attributes); } + +// --- New Metric Recording Functions --- + +/** + * Records a metric for when an invalid chunk is received from a stream. + */ +export function recordInvalidChunk(config: Config): void { + if (!invalidChunkCounter || !isMetricsInitialized) return; + invalidChunkCounter.add(1, getCommonAttributes(config)); +} + +/** + * Records a metric for when a retry is triggered due to a content error. + */ +export function recordContentRetry(config: Config): void { + if (!contentRetryCounter || !isMetricsInitialized) return; + contentRetryCounter.add(1, getCommonAttributes(config)); +} + +/** + * Records a metric for when all content error retries have failed for a request. + */ +export function recordContentRetryFailure(config: Config): void { + if (!contentRetryFailureCounter || !isMetricsInitialized) return; + contentRetryFailureCounter.add(1, getCommonAttributes(config)); +}