mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
feat(telemetry): Prevent memory leak in ClearcutLogger (#5734)
This commit is contained in:
@@ -30,8 +30,8 @@ import {
|
||||
getCachedGoogleAccount,
|
||||
getLifetimeGoogleAccounts,
|
||||
} from '../../utils/user_account.js';
|
||||
import { HttpError, retryWithBackoff } from '../../utils/retry.js';
|
||||
import { getInstallationId } from '../../utils/user_id.js';
|
||||
import { FixedDeque } from 'mnemonist';
|
||||
|
||||
const start_session_event_name = 'start_session';
|
||||
const new_prompt_event_name = 'new_prompt';
|
||||
@@ -51,6 +51,25 @@ export interface LogResponse {
|
||||
nextRequestWaitMs?: number;
|
||||
}
|
||||
|
||||
export interface LogEventEntry {
|
||||
event_time_ms: number;
|
||||
source_extension_json: string;
|
||||
}
|
||||
|
||||
export type EventValue = {
|
||||
gemini_cli_key: EventMetadataKey | string;
|
||||
value: string;
|
||||
};
|
||||
|
||||
export type LogEvent = {
|
||||
console_type: string;
|
||||
application: number;
|
||||
event_name: string;
|
||||
event_metadata: EventValue[][];
|
||||
client_email?: string;
|
||||
client_install_id?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Determine the surface that the user is currently using. Surface is effectively the
|
||||
* distribution channel in which the user is using Gemini CLI. Gemini CLI comes bundled
|
||||
@@ -75,13 +94,17 @@ function determineSurface(): string {
|
||||
export class ClearcutLogger {
|
||||
private static instance: ClearcutLogger;
|
||||
private config?: Config;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Clearcut expects this format.
|
||||
private readonly events: any = [];
|
||||
private readonly events: FixedDeque<LogEventEntry[]>;
|
||||
private last_flush_time: number = Date.now();
|
||||
private flush_interval_ms: number = 1000 * 60; // Wait at least a minute before flushing events.
|
||||
private readonly max_events: number = 1000; // Maximum events to keep in memory
|
||||
private readonly max_retry_events: number = 100; // Maximum failed events to retry
|
||||
private flushing: boolean = false; // Prevent concurrent flush operations
|
||||
private pendingFlush: boolean = false; // Track if a flush was requested during an ongoing flush
|
||||
|
||||
private constructor(config?: Config) {
|
||||
this.config = config;
|
||||
this.events = new FixedDeque<LogEventEntry[]>(Array, this.max_events);
|
||||
}
|
||||
|
||||
static getInstance(config?: Config): ClearcutLogger | undefined {
|
||||
@@ -93,17 +116,41 @@ export class ClearcutLogger {
|
||||
return ClearcutLogger.instance;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Clearcut expects this format.
|
||||
enqueueLogEvent(event: any): void {
|
||||
this.events.push([
|
||||
{
|
||||
event_time_ms: Date.now(),
|
||||
source_extension_json: safeJsonStringify(event),
|
||||
},
|
||||
]);
|
||||
/** For testing purposes only. */
|
||||
static clearInstance(): void {
|
||||
// @ts-expect-error - ClearcutLogger is a singleton, but we need to clear it for tests.
|
||||
ClearcutLogger.instance = undefined;
|
||||
}
|
||||
|
||||
createLogEvent(name: string, data: object[]): object {
|
||||
enqueueLogEvent(event: object): void {
|
||||
try {
|
||||
// Manually handle overflow for FixedDeque, which throws when full.
|
||||
const wasAtCapacity = this.events.size >= this.max_events;
|
||||
|
||||
if (wasAtCapacity) {
|
||||
this.events.shift(); // Evict oldest element to make space.
|
||||
}
|
||||
|
||||
this.events.push([
|
||||
{
|
||||
event_time_ms: Date.now(),
|
||||
source_extension_json: safeJsonStringify(event),
|
||||
},
|
||||
]);
|
||||
|
||||
if (wasAtCapacity && this.config?.getDebugMode()) {
|
||||
console.debug(
|
||||
`ClearcutLogger: Dropped old event to prevent memory leak (queue size: ${this.events.size})`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
if (this.config?.getDebugMode()) {
|
||||
console.error('ClearcutLogger: Failed to enqueue log event.', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
createLogEvent(name: string, data: EventValue[]): LogEvent {
|
||||
const email = getCachedGoogleAccount();
|
||||
const totalAccounts = getLifetimeGoogleAccounts();
|
||||
data.push({
|
||||
@@ -111,12 +158,11 @@ export class ClearcutLogger {
|
||||
value: totalAccounts.toString(),
|
||||
});
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const logEvent: any = {
|
||||
const logEvent: LogEvent = {
|
||||
console_type: 'GEMINI_CLI',
|
||||
application: 102,
|
||||
event_name: name,
|
||||
event_metadata: [data] as object[],
|
||||
event_metadata: [data],
|
||||
};
|
||||
|
||||
// Should log either email or install ID, not both. See go/cloudmill-1p-oss-instrumentation#define-sessionable-id
|
||||
@@ -140,16 +186,25 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
async flushToClearcut(): Promise<LogResponse> {
|
||||
if (this.flushing) {
|
||||
if (this.config?.getDebugMode()) {
|
||||
console.debug(
|
||||
'ClearcutLogger: Flush already in progress, marking pending flush.',
|
||||
);
|
||||
}
|
||||
this.pendingFlush = true;
|
||||
return Promise.resolve({});
|
||||
}
|
||||
this.flushing = true;
|
||||
|
||||
if (this.config?.getDebugMode()) {
|
||||
console.log('Flushing log events to Clearcut.');
|
||||
}
|
||||
const eventsToSend = [...this.events];
|
||||
if (eventsToSend.length === 0) {
|
||||
return {};
|
||||
}
|
||||
const eventsToSend = this.events.toArray() as LogEventEntry[][];
|
||||
this.events.clear();
|
||||
|
||||
const flushFn = () =>
|
||||
new Promise<Buffer>((resolve, reject) => {
|
||||
return new Promise<{ buffer: Buffer; statusCode?: number }>(
|
||||
(resolve, reject) => {
|
||||
const request = [
|
||||
{
|
||||
log_source_name: 'CONCORD',
|
||||
@@ -163,6 +218,7 @@ export class ClearcutLogger {
|
||||
path: '/log',
|
||||
method: 'POST',
|
||||
headers: { 'Content-Length': Buffer.byteLength(body) },
|
||||
timeout: 30000, // 30-second timeout
|
||||
};
|
||||
const bufs: Buffer[] = [];
|
||||
const req = https.request(
|
||||
@@ -171,49 +227,77 @@ export class ClearcutLogger {
|
||||
agent: this.getProxyAgent(),
|
||||
},
|
||||
(res) => {
|
||||
if (
|
||||
res.statusCode &&
|
||||
(res.statusCode < 200 || res.statusCode >= 300)
|
||||
) {
|
||||
const err: HttpError = new Error(
|
||||
`Request failed with status ${res.statusCode}`,
|
||||
);
|
||||
err.status = res.statusCode;
|
||||
res.resume();
|
||||
return reject(err);
|
||||
}
|
||||
res.on('error', reject); // Handle stream errors
|
||||
res.on('data', (buf) => bufs.push(buf));
|
||||
res.on('end', () => resolve(Buffer.concat(bufs)));
|
||||
res.on('end', () => {
|
||||
try {
|
||||
const buffer = Buffer.concat(bufs);
|
||||
// Check if we got a successful response
|
||||
if (
|
||||
res.statusCode &&
|
||||
res.statusCode >= 200 &&
|
||||
res.statusCode < 300
|
||||
) {
|
||||
resolve({ buffer, statusCode: res.statusCode });
|
||||
} else {
|
||||
// HTTP error - reject with status code for retry handling
|
||||
reject(
|
||||
new Error(`HTTP ${res.statusCode}: ${res.statusMessage}`),
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
reject(e);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
req.on('error', reject);
|
||||
req.on('error', (e) => {
|
||||
// Network-level error
|
||||
reject(e);
|
||||
});
|
||||
req.on('timeout', () => {
|
||||
if (!req.destroyed) {
|
||||
req.destroy(new Error('Request timeout after 30 seconds'));
|
||||
}
|
||||
});
|
||||
req.end(body);
|
||||
},
|
||||
)
|
||||
.then(({ buffer }) => {
|
||||
try {
|
||||
this.last_flush_time = Date.now();
|
||||
return this.decodeLogResponse(buffer) || {};
|
||||
} catch (error: unknown) {
|
||||
console.error('Error decoding log response:', error);
|
||||
return {};
|
||||
}
|
||||
})
|
||||
.catch((error: unknown) => {
|
||||
// Handle both network-level and HTTP-level errors
|
||||
if (this.config?.getDebugMode()) {
|
||||
console.error('Error flushing log events:', error);
|
||||
}
|
||||
|
||||
// Re-queue failed events for retry
|
||||
this.requeueFailedEvents(eventsToSend);
|
||||
|
||||
// Return empty response to maintain the Promise<LogResponse> contract
|
||||
return {};
|
||||
})
|
||||
.finally(() => {
|
||||
this.flushing = false;
|
||||
|
||||
// If a flush was requested while we were flushing, flush again
|
||||
if (this.pendingFlush) {
|
||||
this.pendingFlush = false;
|
||||
// Fire and forget the pending flush
|
||||
this.flushToClearcut().catch((error) => {
|
||||
if (this.config?.getDebugMode()) {
|
||||
console.debug('Error in pending flush to Clearcut:', error);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
const responseBuffer = await retryWithBackoff(flushFn, {
|
||||
maxAttempts: 3,
|
||||
initialDelayMs: 200,
|
||||
shouldRetry: (err: unknown) => {
|
||||
if (!(err instanceof Error)) return false;
|
||||
const status = (err as HttpError).status as number | undefined;
|
||||
// If status is not available, it's likely a network error
|
||||
if (status === undefined) return true;
|
||||
|
||||
// Retry on 429 (Too many Requests) and 5xx server errors.
|
||||
return status === 429 || (status >= 500 && status < 600);
|
||||
},
|
||||
});
|
||||
|
||||
this.events.splice(0, eventsToSend.length);
|
||||
this.last_flush_time = Date.now();
|
||||
return this.decodeLogResponse(responseBuffer) || {};
|
||||
} catch (error) {
|
||||
if (this.config?.getDebugMode()) {
|
||||
console.error('Clearcut flush failed after multiple retries.', error);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
// Visible for testing. Decodes protobuf-encoded response from Clearcut server.
|
||||
@@ -258,7 +342,7 @@ export class ClearcutLogger {
|
||||
logStartSessionEvent(event: StartSessionEvent): void {
|
||||
const surface = determineSurface();
|
||||
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_START_SESSION_MODEL,
|
||||
value: event.model,
|
||||
@@ -337,7 +421,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logNewPromptEvent(event: UserPromptEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_USER_PROMPT_LENGTH,
|
||||
value: JSON.stringify(event.prompt_length),
|
||||
@@ -361,7 +445,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logToolCallEvent(event: ToolCallEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_TOOL_CALL_NAME,
|
||||
value: JSON.stringify(event.function_name),
|
||||
@@ -398,7 +482,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logApiRequestEvent(event: ApiRequestEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_API_REQUEST_MODEL,
|
||||
value: JSON.stringify(event.model),
|
||||
@@ -414,7 +498,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logApiResponseEvent(event: ApiResponseEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_API_RESPONSE_MODEL,
|
||||
value: JSON.stringify(event.model),
|
||||
@@ -471,7 +555,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logApiErrorEvent(event: ApiErrorEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_API_ERROR_MODEL,
|
||||
value: JSON.stringify(event.model),
|
||||
@@ -503,7 +587,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logFlashFallbackEvent(event: FlashFallbackEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_AUTH_TYPE,
|
||||
value: JSON.stringify(event.auth_type),
|
||||
@@ -521,7 +605,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logLoopDetectedEvent(event: LoopDetectedEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_PROMPT_ID,
|
||||
value: JSON.stringify(event.prompt_id),
|
||||
@@ -537,7 +621,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logNextSpeakerCheck(event: NextSpeakerCheckEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_PROMPT_ID,
|
||||
value: JSON.stringify(event.prompt_id),
|
||||
@@ -563,7 +647,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logSlashCommandEvent(event: SlashCommandEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_SLASH_COMMAND_NAME,
|
||||
value: JSON.stringify(event.command),
|
||||
@@ -582,7 +666,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logMalformedJsonResponseEvent(event: MalformedJsonResponseEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key:
|
||||
EventMetadataKey.GEMINI_CLI_MALFORMED_JSON_RESPONSE_MODEL,
|
||||
@@ -597,7 +681,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logIdeConnectionEvent(event: IdeConnectionEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_IDE_CONNECTION_TYPE,
|
||||
value: JSON.stringify(event.connection_type),
|
||||
@@ -609,7 +693,7 @@ export class ClearcutLogger {
|
||||
}
|
||||
|
||||
logEndSessionEvent(event: EndSessionEvent): void {
|
||||
const data = [
|
||||
const data: EventValue[] = [
|
||||
{
|
||||
gemini_cli_key: EventMetadataKey.GEMINI_CLI_SESSION_ID,
|
||||
value: event?.session_id?.toString() ?? '',
|
||||
@@ -639,4 +723,57 @@ export class ClearcutLogger {
|
||||
const event = new EndSessionEvent(this.config);
|
||||
this.logEndSessionEvent(event);
|
||||
}
|
||||
|
||||
private requeueFailedEvents(eventsToSend: LogEventEntry[][]): void {
|
||||
// Add the events back to the front of the queue to be retried, but limit retry queue size
|
||||
const eventsToRetry = eventsToSend.slice(-this.max_retry_events); // Keep only the most recent events
|
||||
|
||||
// Log a warning if we're dropping events
|
||||
if (
|
||||
eventsToSend.length > this.max_retry_events &&
|
||||
this.config?.getDebugMode()
|
||||
) {
|
||||
console.warn(
|
||||
`ClearcutLogger: Dropping ${
|
||||
eventsToSend.length - this.max_retry_events
|
||||
} events due to retry queue limit. Total events: ${
|
||||
eventsToSend.length
|
||||
}, keeping: ${this.max_retry_events}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Determine how many events can be re-queued
|
||||
const availableSpace = this.max_events - this.events.size;
|
||||
const numEventsToRequeue = Math.min(eventsToRetry.length, availableSpace);
|
||||
|
||||
if (numEventsToRequeue === 0) {
|
||||
if (this.config?.getDebugMode()) {
|
||||
console.debug(
|
||||
`ClearcutLogger: No events re-queued (queue size: ${this.events.size})`,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the most recent events to re-queue
|
||||
const eventsToRequeue = eventsToRetry.slice(
|
||||
eventsToRetry.length - numEventsToRequeue,
|
||||
);
|
||||
|
||||
// Prepend events to the front of the deque to be retried first.
|
||||
// We iterate backwards to maintain the original order of the failed events.
|
||||
for (let i = eventsToRequeue.length - 1; i >= 0; i--) {
|
||||
this.events.unshift(eventsToRequeue[i]);
|
||||
}
|
||||
// Clear any potential overflow
|
||||
while (this.events.size > this.max_events) {
|
||||
this.events.pop();
|
||||
}
|
||||
|
||||
if (this.config?.getDebugMode()) {
|
||||
console.debug(
|
||||
`ClearcutLogger: Re-queued ${numEventsToRequeue} events for retry (queue size: ${this.events.size})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user