telemetry: include user decisions in tool call logs (#966)

Add the user's decision (accept, reject, modify) to tool call telemetry to better understand user intent. The decision provides crucial context to the `success` metric, as a user can reject a call that would have succeeded or accept one that fails. Also prettify the arguments json. Example: ![image](https://github.com/user-attachments/assets/251cb9fc-ceaa-4cdd-929c-8de47031aca8) #750
2025-12-19 09:33:53 +00:00 · 2025-06-12 16:48:10 -04:00
parent f8863f4d00
commit 6723c72fa5
8 changed files with 339 additions and 29 deletions
--- a/packages/core/src/core/coreToolScheduler.test.ts
+++ b/packages/core/src/core/coreToolScheduler.test.ts
@@ -16,6 +16,7 @@ import {
  ToolCallConfirmationDetails,
  ToolConfirmationOutcome,
  ToolResult,
+  Config,
 } from '../index.js';
 import { Part, PartListUnion } from '@google/genai';
 import { convertToFunctionResponse } from './coreToolScheduler.js';
@@ -74,7 +75,12 @@ describe('CoreToolScheduler', () => {
    const onAllToolCallsComplete = vi.fn();
    const onToolCallsUpdate = vi.fn();

+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+    } as Config;
+
    const scheduler = new CoreToolScheduler({
+      config: mockConfig,
      toolRegistry: Promise.resolve(toolRegistry as any),
      onAllToolCallsComplete,
      onToolCallsUpdate,
--- a/packages/core/src/core/coreToolScheduler.ts
+++ b/packages/core/src/core/coreToolScheduler.ts
@@ -16,6 +16,8 @@ import {
  EditTool,
  EditToolParams,
  EditorType,
+  Config,
+  logToolCall,
 } from '../index.js';
 import { Part, PartListUnion } from '@google/genai';
 import { getResponseTextFromParts } from '../utils/generateContentResponseUtilities.js';
@@ -25,6 +27,7 @@ export type ValidatingToolCall = {
  request: ToolCallRequestInfo;
  tool: Tool;
  startTime?: number;
+  outcome?: ToolConfirmationOutcome;
 };

 export type ScheduledToolCall = {
@@ -32,6 +35,7 @@ export type ScheduledToolCall = {
  request: ToolCallRequestInfo;
  tool: Tool;
  startTime?: number;
+  outcome?: ToolConfirmationOutcome;
 };

 export type ErroredToolCall = {
@@ -39,6 +43,7 @@ export type ErroredToolCall = {
  request: ToolCallRequestInfo;
  response: ToolCallResponseInfo;
  durationMs?: number;
+  outcome?: ToolConfirmationOutcome;
 };

 export type SuccessfulToolCall = {
@@ -47,6 +52,7 @@ export type SuccessfulToolCall = {
  tool: Tool;
  response: ToolCallResponseInfo;
  durationMs?: number;
+  outcome?: ToolConfirmationOutcome;
 };

 export type ExecutingToolCall = {
@@ -55,6 +61,7 @@ export type ExecutingToolCall = {
  tool: Tool;
  liveOutput?: string;
  startTime?: number;
+  outcome?: ToolConfirmationOutcome;
 };

 export type CancelledToolCall = {
@@ -63,6 +70,7 @@ export type CancelledToolCall = {
  response: ToolCallResponseInfo;
  tool: Tool;
  durationMs?: number;
+  outcome?: ToolConfirmationOutcome;
 };

 export type WaitingToolCall = {
@@ -71,6 +79,7 @@ export type WaitingToolCall = {
  tool: Tool;
  confirmationDetails: ToolCallConfirmationDetails;
  startTime?: number;
+  outcome?: ToolConfirmationOutcome;
 };

 export type Status = ToolCall['status'];
@@ -205,6 +214,7 @@ interface CoreToolSchedulerOptions {
  onToolCallsUpdate?: ToolCallsUpdateHandler;
  approvalMode?: ApprovalMode;
  getPreferredEditor: () => EditorType | undefined;
+  config: Config;
 }

 export class CoreToolScheduler {
@@ -215,8 +225,10 @@ export class CoreToolScheduler {
  private onToolCallsUpdate?: ToolCallsUpdateHandler;
  private approvalMode: ApprovalMode;
  private getPreferredEditor: () => EditorType | undefined;
+  private config: Config;

  constructor(options: CoreToolSchedulerOptions) {
+    this.config = options.config;
    this.toolRegistry = options.toolRegistry;
    this.outputUpdateHandler = options.outputUpdateHandler;
    this.onAllToolCallsComplete = options.onAllToolCallsComplete;
@@ -274,6 +286,14 @@ export class CoreToolScheduler {
          | WaitingToolCall
      ).tool;

+      const outcome = (
+        currentCall as
+          | ValidatingToolCall
+          | ScheduledToolCall
+          | ExecutingToolCall
+          | WaitingToolCall
+      ).outcome;
+
      switch (newStatus) {
        case 'success': {
          const durationMs = existingStartTime
@@ -285,6 +305,7 @@ export class CoreToolScheduler {
            status: 'success',
            response: auxiliaryData as ToolCallResponseInfo,
            durationMs,
+            outcome,
          } as SuccessfulToolCall;
        }
        case 'error': {
@@ -296,6 +317,7 @@ export class CoreToolScheduler {
            status: 'error',
            response: auxiliaryData as ToolCallResponseInfo,
            durationMs,
+            outcome,
          } as ErroredToolCall;
        }
        case 'awaiting_approval':
@@ -305,6 +327,7 @@ export class CoreToolScheduler {
            status: 'awaiting_approval',
            confirmationDetails: auxiliaryData as ToolCallConfirmationDetails,
            startTime: existingStartTime,
+            outcome,
          } as WaitingToolCall;
        case 'scheduled':
          return {
@@ -312,6 +335,7 @@ export class CoreToolScheduler {
            tool: toolInstance,
            status: 'scheduled',
            startTime: existingStartTime,
+            outcome,
          } as ScheduledToolCall;
        case 'cancelled': {
          const durationMs = existingStartTime
@@ -336,6 +360,7 @@ export class CoreToolScheduler {
              error: undefined,
            },
            durationMs,
+            outcome,
          } as CancelledToolCall;
        }
        case 'validating':
@@ -344,6 +369,7 @@ export class CoreToolScheduler {
            tool: toolInstance,
            status: 'validating',
            startTime: existingStartTime,
+            outcome,
          } as ValidatingToolCall;
        case 'executing':
          return {
@@ -351,6 +377,7 @@ export class CoreToolScheduler {
            tool: toolInstance,
            status: 'executing',
            startTime: existingStartTime,
+            outcome,
          } as ExecutingToolCall;
        default: {
          const exhaustiveCheck: never = newStatus;
@@ -482,6 +509,14 @@ export class CoreToolScheduler {
      await originalOnConfirm(outcome);
    }

+    this.toolCalls = this.toolCalls.map((call) => {
+      if (call.request.callId !== callId) return call;
+      return {
+        ...call,
+        outcome,
+      };
+    });
+
    if (outcome === ToolConfirmationOutcome.Cancel || signal.aborted) {
      this.setStatusInternal(
        callId,
@@ -613,6 +648,23 @@ export class CoreToolScheduler {
      const completedCalls = [...this.toolCalls] as CompletedToolCall[];
      this.toolCalls = [];

+      for (const call of completedCalls) {
+        logToolCall(
+          this.config,
+          {
+            function_name: call.request.name,
+            function_args: call.request.args,
+            duration_ms: call.durationMs ?? 0,
+            success: call.status === 'success',
+            error:
+              call.status === 'error'
+                ? call.response.error?.message
+                : undefined,
+          },
+          call.outcome,
+        );
+      }
+
      if (this.onAllToolCallsComplete) {
        this.onAllToolCallsComplete(completedCalls);
      }
--- a/packages/core/src/telemetry/loggers.test.ts
+++ b/packages/core/src/telemetry/loggers.test.ts
@@ -4,6 +4,7 @@
 * SPDX-License-Identifier: Apache-2.0
 */

+import { ToolConfirmationOutcome } from '../index.js';
 import { logs } from '@opentelemetry/api-logs';
 import { SemanticAttributes } from '@opentelemetry/semantic-conventions';
 import { Config } from '../config/config.js';
@@ -12,6 +13,8 @@ import {
  logApiResponse,
  logCliConfiguration,
  logUserPrompt,
+  logToolCall,
+  ToolCallDecision,
 } from './loggers.js';
 import * as metrics from './metrics.js';
 import * as sdk from './sdk.js';
@@ -236,4 +239,239 @@ describe('loggers', () => {
      });
    });
  });
+
+  describe('logToolCall', () => {
+    const mockConfig = {
+      getSessionId: () => 'test-session-id',
+    } as Config;
+
+    const mockMetrics = {
+      recordToolCallMetrics: vi.fn(),
+    };
+
+    beforeEach(() => {
+      vi.spyOn(metrics, 'recordToolCallMetrics').mockImplementation(
+        mockMetrics.recordToolCallMetrics,
+      );
+      mockLogger.emit.mockReset();
+    });
+
+    it('should log a tool call with all fields', () => {
+      const event = {
+        function_name: 'test-function',
+        function_args: {
+          arg1: 'value1',
+          arg2: 2,
+        },
+        duration_ms: 100,
+        success: true,
+      };
+
+      logToolCall(mockConfig, event, ToolConfirmationOutcome.ProceedOnce);
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'Tool call: test-function. Decision: accept. Success: true. Duration: 100ms.',
+        attributes: {
+          'session.id': 'test-session-id',
+          'event.name': 'gemini_cli.tool_call',
+          'event.timestamp': '2025-01-01T00:00:00.000Z',
+          function_name: 'test-function',
+          function_args: JSON.stringify(
+            {
+              arg1: 'value1',
+              arg2: 2,
+            },
+            null,
+            2,
+          ),
+          duration_ms: 100,
+          success: true,
+          decision: ToolCallDecision.ACCEPT,
+        },
+      });
+
+      expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+        mockConfig,
+        'test-function',
+        100,
+        true,
+        ToolCallDecision.ACCEPT,
+      );
+    });
+    it('should log a tool call with a reject decision', () => {
+      const event = {
+        function_name: 'test-function',
+        function_args: {
+          arg1: 'value1',
+          arg2: 2,
+        },
+        duration_ms: 100,
+        success: false,
+      };
+
+      logToolCall(mockConfig, event, ToolConfirmationOutcome.Cancel);
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'Tool call: test-function. Decision: reject. Success: false. Duration: 100ms.',
+        attributes: {
+          'session.id': 'test-session-id',
+          'event.name': 'gemini_cli.tool_call',
+          'event.timestamp': '2025-01-01T00:00:00.000Z',
+          function_name: 'test-function',
+          function_args: JSON.stringify(
+            {
+              arg1: 'value1',
+              arg2: 2,
+            },
+            null,
+            2,
+          ),
+          duration_ms: 100,
+          success: false,
+          decision: ToolCallDecision.REJECT,
+        },
+      });
+
+      expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+        mockConfig,
+        'test-function',
+        100,
+        false,
+        ToolCallDecision.REJECT,
+      );
+    });
+
+    it('should log a tool call with a modify decision', () => {
+      const event = {
+        function_name: 'test-function',
+        function_args: {
+          arg1: 'value1',
+          arg2: 2,
+        },
+        duration_ms: 100,
+        success: true,
+      };
+
+      logToolCall(mockConfig, event, ToolConfirmationOutcome.ModifyWithEditor);
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'Tool call: test-function. Decision: modify. Success: true. Duration: 100ms.',
+        attributes: {
+          'session.id': 'test-session-id',
+          'event.name': 'gemini_cli.tool_call',
+          'event.timestamp': '2025-01-01T00:00:00.000Z',
+          function_name: 'test-function',
+          function_args: JSON.stringify(
+            {
+              arg1: 'value1',
+              arg2: 2,
+            },
+            null,
+            2,
+          ),
+          duration_ms: 100,
+          success: true,
+          decision: ToolCallDecision.MODIFY,
+        },
+      });
+
+      expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+        mockConfig,
+        'test-function',
+        100,
+        true,
+        ToolCallDecision.MODIFY,
+      );
+    });
+
+    it('should log a tool call without a decision', () => {
+      const event = {
+        function_name: 'test-function',
+        function_args: {
+          arg1: 'value1',
+          arg2: 2,
+        },
+        duration_ms: 100,
+        success: true,
+      };
+
+      logToolCall(mockConfig, event);
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'Tool call: test-function. Success: true. Duration: 100ms.',
+        attributes: {
+          'session.id': 'test-session-id',
+          'event.name': 'gemini_cli.tool_call',
+          'event.timestamp': '2025-01-01T00:00:00.000Z',
+          function_name: 'test-function',
+          function_args: JSON.stringify(
+            {
+              arg1: 'value1',
+              arg2: 2,
+            },
+            null,
+            2,
+          ),
+          duration_ms: 100,
+          success: true,
+        },
+      });
+
+      expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+        mockConfig,
+        'test-function',
+        100,
+        true,
+        undefined,
+      );
+    });
+
+    it('should log a failed tool call with an error', () => {
+      const event = {
+        function_name: 'test-function',
+        function_args: {
+          arg1: 'value1',
+          arg2: 2,
+        },
+        duration_ms: 100,
+        success: false,
+        error: 'test-error',
+        error_type: 'test-error-type',
+      };
+
+      logToolCall(mockConfig, event);
+
+      expect(mockLogger.emit).toHaveBeenCalledWith({
+        body: 'Tool call: test-function. Success: false. Duration: 100ms.',
+        attributes: {
+          'session.id': 'test-session-id',
+          'event.name': 'gemini_cli.tool_call',
+          'event.timestamp': '2025-01-01T00:00:00.000Z',
+          function_name: 'test-function',
+          function_args: JSON.stringify(
+            {
+              arg1: 'value1',
+              arg2: 2,
+            },
+            null,
+            2,
+          ),
+          duration_ms: 100,
+          success: false,
+          error: 'test-error',
+          'error.message': 'test-error',
+          error_type: 'test-error-type',
+          'error.type': 'test-error-type',
+        },
+      });
+
+      expect(mockMetrics.recordToolCallMetrics).toHaveBeenCalledWith(
+        mockConfig,
+        'test-function',
+        100,
+        false,
+        undefined,
+      );
+    });
+  });
 });
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -30,6 +30,7 @@ import {
  recordToolCallMetrics,
 } from './metrics.js';
 import { isTelemetrySdkInitialized } from './sdk.js';
+import { ToolConfirmationOutcome } from '../index.js';

 const shouldLogUserPrompts = (config: Config): boolean =>
  config.getTelemetryLogUserPromptsEnabled() ?? false;
@@ -40,6 +41,29 @@ function getCommonAttributes(config: Config): LogAttributes {
  };
 }

+export enum ToolCallDecision {
+  ACCEPT = 'accept',
+  REJECT = 'reject',
+  MODIFY = 'modify',
+}
+
+export function getDecisionFromOutcome(
+  outcome: ToolConfirmationOutcome,
+): ToolCallDecision {
+  switch (outcome) {
+    case ToolConfirmationOutcome.ProceedOnce:
+    case ToolConfirmationOutcome.ProceedAlways:
+    case ToolConfirmationOutcome.ProceedAlwaysServer:
+    case ToolConfirmationOutcome.ProceedAlwaysTool:
+      return ToolCallDecision.ACCEPT;
+    case ToolConfirmationOutcome.ModifyWithEditor:
+      return ToolCallDecision.MODIFY;
+    case ToolConfirmationOutcome.Cancel:
+    default:
+      return ToolCallDecision.REJECT;
+  }
+}
+
 export function logCliConfiguration(config: Config): void {
  if (!isTelemetrySdkInitialized()) return;

@@ -103,15 +127,20 @@ export function logUserPrompt(

 export function logToolCall(
  config: Config,
-  event: Omit<ToolCallEvent, 'event.name' | 'event.timestamp'>,
+  event: Omit<ToolCallEvent, 'event.name' | 'event.timestamp' | 'decision'>,
+  outcome?: ToolConfirmationOutcome,
 ): void {
  if (!isTelemetrySdkInitialized()) return;
+
+  const decision = outcome ? getDecisionFromOutcome(outcome) : undefined;
+
  const attributes: LogAttributes = {
    ...getCommonAttributes(config),
    ...event,
    'event.name': EVENT_TOOL_CALL,
    'event.timestamp': new Date().toISOString(),
-    function_args: JSON.stringify(event.function_args),
+    function_args: JSON.stringify(event.function_args, null, 2),
+    decision,
  };
  if (event.error) {
    attributes['error.message'] = event.error;
@@ -121,7 +150,7 @@ export function logToolCall(
  }
  const logger = logs.getLogger(SERVICE_NAME);
  const logRecord: LogRecord = {
-    body: `Tool call: ${event.function_name}. Success: ${event.success}. Duration: ${event.duration_ms}ms.`,
+    body: `Tool call: ${event.function_name}${decision ? `. Decision: ${decision}` : ''}. Success: ${event.success}. Duration: ${event.duration_ms}ms.`,
    attributes,
  };
  logger.emit(logRecord);
@@ -130,6 +159,7 @@ export function logToolCall(
    event.function_name,
    event.duration_ms,
    event.success,
+    decision,
  );
 }

--- a/packages/core/src/telemetry/metrics.ts
+++ b/packages/core/src/telemetry/metrics.ts
@@ -89,6 +89,7 @@ export function recordToolCallMetrics(
  functionName: string,
  durationMs: number,
  success: boolean,
+  decision?: 'accept' | 'reject' | 'modify',
 ): void {
  if (!toolCallCounter || !toolCallLatencyHistogram || !isMetricsInitialized)
    return;
@@ -97,6 +98,7 @@ export function recordToolCallMetrics(
    ...getCommonAttributes(config),
    function_name: functionName,
    success,
+    decision,
  };
  toolCallCounter.add(1, metricAttributes);
  toolCallLatencyHistogram.record(durationMs, {
--- a/packages/core/src/telemetry/types.ts
+++ b/packages/core/src/telemetry/types.ts
@@ -4,6 +4,8 @@
 * SPDX-License-Identifier: Apache-2.0
 */

+import { ToolCallDecision } from './loggers.js';
+
 export interface UserPromptEvent {
  'event.name': 'user_prompt';
  'event.timestamp': string; // ISO 8601
@@ -18,6 +20,7 @@ export interface ToolCallEvent {
  function_args: Record<string, unknown>;
  duration_ms: number;
  success: boolean;
+  decision?: ToolCallDecision;
  error?: string;
  error_type?: string;
 }