Improve quota- and resource-related 429 error handling, also taking Code Assist customer tiers into consideration (#3609)

2025-12-20 08:47:44 +00:00 · 2025-07-09 10:18:15 -04:00
parent 8f2da86aa5
commit b0cce95286
14 changed files with 611 additions and 63 deletions
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -103,6 +103,7 @@ export interface SandboxConfig {
 export type FlashFallbackHandler = (
  currentModel: string,
  fallbackModel: string,
+  error?: unknown,
 ) => Promise<boolean>;

 export interface ConfigParameters {
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -845,6 +845,7 @@ describe('Gemini Client (client.ts)', () => {
      expect(mockFallbackHandler).toHaveBeenCalledWith(
        currentModel,
        fallbackModel,
+        undefined,
      );
    });
  });
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -323,8 +323,8 @@ export class GeminiClient {
        });

      const result = await retryWithBackoff(apiCall, {
-        onPersistent429: async (authType?: string) =>
-          await this.handleFlashFallback(authType),
+        onPersistent429: async (authType?: string, error?: unknown) =>
+          await this.handleFlashFallback(authType, error),
        authType: this.config.getContentGeneratorConfig()?.authType,
      });

@@ -411,8 +411,8 @@ export class GeminiClient {
        });

      const result = await retryWithBackoff(apiCall, {
-        onPersistent429: async (authType?: string) =>
-          await this.handleFlashFallback(authType),
+        onPersistent429: async (authType?: string, error?: unknown) =>
+          await this.handleFlashFallback(authType, error),
        authType: this.config.getContentGeneratorConfig()?.authType,
      });
      return result;
@@ -559,7 +559,10 @@ export class GeminiClient {
   * Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
   * Uses a fallback handler if provided by the config, otherwise returns null.
   */
-  private async handleFlashFallback(authType?: string): Promise<string | null> {
+  private async handleFlashFallback(
+    authType?: string,
+    error?: unknown,
+  ): Promise<string | null> {
    // Only handle fallback for OAuth users
    if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
      return null;
@@ -577,7 +580,11 @@ export class GeminiClient {
    const fallbackHandler = this.config.flashFallbackHandler;
    if (typeof fallbackHandler === 'function') {
      try {
-        const accepted = await fallbackHandler(currentModel, fallbackModel);
+        const accepted = await fallbackHandler(
+          currentModel,
+          fallbackModel,
+          error,
+        );
        if (accepted) {
          this.config.setModel(fallbackModel);
          return fallbackModel;
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -191,7 +191,10 @@ export class GeminiChat {
   * Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
   * Uses a fallback handler if provided by the config, otherwise returns null.
   */
-  private async handleFlashFallback(authType?: string): Promise<string | null> {
+  private async handleFlashFallback(
+    authType?: string,
+    error?: unknown,
+  ): Promise<string | null> {
    // Only handle fallback for OAuth users
    if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
      return null;
@@ -209,7 +212,11 @@ export class GeminiChat {
    const fallbackHandler = this.config.flashFallbackHandler;
    if (typeof fallbackHandler === 'function') {
      try {
-        const accepted = await fallbackHandler(currentModel, fallbackModel);
+        const accepted = await fallbackHandler(
+          currentModel,
+          fallbackModel,
+          error,
+        );
        if (accepted) {
          this.config.setModel(fallbackModel);
          return fallbackModel;
@@ -270,8 +277,8 @@ export class GeminiChat {
          }
          return false;
        },
-        onPersistent429: async (authType?: string) =>
-          await this.handleFlashFallback(authType),
+        onPersistent429: async (authType?: string, error?: unknown) =>
+          await this.handleFlashFallback(authType, error),
        authType: this.config.getContentGeneratorConfig()?.authType,
      });
      const durationMs = Date.now() - startTime;
@@ -367,8 +374,8 @@ export class GeminiChat {
          }
          return false; // Don't retry other errors by default
        },
-        onPersistent429: async (authType?: string) =>
-          await this.handleFlashFallback(authType),
+        onPersistent429: async (authType?: string, error?: unknown) =>
+          await this.handleFlashFallback(authType, error),
        authType: this.config.getContentGeneratorConfig()?.authType,
      });

--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -32,6 +32,7 @@ export * from './utils/getFolderStructure.js';
 export * from './utils/memoryDiscovery.js';
 export * from './utils/gitIgnoreParser.js';
 export * from './utils/editor.js';
+export * from './utils/quotaErrorDetection.js';

 // Export services
 export * from './services/fileDiscoveryService.js';
--- a/packages/core/src/utils/flashFallback.integration.test.ts
+++ b/packages/core/src/utils/flashFallback.integration.test.ts
@@ -86,6 +86,7 @@ describe('Flash Fallback Integration', () => {
    expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
    expect(mockFallbackHandler).toHaveBeenCalledWith(
      AuthType.LOGIN_WITH_GOOGLE,
+      expect.any(Error),
    );
    expect(result).toBe('success after fallback');
    // Should have: 2 failures, then fallback triggered, then 1 success after retry reset
--- a/packages/core/src/utils/quotaErrorDetection.ts
+++ b/packages/core/src/utils/quotaErrorDetection.ts
@@ -0,0 +1,82 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+export interface ApiError {
+  error: {
+    code: number;
+    message: string;
+    status: string;
+    details: unknown[];
+  };
+}
+
+interface StructuredError {
+  message: string;
+  status?: number;
+}
+
+export function isApiError(error: unknown): error is ApiError {
+  return (
+    typeof error === 'object' &&
+    error !== null &&
+    'error' in error &&
+    typeof (error as ApiError).error === 'object' &&
+    'message' in (error as ApiError).error
+  );
+}
+
+export function isStructuredError(error: unknown): error is StructuredError {
+  return (
+    typeof error === 'object' &&
+    error !== null &&
+    'message' in error &&
+    typeof (error as StructuredError).message === 'string'
+  );
+}
+
+export function isProQuotaExceededError(error: unknown): boolean {
+  // Check for Pro quota exceeded errors by looking for the specific pattern
+  // This will match patterns like:
+  // - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
+  // - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'"
+  // - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'"
+  // - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'"
+  // We use string methods instead of regex to avoid ReDoS vulnerabilities
+
+  const checkMessage = (message: string): boolean =>
+    message.includes("Quota exceeded for quota metric 'Gemini") &&
+    message.includes("Pro Requests'");
+
+  if (typeof error === 'string') {
+    return checkMessage(error);
+  }
+
+  if (isStructuredError(error)) {
+    return checkMessage(error.message);
+  }
+
+  if (isApiError(error)) {
+    return checkMessage(error.error.message);
+  }
+
+  return false;
+}
+
+export function isGenericQuotaExceededError(error: unknown): boolean {
+  if (typeof error === 'string') {
+    return error.includes('Quota exceeded for quota metric');
+  }
+
+  if (isStructuredError(error)) {
+    return error.message.includes('Quota exceeded for quota metric');
+  }
+
+  if (isApiError(error)) {
+    return error.error.message.includes('Quota exceeded for quota metric');
+  }
+
+  return false;
+}
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -357,7 +357,10 @@ describe('retryWithBackoff', () => {
      // Should fail with original error when fallback is rejected
      expect(result).toBeInstanceOf(Error);
      expect(result.message).toBe('Rate limit exceeded');
-      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+      expect(fallbackCallback).toHaveBeenCalledWith(
+        'oauth-personal',
+        expect.any(Error),
+      );
    });

    it('should handle mixed error types (only count consecutive 429s)', async () => {
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -5,13 +5,20 @@
 */

 import { AuthType } from '../core/contentGenerator.js';
+import {
+  isProQuotaExceededError,
+  isGenericQuotaExceededError,
+} from './quotaErrorDetection.js';

 export interface RetryOptions {
  maxAttempts: number;
  initialDelayMs: number;
  maxDelayMs: number;
  shouldRetry: (error: Error) => boolean;
-  onPersistent429?: (authType?: string) => Promise<string | null>;
+  onPersistent429?: (
+    authType?: string,
+    error?: unknown,
+  ) => Promise<string | null>;
  authType?: string;
 }

@@ -86,6 +93,53 @@ export async function retryWithBackoff<T>(
    } catch (error) {
      const errorStatus = getErrorStatus(error);

+      // Check for Pro quota exceeded error first - immediate fallback for OAuth users
+      if (
+        errorStatus === 429 &&
+        authType === AuthType.LOGIN_WITH_GOOGLE &&
+        isProQuotaExceededError(error) &&
+        onPersistent429
+      ) {
+        try {
+          const fallbackModel = await onPersistent429(authType, error);
+          if (fallbackModel) {
+            // Reset attempt counter and try with new model
+            attempt = 0;
+            consecutive429Count = 0;
+            currentDelay = initialDelayMs;
+            // With the model updated, we continue to the next attempt
+            continue;
+          }
+        } catch (fallbackError) {
+          // If fallback fails, continue with original error
+          console.warn('Fallback to Flash model failed:', fallbackError);
+        }
+      }
+
+      // Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
+      if (
+        errorStatus === 429 &&
+        authType === AuthType.LOGIN_WITH_GOOGLE &&
+        !isProQuotaExceededError(error) &&
+        isGenericQuotaExceededError(error) &&
+        onPersistent429
+      ) {
+        try {
+          const fallbackModel = await onPersistent429(authType, error);
+          if (fallbackModel) {
+            // Reset attempt counter and try with new model
+            attempt = 0;
+            consecutive429Count = 0;
+            currentDelay = initialDelayMs;
+            // With the model updated, we continue to the next attempt
+            continue;
+          }
+        } catch (fallbackError) {
+          // If fallback fails, continue with original error
+          console.warn('Fallback to Flash model failed:', fallbackError);
+        }
+      }
+
      // Track consecutive 429 errors
      if (errorStatus === 429) {
        consecutive429Count++;
@@ -100,7 +154,7 @@ export async function retryWithBackoff<T>(
        authType === AuthType.LOGIN_WITH_GOOGLE
      ) {
        try {
-          const fallbackModel = await onPersistent429(authType);
+          const fallbackModel = await onPersistent429(authType, error);
          if (fallbackModel) {
            // Reset attempt counter and try with new model
            attempt = 0;