[JUNE 25] Permanent failover to Flash model for OAuth users after persistent 429 errors (#1376)

Co-authored-by: Scott Densmore <scottdensmore@mac.com>
2025-12-20 16:57:46 +00:00 · 2025-06-24 18:48:55 -04:00
parent 4bf18da2b0
commit e356949d3f
16 changed files with 837 additions and 12 deletions
--- a/packages/core/src/utils/flashFallback.integration.test.ts
+++ b/packages/core/src/utils/flashFallback.integration.test.ts
@@ -0,0 +1,144 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { Config } from '../config/config.js';
+import {
+  setSimulate429,
+  disableSimulationAfterFallback,
+  shouldSimulate429,
+  createSimulated429Error,
+  resetRequestCounter,
+} from './testUtils.js';
+import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
+import { retryWithBackoff } from './retry.js';
+import { AuthType } from '../core/contentGenerator.js';
+
+describe('Flash Fallback Integration', () => {
+  let config: Config;
+
+  beforeEach(() => {
+    config = new Config({
+      sessionId: 'test-session',
+      targetDir: '/test',
+      debugMode: false,
+      cwd: '/test',
+      model: 'gemini-2.5-pro',
+    });
+
+    // Reset simulation state for each test
+    setSimulate429(false);
+    resetRequestCounter();
+  });
+
+  it('should automatically accept fallback', async () => {
+    // Set up a minimal flash fallback handler for testing
+    const flashFallbackHandler = async (): Promise<boolean> => true;
+
+    config.setFlashFallbackHandler(flashFallbackHandler);
+
+    // Call the handler directly to test
+    const result = await config.flashFallbackHandler!(
+      'gemini-2.5-pro',
+      DEFAULT_GEMINI_FLASH_MODEL,
+    );
+
+    // Verify it automatically accepts
+    expect(result).toBe(true);
+  });
+
+  it('should trigger fallback after 3 consecutive 429 errors for OAuth users', async () => {
+    let fallbackCalled = false;
+    let fallbackModel = '';
+
+    // Mock function that simulates exactly 3 429 errors, then succeeds after fallback
+    const mockApiCall = vi
+      .fn()
+      .mockRejectedValueOnce(createSimulated429Error())
+      .mockRejectedValueOnce(createSimulated429Error())
+      .mockRejectedValueOnce(createSimulated429Error())
+      .mockResolvedValueOnce('success after fallback');
+
+    // Mock fallback handler
+    const mockFallbackHandler = vi.fn(async (_authType?: string) => {
+      fallbackCalled = true;
+      fallbackModel = DEFAULT_GEMINI_FLASH_MODEL;
+      return fallbackModel;
+    });
+
+    // Test with OAuth personal auth type, with maxAttempts = 3 to ensure fallback triggers
+    const result = await retryWithBackoff(mockApiCall, {
+      maxAttempts: 3,
+      initialDelayMs: 1,
+      maxDelayMs: 10,
+      shouldRetry: (error: Error) => {
+        const status = (error as Error & { status?: number }).status;
+        return status === 429;
+      },
+      onPersistent429: mockFallbackHandler,
+      authType: AuthType.LOGIN_WITH_GOOGLE_PERSONAL,
+    });
+
+    // Verify fallback was triggered
+    expect(fallbackCalled).toBe(true);
+    expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
+    expect(mockFallbackHandler).toHaveBeenCalledWith(
+      AuthType.LOGIN_WITH_GOOGLE_PERSONAL,
+    );
+    expect(result).toBe('success after fallback');
+    // Should have: 3 failures, then fallback triggered, then 1 success after retry reset
+    expect(mockApiCall).toHaveBeenCalledTimes(4);
+  });
+
+  it('should not trigger fallback for API key users', async () => {
+    let fallbackCalled = false;
+
+    // Mock function that simulates 429 errors
+    const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error());
+
+    // Mock fallback handler
+    const mockFallbackHandler = vi.fn(async () => {
+      fallbackCalled = true;
+      return DEFAULT_GEMINI_FLASH_MODEL;
+    });
+
+    // Test with API key auth type - should not trigger fallback
+    try {
+      await retryWithBackoff(mockApiCall, {
+        maxAttempts: 5,
+        initialDelayMs: 10,
+        maxDelayMs: 100,
+        shouldRetry: (error: Error) => {
+          const status = (error as Error & { status?: number }).status;
+          return status === 429;
+        },
+        onPersistent429: mockFallbackHandler,
+        authType: AuthType.USE_GEMINI, // API key auth type
+      });
+    } catch (error) {
+      // Expected to throw after max attempts
+      expect((error as Error).message).toContain('Rate limit exceeded');
+    }
+
+    // Verify fallback was NOT triggered for API key users
+    expect(fallbackCalled).toBe(false);
+    expect(mockFallbackHandler).not.toHaveBeenCalled();
+  });
+
+  it('should properly disable simulation state after fallback', () => {
+    // Enable simulation
+    setSimulate429(true);
+
+    // Verify simulation is enabled
+    expect(shouldSimulate429()).toBe(true);
+
+    // Disable simulation after fallback
+    disableSimulationAfterFallback();
+
+    // Verify simulation is now disabled
+    expect(shouldSimulate429()).toBe(false);
+  });
+});
--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -7,6 +7,7 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { retryWithBackoff } from './retry.js';
+import { setSimulate429 } from './testUtils.js';

 // Define an interface for the error with a status property
 interface HttpError extends Error {
@@ -42,10 +43,15 @@ class NonRetryableError extends Error {
 describe('retryWithBackoff', () => {
  beforeEach(() => {
    vi.useFakeTimers();
+    // Disable 429 simulation for tests
+    setSimulate429(false);
+    // Suppress unhandled promise rejection warnings for tests that expect errors
+    console.warn = vi.fn();
  });

  afterEach(() => {
    vi.restoreAllMocks();
+    vi.useRealTimers();
  });

  it('should return the result on the first attempt if successful', async () => {
@@ -231,4 +237,197 @@ describe('retryWithBackoff', () => {
      expect(d).toBeLessThanOrEqual(100 * 1.3);
    });
  });
+
+  describe('Flash model fallback for OAuth users', () => {
+    it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => {
+      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
+
+      let fallbackOccurred = false;
+      const mockFn = vi.fn().mockImplementation(async () => {
+        if (!fallbackOccurred) {
+          const error: HttpError = new Error('Rate limit exceeded');
+          error.status = 429;
+          throw error;
+        }
+        return 'success';
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        onPersistent429: async (authType?: string) => {
+          fallbackOccurred = true;
+          return await fallbackCallback(authType);
+        },
+        authType: 'oauth-personal',
+      });
+
+      // Advance all timers to complete retries
+      await vi.runAllTimersAsync();
+
+      // Should succeed after fallback
+      await expect(promise).resolves.toBe('success');
+
+      // Verify callback was called with correct auth type
+      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+
+      // Should retry again after fallback
+      expect(mockFn).toHaveBeenCalledTimes(4); // 3 initial attempts + 1 after fallback
+    });
+
+    it('should trigger fallback for OAuth enterprise users after persistent 429 errors', async () => {
+      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
+
+      let fallbackOccurred = false;
+      const mockFn = vi.fn().mockImplementation(async () => {
+        if (!fallbackOccurred) {
+          const error: HttpError = new Error('Rate limit exceeded');
+          error.status = 429;
+          throw error;
+        }
+        return 'success';
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        onPersistent429: async (authType?: string) => {
+          fallbackOccurred = true;
+          return await fallbackCallback(authType);
+        },
+        authType: 'oauth-enterprise',
+      });
+
+      await vi.runAllTimersAsync();
+
+      await expect(promise).resolves.toBe('success');
+      expect(fallbackCallback).toHaveBeenCalledWith('oauth-enterprise');
+    });
+
+    it('should NOT trigger fallback for API key users', async () => {
+      const fallbackCallback = vi.fn();
+
+      const mockFn = vi.fn(async () => {
+        const error: HttpError = new Error('Rate limit exceeded');
+        error.status = 429;
+        throw error;
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        onPersistent429: fallbackCallback,
+        authType: 'gemini-api-key',
+      });
+
+      // Handle the promise properly to avoid unhandled rejections
+      const resultPromise = promise.catch((error) => error);
+      await vi.runAllTimersAsync();
+      const result = await resultPromise;
+
+      // Should fail after all retries without fallback
+      expect(result).toBeInstanceOf(Error);
+      expect(result.message).toBe('Rate limit exceeded');
+
+      // Callback should not be called for API key users
+      expect(fallbackCallback).not.toHaveBeenCalled();
+    });
+
+    it('should reset attempt counter and continue after successful fallback', async () => {
+      let fallbackCalled = false;
+      const fallbackCallback = vi.fn().mockImplementation(async () => {
+        fallbackCalled = true;
+        return 'gemini-2.5-flash';
+      });
+
+      const mockFn = vi.fn().mockImplementation(async () => {
+        if (!fallbackCalled) {
+          const error: HttpError = new Error('Rate limit exceeded');
+          error.status = 429;
+          throw error;
+        }
+        return 'success';
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        onPersistent429: fallbackCallback,
+        authType: 'oauth-personal',
+      });
+
+      await vi.runAllTimersAsync();
+
+      await expect(promise).resolves.toBe('success');
+      expect(fallbackCallback).toHaveBeenCalledOnce();
+    });
+
+    it('should continue with original error if fallback is rejected', async () => {
+      const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback
+
+      const mockFn = vi.fn(async () => {
+        const error: HttpError = new Error('Rate limit exceeded');
+        error.status = 429;
+        throw error;
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 3,
+        initialDelayMs: 100,
+        onPersistent429: fallbackCallback,
+        authType: 'oauth-personal',
+      });
+
+      // Handle the promise properly to avoid unhandled rejections
+      const resultPromise = promise.catch((error) => error);
+      await vi.runAllTimersAsync();
+      const result = await resultPromise;
+
+      // Should fail with original error when fallback is rejected
+      expect(result).toBeInstanceOf(Error);
+      expect(result.message).toBe('Rate limit exceeded');
+      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+    });
+
+    it('should handle mixed error types (only count consecutive 429s)', async () => {
+      const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
+      let attempts = 0;
+      let fallbackOccurred = false;
+
+      const mockFn = vi.fn().mockImplementation(async () => {
+        attempts++;
+        if (fallbackOccurred) {
+          return 'success';
+        }
+        if (attempts === 1) {
+          // First attempt: 500 error (resets consecutive count)
+          const error: HttpError = new Error('Server error');
+          error.status = 500;
+          throw error;
+        } else {
+          // Remaining attempts: 429 errors
+          const error: HttpError = new Error('Rate limit exceeded');
+          error.status = 429;
+          throw error;
+        }
+      });
+
+      const promise = retryWithBackoff(mockFn, {
+        maxAttempts: 5,
+        initialDelayMs: 100,
+        onPersistent429: async (authType?: string) => {
+          fallbackOccurred = true;
+          return await fallbackCallback(authType);
+        },
+        authType: 'oauth-personal',
+      });
+
+      await vi.runAllTimersAsync();
+
+      await expect(promise).resolves.toBe('success');
+
+      // Should trigger fallback after 4 consecutive 429s (attempts 2-5)
+      expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
+    });
+  });
 });
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -4,11 +4,15 @@
 * SPDX-License-Identifier: Apache-2.0
 */

+import { AuthType } from '../core/contentGenerator.js';
+
 export interface RetryOptions {
  maxAttempts: number;
  initialDelayMs: number;
  maxDelayMs: number;
  shouldRetry: (error: Error) => boolean;
+  onPersistent429?: (authType?: string) => Promise<string | null>;
+  authType?: string;
 }

 const DEFAULT_RETRY_OPTIONS: RetryOptions = {
@@ -59,29 +63,69 @@ export async function retryWithBackoff<T>(
  fn: () => Promise<T>,
  options?: Partial<RetryOptions>,
 ): Promise<T> {
-  const { maxAttempts, initialDelayMs, maxDelayMs, shouldRetry } = {
+  const {
+    maxAttempts,
+    initialDelayMs,
+    maxDelayMs,
+    shouldRetry,
+    onPersistent429,
+    authType,
+  } = {
    ...DEFAULT_RETRY_OPTIONS,
    ...options,
  };

  let attempt = 0;
  let currentDelay = initialDelayMs;
+  let consecutive429Count = 0;

  while (attempt < maxAttempts) {
    attempt++;
    try {
      return await fn();
    } catch (error) {
+      const errorStatus = getErrorStatus(error);
+
+      // Track consecutive 429 errors
+      if (errorStatus === 429) {
+        consecutive429Count++;
+      } else {
+        consecutive429Count = 0;
+      }
+
+      // Check if we've exhausted retries or shouldn't retry
      if (attempt >= maxAttempts || !shouldRetry(error as Error)) {
+        // If we have persistent 429s and a fallback callback for OAuth
+        if (
+          consecutive429Count >= 3 &&
+          onPersistent429 &&
+          (authType === AuthType.LOGIN_WITH_GOOGLE_PERSONAL ||
+            authType === AuthType.LOGIN_WITH_GOOGLE_ENTERPRISE)
+        ) {
+          try {
+            const fallbackModel = await onPersistent429(authType);
+            if (fallbackModel) {
+              // Reset attempt counter and try with new model
+              attempt = 0;
+              consecutive429Count = 0;
+              currentDelay = initialDelayMs;
+              continue;
+            }
+          } catch (fallbackError) {
+            // If fallback fails, continue with original error
+            console.warn('Fallback to Flash model failed:', fallbackError);
+          }
+        }
        throw error;
      }

-      const { delayDurationMs, errorStatus } = getDelayDurationAndStatus(error);
+      const { delayDurationMs, errorStatus: delayErrorStatus } =
+        getDelayDurationAndStatus(error);

      if (delayDurationMs > 0) {
        // Respect Retry-After header if present and parsed
        console.warn(
-          `Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
+          `Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
          error,
        );
        await delay(delayDurationMs);
--- a/packages/core/src/utils/testUtils.ts
+++ b/packages/core/src/utils/testUtils.ts
@@ -0,0 +1,87 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Testing utilities for simulating 429 errors in unit tests
+ */
+
+let requestCounter = 0;
+let simulate429Enabled = false;
+let simulate429AfterRequests = 0;
+let simulate429ForAuthType: string | undefined;
+let fallbackOccurred = false;
+
+/**
+ * Check if we should simulate a 429 error for the current request
+ */
+export function shouldSimulate429(authType?: string): boolean {
+  if (!simulate429Enabled || fallbackOccurred) {
+    return false;
+  }
+
+  // If auth type filter is set, only simulate for that auth type
+  if (simulate429ForAuthType && authType !== simulate429ForAuthType) {
+    return false;
+  }
+
+  requestCounter++;
+
+  // If afterRequests is set, only simulate after that many requests
+  if (simulate429AfterRequests > 0) {
+    return requestCounter > simulate429AfterRequests;
+  }
+
+  // Otherwise, simulate for every request
+  return true;
+}
+
+/**
+ * Reset the request counter (useful for tests)
+ */
+export function resetRequestCounter(): void {
+  requestCounter = 0;
+}
+
+/**
+ * Disable 429 simulation after successful fallback
+ */
+export function disableSimulationAfterFallback(): void {
+  fallbackOccurred = true;
+}
+
+/**
+ * Create a simulated 429 error response
+ */
+export function createSimulated429Error(): Error {
+  const error = new Error('Rate limit exceeded (simulated)') as Error & {
+    status: number;
+  };
+  error.status = 429;
+  return error;
+}
+
+/**
+ * Reset simulation state when switching auth methods
+ */
+export function resetSimulationState(): void {
+  fallbackOccurred = false;
+  resetRequestCounter();
+}
+
+/**
+ * Enable/disable 429 simulation programmatically (for tests)
+ */
+export function setSimulate429(
+  enabled: boolean,
+  afterRequests = 0,
+  forAuthType?: string,
+): void {
+  simulate429Enabled = enabled;
+  simulate429AfterRequests = afterRequests;
+  simulate429ForAuthType = forAuthType;
+  fallbackOccurred = false; // Reset fallback state when simulation is re-enabled
+  resetRequestCounter();
+}