[JUNE 25] Permanent failover to Flash model for OAuth users after persistent 429 errors (#1376)

Co-authored-by: Scott Densmore <scottdensmore@mac.com>
This commit is contained in:
Bryan Morgan
2025-06-24 18:48:55 -04:00
committed by GitHub
parent 4bf18da2b0
commit e356949d3f
16 changed files with 837 additions and 12 deletions

View File

@@ -0,0 +1,144 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, vi } from 'vitest';
import { Config } from '../config/config.js';
import {
setSimulate429,
disableSimulationAfterFallback,
shouldSimulate429,
createSimulated429Error,
resetRequestCounter,
} from './testUtils.js';
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
import { retryWithBackoff } from './retry.js';
import { AuthType } from '../core/contentGenerator.js';
describe('Flash Fallback Integration', () => {
let config: Config;
beforeEach(() => {
config = new Config({
sessionId: 'test-session',
targetDir: '/test',
debugMode: false,
cwd: '/test',
model: 'gemini-2.5-pro',
});
// Reset simulation state for each test
setSimulate429(false);
resetRequestCounter();
});
it('should automatically accept fallback', async () => {
// Set up a minimal flash fallback handler for testing
const flashFallbackHandler = async (): Promise<boolean> => true;
config.setFlashFallbackHandler(flashFallbackHandler);
// Call the handler directly to test
const result = await config.flashFallbackHandler!(
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
// Verify it automatically accepts
expect(result).toBe(true);
});
it('should trigger fallback after 3 consecutive 429 errors for OAuth users', async () => {
let fallbackCalled = false;
let fallbackModel = '';
// Mock function that simulates exactly 3 429 errors, then succeeds after fallback
const mockApiCall = vi
.fn()
.mockRejectedValueOnce(createSimulated429Error())
.mockRejectedValueOnce(createSimulated429Error())
.mockRejectedValueOnce(createSimulated429Error())
.mockResolvedValueOnce('success after fallback');
// Mock fallback handler
const mockFallbackHandler = vi.fn(async (_authType?: string) => {
fallbackCalled = true;
fallbackModel = DEFAULT_GEMINI_FLASH_MODEL;
return fallbackModel;
});
// Test with OAuth personal auth type, with maxAttempts = 3 to ensure fallback triggers
const result = await retryWithBackoff(mockApiCall, {
maxAttempts: 3,
initialDelayMs: 1,
maxDelayMs: 10,
shouldRetry: (error: Error) => {
const status = (error as Error & { status?: number }).status;
return status === 429;
},
onPersistent429: mockFallbackHandler,
authType: AuthType.LOGIN_WITH_GOOGLE_PERSONAL,
});
// Verify fallback was triggered
expect(fallbackCalled).toBe(true);
expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
expect(mockFallbackHandler).toHaveBeenCalledWith(
AuthType.LOGIN_WITH_GOOGLE_PERSONAL,
);
expect(result).toBe('success after fallback');
// Should have: 3 failures, then fallback triggered, then 1 success after retry reset
expect(mockApiCall).toHaveBeenCalledTimes(4);
});
it('should not trigger fallback for API key users', async () => {
let fallbackCalled = false;
// Mock function that simulates 429 errors
const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error());
// Mock fallback handler
const mockFallbackHandler = vi.fn(async () => {
fallbackCalled = true;
return DEFAULT_GEMINI_FLASH_MODEL;
});
// Test with API key auth type - should not trigger fallback
try {
await retryWithBackoff(mockApiCall, {
maxAttempts: 5,
initialDelayMs: 10,
maxDelayMs: 100,
shouldRetry: (error: Error) => {
const status = (error as Error & { status?: number }).status;
return status === 429;
},
onPersistent429: mockFallbackHandler,
authType: AuthType.USE_GEMINI, // API key auth type
});
} catch (error) {
// Expected to throw after max attempts
expect((error as Error).message).toContain('Rate limit exceeded');
}
// Verify fallback was NOT triggered for API key users
expect(fallbackCalled).toBe(false);
expect(mockFallbackHandler).not.toHaveBeenCalled();
});
it('should properly disable simulation state after fallback', () => {
// Enable simulation
setSimulate429(true);
// Verify simulation is enabled
expect(shouldSimulate429()).toBe(true);
// Disable simulation after fallback
disableSimulationAfterFallback();
// Verify simulation is now disabled
expect(shouldSimulate429()).toBe(false);
});
});

View File

@@ -7,6 +7,7 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { retryWithBackoff } from './retry.js';
import { setSimulate429 } from './testUtils.js';
// Define an interface for the error with a status property
interface HttpError extends Error {
@@ -42,10 +43,15 @@ class NonRetryableError extends Error {
describe('retryWithBackoff', () => {
beforeEach(() => {
vi.useFakeTimers();
// Disable 429 simulation for tests
setSimulate429(false);
// Suppress unhandled promise rejection warnings for tests that expect errors
console.warn = vi.fn();
});
afterEach(() => {
vi.restoreAllMocks();
vi.useRealTimers();
});
it('should return the result on the first attempt if successful', async () => {
@@ -231,4 +237,197 @@ describe('retryWithBackoff', () => {
expect(d).toBeLessThanOrEqual(100 * 1.3);
});
});
describe('Flash model fallback for OAuth users', () => {
it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => {
const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
let fallbackOccurred = false;
const mockFn = vi.fn().mockImplementation(async () => {
if (!fallbackOccurred) {
const error: HttpError = new Error('Rate limit exceeded');
error.status = 429;
throw error;
}
return 'success';
});
const promise = retryWithBackoff(mockFn, {
maxAttempts: 3,
initialDelayMs: 100,
onPersistent429: async (authType?: string) => {
fallbackOccurred = true;
return await fallbackCallback(authType);
},
authType: 'oauth-personal',
});
// Advance all timers to complete retries
await vi.runAllTimersAsync();
// Should succeed after fallback
await expect(promise).resolves.toBe('success');
// Verify callback was called with correct auth type
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
// Should retry again after fallback
expect(mockFn).toHaveBeenCalledTimes(4); // 3 initial attempts + 1 after fallback
});
it('should trigger fallback for OAuth enterprise users after persistent 429 errors', async () => {
const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
let fallbackOccurred = false;
const mockFn = vi.fn().mockImplementation(async () => {
if (!fallbackOccurred) {
const error: HttpError = new Error('Rate limit exceeded');
error.status = 429;
throw error;
}
return 'success';
});
const promise = retryWithBackoff(mockFn, {
maxAttempts: 3,
initialDelayMs: 100,
onPersistent429: async (authType?: string) => {
fallbackOccurred = true;
return await fallbackCallback(authType);
},
authType: 'oauth-enterprise',
});
await vi.runAllTimersAsync();
await expect(promise).resolves.toBe('success');
expect(fallbackCallback).toHaveBeenCalledWith('oauth-enterprise');
});
it('should NOT trigger fallback for API key users', async () => {
const fallbackCallback = vi.fn();
const mockFn = vi.fn(async () => {
const error: HttpError = new Error('Rate limit exceeded');
error.status = 429;
throw error;
});
const promise = retryWithBackoff(mockFn, {
maxAttempts: 3,
initialDelayMs: 100,
onPersistent429: fallbackCallback,
authType: 'gemini-api-key',
});
// Handle the promise properly to avoid unhandled rejections
const resultPromise = promise.catch((error) => error);
await vi.runAllTimersAsync();
const result = await resultPromise;
// Should fail after all retries without fallback
expect(result).toBeInstanceOf(Error);
expect(result.message).toBe('Rate limit exceeded');
// Callback should not be called for API key users
expect(fallbackCallback).not.toHaveBeenCalled();
});
it('should reset attempt counter and continue after successful fallback', async () => {
let fallbackCalled = false;
const fallbackCallback = vi.fn().mockImplementation(async () => {
fallbackCalled = true;
return 'gemini-2.5-flash';
});
const mockFn = vi.fn().mockImplementation(async () => {
if (!fallbackCalled) {
const error: HttpError = new Error('Rate limit exceeded');
error.status = 429;
throw error;
}
return 'success';
});
const promise = retryWithBackoff(mockFn, {
maxAttempts: 3,
initialDelayMs: 100,
onPersistent429: fallbackCallback,
authType: 'oauth-personal',
});
await vi.runAllTimersAsync();
await expect(promise).resolves.toBe('success');
expect(fallbackCallback).toHaveBeenCalledOnce();
});
it('should continue with original error if fallback is rejected', async () => {
const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback
const mockFn = vi.fn(async () => {
const error: HttpError = new Error('Rate limit exceeded');
error.status = 429;
throw error;
});
const promise = retryWithBackoff(mockFn, {
maxAttempts: 3,
initialDelayMs: 100,
onPersistent429: fallbackCallback,
authType: 'oauth-personal',
});
// Handle the promise properly to avoid unhandled rejections
const resultPromise = promise.catch((error) => error);
await vi.runAllTimersAsync();
const result = await resultPromise;
// Should fail with original error when fallback is rejected
expect(result).toBeInstanceOf(Error);
expect(result.message).toBe('Rate limit exceeded');
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
});
it('should handle mixed error types (only count consecutive 429s)', async () => {
const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
let attempts = 0;
let fallbackOccurred = false;
const mockFn = vi.fn().mockImplementation(async () => {
attempts++;
if (fallbackOccurred) {
return 'success';
}
if (attempts === 1) {
// First attempt: 500 error (resets consecutive count)
const error: HttpError = new Error('Server error');
error.status = 500;
throw error;
} else {
// Remaining attempts: 429 errors
const error: HttpError = new Error('Rate limit exceeded');
error.status = 429;
throw error;
}
});
const promise = retryWithBackoff(mockFn, {
maxAttempts: 5,
initialDelayMs: 100,
onPersistent429: async (authType?: string) => {
fallbackOccurred = true;
return await fallbackCallback(authType);
},
authType: 'oauth-personal',
});
await vi.runAllTimersAsync();
await expect(promise).resolves.toBe('success');
// Should trigger fallback after 4 consecutive 429s (attempts 2-5)
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
});
});
});

View File

@@ -4,11 +4,15 @@
* SPDX-License-Identifier: Apache-2.0
*/
import { AuthType } from '../core/contentGenerator.js';
export interface RetryOptions {
maxAttempts: number;
initialDelayMs: number;
maxDelayMs: number;
shouldRetry: (error: Error) => boolean;
onPersistent429?: (authType?: string) => Promise<string | null>;
authType?: string;
}
const DEFAULT_RETRY_OPTIONS: RetryOptions = {
@@ -59,29 +63,69 @@ export async function retryWithBackoff<T>(
fn: () => Promise<T>,
options?: Partial<RetryOptions>,
): Promise<T> {
const { maxAttempts, initialDelayMs, maxDelayMs, shouldRetry } = {
const {
maxAttempts,
initialDelayMs,
maxDelayMs,
shouldRetry,
onPersistent429,
authType,
} = {
...DEFAULT_RETRY_OPTIONS,
...options,
};
let attempt = 0;
let currentDelay = initialDelayMs;
let consecutive429Count = 0;
while (attempt < maxAttempts) {
attempt++;
try {
return await fn();
} catch (error) {
const errorStatus = getErrorStatus(error);
// Track consecutive 429 errors
if (errorStatus === 429) {
consecutive429Count++;
} else {
consecutive429Count = 0;
}
// Check if we've exhausted retries or shouldn't retry
if (attempt >= maxAttempts || !shouldRetry(error as Error)) {
// If we have persistent 429s and a fallback callback for OAuth
if (
consecutive429Count >= 3 &&
onPersistent429 &&
(authType === AuthType.LOGIN_WITH_GOOGLE_PERSONAL ||
authType === AuthType.LOGIN_WITH_GOOGLE_ENTERPRISE)
) {
try {
const fallbackModel = await onPersistent429(authType);
if (fallbackModel) {
// Reset attempt counter and try with new model
attempt = 0;
consecutive429Count = 0;
currentDelay = initialDelayMs;
continue;
}
} catch (fallbackError) {
// If fallback fails, continue with original error
console.warn('Fallback to Flash model failed:', fallbackError);
}
}
throw error;
}
const { delayDurationMs, errorStatus } = getDelayDurationAndStatus(error);
const { delayDurationMs, errorStatus: delayErrorStatus } =
getDelayDurationAndStatus(error);
if (delayDurationMs > 0) {
// Respect Retry-After header if present and parsed
console.warn(
`Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
`Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
error,
);
await delay(delayDurationMs);

View File

@@ -0,0 +1,87 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
/**
* Testing utilities for simulating 429 errors in unit tests
*/
let requestCounter = 0;
let simulate429Enabled = false;
let simulate429AfterRequests = 0;
let simulate429ForAuthType: string | undefined;
let fallbackOccurred = false;
/**
* Check if we should simulate a 429 error for the current request
*/
export function shouldSimulate429(authType?: string): boolean {
if (!simulate429Enabled || fallbackOccurred) {
return false;
}
// If auth type filter is set, only simulate for that auth type
if (simulate429ForAuthType && authType !== simulate429ForAuthType) {
return false;
}
requestCounter++;
// If afterRequests is set, only simulate after that many requests
if (simulate429AfterRequests > 0) {
return requestCounter > simulate429AfterRequests;
}
// Otherwise, simulate for every request
return true;
}
/**
* Reset the request counter (useful for tests)
*/
export function resetRequestCounter(): void {
requestCounter = 0;
}
/**
* Disable 429 simulation after successful fallback
*/
export function disableSimulationAfterFallback(): void {
fallbackOccurred = true;
}
/**
* Create a simulated 429 error response
*/
export function createSimulated429Error(): Error {
const error = new Error('Rate limit exceeded (simulated)') as Error & {
status: number;
};
error.status = 429;
return error;
}
/**
* Reset simulation state when switching auth methods
*/
export function resetSimulationState(): void {
fallbackOccurred = false;
resetRequestCounter();
}
/**
* Enable/disable 429 simulation programmatically (for tests)
*/
export function setSimulate429(
enabled: boolean,
afterRequests = 0,
forAuthType?: string,
): void {
simulate429Enabled = enabled;
simulate429AfterRequests = afterRequests;
simulate429ForAuthType = forAuthType;
fallbackOccurred = false; // Reset fallback state when simulation is re-enabled
resetRequestCounter();
}