mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
[JUNE 25] Permanent failover to Flash model for OAuth users after persistent 429 errors (#1376)
Co-authored-by: Scott Densmore <scottdensmore@mac.com>
This commit is contained in:
144
packages/core/src/utils/flashFallback.integration.test.ts
Normal file
144
packages/core/src/utils/flashFallback.integration.test.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
import { Config } from '../config/config.js';
|
||||
import {
|
||||
setSimulate429,
|
||||
disableSimulationAfterFallback,
|
||||
shouldSimulate429,
|
||||
createSimulated429Error,
|
||||
resetRequestCounter,
|
||||
} from './testUtils.js';
|
||||
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
|
||||
import { retryWithBackoff } from './retry.js';
|
||||
import { AuthType } from '../core/contentGenerator.js';
|
||||
|
||||
describe('Flash Fallback Integration', () => {
|
||||
let config: Config;
|
||||
|
||||
beforeEach(() => {
|
||||
config = new Config({
|
||||
sessionId: 'test-session',
|
||||
targetDir: '/test',
|
||||
debugMode: false,
|
||||
cwd: '/test',
|
||||
model: 'gemini-2.5-pro',
|
||||
});
|
||||
|
||||
// Reset simulation state for each test
|
||||
setSimulate429(false);
|
||||
resetRequestCounter();
|
||||
});
|
||||
|
||||
it('should automatically accept fallback', async () => {
|
||||
// Set up a minimal flash fallback handler for testing
|
||||
const flashFallbackHandler = async (): Promise<boolean> => true;
|
||||
|
||||
config.setFlashFallbackHandler(flashFallbackHandler);
|
||||
|
||||
// Call the handler directly to test
|
||||
const result = await config.flashFallbackHandler!(
|
||||
'gemini-2.5-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
|
||||
// Verify it automatically accepts
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it('should trigger fallback after 3 consecutive 429 errors for OAuth users', async () => {
|
||||
let fallbackCalled = false;
|
||||
let fallbackModel = '';
|
||||
|
||||
// Mock function that simulates exactly 3 429 errors, then succeeds after fallback
|
||||
const mockApiCall = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(createSimulated429Error())
|
||||
.mockRejectedValueOnce(createSimulated429Error())
|
||||
.mockRejectedValueOnce(createSimulated429Error())
|
||||
.mockResolvedValueOnce('success after fallback');
|
||||
|
||||
// Mock fallback handler
|
||||
const mockFallbackHandler = vi.fn(async (_authType?: string) => {
|
||||
fallbackCalled = true;
|
||||
fallbackModel = DEFAULT_GEMINI_FLASH_MODEL;
|
||||
return fallbackModel;
|
||||
});
|
||||
|
||||
// Test with OAuth personal auth type, with maxAttempts = 3 to ensure fallback triggers
|
||||
const result = await retryWithBackoff(mockApiCall, {
|
||||
maxAttempts: 3,
|
||||
initialDelayMs: 1,
|
||||
maxDelayMs: 10,
|
||||
shouldRetry: (error: Error) => {
|
||||
const status = (error as Error & { status?: number }).status;
|
||||
return status === 429;
|
||||
},
|
||||
onPersistent429: mockFallbackHandler,
|
||||
authType: AuthType.LOGIN_WITH_GOOGLE_PERSONAL,
|
||||
});
|
||||
|
||||
// Verify fallback was triggered
|
||||
expect(fallbackCalled).toBe(true);
|
||||
expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
expect(mockFallbackHandler).toHaveBeenCalledWith(
|
||||
AuthType.LOGIN_WITH_GOOGLE_PERSONAL,
|
||||
);
|
||||
expect(result).toBe('success after fallback');
|
||||
// Should have: 3 failures, then fallback triggered, then 1 success after retry reset
|
||||
expect(mockApiCall).toHaveBeenCalledTimes(4);
|
||||
});
|
||||
|
||||
it('should not trigger fallback for API key users', async () => {
|
||||
let fallbackCalled = false;
|
||||
|
||||
// Mock function that simulates 429 errors
|
||||
const mockApiCall = vi.fn().mockRejectedValue(createSimulated429Error());
|
||||
|
||||
// Mock fallback handler
|
||||
const mockFallbackHandler = vi.fn(async () => {
|
||||
fallbackCalled = true;
|
||||
return DEFAULT_GEMINI_FLASH_MODEL;
|
||||
});
|
||||
|
||||
// Test with API key auth type - should not trigger fallback
|
||||
try {
|
||||
await retryWithBackoff(mockApiCall, {
|
||||
maxAttempts: 5,
|
||||
initialDelayMs: 10,
|
||||
maxDelayMs: 100,
|
||||
shouldRetry: (error: Error) => {
|
||||
const status = (error as Error & { status?: number }).status;
|
||||
return status === 429;
|
||||
},
|
||||
onPersistent429: mockFallbackHandler,
|
||||
authType: AuthType.USE_GEMINI, // API key auth type
|
||||
});
|
||||
} catch (error) {
|
||||
// Expected to throw after max attempts
|
||||
expect((error as Error).message).toContain('Rate limit exceeded');
|
||||
}
|
||||
|
||||
// Verify fallback was NOT triggered for API key users
|
||||
expect(fallbackCalled).toBe(false);
|
||||
expect(mockFallbackHandler).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should properly disable simulation state after fallback', () => {
|
||||
// Enable simulation
|
||||
setSimulate429(true);
|
||||
|
||||
// Verify simulation is enabled
|
||||
expect(shouldSimulate429()).toBe(true);
|
||||
|
||||
// Disable simulation after fallback
|
||||
disableSimulationAfterFallback();
|
||||
|
||||
// Verify simulation is now disabled
|
||||
expect(shouldSimulate429()).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -7,6 +7,7 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { retryWithBackoff } from './retry.js';
|
||||
import { setSimulate429 } from './testUtils.js';
|
||||
|
||||
// Define an interface for the error with a status property
|
||||
interface HttpError extends Error {
|
||||
@@ -42,10 +43,15 @@ class NonRetryableError extends Error {
|
||||
describe('retryWithBackoff', () => {
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
// Disable 429 simulation for tests
|
||||
setSimulate429(false);
|
||||
// Suppress unhandled promise rejection warnings for tests that expect errors
|
||||
console.warn = vi.fn();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('should return the result on the first attempt if successful', async () => {
|
||||
@@ -231,4 +237,197 @@ describe('retryWithBackoff', () => {
|
||||
expect(d).toBeLessThanOrEqual(100 * 1.3);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Flash model fallback for OAuth users', () => {
|
||||
it('should trigger fallback for OAuth personal users after persistent 429 errors', async () => {
|
||||
const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
|
||||
|
||||
let fallbackOccurred = false;
|
||||
const mockFn = vi.fn().mockImplementation(async () => {
|
||||
if (!fallbackOccurred) {
|
||||
const error: HttpError = new Error('Rate limit exceeded');
|
||||
error.status = 429;
|
||||
throw error;
|
||||
}
|
||||
return 'success';
|
||||
});
|
||||
|
||||
const promise = retryWithBackoff(mockFn, {
|
||||
maxAttempts: 3,
|
||||
initialDelayMs: 100,
|
||||
onPersistent429: async (authType?: string) => {
|
||||
fallbackOccurred = true;
|
||||
return await fallbackCallback(authType);
|
||||
},
|
||||
authType: 'oauth-personal',
|
||||
});
|
||||
|
||||
// Advance all timers to complete retries
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
// Should succeed after fallback
|
||||
await expect(promise).resolves.toBe('success');
|
||||
|
||||
// Verify callback was called with correct auth type
|
||||
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
|
||||
|
||||
// Should retry again after fallback
|
||||
expect(mockFn).toHaveBeenCalledTimes(4); // 3 initial attempts + 1 after fallback
|
||||
});
|
||||
|
||||
it('should trigger fallback for OAuth enterprise users after persistent 429 errors', async () => {
|
||||
const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
|
||||
|
||||
let fallbackOccurred = false;
|
||||
const mockFn = vi.fn().mockImplementation(async () => {
|
||||
if (!fallbackOccurred) {
|
||||
const error: HttpError = new Error('Rate limit exceeded');
|
||||
error.status = 429;
|
||||
throw error;
|
||||
}
|
||||
return 'success';
|
||||
});
|
||||
|
||||
const promise = retryWithBackoff(mockFn, {
|
||||
maxAttempts: 3,
|
||||
initialDelayMs: 100,
|
||||
onPersistent429: async (authType?: string) => {
|
||||
fallbackOccurred = true;
|
||||
return await fallbackCallback(authType);
|
||||
},
|
||||
authType: 'oauth-enterprise',
|
||||
});
|
||||
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
await expect(promise).resolves.toBe('success');
|
||||
expect(fallbackCallback).toHaveBeenCalledWith('oauth-enterprise');
|
||||
});
|
||||
|
||||
it('should NOT trigger fallback for API key users', async () => {
|
||||
const fallbackCallback = vi.fn();
|
||||
|
||||
const mockFn = vi.fn(async () => {
|
||||
const error: HttpError = new Error('Rate limit exceeded');
|
||||
error.status = 429;
|
||||
throw error;
|
||||
});
|
||||
|
||||
const promise = retryWithBackoff(mockFn, {
|
||||
maxAttempts: 3,
|
||||
initialDelayMs: 100,
|
||||
onPersistent429: fallbackCallback,
|
||||
authType: 'gemini-api-key',
|
||||
});
|
||||
|
||||
// Handle the promise properly to avoid unhandled rejections
|
||||
const resultPromise = promise.catch((error) => error);
|
||||
await vi.runAllTimersAsync();
|
||||
const result = await resultPromise;
|
||||
|
||||
// Should fail after all retries without fallback
|
||||
expect(result).toBeInstanceOf(Error);
|
||||
expect(result.message).toBe('Rate limit exceeded');
|
||||
|
||||
// Callback should not be called for API key users
|
||||
expect(fallbackCallback).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should reset attempt counter and continue after successful fallback', async () => {
|
||||
let fallbackCalled = false;
|
||||
const fallbackCallback = vi.fn().mockImplementation(async () => {
|
||||
fallbackCalled = true;
|
||||
return 'gemini-2.5-flash';
|
||||
});
|
||||
|
||||
const mockFn = vi.fn().mockImplementation(async () => {
|
||||
if (!fallbackCalled) {
|
||||
const error: HttpError = new Error('Rate limit exceeded');
|
||||
error.status = 429;
|
||||
throw error;
|
||||
}
|
||||
return 'success';
|
||||
});
|
||||
|
||||
const promise = retryWithBackoff(mockFn, {
|
||||
maxAttempts: 3,
|
||||
initialDelayMs: 100,
|
||||
onPersistent429: fallbackCallback,
|
||||
authType: 'oauth-personal',
|
||||
});
|
||||
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
await expect(promise).resolves.toBe('success');
|
||||
expect(fallbackCallback).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it('should continue with original error if fallback is rejected', async () => {
|
||||
const fallbackCallback = vi.fn().mockResolvedValue(null); // User rejected fallback
|
||||
|
||||
const mockFn = vi.fn(async () => {
|
||||
const error: HttpError = new Error('Rate limit exceeded');
|
||||
error.status = 429;
|
||||
throw error;
|
||||
});
|
||||
|
||||
const promise = retryWithBackoff(mockFn, {
|
||||
maxAttempts: 3,
|
||||
initialDelayMs: 100,
|
||||
onPersistent429: fallbackCallback,
|
||||
authType: 'oauth-personal',
|
||||
});
|
||||
|
||||
// Handle the promise properly to avoid unhandled rejections
|
||||
const resultPromise = promise.catch((error) => error);
|
||||
await vi.runAllTimersAsync();
|
||||
const result = await resultPromise;
|
||||
|
||||
// Should fail with original error when fallback is rejected
|
||||
expect(result).toBeInstanceOf(Error);
|
||||
expect(result.message).toBe('Rate limit exceeded');
|
||||
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
|
||||
});
|
||||
|
||||
it('should handle mixed error types (only count consecutive 429s)', async () => {
|
||||
const fallbackCallback = vi.fn().mockResolvedValue('gemini-2.5-flash');
|
||||
let attempts = 0;
|
||||
let fallbackOccurred = false;
|
||||
|
||||
const mockFn = vi.fn().mockImplementation(async () => {
|
||||
attempts++;
|
||||
if (fallbackOccurred) {
|
||||
return 'success';
|
||||
}
|
||||
if (attempts === 1) {
|
||||
// First attempt: 500 error (resets consecutive count)
|
||||
const error: HttpError = new Error('Server error');
|
||||
error.status = 500;
|
||||
throw error;
|
||||
} else {
|
||||
// Remaining attempts: 429 errors
|
||||
const error: HttpError = new Error('Rate limit exceeded');
|
||||
error.status = 429;
|
||||
throw error;
|
||||
}
|
||||
});
|
||||
|
||||
const promise = retryWithBackoff(mockFn, {
|
||||
maxAttempts: 5,
|
||||
initialDelayMs: 100,
|
||||
onPersistent429: async (authType?: string) => {
|
||||
fallbackOccurred = true;
|
||||
return await fallbackCallback(authType);
|
||||
},
|
||||
authType: 'oauth-personal',
|
||||
});
|
||||
|
||||
await vi.runAllTimersAsync();
|
||||
|
||||
await expect(promise).resolves.toBe('success');
|
||||
|
||||
// Should trigger fallback after 4 consecutive 429s (attempts 2-5)
|
||||
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,11 +4,15 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { AuthType } from '../core/contentGenerator.js';
|
||||
|
||||
export interface RetryOptions {
|
||||
maxAttempts: number;
|
||||
initialDelayMs: number;
|
||||
maxDelayMs: number;
|
||||
shouldRetry: (error: Error) => boolean;
|
||||
onPersistent429?: (authType?: string) => Promise<string | null>;
|
||||
authType?: string;
|
||||
}
|
||||
|
||||
const DEFAULT_RETRY_OPTIONS: RetryOptions = {
|
||||
@@ -59,29 +63,69 @@ export async function retryWithBackoff<T>(
|
||||
fn: () => Promise<T>,
|
||||
options?: Partial<RetryOptions>,
|
||||
): Promise<T> {
|
||||
const { maxAttempts, initialDelayMs, maxDelayMs, shouldRetry } = {
|
||||
const {
|
||||
maxAttempts,
|
||||
initialDelayMs,
|
||||
maxDelayMs,
|
||||
shouldRetry,
|
||||
onPersistent429,
|
||||
authType,
|
||||
} = {
|
||||
...DEFAULT_RETRY_OPTIONS,
|
||||
...options,
|
||||
};
|
||||
|
||||
let attempt = 0;
|
||||
let currentDelay = initialDelayMs;
|
||||
let consecutive429Count = 0;
|
||||
|
||||
while (attempt < maxAttempts) {
|
||||
attempt++;
|
||||
try {
|
||||
return await fn();
|
||||
} catch (error) {
|
||||
const errorStatus = getErrorStatus(error);
|
||||
|
||||
// Track consecutive 429 errors
|
||||
if (errorStatus === 429) {
|
||||
consecutive429Count++;
|
||||
} else {
|
||||
consecutive429Count = 0;
|
||||
}
|
||||
|
||||
// Check if we've exhausted retries or shouldn't retry
|
||||
if (attempt >= maxAttempts || !shouldRetry(error as Error)) {
|
||||
// If we have persistent 429s and a fallback callback for OAuth
|
||||
if (
|
||||
consecutive429Count >= 3 &&
|
||||
onPersistent429 &&
|
||||
(authType === AuthType.LOGIN_WITH_GOOGLE_PERSONAL ||
|
||||
authType === AuthType.LOGIN_WITH_GOOGLE_ENTERPRISE)
|
||||
) {
|
||||
try {
|
||||
const fallbackModel = await onPersistent429(authType);
|
||||
if (fallbackModel) {
|
||||
// Reset attempt counter and try with new model
|
||||
attempt = 0;
|
||||
consecutive429Count = 0;
|
||||
currentDelay = initialDelayMs;
|
||||
continue;
|
||||
}
|
||||
} catch (fallbackError) {
|
||||
// If fallback fails, continue with original error
|
||||
console.warn('Fallback to Flash model failed:', fallbackError);
|
||||
}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
const { delayDurationMs, errorStatus } = getDelayDurationAndStatus(error);
|
||||
const { delayDurationMs, errorStatus: delayErrorStatus } =
|
||||
getDelayDurationAndStatus(error);
|
||||
|
||||
if (delayDurationMs > 0) {
|
||||
// Respect Retry-After header if present and parsed
|
||||
console.warn(
|
||||
`Attempt ${attempt} failed with status ${errorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
|
||||
`Attempt ${attempt} failed with status ${delayErrorStatus ?? 'unknown'}. Retrying after explicit delay of ${delayDurationMs}ms...`,
|
||||
error,
|
||||
);
|
||||
await delay(delayDurationMs);
|
||||
|
||||
87
packages/core/src/utils/testUtils.ts
Normal file
87
packages/core/src/utils/testUtils.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Testing utilities for simulating 429 errors in unit tests
|
||||
*/
|
||||
|
||||
let requestCounter = 0;
|
||||
let simulate429Enabled = false;
|
||||
let simulate429AfterRequests = 0;
|
||||
let simulate429ForAuthType: string | undefined;
|
||||
let fallbackOccurred = false;
|
||||
|
||||
/**
|
||||
* Check if we should simulate a 429 error for the current request
|
||||
*/
|
||||
export function shouldSimulate429(authType?: string): boolean {
|
||||
if (!simulate429Enabled || fallbackOccurred) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If auth type filter is set, only simulate for that auth type
|
||||
if (simulate429ForAuthType && authType !== simulate429ForAuthType) {
|
||||
return false;
|
||||
}
|
||||
|
||||
requestCounter++;
|
||||
|
||||
// If afterRequests is set, only simulate after that many requests
|
||||
if (simulate429AfterRequests > 0) {
|
||||
return requestCounter > simulate429AfterRequests;
|
||||
}
|
||||
|
||||
// Otherwise, simulate for every request
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the request counter (useful for tests)
|
||||
*/
|
||||
export function resetRequestCounter(): void {
|
||||
requestCounter = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable 429 simulation after successful fallback
|
||||
*/
|
||||
export function disableSimulationAfterFallback(): void {
|
||||
fallbackOccurred = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a simulated 429 error response
|
||||
*/
|
||||
export function createSimulated429Error(): Error {
|
||||
const error = new Error('Rate limit exceeded (simulated)') as Error & {
|
||||
status: number;
|
||||
};
|
||||
error.status = 429;
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset simulation state when switching auth methods
|
||||
*/
|
||||
export function resetSimulationState(): void {
|
||||
fallbackOccurred = false;
|
||||
resetRequestCounter();
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable/disable 429 simulation programmatically (for tests)
|
||||
*/
|
||||
export function setSimulate429(
|
||||
enabled: boolean,
|
||||
afterRequests = 0,
|
||||
forAuthType?: string,
|
||||
): void {
|
||||
simulate429Enabled = enabled;
|
||||
simulate429AfterRequests = afterRequests;
|
||||
simulate429ForAuthType = forAuthType;
|
||||
fallbackOccurred = false; // Reset fallback state when simulation is re-enabled
|
||||
resetRequestCounter();
|
||||
}
|
||||
Reference in New Issue
Block a user