Improve quota- and resource-related 429 error handling, also taking Code Assist customer tiers into consideration (#3609)

This commit is contained in:
Bryan Morgan
2025-07-09 10:18:15 -04:00
committed by GitHub
parent 8f2da86aa5
commit b0cce95286
14 changed files with 611 additions and 63 deletions

View File

@@ -86,6 +86,7 @@ describe('Flash Fallback Integration', () => {
expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
expect(mockFallbackHandler).toHaveBeenCalledWith(
AuthType.LOGIN_WITH_GOOGLE,
expect.any(Error),
);
expect(result).toBe('success after fallback');
// Should have: 2 failures, then fallback triggered, then 1 success after retry reset

View File

@@ -0,0 +1,82 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
export interface ApiError {
error: {
code: number;
message: string;
status: string;
details: unknown[];
};
}
interface StructuredError {
message: string;
status?: number;
}
export function isApiError(error: unknown): error is ApiError {
return (
typeof error === 'object' &&
error !== null &&
'error' in error &&
typeof (error as ApiError).error === 'object' &&
'message' in (error as ApiError).error
);
}
export function isStructuredError(error: unknown): error is StructuredError {
return (
typeof error === 'object' &&
error !== null &&
'message' in error &&
typeof (error as StructuredError).message === 'string'
);
}
export function isProQuotaExceededError(error: unknown): boolean {
// Check for Pro quota exceeded errors by looking for the specific pattern
// This will match patterns like:
// - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
// - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'"
// - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'"
// - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'"
// We use string methods instead of regex to avoid ReDoS vulnerabilities
const checkMessage = (message: string): boolean =>
message.includes("Quota exceeded for quota metric 'Gemini") &&
message.includes("Pro Requests'");
if (typeof error === 'string') {
return checkMessage(error);
}
if (isStructuredError(error)) {
return checkMessage(error.message);
}
if (isApiError(error)) {
return checkMessage(error.error.message);
}
return false;
}
export function isGenericQuotaExceededError(error: unknown): boolean {
if (typeof error === 'string') {
return error.includes('Quota exceeded for quota metric');
}
if (isStructuredError(error)) {
return error.message.includes('Quota exceeded for quota metric');
}
if (isApiError(error)) {
return error.error.message.includes('Quota exceeded for quota metric');
}
return false;
}

View File

@@ -357,7 +357,10 @@ describe('retryWithBackoff', () => {
// Should fail with original error when fallback is rejected
expect(result).toBeInstanceOf(Error);
expect(result.message).toBe('Rate limit exceeded');
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
expect(fallbackCallback).toHaveBeenCalledWith(
'oauth-personal',
expect.any(Error),
);
});
it('should handle mixed error types (only count consecutive 429s)', async () => {

View File

@@ -5,13 +5,20 @@
*/
import { AuthType } from '../core/contentGenerator.js';
import {
isProQuotaExceededError,
isGenericQuotaExceededError,
} from './quotaErrorDetection.js';
export interface RetryOptions {
maxAttempts: number;
initialDelayMs: number;
maxDelayMs: number;
shouldRetry: (error: Error) => boolean;
onPersistent429?: (authType?: string) => Promise<string | null>;
onPersistent429?: (
authType?: string,
error?: unknown,
) => Promise<string | null>;
authType?: string;
}
@@ -86,6 +93,53 @@ export async function retryWithBackoff<T>(
} catch (error) {
const errorStatus = getErrorStatus(error);
// Check for Pro quota exceeded error first - immediate fallback for OAuth users
if (
errorStatus === 429 &&
authType === AuthType.LOGIN_WITH_GOOGLE &&
isProQuotaExceededError(error) &&
onPersistent429
) {
try {
const fallbackModel = await onPersistent429(authType, error);
if (fallbackModel) {
// Reset attempt counter and try with new model
attempt = 0;
consecutive429Count = 0;
currentDelay = initialDelayMs;
// With the model updated, we continue to the next attempt
continue;
}
} catch (fallbackError) {
// If fallback fails, continue with original error
console.warn('Fallback to Flash model failed:', fallbackError);
}
}
// Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
if (
errorStatus === 429 &&
authType === AuthType.LOGIN_WITH_GOOGLE &&
!isProQuotaExceededError(error) &&
isGenericQuotaExceededError(error) &&
onPersistent429
) {
try {
const fallbackModel = await onPersistent429(authType, error);
if (fallbackModel) {
// Reset attempt counter and try with new model
attempt = 0;
consecutive429Count = 0;
currentDelay = initialDelayMs;
// With the model updated, we continue to the next attempt
continue;
}
} catch (fallbackError) {
// If fallback fails, continue with original error
console.warn('Fallback to Flash model failed:', fallbackError);
}
}
// Track consecutive 429 errors
if (errorStatus === 429) {
consecutive429Count++;
@@ -100,7 +154,7 @@ export async function retryWithBackoff<T>(
authType === AuthType.LOGIN_WITH_GOOGLE
) {
try {
const fallbackModel = await onPersistent429(authType);
const fallbackModel = await onPersistent429(authType, error);
if (fallbackModel) {
// Reset attempt counter and try with new model
attempt = 0;