mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 08:47:44 +00:00
Improve quota- and resource-related 429 error handling, also taking Code Assist customer tiers into consideration (#3609)
This commit is contained in:
@@ -103,6 +103,7 @@ export interface SandboxConfig {
|
||||
export type FlashFallbackHandler = (
|
||||
currentModel: string,
|
||||
fallbackModel: string,
|
||||
error?: unknown,
|
||||
) => Promise<boolean>;
|
||||
|
||||
export interface ConfigParameters {
|
||||
|
||||
@@ -845,6 +845,7 @@ describe('Gemini Client (client.ts)', () => {
|
||||
expect(mockFallbackHandler).toHaveBeenCalledWith(
|
||||
currentModel,
|
||||
fallbackModel,
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -323,8 +323,8 @@ export class GeminiClient {
|
||||
});
|
||||
|
||||
const result = await retryWithBackoff(apiCall, {
|
||||
onPersistent429: async (authType?: string) =>
|
||||
await this.handleFlashFallback(authType),
|
||||
onPersistent429: async (authType?: string, error?: unknown) =>
|
||||
await this.handleFlashFallback(authType, error),
|
||||
authType: this.config.getContentGeneratorConfig()?.authType,
|
||||
});
|
||||
|
||||
@@ -411,8 +411,8 @@ export class GeminiClient {
|
||||
});
|
||||
|
||||
const result = await retryWithBackoff(apiCall, {
|
||||
onPersistent429: async (authType?: string) =>
|
||||
await this.handleFlashFallback(authType),
|
||||
onPersistent429: async (authType?: string, error?: unknown) =>
|
||||
await this.handleFlashFallback(authType, error),
|
||||
authType: this.config.getContentGeneratorConfig()?.authType,
|
||||
});
|
||||
return result;
|
||||
@@ -559,7 +559,10 @@ export class GeminiClient {
|
||||
* Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
|
||||
* Uses a fallback handler if provided by the config, otherwise returns null.
|
||||
*/
|
||||
private async handleFlashFallback(authType?: string): Promise<string | null> {
|
||||
private async handleFlashFallback(
|
||||
authType?: string,
|
||||
error?: unknown,
|
||||
): Promise<string | null> {
|
||||
// Only handle fallback for OAuth users
|
||||
if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
|
||||
return null;
|
||||
@@ -577,7 +580,11 @@ export class GeminiClient {
|
||||
const fallbackHandler = this.config.flashFallbackHandler;
|
||||
if (typeof fallbackHandler === 'function') {
|
||||
try {
|
||||
const accepted = await fallbackHandler(currentModel, fallbackModel);
|
||||
const accepted = await fallbackHandler(
|
||||
currentModel,
|
||||
fallbackModel,
|
||||
error,
|
||||
);
|
||||
if (accepted) {
|
||||
this.config.setModel(fallbackModel);
|
||||
return fallbackModel;
|
||||
|
||||
@@ -191,7 +191,10 @@ export class GeminiChat {
|
||||
* Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
|
||||
* Uses a fallback handler if provided by the config, otherwise returns null.
|
||||
*/
|
||||
private async handleFlashFallback(authType?: string): Promise<string | null> {
|
||||
private async handleFlashFallback(
|
||||
authType?: string,
|
||||
error?: unknown,
|
||||
): Promise<string | null> {
|
||||
// Only handle fallback for OAuth users
|
||||
if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
|
||||
return null;
|
||||
@@ -209,7 +212,11 @@ export class GeminiChat {
|
||||
const fallbackHandler = this.config.flashFallbackHandler;
|
||||
if (typeof fallbackHandler === 'function') {
|
||||
try {
|
||||
const accepted = await fallbackHandler(currentModel, fallbackModel);
|
||||
const accepted = await fallbackHandler(
|
||||
currentModel,
|
||||
fallbackModel,
|
||||
error,
|
||||
);
|
||||
if (accepted) {
|
||||
this.config.setModel(fallbackModel);
|
||||
return fallbackModel;
|
||||
@@ -270,8 +277,8 @@ export class GeminiChat {
|
||||
}
|
||||
return false;
|
||||
},
|
||||
onPersistent429: async (authType?: string) =>
|
||||
await this.handleFlashFallback(authType),
|
||||
onPersistent429: async (authType?: string, error?: unknown) =>
|
||||
await this.handleFlashFallback(authType, error),
|
||||
authType: this.config.getContentGeneratorConfig()?.authType,
|
||||
});
|
||||
const durationMs = Date.now() - startTime;
|
||||
@@ -367,8 +374,8 @@ export class GeminiChat {
|
||||
}
|
||||
return false; // Don't retry other errors by default
|
||||
},
|
||||
onPersistent429: async (authType?: string) =>
|
||||
await this.handleFlashFallback(authType),
|
||||
onPersistent429: async (authType?: string, error?: unknown) =>
|
||||
await this.handleFlashFallback(authType, error),
|
||||
authType: this.config.getContentGeneratorConfig()?.authType,
|
||||
});
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ export * from './utils/getFolderStructure.js';
|
||||
export * from './utils/memoryDiscovery.js';
|
||||
export * from './utils/gitIgnoreParser.js';
|
||||
export * from './utils/editor.js';
|
||||
export * from './utils/quotaErrorDetection.js';
|
||||
|
||||
// Export services
|
||||
export * from './services/fileDiscoveryService.js';
|
||||
|
||||
@@ -86,6 +86,7 @@ describe('Flash Fallback Integration', () => {
|
||||
expect(fallbackModel).toBe(DEFAULT_GEMINI_FLASH_MODEL);
|
||||
expect(mockFallbackHandler).toHaveBeenCalledWith(
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
expect.any(Error),
|
||||
);
|
||||
expect(result).toBe('success after fallback');
|
||||
// Should have: 2 failures, then fallback triggered, then 1 success after retry reset
|
||||
|
||||
82
packages/core/src/utils/quotaErrorDetection.ts
Normal file
82
packages/core/src/utils/quotaErrorDetection.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
export interface ApiError {
|
||||
error: {
|
||||
code: number;
|
||||
message: string;
|
||||
status: string;
|
||||
details: unknown[];
|
||||
};
|
||||
}
|
||||
|
||||
interface StructuredError {
|
||||
message: string;
|
||||
status?: number;
|
||||
}
|
||||
|
||||
export function isApiError(error: unknown): error is ApiError {
|
||||
return (
|
||||
typeof error === 'object' &&
|
||||
error !== null &&
|
||||
'error' in error &&
|
||||
typeof (error as ApiError).error === 'object' &&
|
||||
'message' in (error as ApiError).error
|
||||
);
|
||||
}
|
||||
|
||||
export function isStructuredError(error: unknown): error is StructuredError {
|
||||
return (
|
||||
typeof error === 'object' &&
|
||||
error !== null &&
|
||||
'message' in error &&
|
||||
typeof (error as StructuredError).message === 'string'
|
||||
);
|
||||
}
|
||||
|
||||
export function isProQuotaExceededError(error: unknown): boolean {
|
||||
// Check for Pro quota exceeded errors by looking for the specific pattern
|
||||
// This will match patterns like:
|
||||
// - "Quota exceeded for quota metric 'Gemini 2.5 Pro Requests'"
|
||||
// - "Quota exceeded for quota metric 'Gemini 1.5-preview Pro Requests'"
|
||||
// - "Quota exceeded for quota metric 'Gemini beta-3.0 Pro Requests'"
|
||||
// - "Quota exceeded for quota metric 'Gemini experimental-v2 Pro Requests'"
|
||||
// We use string methods instead of regex to avoid ReDoS vulnerabilities
|
||||
|
||||
const checkMessage = (message: string): boolean =>
|
||||
message.includes("Quota exceeded for quota metric 'Gemini") &&
|
||||
message.includes("Pro Requests'");
|
||||
|
||||
if (typeof error === 'string') {
|
||||
return checkMessage(error);
|
||||
}
|
||||
|
||||
if (isStructuredError(error)) {
|
||||
return checkMessage(error.message);
|
||||
}
|
||||
|
||||
if (isApiError(error)) {
|
||||
return checkMessage(error.error.message);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
export function isGenericQuotaExceededError(error: unknown): boolean {
|
||||
if (typeof error === 'string') {
|
||||
return error.includes('Quota exceeded for quota metric');
|
||||
}
|
||||
|
||||
if (isStructuredError(error)) {
|
||||
return error.message.includes('Quota exceeded for quota metric');
|
||||
}
|
||||
|
||||
if (isApiError(error)) {
|
||||
return error.error.message.includes('Quota exceeded for quota metric');
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -357,7 +357,10 @@ describe('retryWithBackoff', () => {
|
||||
// Should fail with original error when fallback is rejected
|
||||
expect(result).toBeInstanceOf(Error);
|
||||
expect(result.message).toBe('Rate limit exceeded');
|
||||
expect(fallbackCallback).toHaveBeenCalledWith('oauth-personal');
|
||||
expect(fallbackCallback).toHaveBeenCalledWith(
|
||||
'oauth-personal',
|
||||
expect.any(Error),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle mixed error types (only count consecutive 429s)', async () => {
|
||||
|
||||
@@ -5,13 +5,20 @@
|
||||
*/
|
||||
|
||||
import { AuthType } from '../core/contentGenerator.js';
|
||||
import {
|
||||
isProQuotaExceededError,
|
||||
isGenericQuotaExceededError,
|
||||
} from './quotaErrorDetection.js';
|
||||
|
||||
export interface RetryOptions {
|
||||
maxAttempts: number;
|
||||
initialDelayMs: number;
|
||||
maxDelayMs: number;
|
||||
shouldRetry: (error: Error) => boolean;
|
||||
onPersistent429?: (authType?: string) => Promise<string | null>;
|
||||
onPersistent429?: (
|
||||
authType?: string,
|
||||
error?: unknown,
|
||||
) => Promise<string | null>;
|
||||
authType?: string;
|
||||
}
|
||||
|
||||
@@ -86,6 +93,53 @@ export async function retryWithBackoff<T>(
|
||||
} catch (error) {
|
||||
const errorStatus = getErrorStatus(error);
|
||||
|
||||
// Check for Pro quota exceeded error first - immediate fallback for OAuth users
|
||||
if (
|
||||
errorStatus === 429 &&
|
||||
authType === AuthType.LOGIN_WITH_GOOGLE &&
|
||||
isProQuotaExceededError(error) &&
|
||||
onPersistent429
|
||||
) {
|
||||
try {
|
||||
const fallbackModel = await onPersistent429(authType, error);
|
||||
if (fallbackModel) {
|
||||
// Reset attempt counter and try with new model
|
||||
attempt = 0;
|
||||
consecutive429Count = 0;
|
||||
currentDelay = initialDelayMs;
|
||||
// With the model updated, we continue to the next attempt
|
||||
continue;
|
||||
}
|
||||
} catch (fallbackError) {
|
||||
// If fallback fails, continue with original error
|
||||
console.warn('Fallback to Flash model failed:', fallbackError);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for generic quota exceeded error (but not Pro, which was handled above) - immediate fallback for OAuth users
|
||||
if (
|
||||
errorStatus === 429 &&
|
||||
authType === AuthType.LOGIN_WITH_GOOGLE &&
|
||||
!isProQuotaExceededError(error) &&
|
||||
isGenericQuotaExceededError(error) &&
|
||||
onPersistent429
|
||||
) {
|
||||
try {
|
||||
const fallbackModel = await onPersistent429(authType, error);
|
||||
if (fallbackModel) {
|
||||
// Reset attempt counter and try with new model
|
||||
attempt = 0;
|
||||
consecutive429Count = 0;
|
||||
currentDelay = initialDelayMs;
|
||||
// With the model updated, we continue to the next attempt
|
||||
continue;
|
||||
}
|
||||
} catch (fallbackError) {
|
||||
// If fallback fails, continue with original error
|
||||
console.warn('Fallback to Flash model failed:', fallbackError);
|
||||
}
|
||||
}
|
||||
|
||||
// Track consecutive 429 errors
|
||||
if (errorStatus === 429) {
|
||||
consecutive429Count++;
|
||||
@@ -100,7 +154,7 @@ export async function retryWithBackoff<T>(
|
||||
authType === AuthType.LOGIN_WITH_GOOGLE
|
||||
) {
|
||||
try {
|
||||
const fallbackModel = await onPersistent429(authType);
|
||||
const fallbackModel = await onPersistent429(authType, error);
|
||||
if (fallbackModel) {
|
||||
// Reset attempt counter and try with new model
|
||||
attempt = 0;
|
||||
|
||||
Reference in New Issue
Block a user