mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)
Co-authored-by: Jenna Inouye <jinouye@google.com>
This commit is contained in:
@@ -39,7 +39,7 @@ describe('parseAndFormatApiError', () => {
|
||||
);
|
||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||
expect(result).toContain(
|
||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -55,7 +55,7 @@ describe('parseAndFormatApiError', () => {
|
||||
);
|
||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||
expect(result).toContain(
|
||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -169,7 +169,7 @@ describe('parseAndFormatApiError', () => {
|
||||
);
|
||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||
expect(result).toContain(
|
||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
);
|
||||
expect(result).not.toContain(
|
||||
'You have reached your daily gemini-2.5-pro quota limit',
|
||||
@@ -262,21 +262,17 @@ describe('parseAndFormatApiError', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle different Gemini version strings in Pro quota exceeded errors', () => {
|
||||
const errorMessage15 =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
|
||||
const errorMessage25 =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
const errorMessagePreview =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
const errorMessageBeta =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
const errorMessageExperimental =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
|
||||
const result15 = parseAndFormatApiError(
|
||||
errorMessage15,
|
||||
const result25 = parseAndFormatApiError(
|
||||
errorMessage25,
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
undefined,
|
||||
'gemini-1.5-pro',
|
||||
'gemini-2.5-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
const resultPreview = parseAndFormatApiError(
|
||||
@@ -286,45 +282,19 @@ describe('parseAndFormatApiError', () => {
|
||||
'gemini-2.5-preview-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
const resultBeta = parseAndFormatApiError(
|
||||
errorMessageBeta,
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
undefined,
|
||||
'gemini-beta-3.0-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
const resultExperimental = parseAndFormatApiError(
|
||||
errorMessageExperimental,
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
undefined,
|
||||
'gemini-experimental-v2-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
|
||||
expect(result15).toContain(
|
||||
'You have reached your daily gemini-1.5-pro quota limit',
|
||||
expect(result25).toContain(
|
||||
'You have reached your daily gemini-2.5-pro quota limit',
|
||||
);
|
||||
expect(resultPreview).toContain(
|
||||
'You have reached your daily gemini-2.5-preview-pro quota limit',
|
||||
);
|
||||
expect(resultBeta).toContain(
|
||||
'You have reached your daily gemini-beta-3.0-pro quota limit',
|
||||
);
|
||||
expect(resultExperimental).toContain(
|
||||
'You have reached your daily gemini-experimental-v2-pro quota limit',
|
||||
);
|
||||
expect(result15).toContain(
|
||||
expect(result25).toContain(
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
);
|
||||
expect(resultPreview).toContain(
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
);
|
||||
expect(resultBeta).toContain(
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
);
|
||||
expect(resultExperimental).toContain(
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
);
|
||||
});
|
||||
|
||||
it('should not match non-Pro models with similar version strings', () => {
|
||||
@@ -339,16 +309,6 @@ describe('parseAndFormatApiError', () => {
|
||||
"Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isProQuotaExceededError(
|
||||
"Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isProQuotaExceededError(
|
||||
"Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit",
|
||||
),
|
||||
).toBe(false);
|
||||
|
||||
// Test other model types
|
||||
expect(
|
||||
|
||||
@@ -19,7 +19,7 @@ import {
|
||||
const getRateLimitErrorMessageGoogleFree = (
|
||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||
) =>
|
||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||
|
||||
const getRateLimitErrorMessageGoogleProQuotaFree = (
|
||||
currentModel: string = DEFAULT_GEMINI_MODEL,
|
||||
@@ -34,7 +34,7 @@ const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
|
||||
const getRateLimitErrorMessageGooglePaid = (
|
||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||
) =>
|
||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
|
||||
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
|
||||
|
||||
const getRateLimitErrorMessageGoogleProQuotaPaid = (
|
||||
currentModel: string = DEFAULT_GEMINI_MODEL,
|
||||
@@ -53,7 +53,7 @@ const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
|
||||
const getRateLimitErrorMessageDefault = (
|
||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||
) =>
|
||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||
|
||||
function getRateLimitMessage(
|
||||
authType?: AuthType,
|
||||
|
||||
Reference in New Issue
Block a user