mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 08:47:44 +00:00
Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)
Co-authored-by: Jenna Inouye <jinouye@google.com>
This commit is contained in:
@@ -70,6 +70,7 @@ import { UpdateNotification } from './components/UpdateNotification.js';
|
||||
import {
|
||||
isProQuotaExceededError,
|
||||
isGenericQuotaExceededError,
|
||||
UserTierId,
|
||||
} from '@google/gemini-cli-core';
|
||||
import { checkForUpdates } from './utils/updateCheck.js';
|
||||
import ansiEscapes from 'ansi-escapes';
|
||||
@@ -136,6 +137,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
|
||||
const ctrlDTimerRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const [constrainHeight, setConstrainHeight] = useState<boolean>(true);
|
||||
const [showPrivacyNotice, setShowPrivacyNotice] = useState<boolean>(false);
|
||||
const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] =
|
||||
useState<boolean>(false);
|
||||
|
||||
const openPrivacyNotice = useCallback(() => {
|
||||
setShowPrivacyNotice(true);
|
||||
@@ -251,23 +254,51 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
|
||||
): Promise<boolean> => {
|
||||
let message: string;
|
||||
|
||||
// For quota errors, assume FREE tier (safe default) - only show upgrade messaging to free tier users
|
||||
// TODO: Get actual user tier from config when available
|
||||
const userTier = undefined; // Defaults to FREE tier behavior
|
||||
const isPaidTier =
|
||||
userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
|
||||
|
||||
// Check if this is a Pro quota exceeded error
|
||||
if (error && isProQuotaExceededError(error)) {
|
||||
message = `⚡ You have reached your daily ${currentModel} quota limit.
|
||||
if (isPaidTier) {
|
||||
message = `⚡ You have reached your daily ${currentModel} quota limit.
|
||||
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
||||
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||
} else {
|
||||
message = `⚡ You have reached your daily ${currentModel} quota limit.
|
||||
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
||||
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||
⚡ You can switch authentication methods by typing /auth`;
|
||||
}
|
||||
} else if (error && isGenericQuotaExceededError(error)) {
|
||||
message = `⚡ You have reached your daily quota limit.
|
||||
if (isPaidTier) {
|
||||
message = `⚡ You have reached your daily quota limit.
|
||||
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
||||
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||
} else {
|
||||
message = `⚡ You have reached your daily quota limit.
|
||||
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
|
||||
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||
⚡ You can switch authentication methods by typing /auth`;
|
||||
}
|
||||
} else {
|
||||
// Default fallback message for other cases (like consecutive 429s)
|
||||
message = `⚡ Slow response times detected.
|
||||
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
|
||||
if (isPaidTier) {
|
||||
// Default fallback message for other cases (like consecutive 429s)
|
||||
message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
|
||||
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
|
||||
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
|
||||
} else {
|
||||
// Default fallback message for other cases (like consecutive 429s)
|
||||
message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
|
||||
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
|
||||
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
|
||||
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
|
||||
⚡ You can switch authentication methods by typing /auth`;
|
||||
}
|
||||
}
|
||||
|
||||
// Add message to UI history
|
||||
@@ -278,7 +309,14 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
|
||||
},
|
||||
Date.now(),
|
||||
);
|
||||
return true; // Always accept the fallback
|
||||
|
||||
// Set the flag to prevent tool continuation
|
||||
setModelSwitchedFromQuotaError(true);
|
||||
// Set global quota error flag to prevent Flash model calls
|
||||
config.setQuotaErrorOccurred(true);
|
||||
// Switch model for future use but return false to stop current retry
|
||||
config.setModel(fallbackModel);
|
||||
return false; // Don't continue with current prompt
|
||||
};
|
||||
|
||||
config.setFlashFallbackHandler(flashFallbackHandler);
|
||||
@@ -445,6 +483,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
|
||||
getPreferredEditor,
|
||||
onAuthError,
|
||||
performMemoryRefresh,
|
||||
modelSwitchedFromQuotaError,
|
||||
setModelSwitchedFromQuotaError,
|
||||
);
|
||||
pendingHistoryItems.push(...pendingGeminiHistoryItems);
|
||||
const { elapsedTime, currentLoadingPhrase } =
|
||||
|
||||
@@ -301,6 +301,8 @@ describe('useGeminiStream', () => {
|
||||
getUsageStatisticsEnabled: () => true,
|
||||
getDebugMode: () => false,
|
||||
addHistory: vi.fn(),
|
||||
setQuotaErrorOccurred: vi.fn(),
|
||||
getQuotaErrorOccurred: vi.fn(() => false),
|
||||
} as unknown as Config;
|
||||
mockOnDebugMessage = vi.fn();
|
||||
mockHandleSlashCommand = vi.fn().mockResolvedValue(false);
|
||||
@@ -386,6 +388,8 @@ describe('useGeminiStream', () => {
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
);
|
||||
},
|
||||
{
|
||||
@@ -518,6 +522,8 @@ describe('useGeminiStream', () => {
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
@@ -582,6 +588,8 @@ describe('useGeminiStream', () => {
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
@@ -675,6 +683,8 @@ describe('useGeminiStream', () => {
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
@@ -775,6 +785,8 @@ describe('useGeminiStream', () => {
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
@@ -1063,6 +1075,8 @@ describe('useGeminiStream', () => {
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
mockPerformMemoryRefresh,
|
||||
false,
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
@@ -1113,6 +1127,8 @@ describe('useGeminiStream', () => {
|
||||
() => 'vscode' as EditorType,
|
||||
() => {},
|
||||
() => Promise.resolve(),
|
||||
false,
|
||||
() => {},
|
||||
),
|
||||
);
|
||||
|
||||
|
||||
@@ -90,6 +90,8 @@ export const useGeminiStream = (
|
||||
getPreferredEditor: () => EditorType | undefined,
|
||||
onAuthError: () => void,
|
||||
performMemoryRefresh: () => Promise<void>,
|
||||
modelSwitchedFromQuotaError: boolean,
|
||||
setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
|
||||
) => {
|
||||
const [initError, setInitError] = useState<string | null>(null);
|
||||
const abortControllerRef = useRef<AbortController | null>(null);
|
||||
@@ -494,6 +496,12 @@ export const useGeminiStream = (
|
||||
const userMessageTimestamp = Date.now();
|
||||
setShowHelp(false);
|
||||
|
||||
// Reset quota error flag when starting a new query (not a continuation)
|
||||
if (!options?.isContinuation) {
|
||||
setModelSwitchedFromQuotaError(false);
|
||||
config.setQuotaErrorOccurred(false);
|
||||
}
|
||||
|
||||
abortControllerRef.current = new AbortController();
|
||||
const abortSignal = abortControllerRef.current.signal;
|
||||
turnCancelledRef.current = false;
|
||||
@@ -552,6 +560,7 @@ export const useGeminiStream = (
|
||||
[
|
||||
streamingState,
|
||||
setShowHelp,
|
||||
setModelSwitchedFromQuotaError,
|
||||
prepareQueryForGemini,
|
||||
processGeminiStreamEvents,
|
||||
pendingHistoryItemRef,
|
||||
@@ -668,6 +677,12 @@ export const useGeminiStream = (
|
||||
);
|
||||
|
||||
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
|
||||
|
||||
// Don't continue if model was switched due to quota error
|
||||
if (modelSwitchedFromQuotaError) {
|
||||
return;
|
||||
}
|
||||
|
||||
submitQuery(mergePartListUnions(responsesToSend), {
|
||||
isContinuation: true,
|
||||
});
|
||||
@@ -678,6 +693,7 @@ export const useGeminiStream = (
|
||||
markToolsAsSubmitted,
|
||||
geminiClient,
|
||||
performMemoryRefresh,
|
||||
modelSwitchedFromQuotaError,
|
||||
],
|
||||
);
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ describe('parseAndFormatApiError', () => {
|
||||
);
|
||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||
expect(result).toContain(
|
||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -55,7 +55,7 @@ describe('parseAndFormatApiError', () => {
|
||||
);
|
||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||
expect(result).toContain(
|
||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -169,7 +169,7 @@ describe('parseAndFormatApiError', () => {
|
||||
);
|
||||
expect(result).toContain('[API Error: Rate limit exceeded');
|
||||
expect(result).toContain(
|
||||
'Slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
|
||||
);
|
||||
expect(result).not.toContain(
|
||||
'You have reached your daily gemini-2.5-pro quota limit',
|
||||
@@ -262,21 +262,17 @@ describe('parseAndFormatApiError', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle different Gemini version strings in Pro quota exceeded errors', () => {
|
||||
const errorMessage15 =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
|
||||
const errorMessage25 =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
const errorMessagePreview =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
const errorMessageBeta =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
const errorMessageExperimental =
|
||||
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
|
||||
|
||||
const result15 = parseAndFormatApiError(
|
||||
errorMessage15,
|
||||
const result25 = parseAndFormatApiError(
|
||||
errorMessage25,
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
undefined,
|
||||
'gemini-1.5-pro',
|
||||
'gemini-2.5-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
const resultPreview = parseAndFormatApiError(
|
||||
@@ -286,45 +282,19 @@ describe('parseAndFormatApiError', () => {
|
||||
'gemini-2.5-preview-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
const resultBeta = parseAndFormatApiError(
|
||||
errorMessageBeta,
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
undefined,
|
||||
'gemini-beta-3.0-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
const resultExperimental = parseAndFormatApiError(
|
||||
errorMessageExperimental,
|
||||
AuthType.LOGIN_WITH_GOOGLE,
|
||||
undefined,
|
||||
'gemini-experimental-v2-pro',
|
||||
DEFAULT_GEMINI_FLASH_MODEL,
|
||||
);
|
||||
|
||||
expect(result15).toContain(
|
||||
'You have reached your daily gemini-1.5-pro quota limit',
|
||||
expect(result25).toContain(
|
||||
'You have reached your daily gemini-2.5-pro quota limit',
|
||||
);
|
||||
expect(resultPreview).toContain(
|
||||
'You have reached your daily gemini-2.5-preview-pro quota limit',
|
||||
);
|
||||
expect(resultBeta).toContain(
|
||||
'You have reached your daily gemini-beta-3.0-pro quota limit',
|
||||
);
|
||||
expect(resultExperimental).toContain(
|
||||
'You have reached your daily gemini-experimental-v2-pro quota limit',
|
||||
);
|
||||
expect(result15).toContain(
|
||||
expect(result25).toContain(
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
);
|
||||
expect(resultPreview).toContain(
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
);
|
||||
expect(resultBeta).toContain(
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
);
|
||||
expect(resultExperimental).toContain(
|
||||
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
|
||||
);
|
||||
});
|
||||
|
||||
it('should not match non-Pro models with similar version strings', () => {
|
||||
@@ -339,16 +309,6 @@ describe('parseAndFormatApiError', () => {
|
||||
"Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isProQuotaExceededError(
|
||||
"Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isProQuotaExceededError(
|
||||
"Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit",
|
||||
),
|
||||
).toBe(false);
|
||||
|
||||
// Test other model types
|
||||
expect(
|
||||
|
||||
@@ -19,7 +19,7 @@ import {
|
||||
const getRateLimitErrorMessageGoogleFree = (
|
||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||
) =>
|
||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||
|
||||
const getRateLimitErrorMessageGoogleProQuotaFree = (
|
||||
currentModel: string = DEFAULT_GEMINI_MODEL,
|
||||
@@ -34,7 +34,7 @@ const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
|
||||
const getRateLimitErrorMessageGooglePaid = (
|
||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||
) =>
|
||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
|
||||
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
|
||||
|
||||
const getRateLimitErrorMessageGoogleProQuotaPaid = (
|
||||
currentModel: string = DEFAULT_GEMINI_MODEL,
|
||||
@@ -53,7 +53,7 @@ const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
|
||||
const getRateLimitErrorMessageDefault = (
|
||||
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||
) =>
|
||||
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
|
||||
|
||||
function getRateLimitMessage(
|
||||
authType?: AuthType,
|
||||
|
||||
Reference in New Issue
Block a user