Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)

Co-authored-by: Jenna Inouye <jinouye@google.com>
This commit is contained in:
Bryan Morgan
2025-07-09 13:55:56 -04:00
committed by GitHub
parent 01e756481f
commit 8a6509ffeb
14 changed files with 292 additions and 86 deletions

View File

@@ -70,6 +70,7 @@ import { UpdateNotification } from './components/UpdateNotification.js';
import {
isProQuotaExceededError,
isGenericQuotaExceededError,
UserTierId,
} from '@google/gemini-cli-core';
import { checkForUpdates } from './utils/updateCheck.js';
import ansiEscapes from 'ansi-escapes';
@@ -136,6 +137,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
const ctrlDTimerRef = useRef<NodeJS.Timeout | null>(null);
const [constrainHeight, setConstrainHeight] = useState<boolean>(true);
const [showPrivacyNotice, setShowPrivacyNotice] = useState<boolean>(false);
const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] =
useState<boolean>(false);
const openPrivacyNotice = useCallback(() => {
setShowPrivacyNotice(true);
@@ -251,23 +254,51 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
): Promise<boolean> => {
let message: string;
// For quota errors, assume FREE tier (safe default) - only show upgrade messaging to free tier users
// TODO: Get actual user tier from config when available
const userTier = undefined; // Defaults to FREE tier behavior
const isPaidTier =
userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
// Check if this is a Pro quota exceeded error
if (error && isProQuotaExceededError(error)) {
message = `⚡ You have reached your daily ${currentModel} quota limit.
if (isPaidTier) {
message = `⚡ You have reached your daily ${currentModel} quota limit.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
} else {
message = `⚡ You have reached your daily ${currentModel} quota limit.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
}
} else if (error && isGenericQuotaExceededError(error)) {
message = `⚡ You have reached your daily quota limit.
if (isPaidTier) {
message = `⚡ You have reached your daily quota limit.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
} else {
message = `⚡ You have reached your daily quota limit.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
}
} else {
// Default fallback message for other cases (like consecutive 429s)
message = `⚡ Slow response times detected.
⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
if (isPaidTier) {
// Default fallback message for other cases (like consecutive 429s)
message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
} else {
// Default fallback message for other cases (like consecutive 429s)
message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
⚡ You can switch authentication methods by typing /auth`;
}
}
// Add message to UI history
@@ -278,7 +309,14 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
},
Date.now(),
);
return true; // Always accept the fallback
// Set the flag to prevent tool continuation
setModelSwitchedFromQuotaError(true);
// Set global quota error flag to prevent Flash model calls
config.setQuotaErrorOccurred(true);
// Switch model for future use but return false to stop current retry
config.setModel(fallbackModel);
return false; // Don't continue with current prompt
};
config.setFlashFallbackHandler(flashFallbackHandler);
@@ -445,6 +483,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
getPreferredEditor,
onAuthError,
performMemoryRefresh,
modelSwitchedFromQuotaError,
setModelSwitchedFromQuotaError,
);
pendingHistoryItems.push(...pendingGeminiHistoryItems);
const { elapsedTime, currentLoadingPhrase } =

View File

@@ -301,6 +301,8 @@ describe('useGeminiStream', () => {
getUsageStatisticsEnabled: () => true,
getDebugMode: () => false,
addHistory: vi.fn(),
setQuotaErrorOccurred: vi.fn(),
getQuotaErrorOccurred: vi.fn(() => false),
} as unknown as Config;
mockOnDebugMessage = vi.fn();
mockHandleSlashCommand = vi.fn().mockResolvedValue(false);
@@ -386,6 +388,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
);
},
{
@@ -518,6 +522,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
),
);
@@ -582,6 +588,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
),
);
@@ -675,6 +683,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
),
);
@@ -775,6 +785,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
),
);
@@ -1063,6 +1075,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
mockPerformMemoryRefresh,
false,
() => {},
),
);
@@ -1113,6 +1127,8 @@ describe('useGeminiStream', () => {
() => 'vscode' as EditorType,
() => {},
() => Promise.resolve(),
false,
() => {},
),
);

View File

@@ -90,6 +90,8 @@ export const useGeminiStream = (
getPreferredEditor: () => EditorType | undefined,
onAuthError: () => void,
performMemoryRefresh: () => Promise<void>,
modelSwitchedFromQuotaError: boolean,
setModelSwitchedFromQuotaError: React.Dispatch<React.SetStateAction<boolean>>,
) => {
const [initError, setInitError] = useState<string | null>(null);
const abortControllerRef = useRef<AbortController | null>(null);
@@ -494,6 +496,12 @@ export const useGeminiStream = (
const userMessageTimestamp = Date.now();
setShowHelp(false);
// Reset quota error flag when starting a new query (not a continuation)
if (!options?.isContinuation) {
setModelSwitchedFromQuotaError(false);
config.setQuotaErrorOccurred(false);
}
abortControllerRef.current = new AbortController();
const abortSignal = abortControllerRef.current.signal;
turnCancelledRef.current = false;
@@ -552,6 +560,7 @@ export const useGeminiStream = (
[
streamingState,
setShowHelp,
setModelSwitchedFromQuotaError,
prepareQueryForGemini,
processGeminiStreamEvents,
pendingHistoryItemRef,
@@ -668,6 +677,12 @@ export const useGeminiStream = (
);
markToolsAsSubmitted(callIdsToMarkAsSubmitted);
// Don't continue if model was switched due to quota error
if (modelSwitchedFromQuotaError) {
return;
}
submitQuery(mergePartListUnions(responsesToSend), {
isContinuation: true,
});
@@ -678,6 +693,7 @@ export const useGeminiStream = (
markToolsAsSubmitted,
geminiClient,
performMemoryRefresh,
modelSwitchedFromQuotaError,
],
);

View File

@@ -39,7 +39,7 @@ describe('parseAndFormatApiError', () => {
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(
'Slow response times detected. Switching to the gemini-2.5-flash model',
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
);
});
@@ -55,7 +55,7 @@ describe('parseAndFormatApiError', () => {
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(
'Slow response times detected. Switching to the gemini-2.5-flash model',
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
);
});
@@ -169,7 +169,7 @@ describe('parseAndFormatApiError', () => {
);
expect(result).toContain('[API Error: Rate limit exceeded');
expect(result).toContain(
'Slow response times detected. Switching to the gemini-2.5-flash model',
'Possible quota limitations in place or slow response times detected. Switching to the gemini-2.5-flash model',
);
expect(result).not.toContain(
'You have reached your daily gemini-2.5-pro quota limit',
@@ -262,21 +262,17 @@ describe('parseAndFormatApiError', () => {
);
});
it('should handle different Gemini version strings in Pro quota exceeded errors', () => {
const errorMessage15 =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 1.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
it('should handle different Gemini 2.5 version strings in Pro quota exceeded errors', () => {
const errorMessage25 =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const errorMessagePreview =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini 2.5-preview Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const errorMessageBeta =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini beta-3.0 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const errorMessageExperimental =
'got status: 429 Too Many Requests. {"error":{"code":429,"message":"Quota exceeded for quota metric \'Gemini experimental-v2 Pro Requests\' and limit \'RequestsPerDay\' of service \'generativelanguage.googleapis.com\' for consumer \'project_number:123456789\'.","status":"RESOURCE_EXHAUSTED"}}';
const result15 = parseAndFormatApiError(
errorMessage15,
const result25 = parseAndFormatApiError(
errorMessage25,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-1.5-pro',
'gemini-2.5-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
const resultPreview = parseAndFormatApiError(
@@ -286,45 +282,19 @@ describe('parseAndFormatApiError', () => {
'gemini-2.5-preview-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
const resultBeta = parseAndFormatApiError(
errorMessageBeta,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-beta-3.0-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
const resultExperimental = parseAndFormatApiError(
errorMessageExperimental,
AuthType.LOGIN_WITH_GOOGLE,
undefined,
'gemini-experimental-v2-pro',
DEFAULT_GEMINI_FLASH_MODEL,
);
expect(result15).toContain(
'You have reached your daily gemini-1.5-pro quota limit',
expect(result25).toContain(
'You have reached your daily gemini-2.5-pro quota limit',
);
expect(resultPreview).toContain(
'You have reached your daily gemini-2.5-preview-pro quota limit',
);
expect(resultBeta).toContain(
'You have reached your daily gemini-beta-3.0-pro quota limit',
);
expect(resultExperimental).toContain(
'You have reached your daily gemini-experimental-v2-pro quota limit',
);
expect(result15).toContain(
expect(result25).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
expect(resultPreview).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
expect(resultBeta).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
expect(resultExperimental).toContain(
'upgrade to a Gemini Code Assist Standard or Enterprise plan',
);
});
it('should not match non-Pro models with similar version strings', () => {
@@ -339,16 +309,6 @@ describe('parseAndFormatApiError', () => {
"Quota exceeded for quota metric 'Gemini 2.5-preview Flash Requests' and limit",
),
).toBe(false);
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'Gemini beta-3.0 Flash Requests' and limit",
),
).toBe(false);
expect(
isProQuotaExceededError(
"Quota exceeded for quota metric 'Gemini experimental-v2 Flash Requests' and limit",
),
).toBe(false);
// Test other model types
expect(

View File

@@ -19,7 +19,7 @@ import {
const getRateLimitErrorMessageGoogleFree = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
const getRateLimitErrorMessageGoogleProQuotaFree = (
currentModel: string = DEFAULT_GEMINI_MODEL,
@@ -34,7 +34,7 @@ const getRateLimitErrorMessageGoogleGenericQuotaFree = () =>
const getRateLimitErrorMessageGooglePaid = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session. We appreciate you for choosing Gemini Code Assist and the Gemini CLI.`;
const getRateLimitErrorMessageGoogleProQuotaPaid = (
currentModel: string = DEFAULT_GEMINI_MODEL,
@@ -53,7 +53,7 @@ const RATE_LIMIT_ERROR_MESSAGE_VERTEX =
const getRateLimitErrorMessageDefault = (
fallbackModel: string = DEFAULT_GEMINI_FLASH_MODEL,
) =>
`\nSlow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
`\nPossible quota limitations in place or slow response times detected. Switching to the ${fallbackModel} model for the rest of this session.`;
function getRateLimitMessage(
authType?: AuthType,