Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)

Co-authored-by: Jenna Inouye <jinouye@google.com>
2025-12-19 09:33:53 +00:00 · 2025-07-09 13:55:56 -04:00
parent 01e756481f
commit 8a6509ffeb
14 changed files with 292 additions and 86 deletions
--- a/packages/cli/src/ui/App.tsx
+++ b/packages/cli/src/ui/App.tsx
@@ -70,6 +70,7 @@ import { UpdateNotification } from './components/UpdateNotification.js';
 import {
  isProQuotaExceededError,
  isGenericQuotaExceededError,
+  UserTierId,
 } from '@google/gemini-cli-core';
 import { checkForUpdates } from './utils/updateCheck.js';
 import ansiEscapes from 'ansi-escapes';
@@ -136,6 +137,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
  const ctrlDTimerRef = useRef<NodeJS.Timeout | null>(null);
  const [constrainHeight, setConstrainHeight] = useState<boolean>(true);
  const [showPrivacyNotice, setShowPrivacyNotice] = useState<boolean>(false);
+  const [modelSwitchedFromQuotaError, setModelSwitchedFromQuotaError] =
+    useState<boolean>(false);

  const openPrivacyNotice = useCallback(() => {
    setShowPrivacyNotice(true);
@@ -251,23 +254,51 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
    ): Promise<boolean> => {
      let message: string;

+      // For quota errors, assume FREE tier (safe default) - only show upgrade messaging to free tier users
+      // TODO: Get actual user tier from config when available
+      const userTier = undefined; // Defaults to FREE tier behavior
+      const isPaidTier =
+        userTier === UserTierId.LEGACY || userTier === UserTierId.STANDARD;
+
      // Check if this is a Pro quota exceeded error
      if (error && isProQuotaExceededError(error)) {
-        message = `⚡ You have reached your daily ${currentModel} quota limit.
+        if (isPaidTier) {
+          message = `⚡ You have reached your daily ${currentModel} quota limit.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
+⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+        } else {
+          message = `⚡ You have reached your daily ${currentModel} quota limit.
 ⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
 ⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
+        }
      } else if (error && isGenericQuotaExceededError(error)) {
-        message = `⚡ You have reached your daily quota limit.
+        if (isPaidTier) {
+          message = `⚡ You have reached your daily quota limit.
+⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
+⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+        } else {
+          message = `⚡ You have reached your daily quota limit.
 ⚡ Automatically switching from ${currentModel} to ${fallbackModel} for the remainder of this session.
 ⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
 ⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
 ⚡ You can switch authentication methods by typing /auth`;
+        }
      } else {
-        // Default fallback message for other cases (like consecutive 429s)
-        message = `⚡ Slow response times detected.
-⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.`;
+        if (isPaidTier) {
+          // Default fallback message for other cases (like consecutive 429s)
+          message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.
+⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
+⚡ To continue accessing the ${currentModel} model today, consider using /auth to switch to using a paid API key from AI Studio at https://aistudio.google.com/apikey`;
+        } else {
+          // Default fallback message for other cases (like consecutive 429s)
+          message = `⚡ Automatically switching from ${currentModel} to ${fallbackModel} for faster responses for the remainder of this session.  
+⚡ Possible reasons for this are that you have received multiple consecutive capacity errors or you have reached your daily ${currentModel} quota limit
+⚡ To increase your limits, upgrade to a Gemini Code Assist Standard or Enterprise plan with higher limits at https://goo.gle/set-up-gemini-code-assist
+⚡ Or you can utilize a Gemini API Key. See: https://goo.gle/gemini-cli-docs-auth#gemini-api-key
+⚡ You can switch authentication methods by typing /auth`;
+        }
      }

      // Add message to UI history
@@ -278,7 +309,14 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
        },
        Date.now(),
      );
-      return true; // Always accept the fallback
+
+      // Set the flag to prevent tool continuation
+      setModelSwitchedFromQuotaError(true);
+      // Set global quota error flag to prevent Flash model calls
+      config.setQuotaErrorOccurred(true);
+      // Switch model for future use but return false to stop current retry
+      config.setModel(fallbackModel);
+      return false; // Don't continue with current prompt
    };

    config.setFlashFallbackHandler(flashFallbackHandler);
@@ -445,6 +483,8 @@ const App = ({ config, settings, startupWarnings = [] }: AppProps) => {
    getPreferredEditor,
    onAuthError,
    performMemoryRefresh,
+    modelSwitchedFromQuotaError,
+    setModelSwitchedFromQuotaError,
  );
  pendingHistoryItems.push(...pendingGeminiHistoryItems);
  const { elapsedTime, currentLoadingPhrase } =