Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)

Co-authored-by: Jenna Inouye <jinouye@google.com>
2025-12-20 16:57:46 +00:00 · 2025-07-09 13:55:56 -04:00
parent 01e756481f
commit 8a6509ffeb
14 changed files with 292 additions and 86 deletions
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -262,6 +262,7 @@ export class GeminiClient {
    request: PartListUnion,
    signal: AbortSignal,
    turns: number = this.MAX_TURNS,
+    originalModel?: string,
  ): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
    // Ensure turns never exceeds MAX_TURNS to prevent infinite loops
    const boundedTurns = Math.min(turns, this.MAX_TURNS);
@@ -269,6 +270,9 @@ export class GeminiClient {
      return new Turn(this.getChat());
    }

+    // Track the original model from the first call to detect model switching
+    const initialModel = originalModel || this.config.getModel();
+
    const compressed = await this.tryCompressChat();
    if (compressed) {
      yield { type: GeminiEventType.ChatCompressed, value: compressed };
@@ -279,6 +283,14 @@ export class GeminiClient {
      yield event;
    }
    if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
+      // Check if model was switched during the call (likely due to quota error)
+      const currentModel = this.config.getModel();
+      if (currentModel !== initialModel) {
+        // Model was switched (likely due to quota error fallback)
+        // Don't continue with recursive call to prevent unwanted Flash execution
+        return turn;
+      }
+
      const nextSpeakerCheck = await checkNextSpeaker(
        this.getChat(),
        this,
@@ -288,7 +300,12 @@ export class GeminiClient {
        const nextRequest = [{ text: 'Please continue.' }];
        // This recursive call's events will be yielded out, but the final
        // turn object will be from the top-level call.
-        yield* this.sendMessageStream(nextRequest, signal, boundedTurns - 1);
+        yield* this.sendMessageStream(
+          nextRequest,
+          signal,
+          boundedTurns - 1,
+          initialModel,
+        );
      }
    }
    return turn;
@@ -298,9 +315,12 @@ export class GeminiClient {
    contents: Content[],
    schema: SchemaUnion,
    abortSignal: AbortSignal,
-    model: string = DEFAULT_GEMINI_FLASH_MODEL,
+    model?: string,
    config: GenerateContentConfig = {},
  ): Promise<Record<string, unknown>> {
+    // Use current model from config instead of hardcoded Flash model
+    const modelToUse =
+      model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
    try {
      const userMemory = this.config.getUserMemory();
      const systemInstruction = getCoreSystemPrompt(userMemory);
@@ -312,7 +332,7 @@ export class GeminiClient {

      const apiCall = () =>
        this.getContentGenerator().generateContent({
-          model,
+          model: modelToUse,
          config: {
            ...requestConfig,
            systemInstruction,
@@ -585,10 +605,14 @@ export class GeminiClient {
          fallbackModel,
          error,
        );
-        if (accepted) {
+        if (accepted !== false && accepted !== null) {
          this.config.setModel(fallbackModel);
          return fallbackModel;
        }
+        // Check if the model was switched manually in the handler
+        if (this.config.getModel() === fallbackModel) {
+          return null; // Model was switched but don't continue with current prompt
+        }
      } catch (error) {
        console.warn('Flash fallback handler failed:', error);
      }