Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)

Co-authored-by: Jenna Inouye <jinouye@google.com>
This commit is contained in:
Bryan Morgan
2025-07-09 13:55:56 -04:00
committed by GitHub
parent 01e756481f
commit 8a6509ffeb
14 changed files with 292 additions and 86 deletions

View File

@@ -262,6 +262,7 @@ export class GeminiClient {
request: PartListUnion,
signal: AbortSignal,
turns: number = this.MAX_TURNS,
originalModel?: string,
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
// Ensure turns never exceeds MAX_TURNS to prevent infinite loops
const boundedTurns = Math.min(turns, this.MAX_TURNS);
@@ -269,6 +270,9 @@ export class GeminiClient {
return new Turn(this.getChat());
}
// Track the original model from the first call to detect model switching
const initialModel = originalModel || this.config.getModel();
const compressed = await this.tryCompressChat();
if (compressed) {
yield { type: GeminiEventType.ChatCompressed, value: compressed };
@@ -279,6 +283,14 @@ export class GeminiClient {
yield event;
}
if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
// Check if model was switched during the call (likely due to quota error)
const currentModel = this.config.getModel();
if (currentModel !== initialModel) {
// Model was switched (likely due to quota error fallback)
// Don't continue with recursive call to prevent unwanted Flash execution
return turn;
}
const nextSpeakerCheck = await checkNextSpeaker(
this.getChat(),
this,
@@ -288,7 +300,12 @@ export class GeminiClient {
const nextRequest = [{ text: 'Please continue.' }];
// This recursive call's events will be yielded out, but the final
// turn object will be from the top-level call.
yield* this.sendMessageStream(nextRequest, signal, boundedTurns - 1);
yield* this.sendMessageStream(
nextRequest,
signal,
boundedTurns - 1,
initialModel,
);
}
}
return turn;
@@ -298,9 +315,12 @@ export class GeminiClient {
contents: Content[],
schema: SchemaUnion,
abortSignal: AbortSignal,
model: string = DEFAULT_GEMINI_FLASH_MODEL,
model?: string,
config: GenerateContentConfig = {},
): Promise<Record<string, unknown>> {
// Use current model from config instead of hardcoded Flash model
const modelToUse =
model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
try {
const userMemory = this.config.getUserMemory();
const systemInstruction = getCoreSystemPrompt(userMemory);
@@ -312,7 +332,7 @@ export class GeminiClient {
const apiCall = () =>
this.getContentGenerator().generateContent({
model,
model: modelToUse,
config: {
...requestConfig,
systemInstruction,
@@ -585,10 +605,14 @@ export class GeminiClient {
fallbackModel,
error,
);
if (accepted) {
if (accepted !== false && accepted !== null) {
this.config.setModel(fallbackModel);
return fallbackModel;
}
// Check if the model was switched manually in the handler
if (this.config.getModel() === fallbackModel) {
return null; // Model was switched but don't continue with current prompt
}
} catch (error) {
console.warn('Flash fallback handler failed:', error);
}