mirror of
https://github.com/QwenLM/qwen-code.git
synced 2025-12-20 16:57:46 +00:00
Remove auto-execution on Flash in the event of a 429/Quota failover (#3662)
Co-authored-by: Jenna Inouye <jinouye@google.com>
This commit is contained in:
@@ -262,6 +262,7 @@ export class GeminiClient {
|
||||
request: PartListUnion,
|
||||
signal: AbortSignal,
|
||||
turns: number = this.MAX_TURNS,
|
||||
originalModel?: string,
|
||||
): AsyncGenerator<ServerGeminiStreamEvent, Turn> {
|
||||
// Ensure turns never exceeds MAX_TURNS to prevent infinite loops
|
||||
const boundedTurns = Math.min(turns, this.MAX_TURNS);
|
||||
@@ -269,6 +270,9 @@ export class GeminiClient {
|
||||
return new Turn(this.getChat());
|
||||
}
|
||||
|
||||
// Track the original model from the first call to detect model switching
|
||||
const initialModel = originalModel || this.config.getModel();
|
||||
|
||||
const compressed = await this.tryCompressChat();
|
||||
if (compressed) {
|
||||
yield { type: GeminiEventType.ChatCompressed, value: compressed };
|
||||
@@ -279,6 +283,14 @@ export class GeminiClient {
|
||||
yield event;
|
||||
}
|
||||
if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
|
||||
// Check if model was switched during the call (likely due to quota error)
|
||||
const currentModel = this.config.getModel();
|
||||
if (currentModel !== initialModel) {
|
||||
// Model was switched (likely due to quota error fallback)
|
||||
// Don't continue with recursive call to prevent unwanted Flash execution
|
||||
return turn;
|
||||
}
|
||||
|
||||
const nextSpeakerCheck = await checkNextSpeaker(
|
||||
this.getChat(),
|
||||
this,
|
||||
@@ -288,7 +300,12 @@ export class GeminiClient {
|
||||
const nextRequest = [{ text: 'Please continue.' }];
|
||||
// This recursive call's events will be yielded out, but the final
|
||||
// turn object will be from the top-level call.
|
||||
yield* this.sendMessageStream(nextRequest, signal, boundedTurns - 1);
|
||||
yield* this.sendMessageStream(
|
||||
nextRequest,
|
||||
signal,
|
||||
boundedTurns - 1,
|
||||
initialModel,
|
||||
);
|
||||
}
|
||||
}
|
||||
return turn;
|
||||
@@ -298,9 +315,12 @@ export class GeminiClient {
|
||||
contents: Content[],
|
||||
schema: SchemaUnion,
|
||||
abortSignal: AbortSignal,
|
||||
model: string = DEFAULT_GEMINI_FLASH_MODEL,
|
||||
model?: string,
|
||||
config: GenerateContentConfig = {},
|
||||
): Promise<Record<string, unknown>> {
|
||||
// Use current model from config instead of hardcoded Flash model
|
||||
const modelToUse =
|
||||
model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
|
||||
try {
|
||||
const userMemory = this.config.getUserMemory();
|
||||
const systemInstruction = getCoreSystemPrompt(userMemory);
|
||||
@@ -312,7 +332,7 @@ export class GeminiClient {
|
||||
|
||||
const apiCall = () =>
|
||||
this.getContentGenerator().generateContent({
|
||||
model,
|
||||
model: modelToUse,
|
||||
config: {
|
||||
...requestConfig,
|
||||
systemInstruction,
|
||||
@@ -585,10 +605,14 @@ export class GeminiClient {
|
||||
fallbackModel,
|
||||
error,
|
||||
);
|
||||
if (accepted) {
|
||||
if (accepted !== false && accepted !== null) {
|
||||
this.config.setModel(fallbackModel);
|
||||
return fallbackModel;
|
||||
}
|
||||
// Check if the model was switched manually in the handler
|
||||
if (this.config.getModel() === fallbackModel) {
|
||||
return null; // Model was switched but don't continue with current prompt
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Flash fallback handler failed:', error);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user