fix(client): get model from config in flashFallbackHandler (#2118)

Co-authored-by: Jacob Richman <jacob314@gmail.com>
This commit is contained in:
SunskyXH
2025-07-04 04:43:48 +09:00
committed by GitHub
parent 64767c52fe
commit ab63a5f183
2 changed files with 122 additions and 10 deletions

View File

@@ -48,7 +48,6 @@ function isThinkingSupported(model: string) {
export class GeminiClient {
private chat?: GeminiChat;
private contentGenerator?: ContentGenerator;
private model: string;
private embeddingModel: string;
private generateContentConfig: GenerateContentConfig = {
temperature: 0,
@@ -62,7 +61,6 @@ export class GeminiClient {
setGlobalDispatcher(new ProxyAgent(config.getProxy() as string));
}
this.model = config.getModel();
this.embeddingModel = config.getEmbeddingModel();
}
@@ -187,7 +185,9 @@ export class GeminiClient {
try {
const userMemory = this.config.getUserMemory();
const systemInstruction = getCoreSystemPrompt(userMemory);
const generateContentConfigWithThinking = isThinkingSupported(this.model)
const generateContentConfigWithThinking = isThinkingSupported(
this.config.getModel(),
)
? {
...this.generateContentConfig,
thinkingConfig: {
@@ -345,7 +345,7 @@ export class GeminiClient {
generationConfig: GenerateContentConfig,
abortSignal: AbortSignal,
): Promise<GenerateContentResponse> {
const modelToUse = this.model;
const modelToUse = this.config.getModel();
const configToUse: GenerateContentConfig = {
...this.generateContentConfig,
...generationConfig,
@@ -439,13 +439,15 @@ export class GeminiClient {
return null;
}
const model = this.config.getModel();
let { totalTokens: originalTokenCount } =
await this.getContentGenerator().countTokens({
model: this.model,
model,
contents: curatedHistory,
});
if (originalTokenCount === undefined) {
console.warn(`Could not determine token count for model ${this.model}.`);
console.warn(`Could not determine token count for model ${model}.`);
originalTokenCount = 0;
}
@@ -453,7 +455,7 @@ export class GeminiClient {
if (
!force &&
originalTokenCount <
this.TOKEN_THRESHOLD_FOR_SUMMARIZATION * tokenLimit(this.model)
this.TOKEN_THRESHOLD_FOR_SUMMARIZATION * tokenLimit(model)
) {
return null;
}
@@ -479,7 +481,8 @@ export class GeminiClient {
const { totalTokens: newTokenCount } =
await this.getContentGenerator().countTokens({
model: this.model,
// model might change after calling `sendMessage`, so we get the newest value from config
model: this.config.getModel(),
contents: this.getChat().getHistory(),
});
if (newTokenCount === undefined) {
@@ -503,7 +506,7 @@ export class GeminiClient {
return null;
}
const currentModel = this.model;
const currentModel = this.config.getModel();
const fallbackModel = DEFAULT_GEMINI_FLASH_MODEL;
// Don't fallback if already using Flash model
@@ -518,7 +521,6 @@ export class GeminiClient {
const accepted = await fallbackHandler(currentModel, fallbackModel);
if (accepted) {
this.config.setModel(fallbackModel);
this.model = fallbackModel;
return fallbackModel;
}
} catch (error) {