diff --git a/src/renderer/src/config/models.ts b/src/renderer/src/config/models.ts index 0afc3d58..a503a60b 100644 --- a/src/renderer/src/config/models.ts +++ b/src/renderer/src/config/models.ts @@ -2224,7 +2224,8 @@ export function isSupportedReasoningEffortModel(model?: Model): boolean { model.id.includes('claude-3-7-sonnet') || model.id.includes('claude-3.7-sonnet') || isOpenAIoSeries(model) || - isGrokReasoningModel(model) + isGrokReasoningModel(model) || + isGemini25ReasoningModel(model) ) { return true } @@ -2251,6 +2252,18 @@ export function isGrokReasoningModel(model?: Model): boolean { return false } +export function isGemini25ReasoningModel(model?: Model): boolean { + if (!model) { + return false + } + + if (model.id.includes('gemini-2.5')) { + return true + } + + return false +} + export function isReasoningModel(model?: Model): boolean { if (!model) { return false @@ -2264,7 +2277,7 @@ export function isReasoningModel(model?: Model): boolean { return true } - if (model.id.includes('gemini-2.5')) { + if (isGemini25ReasoningModel(model)) { return true } diff --git a/src/renderer/src/providers/AiProvider/GeminiProvider.ts b/src/renderer/src/providers/AiProvider/GeminiProvider.ts index e84f806f..eb117f50 100644 --- a/src/renderer/src/providers/AiProvider/GeminiProvider.ts +++ b/src/renderer/src/providers/AiProvider/GeminiProvider.ts @@ -10,9 +10,16 @@ import { Part, PartUnion, SafetySetting, + ThinkingConfig, ToolListUnion } from '@google/genai' -import { isGemmaModel, isGenerateImageModel, isVisionModel, isWebSearchModel } from '@renderer/config/models' +import { + isGemini25ReasoningModel, + isGemmaModel, + isGenerateImageModel, + isVisionModel, + isWebSearchModel +} from '@renderer/config/models' import { getStoreSetting } from '@renderer/hooks/useSettings' import i18n from '@renderer/i18n' import { getAssistantSettings, getDefaultModel, getTopNamingModel } from '@renderer/services/AssistantService' @@ -35,6 +42,8 @@ import OpenAI from 'openai' import { CompletionsParams } from '.' import BaseProvider from './BaseProvider' +type ReasoningEffort = 'low' | 'medium' | 'high' + export default class GeminiProvider extends BaseProvider { private sdk: GoogleGenAI @@ -182,6 +191,41 @@ export default class GeminiProvider extends BaseProvider { ] } + /** + * Get the reasoning effort for the assistant + * @param assistant - The assistant + * @param model - The model + * @returns The reasoning effort + */ + private getReasoningEffort(assistant: Assistant, model: Model) { + if (isGemini25ReasoningModel(model)) { + const effortRatios: Record = { + high: 1, + medium: 0.5, + low: 0.2 + } + const effort = assistant?.settings?.reasoning_effort as ReasoningEffort + const effortRatio = effortRatios[effort] + const maxBudgetToken = 24576 // https://ai.google.dev/gemini-api/docs/thinking + const budgetTokens = Math.max(1024, Math.trunc(maxBudgetToken * effortRatio)) + if (!effortRatio) { + return { + thinkingConfig: { + thinkingBudget: 0 + } as ThinkingConfig + } + } + + return { + thinkingConfig: { + thinkingBudget: budgetTokens, + includeThoughts: true + } as ThinkingConfig + } + } + return {} + } + /** * Generate completions * @param messages - The messages @@ -241,6 +285,7 @@ export default class GeminiProvider extends BaseProvider { topP: assistant?.settings?.topP, maxOutputTokens: maxTokens, tools: tools, + ...this.getReasoningEffort(assistant, model), ...this.getCustomParameters(assistant) } @@ -308,6 +353,7 @@ export default class GeminiProvider extends BaseProvider { text: response.text, usage: { prompt_tokens: response.usageMetadata?.promptTokenCount || 0, + thoughts_tokens: response.usageMetadata?.thoughtsTokenCount || 0, completion_tokens: response.usageMetadata?.candidatesTokenCount || 0, total_tokens: response.usageMetadata?.totalTokenCount || 0 }, @@ -384,6 +430,7 @@ export default class GeminiProvider extends BaseProvider { usage: { prompt_tokens: chunk.usageMetadata?.promptTokenCount || 0, completion_tokens: chunk.usageMetadata?.candidatesTokenCount || 0, + thoughts_tokens: chunk.usageMetadata?.thoughtsTokenCount || 0, total_tokens: chunk.usageMetadata?.totalTokenCount || 0 }, metrics: { diff --git a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts index ea67862c..b4a15933 100644 --- a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts +++ b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts @@ -46,7 +46,7 @@ import { import { CompletionsParams } from '.' import BaseProvider from './BaseProvider' -type ReasoningEffort = 'high' | 'medium' | 'low' +type ReasoningEffort = 'low' | 'medium' | 'high' export default class OpenAIProvider extends BaseProvider { private sdk: OpenAI diff --git a/src/renderer/src/providers/AiProvider/index.ts b/src/renderer/src/providers/AiProvider/index.ts index c31ddae7..e518c218 100644 --- a/src/renderer/src/providers/AiProvider/index.ts +++ b/src/renderer/src/providers/AiProvider/index.ts @@ -11,14 +11,15 @@ import type { Metrics, Model, Provider, - Suggestion + Suggestion, + Usage } from '@renderer/types' import OpenAI from 'openai' export interface ChunkCallbackData { text?: string reasoning_content?: string - usage?: OpenAI.Completions.CompletionUsage + usage?: Usage metrics?: Metrics // Zhipu web search webSearch?: any[] diff --git a/src/renderer/src/services/TokenService.ts b/src/renderer/src/services/TokenService.ts index 4f4ac294..33346692 100644 --- a/src/renderer/src/services/TokenService.ts +++ b/src/renderer/src/services/TokenService.ts @@ -1,6 +1,5 @@ -import { Assistant, FileType, FileTypes, Message } from '@renderer/types' +import { Assistant, FileType, FileTypes, Message, Usage } from '@renderer/types' import { flatten, takeRight } from 'lodash' -import { CompletionUsage } from 'openai/resources' import { approximateTokenSize } from 'tokenx' import { getAssistantSettings } from './AssistantService' @@ -52,7 +51,7 @@ export function estimateImageTokens(file: FileType) { return Math.floor(file.size / 100) } -export async function estimateMessageUsage(message: Message): Promise { +export async function estimateMessageUsage(message: Message): Promise { let imageTokens = 0 if (message.files) { @@ -80,17 +79,17 @@ export async function estimateMessagesUsage({ }: { assistant: Assistant messages: Message[] -}): Promise { +}): Promise { const outputMessage = messages.pop()! const prompt_tokens = await estimateHistoryTokens(assistant, messages) const { completion_tokens } = await estimateMessageUsage(outputMessage) return { - prompt_tokens: await estimateHistoryTokens(assistant, messages), + prompt_tokens, completion_tokens, total_tokens: prompt_tokens + completion_tokens - } as CompletionUsage + } as Usage } export async function estimateHistoryTokens(assistant: Assistant, msgs: Message[]) { diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index ec1f5f96..83f34280 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -63,7 +63,7 @@ export type Message = { model?: Model files?: FileType[] images?: string[] - usage?: OpenAI.Completions.CompletionUsage + usage?: Usage metrics?: Metrics knowledgeBaseIds?: string[] type: 'text' | '@' | 'clear' @@ -97,6 +97,10 @@ export type Message = { foldSelected?: boolean } +export type Usage = OpenAI.Completions.CompletionUsage & { + thoughts_tokens?: number +} + export type Metrics = { completion_tokens?: number time_completion_millsec?: number