refactor: Improve reasoning effort configuration for Anthropic models
- Refactored getReasoningEffort method to handle reasoning configuration more robustly - Added type definitions for reasoning effort and configuration - Simplified logic for calculating budget tokens - Improved type safety and readability of the method
This commit is contained in:
parent
b974f8537f
commit
97dc80a07f
@ -14,6 +14,14 @@ import OpenAI from 'openai'
|
|||||||
|
|
||||||
import { CompletionsParams } from '.'
|
import { CompletionsParams } from '.'
|
||||||
import BaseProvider from './BaseProvider'
|
import BaseProvider from './BaseProvider'
|
||||||
|
|
||||||
|
type ReasoningEffort = 'high' | 'medium' | 'low'
|
||||||
|
|
||||||
|
interface ReasoningConfig {
|
||||||
|
type: 'enabled' | 'disabled'
|
||||||
|
budget_tokens?: number
|
||||||
|
}
|
||||||
|
|
||||||
export default class AnthropicProvider extends BaseProvider {
|
export default class AnthropicProvider extends BaseProvider {
|
||||||
private sdk: Anthropic
|
private sdk: Anthropic
|
||||||
|
|
||||||
@ -77,34 +85,38 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
return assistant?.settings?.topP
|
return assistant?.settings?.topP
|
||||||
}
|
}
|
||||||
|
|
||||||
private getReasoningEffort(assistant: Assistant, model: Model) {
|
private getReasoningEffort(assistant: Assistant, model: Model): ReasoningConfig | undefined {
|
||||||
if (isReasoningModel(model)) {
|
if (!isReasoningModel(model)) {
|
||||||
const effort_ratio =
|
return undefined
|
||||||
assistant?.settings?.reasoning_effort === 'high'
|
|
||||||
? 0.8
|
|
||||||
: assistant?.settings?.reasoning_effort === 'medium'
|
|
||||||
? 0.5
|
|
||||||
: assistant?.settings?.reasoning_effort === 'low'
|
|
||||||
? 0.2
|
|
||||||
: undefined
|
|
||||||
if (!effort_ratio)
|
|
||||||
return {
|
|
||||||
type: 'disabled'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (model.id.includes('claude-3.7-sonnet') || model.id.includes('claude-3-7-sonnet')) {
|
const effortRatios: Record<ReasoningEffort, number> = {
|
||||||
|
high: 0.8,
|
||||||
|
medium: 0.5,
|
||||||
|
low: 0.2
|
||||||
|
}
|
||||||
|
|
||||||
|
const effort = assistant?.settings?.reasoning_effort as ReasoningEffort
|
||||||
|
const effortRatio = effortRatios[effort]
|
||||||
|
|
||||||
|
if (!effortRatio) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const isClaude37Sonnet = model.id.includes('claude-3-7-sonnet') || model.id.includes('claude-3.7-sonnet')
|
||||||
|
|
||||||
|
if (!isClaude37Sonnet) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const maxTokens = assistant?.settings?.maxTokens || DEFAULT_MAX_TOKENS
|
||||||
|
const budgetTokens = Math.trunc(Math.max(Math.min(maxTokens * effortRatio, 32000), 1024))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
type: 'enabled',
|
type: 'enabled',
|
||||||
budget_tokens: Math.max(
|
budget_tokens: budgetTokens
|
||||||
Math.min((assistant?.settings?.maxTokens || DEFAULT_MAX_TOKENS) * effort_ratio, 32000),
|
|
||||||
1024
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return undefined
|
|
||||||
}
|
|
||||||
|
|
||||||
public async completions({ messages, assistant, onChunk, onFilterMessages }: CompletionsParams) {
|
public async completions({ messages, assistant, onChunk, onFilterMessages }: CompletionsParams) {
|
||||||
const defaultModel = getDefaultModel()
|
const defaultModel = getDefaultModel()
|
||||||
@ -137,8 +149,8 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isReasoningModel(model)) {
|
if (isReasoningModel(model)) {
|
||||||
;(body as any).thinking = this.getReasoningEffort(assistant, model)
|
// @ts-ignore thinking
|
||||||
;(body as any).betas = ['output-128k-2025-02-19']
|
body.thinking = this.getReasoningEffort(assistant, model)
|
||||||
}
|
}
|
||||||
|
|
||||||
let time_first_token_millsec = 0
|
let time_first_token_millsec = 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user