feat: add max token limit #18

This commit is contained in:
kangfenmao 2024-08-07 20:49:21 +08:00
parent 6185068353
commit 07613e65f5
7 changed files with 170 additions and 23 deletions

View File

@ -1,5 +1,6 @@
export const DEFAULT_TEMPERATURE = 0.7 export const DEFAULT_TEMPERATURE = 0.7
export const DEFAULT_CONEXTCOUNT = 5 export const DEFAULT_CONEXTCOUNT = 5
export const DEFAULT_MAX_TOKENS = 4096
export const FONT_FAMILY = export const FONT_FAMILY =
"Ubuntu, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif" "Ubuntu, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif"
export const platform = window.electron?.process?.platform === 'darwin' ? 'macos' : 'windows' export const platform = window.electron?.process?.platform === 'darwin' ? 'macos' : 'windows'

View File

@ -78,6 +78,9 @@ const resources = {
'Lower values make the model more creative and unpredictable, while higher values make it more deterministic and precise.', 'Lower values make the model more creative and unpredictable, while higher values make it more deterministic and precise.',
'settings.conext_count': 'Context', 'settings.conext_count': 'Context',
'settings.conext_count.tip': 'The number of previous messages to keep in the context.', 'settings.conext_count.tip': 'The number of previous messages to keep in the context.',
'settings.max_tokens': 'Enable Max Tokens Limit',
'settings.max_tokens.tip':
'The maximum number of tokens the model can generate. Normal chat suggests 500-800. Short text generation suggests 800-2000. Code generation suggests 2000-3600. Long text generation suggests above 4000.',
'settings.reset': 'Reset', 'settings.reset': 'Reset',
'settings.set_as_default': 'Apply to default assistant', 'settings.set_as_default': 'Apply to default assistant',
'settings.max': 'Max', 'settings.max': 'Max',
@ -285,7 +288,10 @@ const resources = {
'模型生成文本的随机程度。值越大,回复内容越赋有多样性、创造性、随机性;设为 0 根据事实回答。日常聊天建议设置为 0.7', '模型生成文本的随机程度。值越大,回复内容越赋有多样性、创造性、随机性;设为 0 根据事实回答。日常聊天建议设置为 0.7',
'settings.conext_count': '上下文数', 'settings.conext_count': '上下文数',
'settings.conext_count.tip': 'settings.conext_count.tip':
'要保留在上下文中的消息数量,数值越大,上下文越长,消耗的 token 越多。普通聊天建议 5-10代码生成建议 5-10', '要保留在上下文中的消息数量,数值越大,上下文越长,消耗的 token 越多。普通聊天建议 5-10',
'settings.max_tokens': '开启消息长度限制',
'settings.max_tokens.tip':
'单次交互所用的最大 Token 数, 会影响返回结果的长度。普通聊天建议 500-800短文生成建议 800-2000代码生成建议 2000-3600长文生成建议切换模型到 4000 左右',
'settings.reset': '重置', 'settings.reset': '重置',
'settings.set_as_default': '应用到默认助手', 'settings.set_as_default': '应用到默认助手',
'settings.max': '不限', 'settings.max': '不限',

View File

@ -1,11 +1,12 @@
import { QuestionCircleOutlined, ReloadOutlined } from '@ant-design/icons' import { QuestionCircleOutlined, ReloadOutlined } from '@ant-design/icons'
import { DEFAULT_CONEXTCOUNT, DEFAULT_TEMPERATURE } from '@renderer/config/constant' import { HStack } from '@renderer/components/Layout'
import { DEFAULT_CONEXTCOUNT, DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE } from '@renderer/config/constant'
import { useAssistant } from '@renderer/hooks/useAssistant' import { useAssistant } from '@renderer/hooks/useAssistant'
import { useSettings } from '@renderer/hooks/useSettings' import { useSettings } from '@renderer/hooks/useSettings'
import { SettingDivider, SettingRow, SettingRowTitle, SettingSubtitle } from '@renderer/pages/settings/components' import { SettingDivider, SettingRow, SettingRowTitle, SettingSubtitle } from '@renderer/pages/settings/components'
import { useAppDispatch } from '@renderer/store' import { useAppDispatch } from '@renderer/store'
import { setMessageFont, setShowInputEstimatedTokens, setShowMessageDivider } from '@renderer/store/settings' import { setMessageFont, setShowInputEstimatedTokens, setShowMessageDivider } from '@renderer/store/settings'
import { Assistant } from '@renderer/types' import { Assistant, AssistantSettings } from '@renderer/types'
import { Col, InputNumber, Row, Slider, Switch, Tooltip } from 'antd' import { Col, InputNumber, Row, Slider, Switch, Tooltip } from 'antd'
import { debounce } from 'lodash' import { debounce } from 'lodash'
import { FC, useCallback, useEffect, useState } from 'react' import { FC, useCallback, useEffect, useState } from 'react'
@ -20,6 +21,8 @@ const SettingsTab: FC<Props> = (props) => {
const { assistant, updateAssistantSettings, updateAssistant } = useAssistant(props.assistant.id) const { assistant, updateAssistantSettings, updateAssistant } = useAssistant(props.assistant.id)
const [temperature, setTemperature] = useState(assistant?.settings?.temperature ?? DEFAULT_TEMPERATURE) const [temperature, setTemperature] = useState(assistant?.settings?.temperature ?? DEFAULT_TEMPERATURE)
const [contextCount, setConextCount] = useState(assistant?.settings?.contextCount ?? DEFAULT_CONEXTCOUNT) const [contextCount, setConextCount] = useState(assistant?.settings?.contextCount ?? DEFAULT_CONEXTCOUNT)
const [enableMaxTokens, setEnableMaxTokens] = useState(assistant?.settings?.enableMaxTokens ?? false)
const [maxTokens, setMaxTokens] = useState(assistant?.settings?.maxTokens ?? 0)
const { t } = useTranslation() const { t } = useTranslation()
const dispatch = useAppDispatch() const dispatch = useAppDispatch()
@ -28,11 +31,13 @@ const SettingsTab: FC<Props> = (props) => {
const onUpdateAssistantSettings = useCallback( const onUpdateAssistantSettings = useCallback(
debounce( debounce(
({ _temperature, _contextCount }: { _temperature?: number; _contextCount?: number }) => { (settings: Partial<AssistantSettings>) => {
updateAssistantSettings({ updateAssistantSettings({
...assistant.settings, ...assistant.settings,
temperature: _temperature ?? temperature, temperature: settings.temperature ?? temperature,
contextCount: _contextCount ?? contextCount contextCount: settings.contextCount ?? contextCount,
enableMaxTokens: settings.enableMaxTokens ?? enableMaxTokens,
maxTokens: settings.maxTokens ?? maxTokens
}) })
}, },
1000, 1000,
@ -47,14 +52,21 @@ const SettingsTab: FC<Props> = (props) => {
const onTemperatureChange = (value) => { const onTemperatureChange = (value) => {
if (!isNaN(value as number)) { if (!isNaN(value as number)) {
setTemperature(value) setTemperature(value)
onUpdateAssistantSettings({ _temperature: value }) onUpdateAssistantSettings({ temperature: value })
} }
} }
const onConextCountChange = (value) => { const onConextCountChange = (value) => {
if (!isNaN(value as number)) { if (!isNaN(value as number)) {
setConextCount(value) setConextCount(value)
onUpdateAssistantSettings({ _contextCount: value }) onUpdateAssistantSettings({ contextCount: value })
}
}
const onMaxTokensChange = (value) => {
if (!isNaN(value as number)) {
setMaxTokens(value)
onUpdateAssistantSettings({ maxTokens: value })
} }
} }
@ -66,7 +78,9 @@ const SettingsTab: FC<Props> = (props) => {
settings: { settings: {
...assistant.settings, ...assistant.settings,
temperature: DEFAULT_TEMPERATURE, temperature: DEFAULT_TEMPERATURE,
contextCount: DEFAULT_CONEXTCOUNT contextCount: DEFAULT_CONEXTCOUNT,
enableMaxTokens: false,
maxTokens: DEFAULT_MAX_TOKENS
} }
}) })
} }
@ -74,6 +88,8 @@ const SettingsTab: FC<Props> = (props) => {
useEffect(() => { useEffect(() => {
setTemperature(assistant?.settings?.temperature ?? DEFAULT_TEMPERATURE) setTemperature(assistant?.settings?.temperature ?? DEFAULT_TEMPERATURE)
setConextCount(assistant?.settings?.contextCount ?? DEFAULT_CONEXTCOUNT) setConextCount(assistant?.settings?.contextCount ?? DEFAULT_CONEXTCOUNT)
setEnableMaxTokens(assistant?.settings?.enableMaxTokens ?? false)
setMaxTokens(assistant?.settings?.maxTokens ?? DEFAULT_MAX_TOKENS)
}, [assistant]) }, [assistant])
return ( return (
@ -110,6 +126,7 @@ const SettingsTab: FC<Props> = (props) => {
value={temperature} value={temperature}
onChange={onTemperatureChange} onChange={onTemperatureChange}
controls={false} controls={false}
size="small"
/> />
</Col> </Col>
</Row> </Row>
@ -138,9 +155,51 @@ const SettingsTab: FC<Props> = (props) => {
value={contextCount} value={contextCount}
onChange={onConextCountChange} onChange={onConextCountChange}
controls={false} controls={false}
size="small"
/> />
</Col> </Col>
</Row> </Row>
<Row align="middle" justify="space-between" style={{ marginBottom: 8 }}>
<HStack alignItems="center">
<Label>{t('chat.settings.max_tokens')}</Label>
<Tooltip title={t('chat.settings.max_tokens.tip')}>
<QuestionIcon />
</Tooltip>
</HStack>
<Switch
size="small"
checked={enableMaxTokens}
onChange={(enabled) => {
setEnableMaxTokens(enabled)
onUpdateAssistantSettings({ enableMaxTokens: enabled })
}}
/>
</Row>
{enableMaxTokens && (
<Row align="middle" gutter={10}>
<Col span={16}>
<Slider
min={0}
max={32000}
onChange={onMaxTokensChange}
value={typeof maxTokens === 'number' ? maxTokens : 0}
step={100}
/>
</Col>
<Col span={8}>
<InputNumberic
min={0}
max={32000}
step={100}
value={maxTokens}
onChange={onMaxTokensChange}
controls={true}
style={{ width: '100%' }}
size="small"
/>
</Col>
</Row>
)}
<SettingSubtitle>{t('settings.messages.title')}</SettingSubtitle> <SettingSubtitle>{t('settings.messages.title')}</SettingSubtitle>
<SettingDivider /> <SettingDivider />
<SettingRow> <SettingRow>

View File

@ -1,7 +1,9 @@
import { QuestionCircleOutlined } from '@ant-design/icons' import { QuestionCircleOutlined } from '@ant-design/icons'
import { DEFAULT_CONEXTCOUNT, DEFAULT_TEMPERATURE } from '@renderer/config/constant' import { HStack } from '@renderer/components/Layout'
import { DEFAULT_CONEXTCOUNT, DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE } from '@renderer/config/constant'
import { useDefaultAssistant } from '@renderer/hooks/useAssistant' import { useDefaultAssistant } from '@renderer/hooks/useAssistant'
import { Button, Col, Input, InputNumber, Row, Slider, Tooltip } from 'antd' import { AssistantSettings as AssistantSettingsType } from '@renderer/types'
import { Button, Col, Input, InputNumber, Row, Slider, Switch, Tooltip } from 'antd'
import TextArea from 'antd/es/input/TextArea' import TextArea from 'antd/es/input/TextArea'
import { debounce } from 'lodash' import { debounce } from 'lodash'
import { FC, useCallback, useState } from 'react' import { FC, useCallback, useState } from 'react'
@ -14,18 +16,22 @@ const AssistantSettings: FC = () => {
const { defaultAssistant, updateDefaultAssistant } = useDefaultAssistant() const { defaultAssistant, updateDefaultAssistant } = useDefaultAssistant()
const [temperature, setTemperature] = useState(defaultAssistant.settings?.temperature ?? DEFAULT_TEMPERATURE) const [temperature, setTemperature] = useState(defaultAssistant.settings?.temperature ?? DEFAULT_TEMPERATURE)
const [contextCount, setConextCount] = useState(defaultAssistant.settings?.contextCount ?? DEFAULT_CONEXTCOUNT) const [contextCount, setConextCount] = useState(defaultAssistant.settings?.contextCount ?? DEFAULT_CONEXTCOUNT)
const [enableMaxTokens, setEnableMaxTokens] = useState(defaultAssistant?.settings?.enableMaxTokens ?? false)
const [maxTokens, setMaxTokens] = useState(defaultAssistant?.settings?.maxTokens ?? 0)
const { t } = useTranslation() const { t } = useTranslation()
const onUpdateAssistantSettings = useCallback( const onUpdateAssistantSettings = useCallback(
debounce( debounce(
({ _temperature, _contextCount }: { _temperature?: number; _contextCount?: number }) => { (settings: Partial<AssistantSettingsType>) => {
updateDefaultAssistant({ updateDefaultAssistant({
...defaultAssistant, ...defaultAssistant,
settings: { settings: {
...defaultAssistant.settings, ...defaultAssistant.settings,
temperature: _temperature ?? temperature, temperature: settings.temperature ?? temperature,
contextCount: _contextCount ?? contextCount contextCount: settings.contextCount ?? contextCount,
enableMaxTokens: settings.enableMaxTokens ?? enableMaxTokens,
maxTokens: settings.maxTokens ?? maxTokens
} }
}) })
}, },
@ -38,14 +44,21 @@ const AssistantSettings: FC = () => {
const onTemperatureChange = (value) => { const onTemperatureChange = (value) => {
if (!isNaN(value as number)) { if (!isNaN(value as number)) {
setTemperature(value) setTemperature(value)
onUpdateAssistantSettings({ _temperature: value }) onUpdateAssistantSettings({ temperature: value })
} }
} }
const onConextCountChange = (value) => { const onConextCountChange = (value) => {
if (!isNaN(value as number)) { if (!isNaN(value as number)) {
setConextCount(value) setConextCount(value)
onUpdateAssistantSettings({ _contextCount: value }) onUpdateAssistantSettings({ contextCount: value })
}
}
const onMaxTokensChange = (value) => {
if (!isNaN(value as number)) {
setMaxTokens(value)
onUpdateAssistantSettings({ maxTokens: value })
} }
} }
@ -57,7 +70,9 @@ const AssistantSettings: FC = () => {
settings: { settings: {
...defaultAssistant.settings, ...defaultAssistant.settings,
temperature: DEFAULT_TEMPERATURE, temperature: DEFAULT_TEMPERATURE,
contextCount: DEFAULT_CONEXTCOUNT contextCount: DEFAULT_CONEXTCOUNT,
enableMaxTokens: false,
maxTokens: DEFAULT_MAX_TOKENS
} }
}) })
} }
@ -80,7 +95,19 @@ const AssistantSettings: FC = () => {
onChange={(e) => updateDefaultAssistant({ ...defaultAssistant, prompt: e.target.value })} onChange={(e) => updateDefaultAssistant({ ...defaultAssistant, prompt: e.target.value })}
/> />
<SettingDivider /> <SettingDivider />
<SettingSubtitle style={{ marginTop: 0 }}>{t('settings.assistant.model_params')}</SettingSubtitle> <SettingSubtitle
style={{
marginTop: 0,
marginBottom: 20,
display: 'flex',
flexDirection: 'row',
justifyContent: 'space-between'
}}>
<span>{t('settings.assistant.model_params')}</span>
<Button onClick={onReset} style={{ width: 90 }}>
{t('chat.settings.reset')}
</Button>
</SettingSubtitle>
<Row align="middle"> <Row align="middle">
<Label>{t('chat.settings.temperature')}</Label> <Label>{t('chat.settings.temperature')}</Label>
<Tooltip title={t('chat.settings.temperature.tip')}> <Tooltip title={t('chat.settings.temperature.tip')}>
@ -137,9 +164,46 @@ const AssistantSettings: FC = () => {
/> />
</Col> </Col>
</Row> </Row>
<Button onClick={onReset} style={{ width: 100 }}> <Row align="middle">
{t('chat.settings.reset')} <HStack alignItems="center">
</Button> <Label>{t('chat.settings.max_tokens')}</Label>
<Tooltip title={t('chat.settings.max_tokens.tip')}>
<QuestionIcon />
</Tooltip>
</HStack>
<Switch
style={{ marginLeft: 10 }}
checked={enableMaxTokens}
onChange={(enabled) => {
setEnableMaxTokens(enabled)
onUpdateAssistantSettings({ enableMaxTokens: enabled })
}}
/>
</Row>
{enableMaxTokens && (
<Row align="middle" gutter={20}>
<Col span={22}>
<Slider
min={0}
max={32000}
onChange={onMaxTokensChange}
value={typeof maxTokens === 'number' ? maxTokens : 0}
step={100}
/>
</Col>
<Col span={2}>
<InputNumber
min={0}
max={32000}
step={100}
value={maxTokens}
onChange={onMaxTokensChange}
controls={true}
style={{ width: '100%' }}
/>
</Col>
</Row>
)}
</SettingContainer> </SettingContainer>
) )
} }

View File

@ -1,5 +1,6 @@
import Anthropic from '@anthropic-ai/sdk' import Anthropic from '@anthropic-ai/sdk'
import { MessageCreateParamsNonStreaming, MessageParam } from '@anthropic-ai/sdk/resources' import { MessageCreateParamsNonStreaming, MessageParam } from '@anthropic-ai/sdk/resources'
import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
import { getOllamaKeepAliveTime } from '@renderer/hooks/useOllama' import { getOllamaKeepAliveTime } from '@renderer/hooks/useOllama'
import { Assistant, Message, Provider, Suggestion } from '@renderer/types' import { Assistant, Message, Provider, Suggestion } from '@renderer/types'
import { getAssistantSettings, removeQuotes } from '@renderer/utils' import { getAssistantSettings, removeQuotes } from '@renderer/utils'
@ -7,7 +8,7 @@ import { sum, takeRight } from 'lodash'
import OpenAI from 'openai' import OpenAI from 'openai'
import { ChatCompletionCreateParamsNonStreaming, ChatCompletionMessageParam } from 'openai/resources' import { ChatCompletionCreateParamsNonStreaming, ChatCompletionMessageParam } from 'openai/resources'
import { getDefaultModel, getTopNamingModel } from './assistant' import { getAssistantMaxTokens, getDefaultModel, getTopNamingModel } from './assistant'
import { EVENT_NAMES } from './event' import { EVENT_NAMES } from './event'
export default class ProviderSDK { export default class ProviderSDK {
@ -52,7 +53,7 @@ export default class ProviderSDK {
.stream({ .stream({
model: model.id, model: model.id,
messages: [systemMessage, ...userMessages].filter(Boolean) as MessageParam[], messages: [systemMessage, ...userMessages].filter(Boolean) as MessageParam[],
max_tokens: 4096, max_tokens: getAssistantMaxTokens(assistant) || DEFAULT_MAX_TOKENS,
temperature: assistant?.settings?.temperature temperature: assistant?.settings?.temperature
}) })
.on('text', (text) => onChunk({ text: text || '' })) .on('text', (text) => onChunk({ text: text || '' }))
@ -72,6 +73,7 @@ export default class ProviderSDK {
messages: [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[], messages: [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[],
stream: true, stream: true,
temperature: assistant?.settings?.temperature, temperature: assistant?.settings?.temperature,
max_tokens: getAssistantMaxTokens(assistant),
keep_alive: this.keepAliveTime keep_alive: this.keepAliveTime
}) })
for await (const chunk of stream) { for await (const chunk of stream) {

View File

@ -1,3 +1,4 @@
import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
import i18n from '@renderer/i18n' import i18n from '@renderer/i18n'
import store from '@renderer/store' import store from '@renderer/store'
import { updateAgent } from '@renderer/store/agents' import { updateAgent } from '@renderer/store/agents'
@ -56,6 +57,18 @@ export function getProviderByModelId(modelId?: string) {
return providers.find((p) => p.models.find((m) => m.id === _modelId)) as Provider return providers.find((p) => p.models.find((m) => m.id === _modelId)) as Provider
} }
export function getAssistantMaxTokens(assistant: Assistant) {
if (assistant.settings?.enableMaxTokens) {
const maxTokens = assistant.settings.maxTokens
if (typeof maxTokens === 'number') {
return maxTokens > 100 ? maxTokens : DEFAULT_MAX_TOKENS
}
return DEFAULT_MAX_TOKENS
}
return undefined
}
export function covertAgentToAssistant(agent: Agent): Assistant { export function covertAgentToAssistant(agent: Agent): Assistant {
return { return {
...getDefaultAssistant(), ...getDefaultAssistant(),

View File

@ -14,6 +14,8 @@ export type Assistant = {
export type AssistantSettings = { export type AssistantSettings = {
contextCount: number contextCount: number
temperature: number temperature: number
maxTokens: number
enableMaxTokens: boolean
} }
export type Message = { export type Message = {