feat: add max token limit #18

This commit is contained in:
kangfenmao 2024-08-07 20:49:21 +08:00
parent 6185068353
commit 07613e65f5
7 changed files with 170 additions and 23 deletions

View File

@ -1,5 +1,6 @@
export const DEFAULT_TEMPERATURE = 0.7
export const DEFAULT_CONEXTCOUNT = 5
export const DEFAULT_MAX_TOKENS = 4096
export const FONT_FAMILY =
"Ubuntu, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif"
export const platform = window.electron?.process?.platform === 'darwin' ? 'macos' : 'windows'

View File

@ -78,6 +78,9 @@ const resources = {
'Lower values make the model more creative and unpredictable, while higher values make it more deterministic and precise.',
'settings.conext_count': 'Context',
'settings.conext_count.tip': 'The number of previous messages to keep in the context.',
'settings.max_tokens': 'Enable Max Tokens Limit',
'settings.max_tokens.tip':
'The maximum number of tokens the model can generate. Normal chat suggests 500-800. Short text generation suggests 800-2000. Code generation suggests 2000-3600. Long text generation suggests above 4000.',
'settings.reset': 'Reset',
'settings.set_as_default': 'Apply to default assistant',
'settings.max': 'Max',
@ -285,7 +288,10 @@ const resources = {
'模型生成文本的随机程度。值越大,回复内容越赋有多样性、创造性、随机性;设为 0 根据事实回答。日常聊天建议设置为 0.7',
'settings.conext_count': '上下文数',
'settings.conext_count.tip':
'要保留在上下文中的消息数量,数值越大,上下文越长,消耗的 token 越多。普通聊天建议 5-10代码生成建议 5-10',
'要保留在上下文中的消息数量,数值越大,上下文越长,消耗的 token 越多。普通聊天建议 5-10',
'settings.max_tokens': '开启消息长度限制',
'settings.max_tokens.tip':
'单次交互所用的最大 Token 数, 会影响返回结果的长度。普通聊天建议 500-800短文生成建议 800-2000代码生成建议 2000-3600长文生成建议切换模型到 4000 左右',
'settings.reset': '重置',
'settings.set_as_default': '应用到默认助手',
'settings.max': '不限',

View File

@ -1,11 +1,12 @@
import { QuestionCircleOutlined, ReloadOutlined } from '@ant-design/icons'
import { DEFAULT_CONEXTCOUNT, DEFAULT_TEMPERATURE } from '@renderer/config/constant'
import { HStack } from '@renderer/components/Layout'
import { DEFAULT_CONEXTCOUNT, DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE } from '@renderer/config/constant'
import { useAssistant } from '@renderer/hooks/useAssistant'
import { useSettings } from '@renderer/hooks/useSettings'
import { SettingDivider, SettingRow, SettingRowTitle, SettingSubtitle } from '@renderer/pages/settings/components'
import { useAppDispatch } from '@renderer/store'
import { setMessageFont, setShowInputEstimatedTokens, setShowMessageDivider } from '@renderer/store/settings'
import { Assistant } from '@renderer/types'
import { Assistant, AssistantSettings } from '@renderer/types'
import { Col, InputNumber, Row, Slider, Switch, Tooltip } from 'antd'
import { debounce } from 'lodash'
import { FC, useCallback, useEffect, useState } from 'react'
@ -20,6 +21,8 @@ const SettingsTab: FC<Props> = (props) => {
const { assistant, updateAssistantSettings, updateAssistant } = useAssistant(props.assistant.id)
const [temperature, setTemperature] = useState(assistant?.settings?.temperature ?? DEFAULT_TEMPERATURE)
const [contextCount, setConextCount] = useState(assistant?.settings?.contextCount ?? DEFAULT_CONEXTCOUNT)
const [enableMaxTokens, setEnableMaxTokens] = useState(assistant?.settings?.enableMaxTokens ?? false)
const [maxTokens, setMaxTokens] = useState(assistant?.settings?.maxTokens ?? 0)
const { t } = useTranslation()
const dispatch = useAppDispatch()
@ -28,11 +31,13 @@ const SettingsTab: FC<Props> = (props) => {
const onUpdateAssistantSettings = useCallback(
debounce(
({ _temperature, _contextCount }: { _temperature?: number; _contextCount?: number }) => {
(settings: Partial<AssistantSettings>) => {
updateAssistantSettings({
...assistant.settings,
temperature: _temperature ?? temperature,
contextCount: _contextCount ?? contextCount
temperature: settings.temperature ?? temperature,
contextCount: settings.contextCount ?? contextCount,
enableMaxTokens: settings.enableMaxTokens ?? enableMaxTokens,
maxTokens: settings.maxTokens ?? maxTokens
})
},
1000,
@ -47,14 +52,21 @@ const SettingsTab: FC<Props> = (props) => {
const onTemperatureChange = (value) => {
if (!isNaN(value as number)) {
setTemperature(value)
onUpdateAssistantSettings({ _temperature: value })
onUpdateAssistantSettings({ temperature: value })
}
}
const onConextCountChange = (value) => {
if (!isNaN(value as number)) {
setConextCount(value)
onUpdateAssistantSettings({ _contextCount: value })
onUpdateAssistantSettings({ contextCount: value })
}
}
const onMaxTokensChange = (value) => {
if (!isNaN(value as number)) {
setMaxTokens(value)
onUpdateAssistantSettings({ maxTokens: value })
}
}
@ -66,7 +78,9 @@ const SettingsTab: FC<Props> = (props) => {
settings: {
...assistant.settings,
temperature: DEFAULT_TEMPERATURE,
contextCount: DEFAULT_CONEXTCOUNT
contextCount: DEFAULT_CONEXTCOUNT,
enableMaxTokens: false,
maxTokens: DEFAULT_MAX_TOKENS
}
})
}
@ -74,6 +88,8 @@ const SettingsTab: FC<Props> = (props) => {
useEffect(() => {
setTemperature(assistant?.settings?.temperature ?? DEFAULT_TEMPERATURE)
setConextCount(assistant?.settings?.contextCount ?? DEFAULT_CONEXTCOUNT)
setEnableMaxTokens(assistant?.settings?.enableMaxTokens ?? false)
setMaxTokens(assistant?.settings?.maxTokens ?? DEFAULT_MAX_TOKENS)
}, [assistant])
return (
@ -110,6 +126,7 @@ const SettingsTab: FC<Props> = (props) => {
value={temperature}
onChange={onTemperatureChange}
controls={false}
size="small"
/>
</Col>
</Row>
@ -138,9 +155,51 @@ const SettingsTab: FC<Props> = (props) => {
value={contextCount}
onChange={onConextCountChange}
controls={false}
size="small"
/>
</Col>
</Row>
<Row align="middle" justify="space-between" style={{ marginBottom: 8 }}>
<HStack alignItems="center">
<Label>{t('chat.settings.max_tokens')}</Label>
<Tooltip title={t('chat.settings.max_tokens.tip')}>
<QuestionIcon />
</Tooltip>
</HStack>
<Switch
size="small"
checked={enableMaxTokens}
onChange={(enabled) => {
setEnableMaxTokens(enabled)
onUpdateAssistantSettings({ enableMaxTokens: enabled })
}}
/>
</Row>
{enableMaxTokens && (
<Row align="middle" gutter={10}>
<Col span={16}>
<Slider
min={0}
max={32000}
onChange={onMaxTokensChange}
value={typeof maxTokens === 'number' ? maxTokens : 0}
step={100}
/>
</Col>
<Col span={8}>
<InputNumberic
min={0}
max={32000}
step={100}
value={maxTokens}
onChange={onMaxTokensChange}
controls={true}
style={{ width: '100%' }}
size="small"
/>
</Col>
</Row>
)}
<SettingSubtitle>{t('settings.messages.title')}</SettingSubtitle>
<SettingDivider />
<SettingRow>

View File

@ -1,7 +1,9 @@
import { QuestionCircleOutlined } from '@ant-design/icons'
import { DEFAULT_CONEXTCOUNT, DEFAULT_TEMPERATURE } from '@renderer/config/constant'
import { HStack } from '@renderer/components/Layout'
import { DEFAULT_CONEXTCOUNT, DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE } from '@renderer/config/constant'
import { useDefaultAssistant } from '@renderer/hooks/useAssistant'
import { Button, Col, Input, InputNumber, Row, Slider, Tooltip } from 'antd'
import { AssistantSettings as AssistantSettingsType } from '@renderer/types'
import { Button, Col, Input, InputNumber, Row, Slider, Switch, Tooltip } from 'antd'
import TextArea from 'antd/es/input/TextArea'
import { debounce } from 'lodash'
import { FC, useCallback, useState } from 'react'
@ -14,18 +16,22 @@ const AssistantSettings: FC = () => {
const { defaultAssistant, updateDefaultAssistant } = useDefaultAssistant()
const [temperature, setTemperature] = useState(defaultAssistant.settings?.temperature ?? DEFAULT_TEMPERATURE)
const [contextCount, setConextCount] = useState(defaultAssistant.settings?.contextCount ?? DEFAULT_CONEXTCOUNT)
const [enableMaxTokens, setEnableMaxTokens] = useState(defaultAssistant?.settings?.enableMaxTokens ?? false)
const [maxTokens, setMaxTokens] = useState(defaultAssistant?.settings?.maxTokens ?? 0)
const { t } = useTranslation()
const onUpdateAssistantSettings = useCallback(
debounce(
({ _temperature, _contextCount }: { _temperature?: number; _contextCount?: number }) => {
(settings: Partial<AssistantSettingsType>) => {
updateDefaultAssistant({
...defaultAssistant,
settings: {
...defaultAssistant.settings,
temperature: _temperature ?? temperature,
contextCount: _contextCount ?? contextCount
temperature: settings.temperature ?? temperature,
contextCount: settings.contextCount ?? contextCount,
enableMaxTokens: settings.enableMaxTokens ?? enableMaxTokens,
maxTokens: settings.maxTokens ?? maxTokens
}
})
},
@ -38,14 +44,21 @@ const AssistantSettings: FC = () => {
const onTemperatureChange = (value) => {
if (!isNaN(value as number)) {
setTemperature(value)
onUpdateAssistantSettings({ _temperature: value })
onUpdateAssistantSettings({ temperature: value })
}
}
const onConextCountChange = (value) => {
if (!isNaN(value as number)) {
setConextCount(value)
onUpdateAssistantSettings({ _contextCount: value })
onUpdateAssistantSettings({ contextCount: value })
}
}
const onMaxTokensChange = (value) => {
if (!isNaN(value as number)) {
setMaxTokens(value)
onUpdateAssistantSettings({ maxTokens: value })
}
}
@ -57,7 +70,9 @@ const AssistantSettings: FC = () => {
settings: {
...defaultAssistant.settings,
temperature: DEFAULT_TEMPERATURE,
contextCount: DEFAULT_CONEXTCOUNT
contextCount: DEFAULT_CONEXTCOUNT,
enableMaxTokens: false,
maxTokens: DEFAULT_MAX_TOKENS
}
})
}
@ -80,7 +95,19 @@ const AssistantSettings: FC = () => {
onChange={(e) => updateDefaultAssistant({ ...defaultAssistant, prompt: e.target.value })}
/>
<SettingDivider />
<SettingSubtitle style={{ marginTop: 0 }}>{t('settings.assistant.model_params')}</SettingSubtitle>
<SettingSubtitle
style={{
marginTop: 0,
marginBottom: 20,
display: 'flex',
flexDirection: 'row',
justifyContent: 'space-between'
}}>
<span>{t('settings.assistant.model_params')}</span>
<Button onClick={onReset} style={{ width: 90 }}>
{t('chat.settings.reset')}
</Button>
</SettingSubtitle>
<Row align="middle">
<Label>{t('chat.settings.temperature')}</Label>
<Tooltip title={t('chat.settings.temperature.tip')}>
@ -137,9 +164,46 @@ const AssistantSettings: FC = () => {
/>
</Col>
</Row>
<Button onClick={onReset} style={{ width: 100 }}>
{t('chat.settings.reset')}
</Button>
<Row align="middle">
<HStack alignItems="center">
<Label>{t('chat.settings.max_tokens')}</Label>
<Tooltip title={t('chat.settings.max_tokens.tip')}>
<QuestionIcon />
</Tooltip>
</HStack>
<Switch
style={{ marginLeft: 10 }}
checked={enableMaxTokens}
onChange={(enabled) => {
setEnableMaxTokens(enabled)
onUpdateAssistantSettings({ enableMaxTokens: enabled })
}}
/>
</Row>
{enableMaxTokens && (
<Row align="middle" gutter={20}>
<Col span={22}>
<Slider
min={0}
max={32000}
onChange={onMaxTokensChange}
value={typeof maxTokens === 'number' ? maxTokens : 0}
step={100}
/>
</Col>
<Col span={2}>
<InputNumber
min={0}
max={32000}
step={100}
value={maxTokens}
onChange={onMaxTokensChange}
controls={true}
style={{ width: '100%' }}
/>
</Col>
</Row>
)}
</SettingContainer>
)
}

View File

@ -1,5 +1,6 @@
import Anthropic from '@anthropic-ai/sdk'
import { MessageCreateParamsNonStreaming, MessageParam } from '@anthropic-ai/sdk/resources'
import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
import { getOllamaKeepAliveTime } from '@renderer/hooks/useOllama'
import { Assistant, Message, Provider, Suggestion } from '@renderer/types'
import { getAssistantSettings, removeQuotes } from '@renderer/utils'
@ -7,7 +8,7 @@ import { sum, takeRight } from 'lodash'
import OpenAI from 'openai'
import { ChatCompletionCreateParamsNonStreaming, ChatCompletionMessageParam } from 'openai/resources'
import { getDefaultModel, getTopNamingModel } from './assistant'
import { getAssistantMaxTokens, getDefaultModel, getTopNamingModel } from './assistant'
import { EVENT_NAMES } from './event'
export default class ProviderSDK {
@ -52,7 +53,7 @@ export default class ProviderSDK {
.stream({
model: model.id,
messages: [systemMessage, ...userMessages].filter(Boolean) as MessageParam[],
max_tokens: 4096,
max_tokens: getAssistantMaxTokens(assistant) || DEFAULT_MAX_TOKENS,
temperature: assistant?.settings?.temperature
})
.on('text', (text) => onChunk({ text: text || '' }))
@ -72,6 +73,7 @@ export default class ProviderSDK {
messages: [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[],
stream: true,
temperature: assistant?.settings?.temperature,
max_tokens: getAssistantMaxTokens(assistant),
keep_alive: this.keepAliveTime
})
for await (const chunk of stream) {

View File

@ -1,3 +1,4 @@
import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant'
import i18n from '@renderer/i18n'
import store from '@renderer/store'
import { updateAgent } from '@renderer/store/agents'
@ -56,6 +57,18 @@ export function getProviderByModelId(modelId?: string) {
return providers.find((p) => p.models.find((m) => m.id === _modelId)) as Provider
}
export function getAssistantMaxTokens(assistant: Assistant) {
if (assistant.settings?.enableMaxTokens) {
const maxTokens = assistant.settings.maxTokens
if (typeof maxTokens === 'number') {
return maxTokens > 100 ? maxTokens : DEFAULT_MAX_TOKENS
}
return DEFAULT_MAX_TOKENS
}
return undefined
}
export function covertAgentToAssistant(agent: Agent): Assistant {
return {
...getDefaultAssistant(),

View File

@ -14,6 +14,8 @@ export type Assistant = {
export type AssistantSettings = {
contextCount: number
temperature: number
maxTokens: number
enableMaxTokens: boolean
}
export type Message = {