diff --git a/src/renderer/src/config/constant.ts b/src/renderer/src/config/constant.ts index 63a83792..f768188a 100644 --- a/src/renderer/src/config/constant.ts +++ b/src/renderer/src/config/constant.ts @@ -1,5 +1,6 @@ export const DEFAULT_TEMPERATURE = 0.7 export const DEFAULT_CONEXTCOUNT = 5 +export const DEFAULT_MAX_TOKENS = 4096 export const FONT_FAMILY = "Ubuntu, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif" export const platform = window.electron?.process?.platform === 'darwin' ? 'macos' : 'windows' diff --git a/src/renderer/src/i18n/index.ts b/src/renderer/src/i18n/index.ts index 75d4d003..8f7fec01 100644 --- a/src/renderer/src/i18n/index.ts +++ b/src/renderer/src/i18n/index.ts @@ -78,6 +78,9 @@ const resources = { 'Lower values make the model more creative and unpredictable, while higher values make it more deterministic and precise.', 'settings.conext_count': 'Context', 'settings.conext_count.tip': 'The number of previous messages to keep in the context.', + 'settings.max_tokens': 'Enable Max Tokens Limit', + 'settings.max_tokens.tip': + 'The maximum number of tokens the model can generate. Normal chat suggests 500-800. Short text generation suggests 800-2000. Code generation suggests 2000-3600. Long text generation suggests above 4000.', 'settings.reset': 'Reset', 'settings.set_as_default': 'Apply to default assistant', 'settings.max': 'Max', @@ -285,7 +288,10 @@ const resources = { '模型生成文本的随机程度。值越大,回复内容越赋有多样性、创造性、随机性;设为 0 根据事实回答。日常聊天建议设置为 0.7', 'settings.conext_count': '上下文数', 'settings.conext_count.tip': - '要保留在上下文中的消息数量,数值越大,上下文越长,消耗的 token 越多。普通聊天建议 5-10,代码生成建议 5-10', + '要保留在上下文中的消息数量,数值越大,上下文越长,消耗的 token 越多。普通聊天建议 5-10', + 'settings.max_tokens': '开启消息长度限制', + 'settings.max_tokens.tip': + '单次交互所用的最大 Token 数, 会影响返回结果的长度。普通聊天建议 500-800;短文生成建议 800-2000;代码生成建议 2000-3600;长文生成建议切换模型到 4000 左右', 'settings.reset': '重置', 'settings.set_as_default': '应用到默认助手', 'settings.max': '不限', diff --git a/src/renderer/src/pages/home/components/sidebar/SettingsTab.tsx b/src/renderer/src/pages/home/components/sidebar/SettingsTab.tsx index 59d37dda..9172878e 100644 --- a/src/renderer/src/pages/home/components/sidebar/SettingsTab.tsx +++ b/src/renderer/src/pages/home/components/sidebar/SettingsTab.tsx @@ -1,11 +1,12 @@ import { QuestionCircleOutlined, ReloadOutlined } from '@ant-design/icons' -import { DEFAULT_CONEXTCOUNT, DEFAULT_TEMPERATURE } from '@renderer/config/constant' +import { HStack } from '@renderer/components/Layout' +import { DEFAULT_CONEXTCOUNT, DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE } from '@renderer/config/constant' import { useAssistant } from '@renderer/hooks/useAssistant' import { useSettings } from '@renderer/hooks/useSettings' import { SettingDivider, SettingRow, SettingRowTitle, SettingSubtitle } from '@renderer/pages/settings/components' import { useAppDispatch } from '@renderer/store' import { setMessageFont, setShowInputEstimatedTokens, setShowMessageDivider } from '@renderer/store/settings' -import { Assistant } from '@renderer/types' +import { Assistant, AssistantSettings } from '@renderer/types' import { Col, InputNumber, Row, Slider, Switch, Tooltip } from 'antd' import { debounce } from 'lodash' import { FC, useCallback, useEffect, useState } from 'react' @@ -20,6 +21,8 @@ const SettingsTab: FC = (props) => { const { assistant, updateAssistantSettings, updateAssistant } = useAssistant(props.assistant.id) const [temperature, setTemperature] = useState(assistant?.settings?.temperature ?? DEFAULT_TEMPERATURE) const [contextCount, setConextCount] = useState(assistant?.settings?.contextCount ?? DEFAULT_CONEXTCOUNT) + const [enableMaxTokens, setEnableMaxTokens] = useState(assistant?.settings?.enableMaxTokens ?? false) + const [maxTokens, setMaxTokens] = useState(assistant?.settings?.maxTokens ?? 0) const { t } = useTranslation() const dispatch = useAppDispatch() @@ -28,11 +31,13 @@ const SettingsTab: FC = (props) => { const onUpdateAssistantSettings = useCallback( debounce( - ({ _temperature, _contextCount }: { _temperature?: number; _contextCount?: number }) => { + (settings: Partial) => { updateAssistantSettings({ ...assistant.settings, - temperature: _temperature ?? temperature, - contextCount: _contextCount ?? contextCount + temperature: settings.temperature ?? temperature, + contextCount: settings.contextCount ?? contextCount, + enableMaxTokens: settings.enableMaxTokens ?? enableMaxTokens, + maxTokens: settings.maxTokens ?? maxTokens }) }, 1000, @@ -47,14 +52,21 @@ const SettingsTab: FC = (props) => { const onTemperatureChange = (value) => { if (!isNaN(value as number)) { setTemperature(value) - onUpdateAssistantSettings({ _temperature: value }) + onUpdateAssistantSettings({ temperature: value }) } } const onConextCountChange = (value) => { if (!isNaN(value as number)) { setConextCount(value) - onUpdateAssistantSettings({ _contextCount: value }) + onUpdateAssistantSettings({ contextCount: value }) + } + } + + const onMaxTokensChange = (value) => { + if (!isNaN(value as number)) { + setMaxTokens(value) + onUpdateAssistantSettings({ maxTokens: value }) } } @@ -66,7 +78,9 @@ const SettingsTab: FC = (props) => { settings: { ...assistant.settings, temperature: DEFAULT_TEMPERATURE, - contextCount: DEFAULT_CONEXTCOUNT + contextCount: DEFAULT_CONEXTCOUNT, + enableMaxTokens: false, + maxTokens: DEFAULT_MAX_TOKENS } }) } @@ -74,6 +88,8 @@ const SettingsTab: FC = (props) => { useEffect(() => { setTemperature(assistant?.settings?.temperature ?? DEFAULT_TEMPERATURE) setConextCount(assistant?.settings?.contextCount ?? DEFAULT_CONEXTCOUNT) + setEnableMaxTokens(assistant?.settings?.enableMaxTokens ?? false) + setMaxTokens(assistant?.settings?.maxTokens ?? DEFAULT_MAX_TOKENS) }, [assistant]) return ( @@ -110,6 +126,7 @@ const SettingsTab: FC = (props) => { value={temperature} onChange={onTemperatureChange} controls={false} + size="small" /> @@ -138,9 +155,51 @@ const SettingsTab: FC = (props) => { value={contextCount} onChange={onConextCountChange} controls={false} + size="small" /> + + + + + + + + { + setEnableMaxTokens(enabled) + onUpdateAssistantSettings({ enableMaxTokens: enabled }) + }} + /> + + {enableMaxTokens && ( + + + + + + + + + )} {t('settings.messages.title')} diff --git a/src/renderer/src/pages/settings/AssistantSettings.tsx b/src/renderer/src/pages/settings/AssistantSettings.tsx index fabe0237..00660436 100644 --- a/src/renderer/src/pages/settings/AssistantSettings.tsx +++ b/src/renderer/src/pages/settings/AssistantSettings.tsx @@ -1,7 +1,9 @@ import { QuestionCircleOutlined } from '@ant-design/icons' -import { DEFAULT_CONEXTCOUNT, DEFAULT_TEMPERATURE } from '@renderer/config/constant' +import { HStack } from '@renderer/components/Layout' +import { DEFAULT_CONEXTCOUNT, DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE } from '@renderer/config/constant' import { useDefaultAssistant } from '@renderer/hooks/useAssistant' -import { Button, Col, Input, InputNumber, Row, Slider, Tooltip } from 'antd' +import { AssistantSettings as AssistantSettingsType } from '@renderer/types' +import { Button, Col, Input, InputNumber, Row, Slider, Switch, Tooltip } from 'antd' import TextArea from 'antd/es/input/TextArea' import { debounce } from 'lodash' import { FC, useCallback, useState } from 'react' @@ -14,18 +16,22 @@ const AssistantSettings: FC = () => { const { defaultAssistant, updateDefaultAssistant } = useDefaultAssistant() const [temperature, setTemperature] = useState(defaultAssistant.settings?.temperature ?? DEFAULT_TEMPERATURE) const [contextCount, setConextCount] = useState(defaultAssistant.settings?.contextCount ?? DEFAULT_CONEXTCOUNT) + const [enableMaxTokens, setEnableMaxTokens] = useState(defaultAssistant?.settings?.enableMaxTokens ?? false) + const [maxTokens, setMaxTokens] = useState(defaultAssistant?.settings?.maxTokens ?? 0) const { t } = useTranslation() const onUpdateAssistantSettings = useCallback( debounce( - ({ _temperature, _contextCount }: { _temperature?: number; _contextCount?: number }) => { + (settings: Partial) => { updateDefaultAssistant({ ...defaultAssistant, settings: { ...defaultAssistant.settings, - temperature: _temperature ?? temperature, - contextCount: _contextCount ?? contextCount + temperature: settings.temperature ?? temperature, + contextCount: settings.contextCount ?? contextCount, + enableMaxTokens: settings.enableMaxTokens ?? enableMaxTokens, + maxTokens: settings.maxTokens ?? maxTokens } }) }, @@ -38,14 +44,21 @@ const AssistantSettings: FC = () => { const onTemperatureChange = (value) => { if (!isNaN(value as number)) { setTemperature(value) - onUpdateAssistantSettings({ _temperature: value }) + onUpdateAssistantSettings({ temperature: value }) } } const onConextCountChange = (value) => { if (!isNaN(value as number)) { setConextCount(value) - onUpdateAssistantSettings({ _contextCount: value }) + onUpdateAssistantSettings({ contextCount: value }) + } + } + + const onMaxTokensChange = (value) => { + if (!isNaN(value as number)) { + setMaxTokens(value) + onUpdateAssistantSettings({ maxTokens: value }) } } @@ -57,7 +70,9 @@ const AssistantSettings: FC = () => { settings: { ...defaultAssistant.settings, temperature: DEFAULT_TEMPERATURE, - contextCount: DEFAULT_CONEXTCOUNT + contextCount: DEFAULT_CONEXTCOUNT, + enableMaxTokens: false, + maxTokens: DEFAULT_MAX_TOKENS } }) } @@ -80,7 +95,19 @@ const AssistantSettings: FC = () => { onChange={(e) => updateDefaultAssistant({ ...defaultAssistant, prompt: e.target.value })} /> - {t('settings.assistant.model_params')} + + {t('settings.assistant.model_params')} + + @@ -137,9 +164,46 @@ const AssistantSettings: FC = () => { /> - + + + + + + + + { + setEnableMaxTokens(enabled) + onUpdateAssistantSettings({ enableMaxTokens: enabled }) + }} + /> + + {enableMaxTokens && ( + + + + + + + + + )} ) } diff --git a/src/renderer/src/services/ProviderSDK.ts b/src/renderer/src/services/ProviderSDK.ts index 4d317f99..22a0aaf2 100644 --- a/src/renderer/src/services/ProviderSDK.ts +++ b/src/renderer/src/services/ProviderSDK.ts @@ -1,5 +1,6 @@ import Anthropic from '@anthropic-ai/sdk' import { MessageCreateParamsNonStreaming, MessageParam } from '@anthropic-ai/sdk/resources' +import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant' import { getOllamaKeepAliveTime } from '@renderer/hooks/useOllama' import { Assistant, Message, Provider, Suggestion } from '@renderer/types' import { getAssistantSettings, removeQuotes } from '@renderer/utils' @@ -7,7 +8,7 @@ import { sum, takeRight } from 'lodash' import OpenAI from 'openai' import { ChatCompletionCreateParamsNonStreaming, ChatCompletionMessageParam } from 'openai/resources' -import { getDefaultModel, getTopNamingModel } from './assistant' +import { getAssistantMaxTokens, getDefaultModel, getTopNamingModel } from './assistant' import { EVENT_NAMES } from './event' export default class ProviderSDK { @@ -52,7 +53,7 @@ export default class ProviderSDK { .stream({ model: model.id, messages: [systemMessage, ...userMessages].filter(Boolean) as MessageParam[], - max_tokens: 4096, + max_tokens: getAssistantMaxTokens(assistant) || DEFAULT_MAX_TOKENS, temperature: assistant?.settings?.temperature }) .on('text', (text) => onChunk({ text: text || '' })) @@ -72,6 +73,7 @@ export default class ProviderSDK { messages: [systemMessage, ...userMessages].filter(Boolean) as ChatCompletionMessageParam[], stream: true, temperature: assistant?.settings?.temperature, + max_tokens: getAssistantMaxTokens(assistant), keep_alive: this.keepAliveTime }) for await (const chunk of stream) { diff --git a/src/renderer/src/services/assistant.ts b/src/renderer/src/services/assistant.ts index 2ed7d464..2b8614fd 100644 --- a/src/renderer/src/services/assistant.ts +++ b/src/renderer/src/services/assistant.ts @@ -1,3 +1,4 @@ +import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant' import i18n from '@renderer/i18n' import store from '@renderer/store' import { updateAgent } from '@renderer/store/agents' @@ -56,6 +57,18 @@ export function getProviderByModelId(modelId?: string) { return providers.find((p) => p.models.find((m) => m.id === _modelId)) as Provider } +export function getAssistantMaxTokens(assistant: Assistant) { + if (assistant.settings?.enableMaxTokens) { + const maxTokens = assistant.settings.maxTokens + if (typeof maxTokens === 'number') { + return maxTokens > 100 ? maxTokens : DEFAULT_MAX_TOKENS + } + return DEFAULT_MAX_TOKENS + } + + return undefined +} + export function covertAgentToAssistant(agent: Agent): Assistant { return { ...getDefaultAssistant(), diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index 3298591d..b2468a49 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -14,6 +14,8 @@ export type Assistant = { export type AssistantSettings = { contextCount: number temperature: number + maxTokens: number + enableMaxTokens: boolean } export type Message = {