From b2554333a942f23815ba5d7a6a6a3408a8c22178 Mon Sep 17 00:00:00 2001 From: adfnekc Date: Mon, 16 Dec 2024 17:10:36 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20message=20=E5=A2=9E=E5=8A=A0=20metrics?= =?UTF-8?q?=20=E5=AD=97=E6=AE=B5=20=E7=94=A8=E4=BB=A5=E7=BB=9F=E8=AE=A1tok?= =?UTF-8?q?en=E7=94=9F=E6=88=90=E9=80=9F=E5=BA=A6=E5=92=8C=E9=A6=96?= =?UTF-8?q?=E5=AD=97=E6=97=B6=E5=BB=B6=20(#479)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/renderer/src/i18n/locales/en-us.json | 1 + src/renderer/src/i18n/locales/zh-cn.json | 1 + .../src/pages/home/Messages/MessageTokens.tsx | 15 ++++++++++-- src/renderer/src/providers/OpenAIProvider.ts | 24 ++++++++++++++++--- src/renderer/src/providers/index.d.ts | 3 +++ src/renderer/src/services/ApiService.ts | 3 ++- src/renderer/src/types/index.ts | 7 ++++++ 7 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index df5d43fc..230a6d33 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -389,6 +389,7 @@ "messages.input.paste_long_text_as_file": "Paste long text as file", "messages.input.send_shortcuts": "Send shortcuts", "messages.input.show_estimated_tokens": "Show estimated tokens", + "messages.metrics": "{{time_first_token_millsec}}ms to first token • {{token_speed}} tok/sec • ", "messages.input.title": "Input Settings", "messages.markdown_rendering_input_message": "Markdown render input msg", "messages.math_engine": "Math render engine", diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index 27b2f7c4..b1c0541d 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -389,6 +389,7 @@ "messages.input.paste_long_text_as_file": "长文本粘贴为文件", "messages.input.send_shortcuts": "发送快捷键", "messages.input.show_estimated_tokens": "显示预估 Token 数", + "messages.metrics": "首字时延 {{time_first_token_millsec}}ms • 每秒 {{token_speed}} token • ", "messages.input.title": "输入设置", "messages.markdown_rendering_input_message": "Markdown 渲染输入消息", "messages.math_engine": "数学公式引擎", diff --git a/src/renderer/src/pages/home/Messages/MessageTokens.tsx b/src/renderer/src/pages/home/Messages/MessageTokens.tsx index 4fc9d379..7a046b83 100644 --- a/src/renderer/src/pages/home/Messages/MessageTokens.tsx +++ b/src/renderer/src/pages/home/Messages/MessageTokens.tsx @@ -1,6 +1,7 @@ import { useRuntime } from '@renderer/hooks/useRuntime' import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService' import { Message } from '@renderer/types' +import { t } from 'i18next' import styled from 'styled-components' const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({ message, isLastMessage }) => { @@ -27,9 +28,19 @@ const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({ } if (message.role === 'assistant') { + let metrixs = '' + if (message?.metrics?.completion_tokens && message?.metrics?.time_completion_millsec) { + metrixs = t('settings.messages.metrics', { + time_first_token_millsec: message?.metrics?.time_first_token_millsec, + token_speed: (message?.metrics?.completion_tokens / (message?.metrics?.time_completion_millsec / 1000)).toFixed( + 2 + ) + }) + } return ( - Tokens: {message?.usage?.total_tokens} | ↑{message?.usage?.prompt_tokens} | ↓{message?.usage?.completion_tokens} + {metrixs !== '' ? metrixs : ''} + Tokens: {message?.usage?.total_tokens} ↑ {message?.usage?.prompt_tokens} ↓ {message?.usage?.completion_tokens} ) } @@ -38,7 +49,7 @@ const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({ } const MessageMetadata = styled.div` - font-size: 12px; + font-size: 11px; color: var(--color-text-2); user-select: text; margin: 2px 0; diff --git a/src/renderer/src/providers/OpenAIProvider.ts b/src/renderer/src/providers/OpenAIProvider.ts index e0799dce..a86297f1 100644 --- a/src/renderer/src/providers/OpenAIProvider.ts +++ b/src/renderer/src/providers/OpenAIProvider.ts @@ -132,6 +132,9 @@ export default class OpenAIProvider extends BaseProvider { const isOpenAIo1 = model.id.includes('o1-') const isSupportStreamOutput = streamOutput + let time_first_token_millsec = 0 + const start_time_millsec = new Date().getTime() + // @ts-ignore key is not typed const stream = await this.sdk.chat.completions.create({ model: model.id, @@ -146,20 +149,35 @@ export default class OpenAIProvider extends BaseProvider { }) if (!isSupportStreamOutput) { + let time_completion_millsec = new Date().getTime() - start_time_millsec return onChunk({ text: stream.choices[0].message?.content || '', - usage: stream.usage + usage: stream.usage, + metrics: { + completion_tokens: stream.usage?.completion_tokens, + time_completion_millsec: time_completion_millsec, + time_first_token_sec: 0, + } }) } + for await (const chunk of stream) { if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) { break } - + if (time_first_token_millsec == 0) { + time_first_token_millsec = new Date().getTime() - start_time_millsec + } + let time_completion_millsec = new Date().getTime() - start_time_millsec onChunk({ text: chunk.choices[0]?.delta?.content || '', - usage: chunk.usage + usage: chunk.usage, + metrics: { + completion_tokens: chunk.usage?.completion_tokens, + time_completion_millsec: time_completion_millsec, + time_first_token_millsec: time_first_token_millsec, + } }) } } diff --git a/src/renderer/src/providers/index.d.ts b/src/renderer/src/providers/index.d.ts index e894982a..b4c62e8b 100644 --- a/src/renderer/src/providers/index.d.ts +++ b/src/renderer/src/providers/index.d.ts @@ -1,6 +1,9 @@ +import { Metrics } from "@renderer/types" + interface ChunkCallbackData { text?: string usage?: OpenAI.Completions.CompletionUsage + metrics?: Metrics } interface CompletionsParams { diff --git a/src/renderer/src/services/ApiService.ts b/src/renderer/src/services/ApiService.ts index ecf8fd12..3c24c003 100644 --- a/src/renderer/src/services/ApiService.ts +++ b/src/renderer/src/services/ApiService.ts @@ -57,9 +57,10 @@ export async function fetchChatCompletion({ messages, assistant, onFilterMessages: (messages) => (_messages = messages), - onChunk: ({ text, usage }) => { + onChunk: ({ text, usage, metrics }) => { message.content = message.content + text || '' message.usage = usage + message.metrics = metrics onResponse({ ...message, status: 'pending' }) } }) diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index d086540e..4d7f8cc8 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -47,10 +47,17 @@ export type Message = { files?: FileType[] images?: string[] usage?: OpenAI.Completions.CompletionUsage + metrics?: Metrics type: 'text' | '@' | 'clear' isPreset?: boolean } +export type Metrics = { + completion_tokens?: number + time_completion_millsec?: number + time_first_token_millsec?: number +} + export type Topic = { id: string assistantId: string