feat: message 增加 metrics 字段 用以统计token生成速度和首字时延 (#479)
This commit is contained in:
parent
6ced973b35
commit
b2554333a9
@ -389,6 +389,7 @@
|
||||
"messages.input.paste_long_text_as_file": "Paste long text as file",
|
||||
"messages.input.send_shortcuts": "Send shortcuts",
|
||||
"messages.input.show_estimated_tokens": "Show estimated tokens",
|
||||
"messages.metrics": "{{time_first_token_millsec}}ms to first token • {{token_speed}} tok/sec • ",
|
||||
"messages.input.title": "Input Settings",
|
||||
"messages.markdown_rendering_input_message": "Markdown render input msg",
|
||||
"messages.math_engine": "Math render engine",
|
||||
|
||||
@ -389,6 +389,7 @@
|
||||
"messages.input.paste_long_text_as_file": "长文本粘贴为文件",
|
||||
"messages.input.send_shortcuts": "发送快捷键",
|
||||
"messages.input.show_estimated_tokens": "显示预估 Token 数",
|
||||
"messages.metrics": "首字时延 {{time_first_token_millsec}}ms • 每秒 {{token_speed}} token • ",
|
||||
"messages.input.title": "输入设置",
|
||||
"messages.markdown_rendering_input_message": "Markdown 渲染输入消息",
|
||||
"messages.math_engine": "数学公式引擎",
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import { useRuntime } from '@renderer/hooks/useRuntime'
|
||||
import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService'
|
||||
import { Message } from '@renderer/types'
|
||||
import { t } from 'i18next'
|
||||
import styled from 'styled-components'
|
||||
|
||||
const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({ message, isLastMessage }) => {
|
||||
@ -27,9 +28,19 @@ const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({
|
||||
}
|
||||
|
||||
if (message.role === 'assistant') {
|
||||
let metrixs = ''
|
||||
if (message?.metrics?.completion_tokens && message?.metrics?.time_completion_millsec) {
|
||||
metrixs = t('settings.messages.metrics', {
|
||||
time_first_token_millsec: message?.metrics?.time_first_token_millsec,
|
||||
token_speed: (message?.metrics?.completion_tokens / (message?.metrics?.time_completion_millsec / 1000)).toFixed(
|
||||
2
|
||||
)
|
||||
})
|
||||
}
|
||||
return (
|
||||
<MessageMetadata className="message-tokens" onClick={locateMessage}>
|
||||
Tokens: {message?.usage?.total_tokens} | ↑{message?.usage?.prompt_tokens} | ↓{message?.usage?.completion_tokens}
|
||||
{metrixs !== '' ? metrixs : ''}
|
||||
Tokens: {message?.usage?.total_tokens} ↑ {message?.usage?.prompt_tokens} ↓ {message?.usage?.completion_tokens}
|
||||
</MessageMetadata>
|
||||
)
|
||||
}
|
||||
@ -38,7 +49,7 @@ const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({
|
||||
}
|
||||
|
||||
const MessageMetadata = styled.div`
|
||||
font-size: 12px;
|
||||
font-size: 11px;
|
||||
color: var(--color-text-2);
|
||||
user-select: text;
|
||||
margin: 2px 0;
|
||||
|
||||
@ -132,6 +132,9 @@ export default class OpenAIProvider extends BaseProvider {
|
||||
const isOpenAIo1 = model.id.includes('o1-')
|
||||
const isSupportStreamOutput = streamOutput
|
||||
|
||||
let time_first_token_millsec = 0
|
||||
const start_time_millsec = new Date().getTime()
|
||||
|
||||
// @ts-ignore key is not typed
|
||||
const stream = await this.sdk.chat.completions.create({
|
||||
model: model.id,
|
||||
@ -146,20 +149,35 @@ export default class OpenAIProvider extends BaseProvider {
|
||||
})
|
||||
|
||||
if (!isSupportStreamOutput) {
|
||||
let time_completion_millsec = new Date().getTime() - start_time_millsec
|
||||
return onChunk({
|
||||
text: stream.choices[0].message?.content || '',
|
||||
usage: stream.usage
|
||||
usage: stream.usage,
|
||||
metrics: {
|
||||
completion_tokens: stream.usage?.completion_tokens,
|
||||
time_completion_millsec: time_completion_millsec,
|
||||
time_first_token_sec: 0,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
||||
break
|
||||
}
|
||||
|
||||
if (time_first_token_millsec == 0) {
|
||||
time_first_token_millsec = new Date().getTime() - start_time_millsec
|
||||
}
|
||||
let time_completion_millsec = new Date().getTime() - start_time_millsec
|
||||
onChunk({
|
||||
text: chunk.choices[0]?.delta?.content || '',
|
||||
usage: chunk.usage
|
||||
usage: chunk.usage,
|
||||
metrics: {
|
||||
completion_tokens: chunk.usage?.completion_tokens,
|
||||
time_completion_millsec: time_completion_millsec,
|
||||
time_first_token_millsec: time_first_token_millsec,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
3
src/renderer/src/providers/index.d.ts
vendored
3
src/renderer/src/providers/index.d.ts
vendored
@ -1,6 +1,9 @@
|
||||
import { Metrics } from "@renderer/types"
|
||||
|
||||
interface ChunkCallbackData {
|
||||
text?: string
|
||||
usage?: OpenAI.Completions.CompletionUsage
|
||||
metrics?: Metrics
|
||||
}
|
||||
|
||||
interface CompletionsParams {
|
||||
|
||||
@ -57,9 +57,10 @@ export async function fetchChatCompletion({
|
||||
messages,
|
||||
assistant,
|
||||
onFilterMessages: (messages) => (_messages = messages),
|
||||
onChunk: ({ text, usage }) => {
|
||||
onChunk: ({ text, usage, metrics }) => {
|
||||
message.content = message.content + text || ''
|
||||
message.usage = usage
|
||||
message.metrics = metrics
|
||||
onResponse({ ...message, status: 'pending' })
|
||||
}
|
||||
})
|
||||
|
||||
@ -47,10 +47,17 @@ export type Message = {
|
||||
files?: FileType[]
|
||||
images?: string[]
|
||||
usage?: OpenAI.Completions.CompletionUsage
|
||||
metrics?: Metrics
|
||||
type: 'text' | '@' | 'clear'
|
||||
isPreset?: boolean
|
||||
}
|
||||
|
||||
export type Metrics = {
|
||||
completion_tokens?: number
|
||||
time_completion_millsec?: number
|
||||
time_first_token_millsec?: number
|
||||
}
|
||||
|
||||
export type Topic = {
|
||||
id: string
|
||||
assistantId: string
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user