mirror of
https://github.com/alibaba/higress.git
synced 2026-05-26 13:47:27 +08:00
feat: Support transforming reasoning_content returned by Qwen to OpenAI contract (#1791)
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
package provider
|
||||
|
||||
import "strings"
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
streamEventIdItemKey = "id:"
|
||||
@@ -110,9 +113,16 @@ type chatCompletionChoice struct {
|
||||
}
|
||||
|
||||
type usage struct {
|
||||
PromptTokens int `json:"prompt_tokens,omitempty"`
|
||||
CompletionTokens int `json:"completion_tokens,omitempty"`
|
||||
TotalTokens int `json:"total_tokens,omitempty"`
|
||||
PromptTokens int `json:"prompt_tokens,omitempty"`
|
||||
CompletionTokens int `json:"completion_tokens,omitempty"`
|
||||
TotalTokens int `json:"total_tokens,omitempty"`
|
||||
CompletionTokensDetails *completionTokensDetails `json:"completion_tokens_details,omitempty"`
|
||||
}
|
||||
|
||||
type completionTokensDetails struct {
|
||||
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
||||
AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
|
||||
RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
|
||||
}
|
||||
|
||||
type chatMessage struct {
|
||||
@@ -126,6 +136,24 @@ type chatMessage struct {
|
||||
Refusal string `json:"refusal,omitempty"`
|
||||
}
|
||||
|
||||
func (m *chatMessage) handleReasoningContent(reasoningContentMode string) {
|
||||
if m.ReasoningContent == "" {
|
||||
return
|
||||
}
|
||||
switch reasoningContentMode {
|
||||
case reasoningBehaviorIgnore:
|
||||
m.ReasoningContent = ""
|
||||
break
|
||||
case reasoningBehaviorConcat:
|
||||
m.Content = fmt.Sprintf("%v\n%v", m.ReasoningContent, m.Content)
|
||||
m.ReasoningContent = ""
|
||||
break
|
||||
case reasoningBehaviorPassThrough:
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
type messageContent struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
Text string `json:"text"`
|
||||
@@ -138,6 +166,9 @@ type imageUrl struct {
|
||||
}
|
||||
|
||||
func (m *chatMessage) IsEmpty() bool {
|
||||
if m.ReasoningContent != "" {
|
||||
return false
|
||||
}
|
||||
if m.IsStringContent() && m.Content != "" {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -85,6 +85,10 @@ const (
|
||||
objectChatCompletion = "chat.completion"
|
||||
objectChatCompletionChunk = "chat.completion.chunk"
|
||||
|
||||
reasoningBehaviorPassThrough = "passthrough"
|
||||
reasoningBehaviorIgnore = "ignore"
|
||||
reasoningBehaviorConcat = "concat"
|
||||
|
||||
wildcard = "*"
|
||||
|
||||
defaultTimeout = 2 * 60 * 1000 // ms
|
||||
@@ -190,6 +194,9 @@ type ProviderConfig struct {
|
||||
// @Title zh-CN 失败请求重试
|
||||
// @Description zh-CN 对失败的请求立即进行重试
|
||||
retryOnFailure *retryOnFailure `required:"false" yaml:"retryOnFailure" json:"retryOnFailure"`
|
||||
// @Title zh-CN 推理内容处理方式
|
||||
// @Description zh-CN 如何处理大模型服务返回的推理内容。目前支持以下取值:passthrough(正常输出推理内容)、ignore(不输出推理内容)、concat(将推理内容拼接在常规输出内容之前)。默认为 normal。仅支持通义千问服务。
|
||||
reasoningContentMode string `required:"false" yaml:"reasoningContentMode" json:"reasoningContentMode"`
|
||||
// @Title zh-CN 基于OpenAI协议的自定义后端URL
|
||||
// @Description zh-CN 仅适用于支持 openai 协议的服务。
|
||||
openaiCustomUrl string `required:"false" yaml:"openaiCustomUrl" json:"openaiCustomUrl"`
|
||||
@@ -359,6 +366,20 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
|
||||
}
|
||||
}
|
||||
|
||||
c.reasoningContentMode = json.Get("reasoningContentMode").String()
|
||||
if c.reasoningContentMode == "" {
|
||||
c.reasoningContentMode = reasoningBehaviorPassThrough
|
||||
} else {
|
||||
c.reasoningContentMode = strings.ToLower(c.reasoningContentMode)
|
||||
switch c.reasoningContentMode {
|
||||
case reasoningBehaviorPassThrough, reasoningBehaviorIgnore, reasoningBehaviorConcat:
|
||||
break
|
||||
default:
|
||||
c.reasoningContentMode = reasoningBehaviorPassThrough
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
failoverJson := json.Get("failover")
|
||||
c.failover = &failover{
|
||||
enabled: false,
|
||||
|
||||
@@ -357,7 +357,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(ctx wrapper.HttpContext, o
|
||||
func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) *chatCompletionResponse {
|
||||
choices := make([]chatCompletionChoice, 0, len(qwenResponse.Output.Choices))
|
||||
for _, qwenChoice := range qwenResponse.Output.Choices {
|
||||
message := qwenMessageToChatMessage(qwenChoice.Message)
|
||||
message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
|
||||
choices = append(choices, chatCompletionChoice{
|
||||
Message: &message,
|
||||
FinishReason: qwenChoice.FinishReason,
|
||||
@@ -395,7 +395,8 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
|
||||
finished := qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null"
|
||||
message := qwenChoice.Message
|
||||
|
||||
deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content}
|
||||
deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content, ReasoningContent: message.ReasoningContent}
|
||||
deltaContentMessage.handleReasoningContent(m.config.reasoningContentMode)
|
||||
deltaToolCallsMessage := &chatMessage{Role: message.Role, ToolCalls: append([]toolCall{}, message.ToolCalls...)}
|
||||
if !incrementalStreaming {
|
||||
for _, tc := range message.ToolCalls {
|
||||
@@ -430,6 +431,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
|
||||
}
|
||||
}
|
||||
}
|
||||
if message.ReasoningContent == "" {
|
||||
message.ReasoningContent = pushedMessage.ReasoningContent
|
||||
} else {
|
||||
deltaContentMessage.ReasoningContent = util.StripPrefix(deltaContentMessage.ReasoningContent, pushedMessage.ReasoningContent)
|
||||
}
|
||||
if len(deltaToolCallsMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil {
|
||||
for i, tc := range deltaToolCallsMessage.ToolCalls {
|
||||
if i >= len(pushedMessage.ToolCalls) {
|
||||
@@ -690,13 +696,16 @@ type qwenTextEmbeddings struct {
|
||||
Embedding []float64 `json:"embedding"`
|
||||
}
|
||||
|
||||
func qwenMessageToChatMessage(qwenMessage qwenMessage) chatMessage {
|
||||
return chatMessage{
|
||||
Name: qwenMessage.Name,
|
||||
Role: qwenMessage.Role,
|
||||
Content: qwenMessage.Content,
|
||||
ToolCalls: qwenMessage.ToolCalls,
|
||||
func qwenMessageToChatMessage(qwenMessage qwenMessage, reasoningContentMode string) chatMessage {
|
||||
msg := chatMessage{
|
||||
Name: qwenMessage.Name,
|
||||
Role: qwenMessage.Role,
|
||||
Content: qwenMessage.Content,
|
||||
ReasoningContent: qwenMessage.ReasoningContent,
|
||||
ToolCalls: qwenMessage.ToolCalls,
|
||||
}
|
||||
msg.handleReasoningContent(reasoningContentMode)
|
||||
return msg
|
||||
}
|
||||
|
||||
func (m *qwenMessage) IsStringContent() bool {
|
||||
|
||||
Reference in New Issue
Block a user