mirror of
https://github.com/alibaba/higress.git
synced 2026-05-23 04:07:26 +08:00
feat(ai-proxy): add promoteThinkingOnEmpty and hiclawMode config options (#3625)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -255,6 +255,70 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
|
||||
}
|
||||
}
|
||||
|
||||
// promoteThinkingOnEmpty promotes reasoning_content to content when content is empty.
|
||||
// This handles models that put user-facing replies into thinking blocks instead of text blocks.
|
||||
func (r *chatCompletionResponse) promoteThinkingOnEmpty() {
|
||||
for i := range r.Choices {
|
||||
msg := r.Choices[i].Message
|
||||
if msg == nil {
|
||||
continue
|
||||
}
|
||||
if !isContentEmpty(msg.Content) {
|
||||
continue
|
||||
}
|
||||
if msg.ReasoningContent != "" {
|
||||
msg.Content = msg.ReasoningContent
|
||||
msg.ReasoningContent = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// promoteStreamingThinkingOnEmpty accumulates reasoning content during streaming.
|
||||
// It strips reasoning from chunks and buffers it. When content is seen, it marks
|
||||
// the stream as having content so no promotion will happen.
|
||||
// Call PromoteStreamingThinkingFlush at the end of the stream to emit buffered
|
||||
// reasoning as content if no content was ever seen.
|
||||
// Returns true if the chunk was modified (reasoning stripped).
|
||||
func promoteStreamingThinkingOnEmpty(ctx wrapper.HttpContext, msg *chatMessage) bool {
|
||||
if msg == nil {
|
||||
return false
|
||||
}
|
||||
hasContentDelta, _ := ctx.GetContext(ctxKeyHasContentDelta).(bool)
|
||||
if hasContentDelta {
|
||||
return false
|
||||
}
|
||||
|
||||
if !isContentEmpty(msg.Content) {
|
||||
ctx.SetContext(ctxKeyHasContentDelta, true)
|
||||
return false
|
||||
}
|
||||
|
||||
// Buffer reasoning content and strip it from the chunk
|
||||
reasoning := msg.ReasoningContent
|
||||
if reasoning == "" {
|
||||
reasoning = msg.Reasoning
|
||||
}
|
||||
if reasoning != "" {
|
||||
buffered, _ := ctx.GetContext(ctxKeyBufferedReasoning).(string)
|
||||
ctx.SetContext(ctxKeyBufferedReasoning, buffered+reasoning)
|
||||
msg.ReasoningContent = ""
|
||||
msg.Reasoning = ""
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isContentEmpty(content any) bool {
|
||||
switch v := content.(type) {
|
||||
case nil:
|
||||
return true
|
||||
case string:
|
||||
return strings.TrimSpace(v) == ""
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type chatMessageContent struct {
|
||||
CacheControl map[string]interface{} `json:"cache_control,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
@@ -648,3 +712,87 @@ func (r embeddingsRequest) ParseInput() []string {
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
// PromoteThinkingOnEmptyResponse promotes reasoning_content to content in a non-streaming
|
||||
// response body when content is empty. Returns the original body if no promotion is needed.
|
||||
func PromoteThinkingOnEmptyResponse(body []byte) ([]byte, error) {
|
||||
var resp chatCompletionResponse
|
||||
if err := json.Unmarshal(body, &resp); err != nil {
|
||||
return body, fmt.Errorf("unable to unmarshal response for thinking promotion: %v", err)
|
||||
}
|
||||
promoted := false
|
||||
for i := range resp.Choices {
|
||||
msg := resp.Choices[i].Message
|
||||
if msg == nil {
|
||||
continue
|
||||
}
|
||||
if !isContentEmpty(msg.Content) {
|
||||
continue
|
||||
}
|
||||
if msg.ReasoningContent != "" {
|
||||
msg.Content = msg.ReasoningContent
|
||||
msg.ReasoningContent = ""
|
||||
promoted = true
|
||||
}
|
||||
}
|
||||
if !promoted {
|
||||
return body, nil
|
||||
}
|
||||
return json.Marshal(resp)
|
||||
}
|
||||
|
||||
// PromoteStreamingThinkingOnEmptyChunk buffers reasoning deltas and strips them from
|
||||
// the chunk during streaming. Call PromoteStreamingThinkingFlush on the last chunk
|
||||
// to emit buffered reasoning as content if no real content was ever seen.
|
||||
func PromoteStreamingThinkingOnEmptyChunk(ctx wrapper.HttpContext, data []byte) ([]byte, error) {
|
||||
var resp chatCompletionResponse
|
||||
if err := json.Unmarshal(data, &resp); err != nil {
|
||||
return data, nil // not a valid chat completion chunk, skip
|
||||
}
|
||||
modified := false
|
||||
for i := range resp.Choices {
|
||||
msg := resp.Choices[i].Delta
|
||||
if msg == nil {
|
||||
continue
|
||||
}
|
||||
if promoteStreamingThinkingOnEmpty(ctx, msg) {
|
||||
modified = true
|
||||
}
|
||||
}
|
||||
if !modified {
|
||||
return data, nil
|
||||
}
|
||||
return json.Marshal(resp)
|
||||
}
|
||||
|
||||
// PromoteStreamingThinkingFlush checks if the stream had no content and returns
|
||||
// an SSE chunk that emits the buffered reasoning as content. Returns nil if
|
||||
// content was already seen or no reasoning was buffered.
|
||||
func PromoteStreamingThinkingFlush(ctx wrapper.HttpContext) []byte {
|
||||
hasContentDelta, _ := ctx.GetContext(ctxKeyHasContentDelta).(bool)
|
||||
if hasContentDelta {
|
||||
return nil
|
||||
}
|
||||
buffered, _ := ctx.GetContext(ctxKeyBufferedReasoning).(string)
|
||||
if buffered == "" {
|
||||
return nil
|
||||
}
|
||||
// Build a minimal chat.completion.chunk with the buffered reasoning as content
|
||||
resp := chatCompletionResponse{
|
||||
Object: objectChatCompletionChunk,
|
||||
Choices: []chatCompletionChoice{
|
||||
{
|
||||
Index: 0,
|
||||
Delta: &chatMessage{
|
||||
Content: buffered,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
data, err := json.Marshal(resp)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
// Format as SSE
|
||||
return []byte("data: " + string(data) + "\n\n")
|
||||
}
|
||||
|
||||
@@ -178,6 +178,8 @@ const (
|
||||
ctxKeyPushedMessage = "pushedMessage"
|
||||
ctxKeyContentPushed = "contentPushed"
|
||||
ctxKeyReasoningContentPushed = "reasoningContentPushed"
|
||||
ctxKeyHasContentDelta = "hasContentDelta"
|
||||
ctxKeyBufferedReasoning = "bufferedReasoning"
|
||||
|
||||
objectChatCompletion = "chat.completion"
|
||||
objectChatCompletionChunk = "chat.completion.chunk"
|
||||
@@ -474,6 +476,12 @@ type ProviderConfig struct {
|
||||
// @Title zh-CN 合并连续同角色消息
|
||||
// @Description zh-CN 开启后,若请求的 messages 中存在连续的同角色消息(如连续两条 user 消息),将其内容合并为一条,以满足要求严格轮流交替(user→assistant→user→...)的模型服务商的要求。
|
||||
mergeConsecutiveMessages bool `required:"false" yaml:"mergeConsecutiveMessages" json:"mergeConsecutiveMessages"`
|
||||
// @Title zh-CN 空内容时提升思考为正文
|
||||
// @Description zh-CN 开启后,若模型响应只包含 reasoning_content/thinking 而没有正文内容,将 reasoning 内容提升为正文内容返回,避免客户端收到空回复。
|
||||
promoteThinkingOnEmpty bool `required:"false" yaml:"promoteThinkingOnEmpty" json:"promoteThinkingOnEmpty"`
|
||||
// @Title zh-CN HiClaw 模式
|
||||
// @Description zh-CN 开启后同时启用 mergeConsecutiveMessages 和 promoteThinkingOnEmpty,适用于 HiClaw 多 Agent 协作场景。
|
||||
hiclawMode bool `required:"false" yaml:"hiclawMode" json:"hiclawMode"`
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) GetId() string {
|
||||
@@ -699,6 +707,12 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
|
||||
}
|
||||
}
|
||||
c.mergeConsecutiveMessages = json.Get("mergeConsecutiveMessages").Bool()
|
||||
c.promoteThinkingOnEmpty = json.Get("promoteThinkingOnEmpty").Bool()
|
||||
c.hiclawMode = json.Get("hiclawMode").Bool()
|
||||
if c.hiclawMode {
|
||||
c.mergeConsecutiveMessages = true
|
||||
c.promoteThinkingOnEmpty = true
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) Validate() error {
|
||||
@@ -833,6 +847,10 @@ func (c *ProviderConfig) IsOriginal() bool {
|
||||
return c.protocol == protocolOriginal
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) GetPromoteThinkingOnEmpty() bool {
|
||||
return c.promoteThinkingOnEmpty
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) ReplaceByCustomSettings(body []byte) ([]byte, error) {
|
||||
return ReplaceByCustomSettings(body, c.customSettings)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user