mirror of
https://github.com/alibaba/higress.git
synced 2026-05-30 15:47:28 +08:00
feat(ai-proxy): add promoteThinkingOnEmpty and hiclawMode config options
When some models (e.g. kimi-k2.5) put user-facing replies into reasoning_content/thinking blocks without generating text content, downstream clients receive empty responses. This adds a new promoteThinkingOnEmpty option that promotes reasoning content to text content when the response has no text block. Also adds hiclawMode as a convenience flag that enables both mergeConsecutiveMessages and promoteThinkingOnEmpty for multi-agent collaboration scenarios. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -255,6 +255,65 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
|
||||
}
|
||||
}
|
||||
|
||||
// promoteThinkingOnEmpty promotes reasoning_content to content when content is empty.
|
||||
// This handles models that put user-facing replies into thinking blocks instead of text blocks.
|
||||
func (r *chatCompletionResponse) promoteThinkingOnEmpty() {
|
||||
for i := range r.Choices {
|
||||
msg := r.Choices[i].Message
|
||||
if msg == nil {
|
||||
continue
|
||||
}
|
||||
if !isContentEmpty(msg.Content) {
|
||||
continue
|
||||
}
|
||||
if msg.ReasoningContent != "" {
|
||||
msg.Content = msg.ReasoningContent
|
||||
msg.ReasoningContent = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// promoteStreamingThinkingOnEmpty promotes reasoning delta to content delta when no content
|
||||
// has been seen in the stream so far. Uses context to track state across chunks.
|
||||
// Returns true if a promotion was performed.
|
||||
func promoteStreamingThinkingOnEmpty(ctx wrapper.HttpContext, msg *chatMessage) bool {
|
||||
if msg == nil {
|
||||
return false
|
||||
}
|
||||
hasContentDelta, _ := ctx.GetContext(ctxKeyHasContentDelta).(bool)
|
||||
if hasContentDelta {
|
||||
return false
|
||||
}
|
||||
|
||||
if !isContentEmpty(msg.Content) {
|
||||
ctx.SetContext(ctxKeyHasContentDelta, true)
|
||||
return false
|
||||
}
|
||||
|
||||
reasoning := msg.ReasoningContent
|
||||
if reasoning == "" {
|
||||
reasoning = msg.Reasoning
|
||||
}
|
||||
if reasoning != "" {
|
||||
msg.Content = reasoning
|
||||
msg.ReasoningContent = ""
|
||||
msg.Reasoning = ""
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isContentEmpty(content any) bool {
|
||||
switch v := content.(type) {
|
||||
case nil:
|
||||
return true
|
||||
case string:
|
||||
return strings.TrimSpace(v) == ""
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type chatMessageContent struct {
|
||||
CacheControl map[string]interface{} `json:"cache_control,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
@@ -648,3 +707,54 @@ func (r embeddingsRequest) ParseInput() []string {
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
// PromoteThinkingOnEmptyResponse promotes reasoning_content to content in a non-streaming
|
||||
// response body when content is empty. Returns the original body if no promotion is needed.
|
||||
func PromoteThinkingOnEmptyResponse(body []byte) ([]byte, error) {
|
||||
var resp chatCompletionResponse
|
||||
if err := json.Unmarshal(body, &resp); err != nil {
|
||||
return body, fmt.Errorf("unable to unmarshal response for thinking promotion: %v", err)
|
||||
}
|
||||
promoted := false
|
||||
for i := range resp.Choices {
|
||||
msg := resp.Choices[i].Message
|
||||
if msg == nil {
|
||||
continue
|
||||
}
|
||||
if !isContentEmpty(msg.Content) {
|
||||
continue
|
||||
}
|
||||
if msg.ReasoningContent != "" {
|
||||
msg.Content = msg.ReasoningContent
|
||||
msg.ReasoningContent = ""
|
||||
promoted = true
|
||||
}
|
||||
}
|
||||
if !promoted {
|
||||
return body, nil
|
||||
}
|
||||
return json.Marshal(resp)
|
||||
}
|
||||
|
||||
// PromoteStreamingThinkingOnEmptyChunk promotes reasoning delta to content delta in a
|
||||
// streaming SSE data payload when no content has been seen in the stream so far.
|
||||
func PromoteStreamingThinkingOnEmptyChunk(ctx wrapper.HttpContext, data []byte) ([]byte, error) {
|
||||
var resp chatCompletionResponse
|
||||
if err := json.Unmarshal(data, &resp); err != nil {
|
||||
return data, nil // not a valid chat completion chunk, skip
|
||||
}
|
||||
promoted := false
|
||||
for i := range resp.Choices {
|
||||
msg := resp.Choices[i].Delta
|
||||
if msg == nil {
|
||||
continue
|
||||
}
|
||||
if promoteStreamingThinkingOnEmpty(ctx, msg) {
|
||||
promoted = true
|
||||
}
|
||||
}
|
||||
if !promoted {
|
||||
return data, nil
|
||||
}
|
||||
return json.Marshal(resp)
|
||||
}
|
||||
|
||||
@@ -178,6 +178,7 @@ const (
|
||||
ctxKeyPushedMessage = "pushedMessage"
|
||||
ctxKeyContentPushed = "contentPushed"
|
||||
ctxKeyReasoningContentPushed = "reasoningContentPushed"
|
||||
ctxKeyHasContentDelta = "hasContentDelta"
|
||||
|
||||
objectChatCompletion = "chat.completion"
|
||||
objectChatCompletionChunk = "chat.completion.chunk"
|
||||
@@ -474,6 +475,12 @@ type ProviderConfig struct {
|
||||
// @Title zh-CN 合并连续同角色消息
|
||||
// @Description zh-CN 开启后,若请求的 messages 中存在连续的同角色消息(如连续两条 user 消息),将其内容合并为一条,以满足要求严格轮流交替(user→assistant→user→...)的模型服务商的要求。
|
||||
mergeConsecutiveMessages bool `required:"false" yaml:"mergeConsecutiveMessages" json:"mergeConsecutiveMessages"`
|
||||
// @Title zh-CN 空内容时提升思考为正文
|
||||
// @Description zh-CN 开启后,若模型响应只包含 reasoning_content/thinking 而没有正文内容,将 reasoning 内容提升为正文内容返回,避免客户端收到空回复。
|
||||
promoteThinkingOnEmpty bool `required:"false" yaml:"promoteThinkingOnEmpty" json:"promoteThinkingOnEmpty"`
|
||||
// @Title zh-CN HiClaw 模式
|
||||
// @Description zh-CN 开启后同时启用 mergeConsecutiveMessages 和 promoteThinkingOnEmpty,适用于 HiClaw 多 Agent 协作场景。
|
||||
hiclawMode bool `required:"false" yaml:"hiclawMode" json:"hiclawMode"`
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) GetId() string {
|
||||
@@ -699,6 +706,12 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
|
||||
}
|
||||
}
|
||||
c.mergeConsecutiveMessages = json.Get("mergeConsecutiveMessages").Bool()
|
||||
c.promoteThinkingOnEmpty = json.Get("promoteThinkingOnEmpty").Bool()
|
||||
c.hiclawMode = json.Get("hiclawMode").Bool()
|
||||
if c.hiclawMode {
|
||||
c.mergeConsecutiveMessages = true
|
||||
c.promoteThinkingOnEmpty = true
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) Validate() error {
|
||||
@@ -833,6 +846,10 @@ func (c *ProviderConfig) IsOriginal() bool {
|
||||
return c.protocol == protocolOriginal
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) GetPromoteThinkingOnEmpty() bool {
|
||||
return c.promoteThinkingOnEmpty
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) ReplaceByCustomSettings(body []byte) ([]byte, error) {
|
||||
return ReplaceByCustomSettings(body, c.customSettings)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user