feat(ai-proxy): add promoteThinkingOnEmpty and hiclawMode config options

When some models (e.g. kimi-k2.5) put user-facing replies into
reasoning_content/thinking blocks without generating text content,
downstream clients receive empty responses. This adds a new
promoteThinkingOnEmpty option that promotes reasoning content to
text content when the response has no text block.

Also adds hiclawMode as a convenience flag that enables both
mergeConsecutiveMessages and promoteThinkingOnEmpty for multi-agent
collaboration scenarios.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
johnlanni
2026-03-19 23:28:17 +08:00
parent 045238944d
commit bdfe9950ce
4 changed files with 410 additions and 1 deletions

View File

@@ -255,6 +255,65 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
}
}
// promoteThinkingOnEmpty promotes reasoning_content to content when content is empty.
// This handles models that put user-facing replies into thinking blocks instead of text blocks.
func (r *chatCompletionResponse) promoteThinkingOnEmpty() {
for i := range r.Choices {
msg := r.Choices[i].Message
if msg == nil {
continue
}
if !isContentEmpty(msg.Content) {
continue
}
if msg.ReasoningContent != "" {
msg.Content = msg.ReasoningContent
msg.ReasoningContent = ""
}
}
}
// promoteStreamingThinkingOnEmpty promotes reasoning delta to content delta when no content
// has been seen in the stream so far. Uses context to track state across chunks.
// Returns true if a promotion was performed.
func promoteStreamingThinkingOnEmpty(ctx wrapper.HttpContext, msg *chatMessage) bool {
if msg == nil {
return false
}
hasContentDelta, _ := ctx.GetContext(ctxKeyHasContentDelta).(bool)
if hasContentDelta {
return false
}
if !isContentEmpty(msg.Content) {
ctx.SetContext(ctxKeyHasContentDelta, true)
return false
}
reasoning := msg.ReasoningContent
if reasoning == "" {
reasoning = msg.Reasoning
}
if reasoning != "" {
msg.Content = reasoning
msg.ReasoningContent = ""
msg.Reasoning = ""
return true
}
return false
}
func isContentEmpty(content any) bool {
switch v := content.(type) {
case nil:
return true
case string:
return strings.TrimSpace(v) == ""
default:
return false
}
}
type chatMessageContent struct {
CacheControl map[string]interface{} `json:"cache_control,omitempty"`
Type string `json:"type,omitempty"`
@@ -648,3 +707,54 @@ func (r embeddingsRequest) ParseInput() []string {
}
return input
}
// PromoteThinkingOnEmptyResponse promotes reasoning_content to content in a non-streaming
// response body when content is empty. Returns the original body if no promotion is needed.
func PromoteThinkingOnEmptyResponse(body []byte) ([]byte, error) {
var resp chatCompletionResponse
if err := json.Unmarshal(body, &resp); err != nil {
return body, fmt.Errorf("unable to unmarshal response for thinking promotion: %v", err)
}
promoted := false
for i := range resp.Choices {
msg := resp.Choices[i].Message
if msg == nil {
continue
}
if !isContentEmpty(msg.Content) {
continue
}
if msg.ReasoningContent != "" {
msg.Content = msg.ReasoningContent
msg.ReasoningContent = ""
promoted = true
}
}
if !promoted {
return body, nil
}
return json.Marshal(resp)
}
// PromoteStreamingThinkingOnEmptyChunk promotes reasoning delta to content delta in a
// streaming SSE data payload when no content has been seen in the stream so far.
func PromoteStreamingThinkingOnEmptyChunk(ctx wrapper.HttpContext, data []byte) ([]byte, error) {
var resp chatCompletionResponse
if err := json.Unmarshal(data, &resp); err != nil {
return data, nil // not a valid chat completion chunk, skip
}
promoted := false
for i := range resp.Choices {
msg := resp.Choices[i].Delta
if msg == nil {
continue
}
if promoteStreamingThinkingOnEmpty(ctx, msg) {
promoted = true
}
}
if !promoted {
return data, nil
}
return json.Marshal(resp)
}

View File

@@ -178,6 +178,7 @@ const (
ctxKeyPushedMessage = "pushedMessage"
ctxKeyContentPushed = "contentPushed"
ctxKeyReasoningContentPushed = "reasoningContentPushed"
ctxKeyHasContentDelta = "hasContentDelta"
objectChatCompletion = "chat.completion"
objectChatCompletionChunk = "chat.completion.chunk"
@@ -474,6 +475,12 @@ type ProviderConfig struct {
// @Title zh-CN 合并连续同角色消息
// @Description zh-CN 开启后,若请求的 messages 中存在连续的同角色消息(如连续两条 user 消息将其内容合并为一条以满足要求严格轮流交替user→assistant→user→...)的模型服务商的要求。
mergeConsecutiveMessages bool `required:"false" yaml:"mergeConsecutiveMessages" json:"mergeConsecutiveMessages"`
// @Title zh-CN 空内容时提升思考为正文
// @Description zh-CN 开启后,若模型响应只包含 reasoning_content/thinking 而没有正文内容,将 reasoning 内容提升为正文内容返回,避免客户端收到空回复。
promoteThinkingOnEmpty bool `required:"false" yaml:"promoteThinkingOnEmpty" json:"promoteThinkingOnEmpty"`
// @Title zh-CN HiClaw 模式
// @Description zh-CN 开启后同时启用 mergeConsecutiveMessages 和 promoteThinkingOnEmpty适用于 HiClaw 多 Agent 协作场景。
hiclawMode bool `required:"false" yaml:"hiclawMode" json:"hiclawMode"`
}
func (c *ProviderConfig) GetId() string {
@@ -699,6 +706,12 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
}
}
c.mergeConsecutiveMessages = json.Get("mergeConsecutiveMessages").Bool()
c.promoteThinkingOnEmpty = json.Get("promoteThinkingOnEmpty").Bool()
c.hiclawMode = json.Get("hiclawMode").Bool()
if c.hiclawMode {
c.mergeConsecutiveMessages = true
c.promoteThinkingOnEmpty = true
}
}
func (c *ProviderConfig) Validate() error {
@@ -833,6 +846,10 @@ func (c *ProviderConfig) IsOriginal() bool {
return c.protocol == protocolOriginal
}
func (c *ProviderConfig) GetPromoteThinkingOnEmpty() bool {
return c.promoteThinkingOnEmpty
}
func (c *ProviderConfig) ReplaceByCustomSettings(body []byte) ([]byte, error) {
return ReplaceByCustomSettings(body, c.customSettings)
}