feat(ai-proxy): add promoteThinkingOnEmpty and hiclawMode config options (#3625)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
澄潭
2026-03-20 00:39:47 +08:00
committed by GitHub
parent 045238944d
commit ca7ee6ef5f
6 changed files with 442 additions and 1 deletions

View File

@@ -255,6 +255,70 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
}
}
// promoteThinkingOnEmpty promotes reasoning_content to content when content is empty.
// This handles models that put user-facing replies into thinking blocks instead of text blocks.
func (r *chatCompletionResponse) promoteThinkingOnEmpty() {
for i := range r.Choices {
msg := r.Choices[i].Message
if msg == nil {
continue
}
if !isContentEmpty(msg.Content) {
continue
}
if msg.ReasoningContent != "" {
msg.Content = msg.ReasoningContent
msg.ReasoningContent = ""
}
}
}
// promoteStreamingThinkingOnEmpty accumulates reasoning content during streaming.
// It strips reasoning from chunks and buffers it. When content is seen, it marks
// the stream as having content so no promotion will happen.
// Call PromoteStreamingThinkingFlush at the end of the stream to emit buffered
// reasoning as content if no content was ever seen.
// Returns true if the chunk was modified (reasoning stripped).
func promoteStreamingThinkingOnEmpty(ctx wrapper.HttpContext, msg *chatMessage) bool {
if msg == nil {
return false
}
hasContentDelta, _ := ctx.GetContext(ctxKeyHasContentDelta).(bool)
if hasContentDelta {
return false
}
if !isContentEmpty(msg.Content) {
ctx.SetContext(ctxKeyHasContentDelta, true)
return false
}
// Buffer reasoning content and strip it from the chunk
reasoning := msg.ReasoningContent
if reasoning == "" {
reasoning = msg.Reasoning
}
if reasoning != "" {
buffered, _ := ctx.GetContext(ctxKeyBufferedReasoning).(string)
ctx.SetContext(ctxKeyBufferedReasoning, buffered+reasoning)
msg.ReasoningContent = ""
msg.Reasoning = ""
return true
}
return false
}
func isContentEmpty(content any) bool {
switch v := content.(type) {
case nil:
return true
case string:
return strings.TrimSpace(v) == ""
default:
return false
}
}
type chatMessageContent struct {
CacheControl map[string]interface{} `json:"cache_control,omitempty"`
Type string `json:"type,omitempty"`
@@ -648,3 +712,87 @@ func (r embeddingsRequest) ParseInput() []string {
}
return input
}
// PromoteThinkingOnEmptyResponse promotes reasoning_content to content in a non-streaming
// response body when content is empty. Returns the original body if no promotion is needed.
func PromoteThinkingOnEmptyResponse(body []byte) ([]byte, error) {
var resp chatCompletionResponse
if err := json.Unmarshal(body, &resp); err != nil {
return body, fmt.Errorf("unable to unmarshal response for thinking promotion: %v", err)
}
promoted := false
for i := range resp.Choices {
msg := resp.Choices[i].Message
if msg == nil {
continue
}
if !isContentEmpty(msg.Content) {
continue
}
if msg.ReasoningContent != "" {
msg.Content = msg.ReasoningContent
msg.ReasoningContent = ""
promoted = true
}
}
if !promoted {
return body, nil
}
return json.Marshal(resp)
}
// PromoteStreamingThinkingOnEmptyChunk buffers reasoning deltas and strips them from
// the chunk during streaming. Call PromoteStreamingThinkingFlush on the last chunk
// to emit buffered reasoning as content if no real content was ever seen.
func PromoteStreamingThinkingOnEmptyChunk(ctx wrapper.HttpContext, data []byte) ([]byte, error) {
var resp chatCompletionResponse
if err := json.Unmarshal(data, &resp); err != nil {
return data, nil // not a valid chat completion chunk, skip
}
modified := false
for i := range resp.Choices {
msg := resp.Choices[i].Delta
if msg == nil {
continue
}
if promoteStreamingThinkingOnEmpty(ctx, msg) {
modified = true
}
}
if !modified {
return data, nil
}
return json.Marshal(resp)
}
// PromoteStreamingThinkingFlush checks if the stream had no content and returns
// an SSE chunk that emits the buffered reasoning as content. Returns nil if
// content was already seen or no reasoning was buffered.
func PromoteStreamingThinkingFlush(ctx wrapper.HttpContext) []byte {
hasContentDelta, _ := ctx.GetContext(ctxKeyHasContentDelta).(bool)
if hasContentDelta {
return nil
}
buffered, _ := ctx.GetContext(ctxKeyBufferedReasoning).(string)
if buffered == "" {
return nil
}
// Build a minimal chat.completion.chunk with the buffered reasoning as content
resp := chatCompletionResponse{
Object: objectChatCompletionChunk,
Choices: []chatCompletionChoice{
{
Index: 0,
Delta: &chatMessage{
Content: buffered,
},
},
},
}
data, err := json.Marshal(resp)
if err != nil {
return nil
}
// Format as SSE
return []byte("data: " + string(data) + "\n\n")
}

View File

@@ -178,6 +178,8 @@ const (
ctxKeyPushedMessage = "pushedMessage"
ctxKeyContentPushed = "contentPushed"
ctxKeyReasoningContentPushed = "reasoningContentPushed"
ctxKeyHasContentDelta = "hasContentDelta"
ctxKeyBufferedReasoning = "bufferedReasoning"
objectChatCompletion = "chat.completion"
objectChatCompletionChunk = "chat.completion.chunk"
@@ -474,6 +476,12 @@ type ProviderConfig struct {
// @Title zh-CN 合并连续同角色消息
// @Description zh-CN 开启后,若请求的 messages 中存在连续的同角色消息(如连续两条 user 消息将其内容合并为一条以满足要求严格轮流交替user→assistant→user→...)的模型服务商的要求。
mergeConsecutiveMessages bool `required:"false" yaml:"mergeConsecutiveMessages" json:"mergeConsecutiveMessages"`
// @Title zh-CN 空内容时提升思考为正文
// @Description zh-CN 开启后,若模型响应只包含 reasoning_content/thinking 而没有正文内容,将 reasoning 内容提升为正文内容返回,避免客户端收到空回复。
promoteThinkingOnEmpty bool `required:"false" yaml:"promoteThinkingOnEmpty" json:"promoteThinkingOnEmpty"`
// @Title zh-CN HiClaw 模式
// @Description zh-CN 开启后同时启用 mergeConsecutiveMessages 和 promoteThinkingOnEmpty适用于 HiClaw 多 Agent 协作场景。
hiclawMode bool `required:"false" yaml:"hiclawMode" json:"hiclawMode"`
}
func (c *ProviderConfig) GetId() string {
@@ -699,6 +707,12 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
}
}
c.mergeConsecutiveMessages = json.Get("mergeConsecutiveMessages").Bool()
c.promoteThinkingOnEmpty = json.Get("promoteThinkingOnEmpty").Bool()
c.hiclawMode = json.Get("hiclawMode").Bool()
if c.hiclawMode {
c.mergeConsecutiveMessages = true
c.promoteThinkingOnEmpty = true
}
}
func (c *ProviderConfig) Validate() error {
@@ -833,6 +847,10 @@ func (c *ProviderConfig) IsOriginal() bool {
return c.protocol == protocolOriginal
}
func (c *ProviderConfig) GetPromoteThinkingOnEmpty() bool {
return c.promoteThinkingOnEmpty
}
func (c *ProviderConfig) ReplaceByCustomSettings(body []byte) ([]byte, error) {
return ReplaceByCustomSettings(body, c.customSettings)
}