From 8e7292c42ece84c06a42dc64fea8de50d934f842 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=BE=84=E6=BD=AD?= <zty98751@alibaba-inc.com>
Date: Sun, 15 Feb 2026 13:52:26 +0800
Subject: [PATCH] fix(ai-proxy): fix Claude protocol conversion issues (#3510)

---
 .../extensions/ai-proxy/provider/claude.go    | 41 ++++++++-----
 .../ai-proxy/provider/claude_to_openai.go     | 61 ++++++++++++++-----
 .../provider/claude_to_openai_test.go         | 16 +++--
 .../extensions/ai-proxy/provider/model.go     |  8 ++-
 4 files changed, 89 insertions(+), 37 deletions(-)

diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
index dabbecca1..f2e57679b 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
@@ -237,13 +237,13 @@ type claudeTextGenResponse struct {
 }
 
 type claudeTextGenContent struct {
-	Type      string                 `json:"type,omitempty"`
-	Text      string                 `json:"text,omitempty"`
-	Id        string                 `json:"id,omitempty"`        // For tool_use
-	Name      string                 `json:"name,omitempty"`      // For tool_use
-	Input     map[string]interface{} `json:"input,omitempty"`     // For tool_use
-	Signature string                 `json:"signature,omitempty"` // For thinking
-	Thinking  string                 `json:"thinking,omitempty"`  // For thinking
+	Type      string                  `json:"type,omitempty"`
+	Text      *string                 `json:"text,omitempty"`      // Use pointer: empty string outputs "text":"", nil omits field
+	Id        string                  `json:"id,omitempty"`        // For tool_use
+	Name      string                  `json:"name,omitempty"`      // For tool_use
+	Input     *map[string]interface{} `json:"input,omitempty"`     // Use pointer: empty map outputs "input":{}, nil omits field
+	Signature *string                 `json:"signature,omitempty"` // For thinking - use pointer for empty string output
+	Thinking  *string                 `json:"thinking,omitempty"`  // For thinking - use pointer for empty string output
 }
 
 type claudeTextGenUsage struct {
@@ -269,12 +269,12 @@ type claudeTextGenStreamResponse struct {
 }
 
 type claudeTextGenDelta struct {
-	Type         string  `json:"type"`
-	Text         string  `json:"text,omitempty"`
-	Thinking     string  `json:"thinking,omitempty"`
-	PartialJson  string  `json:"partial_json,omitempty"`
-	StopReason   *string `json:"stop_reason,omitempty"`
-	StopSequence *string `json:"stop_sequence,omitempty"`
+	Type         string          `json:"type,omitempty"`
+	Text         string          `json:"text,omitempty"`
+	Thinking     string          `json:"thinking,omitempty"`
+	PartialJson  string          `json:"partial_json,omitempty"`
+	StopReason   *string         `json:"stop_reason,omitempty"`
+	StopSequence json.RawMessage `json:"stop_sequence,omitempty"` // Use RawMessage to output explicit null
 }
 
 func (c *claudeProviderInitializer) ValidateConfig(config *ProviderConfig) error {
@@ -598,11 +598,20 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
 	for _, content := range origResponse.Content {
 		switch content.Type {
 		case contentTypeText:
-			textContent = content.Text
+			if content.Text != nil {
+				textContent = *content.Text
+			}
 		case "thinking":
-			reasoningContent = content.Thinking
+			if content.Thinking != nil {
+				reasoningContent = *content.Thinking
+			}
 		case "tool_use":
-			args, _ := json.Marshal(content.Input)
+			var args []byte
+			if content.Input != nil {
+				args, _ = json.Marshal(*content.Input)
+			} else {
+				args = []byte("{}")
+			}
 			toolCalls = append(toolCalls, toolCall{
 				Id:   content.Id,
 				Type: "function",
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go
index 1df7821ba..4739f830e 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go
@@ -192,6 +192,7 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b
 
 		if claudeRequest.Thinking.Type == "enabled" {
 			openaiRequest.ReasoningMaxTokens = claudeRequest.Thinking.BudgetTokens
+			openaiRequest.Thinking = &thinkingParam{Type: "enabled", BudgetToken: claudeRequest.Thinking.BudgetTokens}
 
 			// Set ReasoningEffort based on budget_tokens
 			// low: <4096, medium: >=4096 and <16384, high: >=16384
@@ -207,7 +208,10 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b
 				claudeRequest.Thinking.BudgetTokens, openaiRequest.ReasoningEffort, openaiRequest.ReasoningMaxTokens)
 		}
 	} else {
-		log.Debugf("[Claude->OpenAI] No thinking config found")
+		// Explicitly disable thinking when not configured in Claude request
+		// This prevents providers like ZhipuAI from enabling thinking by default
+		openaiRequest.Thinking = &thinkingParam{Type: "disabled"}
+		log.Debugf("[Claude->OpenAI] No thinking config found, explicitly disabled")
 	}
 
 	result, err := json.Marshal(openaiRequest)
@@ -262,19 +266,21 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIResponseToClaude(ctx wrapper.Http
 			}
 
 			if reasoningText != "" {
+				emptySignature := ""
 				contents = append(contents, claudeTextGenContent{
 					Type:      "thinking",
-					Signature: "", // OpenAI doesn't provide signature, use empty string
-					Thinking:  reasoningText,
+					Signature: &emptySignature, // Use pointer for empty string
+					Thinking:  &reasoningText,
 				})
 				log.Debugf("[OpenAI->Claude] Added thinking content: %s", reasoningText)
 			}
 
 			// Add text content if present
 			if choice.Message.StringContent() != "" {
+				textContent := choice.Message.StringContent()
 				contents = append(contents, claudeTextGenContent{
 					Type: "text",
-					Text: choice.Message.StringContent(),
+					Text: &textContent,
 				})
 			}
 
@@ -297,7 +303,7 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIResponseToClaude(ctx wrapper.Http
 							Type:  "tool_use",
 							Id:    toolCall.Id,
 							Name:  toolCall.Function.Name,
-							Input: input,
+							Input: &input,
 						})
 					}
 				}
@@ -379,8 +385,8 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrappe
 					messageDelta := &claudeTextGenStreamResponse{
 						Type: "message_delta",
 						Delta: &claudeTextGenDelta{
-							Type:       "message_delta",
-							StopReason: c.pendingStopReason,
+							StopReason:   c.pendingStopReason,
+							StopSequence: json.RawMessage("null"),
 						},
 					}
 					stopData, _ := json.Marshal(messageDelta)
@@ -524,13 +530,14 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont
 			c.nextContentIndex++
 			c.thinkingBlockStarted = true
 			log.Debugf("[OpenAI->Claude] Generated content_block_start event for thinking at index %d", c.thinkingBlockIndex)
+			emptyStr := ""
 			responses = append(responses, &claudeTextGenStreamResponse{
 				Type:  "content_block_start",
 				Index: &c.thinkingBlockIndex,
 				ContentBlock: &claudeTextGenContent{
 					Type:      "thinking",
-					Signature: "", // OpenAI doesn't provide signature
-					Thinking:  "",
+					Signature: &emptyStr, // Use pointer for empty string output
+					Thinking:  &emptyStr, // Use pointer for empty string output
 				},
 			})
 		}
@@ -541,8 +548,8 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont
 			Type:  "content_block_delta",
 			Index: &c.thinkingBlockIndex,
 			Delta: &claudeTextGenDelta{
-				Type: "thinking_delta", // Use thinking_delta for reasoning content
-				Text: reasoningText,
+				Type:     "thinking_delta",
+				Thinking: reasoningText, // Use Thinking field, not Text
 			},
 		})
 	}
@@ -573,12 +580,13 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont
 			c.nextContentIndex++
 			c.textBlockStarted = true
 			log.Debugf("[OpenAI->Claude] Generated content_block_start event for text at index %d", c.textBlockIndex)
+			emptyText := ""
 			responses = append(responses, &claudeTextGenStreamResponse{
 				Type:  "content_block_start",
 				Index: &c.textBlockIndex,
 				ContentBlock: &claudeTextGenContent{
 					Type: "text",
-					Text: "",
+					Text: &emptyText,
 				},
 			})
 		}
@@ -597,6 +605,30 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont
 
 	// Handle tool calls in streaming response
 	if choice.Delta != nil && len(choice.Delta.ToolCalls) > 0 {
+		// Ensure message_start is sent before any content blocks
+		if !c.messageStartSent {
+			c.messageId = openaiResponse.Id
+			c.messageStartSent = true
+			message := &claudeTextGenResponse{
+				Id:      openaiResponse.Id,
+				Type:    "message",
+				Role:    "assistant",
+				Model:   openaiResponse.Model,
+				Content: []claudeTextGenContent{},
+			}
+			if openaiResponse.Usage != nil {
+				message.Usage = claudeTextGenUsage{
+					InputTokens:  openaiResponse.Usage.PromptTokens,
+					OutputTokens: 0,
+				}
+			}
+			responses = append(responses, &claudeTextGenStreamResponse{
+				Type:    "message_start",
+				Message: message,
+			})
+			log.Debugf("[OpenAI->Claude] Generated message_start event before tool calls for id: %s", openaiResponse.Id)
+		}
+
 		// Initialize toolCallStates if needed
 		if c.toolCallStates == nil {
 			c.toolCallStates = make(map[int]*toolCallInfo)
@@ -741,7 +773,7 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont
 		messageDelta := &claudeTextGenStreamResponse{
 			Type: "message_delta",
 			Delta: &claudeTextGenDelta{
-				Type: "message_delta",
+				StopSequence: json.RawMessage("null"), // Explicit null per Claude spec
 			},
 			Usage: &claudeTextGenUsage{
 				InputTokens:  openaiResponse.Usage.PromptTokens,
@@ -895,6 +927,7 @@ func (c *ClaudeToOpenAIConverter) startToolCall(toolState *toolCallInfo) []*clau
 		toolState.claudeContentIndex, toolState.id, toolState.name)
 
 	// Send content_block_start
+	emptyInput := map[string]interface{}{}
 	responses = append(responses, &claudeTextGenStreamResponse{
 		Type:  "content_block_start",
 		Index: &toolState.claudeContentIndex,
@@ -902,7 +935,7 @@ func (c *ClaudeToOpenAIConverter) startToolCall(toolState *toolCallInfo) []*clau
 			Type:  "tool_use",
 			Id:    toolState.id,
 			Name:  toolState.name,
-			Input: map[string]interface{}{}, // Empty input as per Claude spec
+			Input: &emptyInput, // Empty input as per Claude spec
 		},
 	})
 
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go
index bf294b8a4..fdba0710b 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go
@@ -624,7 +624,7 @@ func TestClaudeToOpenAIConverter_ConvertOpenAIResponseToClaude(t *testing.T) {
 		// First content should be text
 		textContent := claudeResponse.Content[0]
 		assert.Equal(t, "text", textContent.Type)
-		assert.Equal(t, "I'll analyze the README file to understand this project's purpose.", textContent.Text)
+		assert.Equal(t, "I'll analyze the README file to understand this project's purpose.", *textContent.Text)
 
 		// Second content should be tool_use
 		toolContent := claudeResponse.Content[1]
@@ -634,7 +634,7 @@ func TestClaudeToOpenAIConverter_ConvertOpenAIResponseToClaude(t *testing.T) {
 
 		// Verify tool arguments
 		require.NotNil(t, toolContent.Input)
-		assert.Equal(t, "/Users/zhangty/git/higress/README.md", toolContent.Input["file_path"])
+		assert.Equal(t, "/Users/zhangty/git/higress/README.md", (*toolContent.Input)["file_path"])
 	})
 }
 
@@ -837,20 +837,24 @@ func TestClaudeToOpenAIConverter_ConvertReasoningResponseToClaude(t *testing.T)
 				// First should be thinking
 				thinkingContent := claudeResponse.Content[0]
 				assert.Equal(t, "thinking", thinkingContent.Type)
-				assert.Equal(t, "", thinkingContent.Signature) // OpenAI doesn't provide signature
-				assert.Contains(t, thinkingContent.Thinking, "Let me think about this step by step")
+				require.NotNil(t, thinkingContent.Signature)
+				assert.Equal(t, "", *thinkingContent.Signature) // OpenAI doesn't provide signature
+				require.NotNil(t, thinkingContent.Thinking)
+				assert.Contains(t, *thinkingContent.Thinking, "Let me think about this step by step")
 
 				// Second should be text
 				textContent := claudeResponse.Content[1]
 				assert.Equal(t, "text", textContent.Type)
-				assert.Equal(t, tt.expectedText, textContent.Text)
+				require.NotNil(t, textContent.Text)
+				assert.Equal(t, tt.expectedText, *textContent.Text)
 			} else {
 				// Should only have text content
 				assert.Len(t, claudeResponse.Content, 1)
 
 				textContent := claudeResponse.Content[0]
 				assert.Equal(t, "text", textContent.Type)
-				assert.Equal(t, tt.expectedText, textContent.Text)
+				require.NotNil(t, textContent.Text)
+				assert.Equal(t, tt.expectedText, *textContent.Text)
 			}
 		})
 	}
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/model.go b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
index 3a11a279b..465bccc3b 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -30,7 +30,13 @@ const (
 )
 
 type NonOpenAIStyleOptions struct {
-	ReasoningMaxTokens int `json:"reasoning_max_tokens,omitempty"`
+	ReasoningMaxTokens int            `json:"reasoning_max_tokens,omitempty"`
+	Thinking           *thinkingParam `json:"thinking,omitempty"`
+}
+
+type thinkingParam struct {
+	Type        string `json:"type,omitempty"`
+	BudgetToken int    `json:"budget_token,omitempty"`
 }
 
 type chatCompletionRequest struct {