fix(ai-proxy): resolve Claude streaming response conversion and SSE event chunking issues (#2882)

2026-02-06 23:21:08 +08:00 · 2025-09-08 09:54:18 +08:00
parent 20b68c039c
commit 4a429bf147
9 changed files with 356 additions and 143 deletions
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -406,7 +406,8 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 				return chunk
 			}
 			if len(outputEvents) == 0 {
-				responseBuilder.WriteString(event.ToHttpString())
+				// no need convert, keep original events
+				responseBuilder.WriteString(event.RawEvent)
 			} else {
 				for _, outputEvent := range outputEvents {
 					responseBuilder.WriteString(outputEvent.ToHttpString())
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
@@ -138,7 +138,7 @@ type claudeSystemPrompt struct {
 	// Will be set to the string value if system is a simple string
 	StringValue string
 	// Will be set to the array value if system is an array of text blocks
-	ArrayValue []claudeTextGenContent
+	ArrayValue []claudeChatMessageContent
 	// Indicates which type this represents
 	IsArray bool
 }
@@ -154,7 +154,7 @@ func (csp *claudeSystemPrompt) UnmarshalJSON(data []byte) error {
 	}

 	// Try to unmarshal as array of text blocks
-	var arrayValue []claudeTextGenContent
+	var arrayValue []claudeChatMessageContent
 	if err := json.Unmarshal(data, &arrayValue); err == nil {
 		csp.ArrayValue = arrayValue
 		csp.IsArray = true
@@ -196,7 +196,7 @@ type claudeThinkingConfig struct {
 type claudeTextGenRequest struct {
 	Model         string                `json:"model"`
 	Messages      []claudeChatMessage   `json:"messages"`
-	System        claudeSystemPrompt    `json:"system,omitempty"`
+	System        *claudeSystemPrompt   `json:"system,omitempty"`
 	MaxTokens     int                   `json:"max_tokens,omitempty"`
 	StopSequences []string              `json:"stop_sequences,omitempty"`
 	Stream        bool                  `json:"stream,omitempty"`
@@ -232,9 +232,11 @@ type claudeTextGenContent struct {
 }

 type claudeTextGenUsage struct {
-	InputTokens  int    `json:"input_tokens,omitempty"`
-	OutputTokens int    `json:"output_tokens,omitempty"`
-	ServiceTier  string `json:"service_tier,omitempty"`
+	InputTokens              int    `json:"input_tokens,omitempty"`
+	OutputTokens             int    `json:"output_tokens,omitempty"`
+	CacheReadInputTokens     int    `json:"cache_read_input_tokens,omitempty"`
+	CacheCreationInputTokens int    `json:"cache_creation_input_tokens,omitempty"`
+	ServiceTier              string `json:"service_tier,omitempty"`
 }

 type claudeTextGenError struct {
@@ -254,6 +256,7 @@ type claudeTextGenStreamResponse struct {
 type claudeTextGenDelta struct {
 	Type         string  `json:"type"`
 	Text         string  `json:"text,omitempty"`
+	PartialJson  string  `json:"partial_json,omitempty"`
 	StopReason   *string `json:"stop_reason,omitempty"`
 	StopSequence *string `json:"stop_sequence,omitempty"`
 }
@@ -401,7 +404,7 @@ func (c *claudeProvider) buildClaudeTextGenRequest(origRequest *chatCompletionRe

 	for _, message := range origRequest.Messages {
 		if message.Role == roleSystem {
-			claudeRequest.System = claudeSystemPrompt{
+			claudeRequest.System = &claudeSystemPrompt{
 				StringValue: message.StringContent(),
 				IsArray:     false,
 			}
@@ -622,12 +625,12 @@ func (c *claudeProvider) insertHttpContextMessage(body []byte, content string, o

 	systemStr := request.System.String()
 	if systemStr == "" {
-		request.System = claudeSystemPrompt{
+		request.System = &claudeSystemPrompt{
 			StringValue: content,
 			IsArray:     false,
 		}
 	} else {
-		request.System = claudeSystemPrompt{
+		request.System = &claudeSystemPrompt{
 			StringValue: content + "\n" + systemStr,
 			IsArray:     false,
 		}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go
@@ -29,7 +29,18 @@ type ClaudeToOpenAIConverter struct {
 	toolBlockStarted     bool
 	toolBlockStopped     bool
 	// Tool call state tracking
-	toolCallStates map[string]*toolCallState
+	toolCallStates  map[int]*toolCallInfo // Map of OpenAI index to tool call state
+	activeToolIndex *int                  // Currently active tool call index (for Claude serialization)
+}
+
+// toolCallInfo tracks tool call state
+type toolCallInfo struct {
+	id                  string // Tool call ID
+	name                string // Tool call name
+	claudeContentIndex  int    // Claude content block index
+	contentBlockStarted bool   // Whether content_block_start has been sent
+	contentBlockStopped bool   // Whether content_block_stop has been sent
+	cachedArguments     string // Cache arguments for this tool call
 }

 // contentConversionResult represents the result of converting Claude content to OpenAI format
@@ -41,14 +52,6 @@ type contentConversionResult struct {
 	hasNonTextContent bool
 }

-// toolCallState tracks the state of a tool call during streaming
-type toolCallState struct {
-	id              string
-	name            string
-	argumentsBuffer string
-	isComplete      bool
-}
-
 // ConvertClaudeRequestToOpenAI converts a Claude chat completion request to OpenAI format
 func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]byte, error) {
 	log.Debugf("[Claude->OpenAI] Original Claude request body: %s", string(body))
@@ -114,18 +117,9 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b

 			// Handle regular content if no tool calls or tool results
 			if len(conversionResult.toolCalls) == 0 && len(conversionResult.toolResults) == 0 {
-				var content interface{}
-				if !conversionResult.hasNonTextContent && len(conversionResult.textParts) > 0 {
-					// Simple text content
-					content = strings.Join(conversionResult.textParts, "\n\n")
-				} else {
-					// Multi-modal content or empty content
-					content = conversionResult.openaiContents
-				}
-
 				openaiMsg := chatMessage{
 					Role:    claudeMsg.Role,
-					Content: content,
+					Content: conversionResult.openaiContents,
 				}
 				openaiRequest.Messages = append(openaiRequest.Messages, openaiMsg)
 			}
@@ -133,11 +127,13 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b
 	}

 	// Handle system message - Claude has separate system field
-	systemStr := claudeRequest.System.String()
-	if systemStr != "" {
-		systemMsg := chatMessage{
-			Role:    roleSystem,
-			Content: systemStr,
+	if claudeRequest.System != nil {
+		systemMsg := chatMessage{Role: roleSystem}
+		if !claudeRequest.System.IsArray {
+			systemMsg.Content = claudeRequest.System.StringValue
+		} else {
+			conversionResult := c.convertContentArray(claudeRequest.System.ArrayValue)
+			systemMsg.Content = conversionResult.openaiContents
 		}
 		// Insert system message at the beginning
 		openaiRequest.Messages = append([]chatMessage{systemMsg}, openaiRequest.Messages...)
@@ -231,6 +227,9 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIResponseToClaude(ctx wrapper.Http
 			InputTokens:  openaiResponse.Usage.PromptTokens,
 			OutputTokens: openaiResponse.Usage.CompletionTokens,
 		}
+		if openaiResponse.Usage.PromptTokensDetails != nil {
+			claudeResponse.Usage.CacheReadInputTokens = openaiResponse.Usage.PromptTokensDetails.CachedTokens
+		}
 	}

 	// Convert the first choice content
@@ -312,11 +311,6 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIResponseToClaude(ctx wrapper.Http
 func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrapper.HttpContext, chunk []byte) ([]byte, error) {
 	log.Debugf("[OpenAI->Claude] Original OpenAI streaming chunk: %s", string(chunk))

-	// Initialize tool call states if needed
-	if c.toolCallStates == nil {
-		c.toolCallStates = make(map[string]*toolCallState)
-	}
-
 	// For streaming responses, we need to handle the Server-Sent Events format
 	lines := strings.Split(string(chunk), "\n")
 	var result strings.Builder
@@ -350,15 +344,18 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrappe
 					stopData, _ := json.Marshal(stopEvent)
 					result.WriteString(fmt.Sprintf("data: %s\n\n", stopData))
 				}
-				if c.toolBlockStarted && !c.toolBlockStopped {
-					c.toolBlockStopped = true
-					log.Debugf("[OpenAI->Claude] Sending final tool content_block_stop event at index %d", c.toolBlockIndex)
-					stopEvent := &claudeTextGenStreamResponse{
-						Type:  "content_block_stop",
-						Index: &c.toolBlockIndex,
+				// Send final content_block_stop events for any remaining unclosed tool calls
+				for index, toolCall := range c.toolCallStates {
+					if toolCall.contentBlockStarted && !toolCall.contentBlockStopped {
+						log.Debugf("[OpenAI->Claude] Sending final tool content_block_stop event for index %d at Claude index %d",
+							index, toolCall.claudeContentIndex)
+						stopEvent := &claudeTextGenStreamResponse{
+							Type:  "content_block_stop",
+							Index: &toolCall.claudeContentIndex,
+						}
+						stopData, _ := json.Marshal(stopEvent)
+						result.WriteString(fmt.Sprintf("data: %s\n\n", stopData))
 					}
-					stopData, _ := json.Marshal(stopEvent)
-					result.WriteString(fmt.Sprintf("data: %s\n\n", stopData))
 				}

 				// If we have a pending stop_reason but no usage, send message_delta with just stop_reason
@@ -401,7 +398,8 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrappe
 				c.toolBlockIndex = -1
 				c.toolBlockStarted = false
 				c.toolBlockStopped = false
-				c.toolCallStates = make(map[string]*toolCallState)
+				c.toolCallStates = make(map[int]*toolCallInfo)
+				c.activeToolIndex = nil
 				log.Debugf("[OpenAI->Claude] Reset converter state for next request")

 				continue
@@ -424,7 +422,7 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrappe
 					continue
 				}
 				log.Debugf("[OpenAI->Claude] Stream event [%d/%d]: %s", i+1, len(claudeStreamResponses), string(responseData))
-				result.WriteString(fmt.Sprintf("data: %s\n\n", responseData))
+				result.WriteString(fmt.Sprintf("event: %s\ndata: %s\n\n", claudeStreamResponse.Type, responseData))
 			}
 		}
 	}
@@ -579,79 +577,60 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont

 	// Handle tool calls in streaming response
 	if choice.Delta != nil && len(choice.Delta.ToolCalls) > 0 {
+		// Initialize toolCallStates if needed
+		if c.toolCallStates == nil {
+			c.toolCallStates = make(map[int]*toolCallInfo)
+		}
+
 		for _, toolCall := range choice.Delta.ToolCalls {
-			if !toolCall.Function.IsEmpty() {
-				log.Debugf("[OpenAI->Claude] Processing tool call delta")
+			log.Debugf("[OpenAI->Claude] Processing tool call delta: index=%d, id=%s, name=%s, args=%s",
+				toolCall.Index, toolCall.Id, toolCall.Function.Name, toolCall.Function.Arguments)

-				// Get or create tool call state
-				state := c.toolCallStates[toolCall.Id]
-				if state == nil {
-					state = &toolCallState{
-						id:              toolCall.Id,
-						name:            toolCall.Function.Name,
-						argumentsBuffer: "",
-						isComplete:      false,
+			// Handle new tool call (has id and name)
+			if toolCall.Id != "" && toolCall.Function.Name != "" {
+				// Create or update tool call state
+				if _, exists := c.toolCallStates[toolCall.Index]; !exists {
+					c.toolCallStates[toolCall.Index] = &toolCallInfo{
+						id:                  toolCall.Id,
+						name:                toolCall.Function.Name,
+						contentBlockStarted: false,
+						contentBlockStopped: false,
+						cachedArguments:     "",
 					}
-					c.toolCallStates[toolCall.Id] = state
-					log.Debugf("[OpenAI->Claude] Created new tool call state for id: %s, name: %s", toolCall.Id, toolCall.Function.Name)
 				}

-				// Accumulate arguments
-				if toolCall.Function.Arguments != "" {
-					state.argumentsBuffer += toolCall.Function.Arguments
-					log.Debugf("[OpenAI->Claude] Accumulated tool arguments: %s", state.argumentsBuffer)
+				toolState := c.toolCallStates[toolCall.Index]
+
+				// Check if we can start this tool call (Claude requires serialization)
+				if c.activeToolIndex == nil {
+					// No active tool call, start this one
+					c.activeToolIndex = &toolCall.Index
+					toolCallResponses := c.startToolCall(toolState)
+					responses = append(responses, toolCallResponses...)
 				}
+				// If there's already an active tool call, we'll start this one when the current one finishes
+			}

-				// Try to parse accumulated arguments as JSON to check if complete
-				var input map[string]interface{}
-				if state.argumentsBuffer != "" {
-					if err := json.Unmarshal([]byte(state.argumentsBuffer), &input); err == nil {
-						// Successfully parsed - arguments are complete
-						if !state.isComplete {
-							state.isComplete = true
-							log.Debugf("[OpenAI->Claude] Tool call arguments complete for %s: %s", state.name, state.argumentsBuffer)
+			// Handle arguments for any tool call - cache all arguments regardless of active state
+			if toolCall.Function.Arguments != "" {
+				if toolState, exists := c.toolCallStates[toolCall.Index]; exists {
+					// Always cache arguments for this tool call
+					toolState.cachedArguments += toolCall.Function.Arguments
+					log.Debugf("[OpenAI->Claude] Cached arguments for tool index %d: %s (total: %s)",
+						toolCall.Index, toolCall.Function.Arguments, toolState.cachedArguments)

-							// Close thinking content block if it's still open
-							if c.thinkingBlockStarted && !c.thinkingBlockStopped {
-								c.thinkingBlockStopped = true
-								log.Debugf("[OpenAI->Claude] Closing thinking content block before tool use")
-								responses = append(responses, &claudeTextGenStreamResponse{
-									Type:  "content_block_stop",
-									Index: &c.thinkingBlockIndex,
-								})
-							}
-
-							// Close text content block if it's still open
-							if c.textBlockStarted && !c.textBlockStopped {
-								c.textBlockStopped = true
-								log.Debugf("[OpenAI->Claude] Closing text content block before tool use")
-								responses = append(responses, &claudeTextGenStreamResponse{
-									Type:  "content_block_stop",
-									Index: &c.textBlockIndex,
-								})
-							}
-
-							// Send content_block_start for tool_use only when we have complete arguments with dynamic index
-							if !c.toolBlockStarted {
-								c.toolBlockIndex = c.nextContentIndex
-								c.nextContentIndex++
-								c.toolBlockStarted = true
-								log.Debugf("[OpenAI->Claude] Generated content_block_start event for tool_use at index %d", c.toolBlockIndex)
-								responses = append(responses, &claudeTextGenStreamResponse{
-									Type:  "content_block_start",
-									Index: &c.toolBlockIndex,
-									ContentBlock: &claudeTextGenContent{
-										Type:  "tool_use",
-										Id:    toolCall.Id,
-										Name:  state.name,
-										Input: input,
-									},
-								})
-							}
-						}
-					} else {
-						// Still accumulating arguments
-						log.Debugf("[OpenAI->Claude] Tool arguments not yet complete, continuing to accumulate: %v", err)
+					// Send input_json_delta event only if this tool is currently active and content block started
+					if c.activeToolIndex != nil && *c.activeToolIndex == toolCall.Index && toolState.contentBlockStarted {
+						log.Debugf("[OpenAI->Claude] Generated input_json_delta event for active tool index %d: %s",
+							toolCall.Index, toolCall.Function.Arguments)
+						responses = append(responses, &claudeTextGenStreamResponse{
+							Type:  "content_block_delta",
+							Index: &toolState.claudeContentIndex,
+							Delta: &claudeTextGenDelta{
+								Type:        "input_json_delta",
+								PartialJson: toolCall.Function.Arguments,
+							},
+						})
 					}
 				}
 			}
@@ -680,15 +659,52 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont
 				Index: &c.textBlockIndex,
 			})
 		}
-		if c.toolBlockStarted && !c.toolBlockStopped {
-			c.toolBlockStopped = true
-			log.Debugf("[OpenAI->Claude] Generated tool content_block_stop event at index %d", c.toolBlockIndex)
-			responses = append(responses, &claudeTextGenStreamResponse{
-				Type:  "content_block_stop",
-				Index: &c.toolBlockIndex,
-			})
+
+		// First, start any remaining unstarted tool calls (they may have no arguments)
+		// Process in order to maintain Claude's sequential requirement
+		var sortedIndices []int
+		for index := range c.toolCallStates {
+			sortedIndices = append(sortedIndices, index)
 		}

+		// Sort indices to process in order
+		for i := 0; i < len(sortedIndices)-1; i++ {
+			for j := i + 1; j < len(sortedIndices); j++ {
+				if sortedIndices[i] > sortedIndices[j] {
+					sortedIndices[i], sortedIndices[j] = sortedIndices[j], sortedIndices[i]
+				}
+			}
+		}
+
+		for _, index := range sortedIndices {
+			toolCall := c.toolCallStates[index]
+			if !toolCall.contentBlockStarted {
+				log.Debugf("[OpenAI->Claude] Starting remaining tool call at finish: index=%d, id=%s, name=%s",
+					index, toolCall.id, toolCall.name)
+				c.activeToolIndex = &index
+				toolCallResponses := c.startToolCall(toolCall)
+				responses = append(responses, toolCallResponses...)
+				c.activeToolIndex = nil // Clear immediately since tool is now fully started
+			}
+		}
+
+		// Then send content_block_stop for all started tool calls in order
+		for _, index := range sortedIndices {
+			toolCall := c.toolCallStates[index]
+			if toolCall.contentBlockStarted && !toolCall.contentBlockStopped {
+				log.Debugf("[OpenAI->Claude] Generated content_block_stop for tool at index %d, Claude index %d",
+					index, toolCall.claudeContentIndex)
+				responses = append(responses, &claudeTextGenStreamResponse{
+					Type:  "content_block_stop",
+					Index: &toolCall.claudeContentIndex,
+				})
+				toolCall.contentBlockStopped = true
+			}
+		}
+
+		// Clear active tool index
+		c.activeToolIndex = nil
+
 		// Cache stop_reason until we get usage info (Claude protocol requires them together)
 		c.pendingStopReason = &claudeFinishReason
 		log.Debugf("[OpenAI->Claude] Cached stop_reason: %s, waiting for usage", claudeFinishReason)
@@ -764,8 +780,9 @@ func (c *ClaudeToOpenAIConverter) convertContentArray(claudeContents []claudeCha
 			if claudeContent.Text != "" {
 				result.textParts = append(result.textParts, claudeContent.Text)
 				result.openaiContents = append(result.openaiContents, chatMessageContent{
-					Type: contentTypeText,
-					Text: claudeContent.Text,
+					Type:         contentTypeText,
+					Text:         claudeContent.Text,
+					CacheControl: claudeContent.CacheControl,
 				})
 			}
 		case "image":
@@ -822,3 +839,63 @@ func (c *ClaudeToOpenAIConverter) convertContentArray(claudeContents []claudeCha

 	return result
 }
+
+// startToolCall starts a new tool call content block
+func (c *ClaudeToOpenAIConverter) startToolCall(toolState *toolCallInfo) []*claudeTextGenStreamResponse {
+	var responses []*claudeTextGenStreamResponse
+
+	// Close thinking content block if it's still open
+	if c.thinkingBlockStarted && !c.thinkingBlockStopped {
+		c.thinkingBlockStopped = true
+		log.Debugf("[OpenAI->Claude] Closing thinking content block before tool use")
+		responses = append(responses, &claudeTextGenStreamResponse{
+			Type:  "content_block_stop",
+			Index: &c.thinkingBlockIndex,
+		})
+	}
+
+	// Close text content block if it's still open
+	if c.textBlockStarted && !c.textBlockStopped {
+		c.textBlockStopped = true
+		log.Debugf("[OpenAI->Claude] Closing text content block before tool use")
+		responses = append(responses, &claudeTextGenStreamResponse{
+			Type:  "content_block_stop",
+			Index: &c.textBlockIndex,
+		})
+	}
+
+	// Assign Claude content index
+	toolState.claudeContentIndex = c.nextContentIndex
+	c.nextContentIndex++
+	toolState.contentBlockStarted = true
+
+	log.Debugf("[OpenAI->Claude] Started tool call: Claude index=%d, id=%s, name=%s",
+		toolState.claudeContentIndex, toolState.id, toolState.name)
+
+	// Send content_block_start
+	responses = append(responses, &claudeTextGenStreamResponse{
+		Type:  "content_block_start",
+		Index: &toolState.claudeContentIndex,
+		ContentBlock: &claudeTextGenContent{
+			Type:  "tool_use",
+			Id:    toolState.id,
+			Name:  toolState.name,
+			Input: map[string]interface{}{}, // Empty input as per Claude spec
+		},
+	})
+
+	// Send any cached arguments as input_json_delta events
+	if toolState.cachedArguments != "" {
+		log.Debugf("[OpenAI->Claude] Outputting cached arguments for tool: %s", toolState.cachedArguments)
+		responses = append(responses, &claudeTextGenStreamResponse{
+			Type:  "content_block_delta",
+			Index: &toolState.claudeContentIndex,
+			Delta: &claudeTextGenDelta{
+				Type:        "input_json_delta",
+				PartialJson: toolState.cachedArguments,
+			},
+		})
+	}
+
+	return responses
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go
@@ -35,7 +35,8 @@ func TestClaudeToOpenAIConverter_ConvertClaudeRequestToOpenAI(t *testing.T) {
 	converter := &ClaudeToOpenAIConverter{}

 	t.Run("convert_multiple_text_content_blocks", func(t *testing.T) {
-		// Test case for the bug fix: multiple text content blocks should be merged into a single string
+		// Test case: multiple text content blocks should remain as separate array elements with cache control support
+		// Both system and user messages should handle array content format
 		claudeRequest := `{
 			"max_tokens": 32000,
 			"messages": [{
@@ -98,15 +99,64 @@ func TestClaudeToOpenAIConverter_ConvertClaudeRequestToOpenAI(t *testing.T) {
 		// First message should be system message (converted from Claude's system field)
 		systemMsg := openaiRequest.Messages[0]
 		assert.Equal(t, roleSystem, systemMsg.Role)
-		assert.Equal(t, "xxx\nyyy", systemMsg.Content) // Claude system uses single \n

-		// Second message should be user message with merged text content
+		// System content should now also be an array for multiple text blocks
+		systemContentArray, ok := systemMsg.Content.([]interface{})
+		require.True(t, ok, "System content should be an array for multiple text blocks")
+		require.Len(t, systemContentArray, 2)
+
+		// First system text block
+		firstSystemElement, ok := systemContentArray[0].(map[string]interface{})
+		require.True(t, ok)
+		assert.Equal(t, contentTypeText, firstSystemElement["type"])
+		assert.Equal(t, "xxx", firstSystemElement["text"])
+		assert.NotNil(t, firstSystemElement["cache_control"]) // Has cache control
+		systemCacheControl1, ok := firstSystemElement["cache_control"].(map[string]interface{})
+		require.True(t, ok)
+		assert.Equal(t, "ephemeral", systemCacheControl1["type"])
+
+		// Second system text block
+		secondSystemElement, ok := systemContentArray[1].(map[string]interface{})
+		require.True(t, ok)
+		assert.Equal(t, contentTypeText, secondSystemElement["type"])
+		assert.Equal(t, "yyy", secondSystemElement["text"])
+		assert.NotNil(t, secondSystemElement["cache_control"]) // Has cache control
+		systemCacheControl2, ok := secondSystemElement["cache_control"].(map[string]interface{})
+		require.True(t, ok)
+		assert.Equal(t, "ephemeral", systemCacheControl2["type"])
+
+		// Second message should be user message with text content as array
 		userMsg := openaiRequest.Messages[1]
 		assert.Equal(t, "user", userMsg.Role)

-		// The key fix: multiple text blocks should be merged into a single string
-		expectedContent := "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.</system-reminder>\n\n<system-reminder>\nyyy</system-reminder>\n\n你是谁"
-		assert.Equal(t, expectedContent, userMsg.Content)
+		// The content should now be an array of separate text blocks, not merged
+		contentArray, ok := userMsg.Content.([]interface{})
+		require.True(t, ok, "Content should be an array for multiple text blocks")
+		require.Len(t, contentArray, 3)
+
+		// First text block
+		firstElement, ok := contentArray[0].(map[string]interface{})
+		require.True(t, ok)
+		assert.Equal(t, contentTypeText, firstElement["type"])
+		assert.Equal(t, "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.</system-reminder>", firstElement["text"])
+		assert.Nil(t, firstElement["cache_control"]) // No cache control for first block
+
+		// Second text block
+		secondElement, ok := contentArray[1].(map[string]interface{})
+		require.True(t, ok)
+		assert.Equal(t, contentTypeText, secondElement["type"])
+		assert.Equal(t, "<system-reminder>\nyyy</system-reminder>", secondElement["text"])
+		assert.Nil(t, secondElement["cache_control"]) // No cache control for second block
+
+		// Third text block with cache control
+		thirdElement, ok := contentArray[2].(map[string]interface{})
+		require.True(t, ok)
+		assert.Equal(t, contentTypeText, thirdElement["type"])
+		assert.Equal(t, "你是谁", thirdElement["text"])
+		assert.NotNil(t, thirdElement["cache_control"]) // Has cache control
+		cacheControl, ok := thirdElement["cache_control"].(map[string]interface{})
+		require.True(t, ok)
+		assert.Equal(t, "ephemeral", cacheControl["type"])
 	})

 	t.Run("convert_mixed_content_with_image", func(t *testing.T) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -159,10 +159,17 @@ type usage struct {
 	CompletionTokens        int                      `json:"completion_tokens,omitempty"`
 	TotalTokens             int                      `json:"total_tokens,omitempty"`
 	CompletionTokensDetails *completionTokensDetails `json:"completion_tokens_details,omitempty"`
+	PromptTokensDetails     *promptTokensDetails     `json:"prompt_tokens_details,omitempty"`
+}
+
+type promptTokensDetails struct {
+	AudioTokens  int `json:"audio_tokens,omitempty"`
+	CachedTokens int `json:"cached_tokens,omitempty"`
 }

 type completionTokensDetails struct {
 	ReasoningTokens          int `json:"reasoning_tokens,omitempty"`
+	AudioTokens              int `json:"audio_tokens,omitempty"`
 	AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
 	RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
 }
@@ -240,11 +247,12 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
 }

 type chatMessageContent struct {
-	Type       string                      `json:"type,omitempty"`
-	Text       string                      `json:"text"`
-	ImageUrl   *chatMessageContentImageUrl `json:"image_url,omitempty"`
-	File       *chatMessageContentFile     `json:"file,omitempty"`
-	InputAudio *chatMessageContentAudio    `json:"input_audio,omitempty"`
+	CacheControl map[string]interface{}      `json:"cache_control,omitempty"`
+	Type         string                      `json:"type,omitempty"`
+	Text         string                      `json:"text"`
+	ImageUrl     *chatMessageContentImageUrl `json:"image_url,omitempty"`
+	File         *chatMessageContentFile     `json:"file,omitempty"`
+	InputAudio   *chatMessageContentAudio    `json:"input_audio,omitempty"`
 }

 type chatMessageContentAudio struct {
@@ -402,6 +410,7 @@ func (m *functionCall) IsEmpty() bool {
 }

 type StreamEvent struct {
+	RawEvent   string `json:"-"`
 	Id         string `json:"id"`
 	Event      string `json:"event"`
 	Data       string `json:"data"`
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -813,6 +813,7 @@ func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte) []StreamEvent
 			value := string(body[valueStartIndex:i])
 			currentEvent.SetValue(currentKey, value)
 		} else {
+			currentEvent.RawEvent = string(body[eventStartIndex : i+1])
 			// Extra new line. The current event is complete.
 			events = append(events, *currentEvent)
 			// Reset event parsing state.
--- a/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go
@@ -6,14 +6,15 @@ import (
 	"strings"

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
-	"github.com/higress-group/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/higress-group/wasm-go/pkg/wrapper"
 )

 const (
-	zhipuAiDomain             = "open.bigmodel.cn"
-	zhipuAiChatCompletionPath = "/api/paas/v4/chat/completions"
-	zhipuAiEmbeddingsPath     = "/api/paas/v4/embeddings"
+	zhipuAiDomain                = "open.bigmodel.cn"
+	zhipuAiChatCompletionPath    = "/api/paas/v4/chat/completions"
+	zhipuAiEmbeddingsPath        = "/api/paas/v4/embeddings"
+	zhipuAiAnthropicMessagesPath = "/api/anthropic/v1/messages"
 )

 type zhipuAiProviderInitializer struct{}
@@ -27,8 +28,9 @@ func (m *zhipuAiProviderInitializer) ValidateConfig(config *ProviderConfig) erro

 func (m *zhipuAiProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): zhipuAiChatCompletionPath,
-		string(ApiNameEmbeddings):     zhipuAiEmbeddingsPath,
+		string(ApiNameChatCompletion):    zhipuAiChatCompletionPath,
+		string(ApiNameEmbeddings):        zhipuAiEmbeddingsPath,
+		string(ApiNameAnthropicMessages): zhipuAiAnthropicMessagesPath,
 	}
 }

@@ -75,5 +77,8 @@ func (m *zhipuAiProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, zhipuAiEmbeddingsPath) {
 		return ApiNameEmbeddings
 	}
+	if strings.Contains(path, zhipuAiAnthropicMessagesPath) {
+		return ApiNameAnthropicMessages
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/test/claude-test/claude-message-api.yaml
+++ b/plugins/wasm-go/extensions/ai-proxy/test/claude-test/claude-message-api.yaml
--- a/plugins/wasm-go/extensions/ai-statistics/main.go
+++ b/plugins/wasm-go/extensions/ai-statistics/main.go
@@ -83,6 +83,23 @@ const (
 	RuleFirst   = "first"
 	RuleReplace = "replace"
 	RuleAppend  = "append"
+
+	// Built-in attributes
+	BuiltinQuestionKey = "question"
+	BuiltinAnswerKey   = "answer"
+
+	// Built-in attribute paths
+	// Question paths (from request body)
+	QuestionPathOpenAI = "messages.@reverse.0.content"
+	QuestionPathClaude = "messages.@reverse.0.content" // Claude uses same format
+
+	// Answer paths (from response body - non-streaming)
+	AnswerPathOpenAINonStreaming = "choices.0.message.content"
+	AnswerPathClaudeNonStreaming = "content.0.text"
+
+	// Answer paths (from response streaming body)
+	AnswerPathOpenAIStreaming = "choices.0.delta.content"
+	AnswerPathClaudeStreaming = "delta.text"
 )

 // TracingSpan is the tracing span configuration.
@@ -325,12 +342,14 @@ func onHttpRequestBody(ctx wrapper.HttpContext, config AIStatisticsConfig, body

 	userPromptCount := 0
 	if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
+		// OpenAI and Claude/Anthropic format - both use "messages" array with "role" field
 		for _, msg := range messages.Array() {
 			if msg.Get("role").String() == "user" {
 				userPromptCount += 1
 			}
 		}
-	} else if contents := gjson.GetBytes(body, "contents"); contents.Exists() && contents.IsArray() { // Google Gemini GenerateContent
+	} else if contents := gjson.GetBytes(body, "contents"); contents.Exists() && contents.IsArray() {
+		// Google Gemini GenerateContent
 		for _, content := range contents.Array() {
 			if !content.Get("role").Exists() || content.Get("role").String() == "user" {
 				userPromptCount += 1
@@ -387,7 +406,7 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config AIStatisticsConfig, dat
 		"id",
 		"response.id",
 		"responseId", // Gemini generateContent
-		"message.id", // anthropic messages
+		"message.id", // anthropic/claude messages
 	}); chatID != nil {
 		ctx.SetUserAttribute(ChatID, chatID.String())
 	}
@@ -456,7 +475,7 @@ func onHttpResponseBody(ctx wrapper.HttpContext, config AIStatisticsConfig, body
 		"id",
 		"response.id",
 		"responseId", // Gemini generateContent
-		"message.id", // anthropic messages
+		"message.id", // anthropic/claude messages
 	}); chatID != nil {
 		ctx.SetUserAttribute(ChatID, chatID.String())
 	}
@@ -507,6 +526,15 @@ func setAttributeBySource(ctx wrapper.HttpContext, config AIStatisticsConfig, so
 				value = gjson.GetBytes(body, attribute.Value).Value()
 			default:
 			}
+
+			// Handle built-in attributes with Claude/OpenAI protocol fallback logic
+			if (value == nil || value == "") && isBuiltinAttribute(key) {
+				value = getBuiltinAttributeFallback(ctx, config, key, source, body, attribute.Rule)
+				if value != nil && value != "" {
+					log.Debugf("[attribute] Used protocol fallback for %s: %+v", key, value)
+				}
+			}
+
 			if (value == nil || value == "") && attribute.DefaultValue != "" {
 				value = attribute.DefaultValue
 			}
@@ -538,6 +566,45 @@ func setAttributeBySource(ctx wrapper.HttpContext, config AIStatisticsConfig, so
 	}
 }

+// isBuiltinAttribute checks if the given key is a built-in attribute
+func isBuiltinAttribute(key string) bool {
+	return key == BuiltinQuestionKey || key == BuiltinAnswerKey
+}
+
+// getBuiltinAttributeFallback provides protocol compatibility fallback for question/answer attributes
+func getBuiltinAttributeFallback(ctx wrapper.HttpContext, config AIStatisticsConfig, key, source string, body []byte, rule string) interface{} {
+	switch key {
+	case BuiltinQuestionKey:
+		if source == RequestBody {
+			// Try OpenAI/Claude format (both use same messages structure)
+			if value := gjson.GetBytes(body, QuestionPathOpenAI).Value(); value != nil && value != "" {
+				return value
+			}
+		}
+	case BuiltinAnswerKey:
+		if source == ResponseStreamingBody {
+			// Try OpenAI format first
+			if value := extractStreamingBodyByJsonPath(body, AnswerPathOpenAIStreaming, rule); value != nil && value != "" {
+				return value
+			}
+			// Try Claude format
+			if value := extractStreamingBodyByJsonPath(body, AnswerPathClaudeStreaming, rule); value != nil && value != "" {
+				return value
+			}
+		} else if source == ResponseBody {
+			// Try OpenAI format first
+			if value := gjson.GetBytes(body, AnswerPathOpenAINonStreaming).Value(); value != nil && value != "" {
+				return value
+			}
+			// Try Claude format
+			if value := gjson.GetBytes(body, AnswerPathClaudeNonStreaming).Value(); value != nil && value != "" {
+				return value
+			}
+		}
+	}
+	return nil
+}
+
 func extractStreamingBodyByJsonPath(data []byte, jsonPath string, rule string) interface{} {
 	chunks := bytes.Split(bytes.TrimSpace(wrapper.UnifySSEChunk(data)), []byte("\n\n"))
 	var value interface{}