diff --git a/plugins/wasm-go/extensions/ai-proxy/main.go b/plugins/wasm-go/extensions/ai-proxy/main.go index c887caa5..fec35f69 100644 --- a/plugins/wasm-go/extensions/ai-proxy/main.go +++ b/plugins/wasm-go/extensions/ai-proxy/main.go @@ -7,6 +7,7 @@ import ( "fmt" "net/url" "regexp" + "strconv" "strings" "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/config" @@ -25,11 +26,14 @@ import ( const ( pluginName = "ai-proxy" - defaultMaxBodyBytes uint32 = 100 * 1024 * 1024 + defaultMaxBodyBytes uint32 = 100 * 1024 * 1024 + errorResponseBodyBufferLimit uint32 = 64 * 1024 + maxLoggedErrorResponseBodyBytes = 16 * 1024 - ctxOriginalPath = "original_path" - ctxOriginalHost = "original_host" - ctxOriginalAuth = "original_auth" + ctxOriginalPath = "original_path" + ctxOriginalHost = "original_host" + ctxOriginalAuth = "original_auth" + ctxUpstreamErrorResponseStatus = "upstream_error_response_status" ) type pair[K, V any] struct { @@ -346,8 +350,17 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo if err != nil { log.Errorf("unable to load :status header from response: %v", err) } + action := providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, status) + if action == types.ActionContinue && + providerConfig.GetLogUpstreamErrorResponseBody() && + shouldLogUpstreamErrorResponse(status) { + ctx.SetContext(ctxUpstreamErrorResponseStatus, status) + ctx.BufferResponseBody() + ctx.SetResponseBodyBufferLimit(errorResponseBodyBufferLimit) + return action + } ctx.DontReadResponseBody() - return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, status) + return action } // Reset ctxApiTokenRequestFailureCount if the request is successful, @@ -501,6 +514,11 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi log.Debugf("[onHttpResponseBody] provider=%s", activeProvider.GetProviderType()) + if status := ctx.GetStringContext(ctxUpstreamErrorResponseStatus, ""); status != "" { + logUpstreamErrorResponse(ctx, activeProvider, status, body) + return types.ActionContinue + } + var finalBody []byte if handler, ok := activeProvider.(provider.TransformResponseBodyHandler); ok { @@ -538,6 +556,52 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi return types.ActionContinue } +func shouldLogUpstreamErrorResponse(status string) bool { + code, err := strconv.Atoi(status) + if err != nil { + return false + } + return code >= 400 +} + +func logUpstreamErrorResponse(ctx wrapper.HttpContext, activeProvider provider.Provider, status string, body []byte) { + apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName) + requestID := responseHeaderValue("x-request-id") + if requestID == "" { + requestID = responseHeaderValue("X-Request-Id") + } + bodyText, truncated := errorResponseBodyForLog(body) + log.Warnf("[upstream_error_response] provider=%s apiName=%s status=%s request_id=%s original_model=%s final_model=%s body_truncated=%v body=%s", + activeProvider.GetProviderType(), + apiName, + status, + requestID, + ctx.GetStringContext("originalRequestModel", ""), + ctx.GetStringContext("finalRequestModel", ""), + truncated, + bodyText, + ) +} + +func responseHeaderValue(name string) string { + value, err := proxywasm.GetHttpResponseHeader(name) + if err != nil { + return "" + } + return value +} + +func errorResponseBodyForLog(body []byte) (string, bool) { + truncated := len(body) > maxLoggedErrorResponseBodyBytes + if truncated { + body = body[:maxLoggedErrorResponseBodyBytes] + } + text := strings.ToValidUTF8(string(body), "?") + text = strings.ReplaceAll(text, "\r", "\\r") + text = strings.ReplaceAll(text, "\n", "\\n") + return text, truncated +} + // Helper function to check if Claude response conversion is needed func needsClaudeResponseConversion(ctx wrapper.HttpContext) bool { needClaudeConversion, _ := ctx.GetContext("needClaudeResponseConversion").(bool) diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go index d588c480..526f79e0 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go @@ -120,26 +120,46 @@ type toolCallInfo struct { // contentConversionResult represents the result of converting Claude content to OpenAI format type contentConversionResult struct { - textParts []string - reasoningContent string - reasoningSignature string - reasoningRedactedContent string - claudeContentBlocks []claudeChatMessageContent - toolCalls []toolCall - toolResults []claudeChatMessageContent - openaiContents []chatMessageContent - hasNonTextContent bool + textParts []string + reasoningParts []string + claudeContentBlocks []claudeChatMessageContent + toolCalls []toolCall + toolResults []claudeChatMessageContent + openaiContents []chatMessageContent + hasReasoningBlocks bool } -func applyReasoningFields(message *chatMessage, conversionResult *contentConversionResult) { - message.ReasoningContent = conversionResult.reasoningContent - message.ReasoningSignature = conversionResult.reasoningSignature - message.ReasoningRedactedContent = conversionResult.reasoningRedactedContent +type ClaudeToOpenAIConvertOptions struct { + // PreserveMessageReasoningContent enables the non-standard message-level + // reasoning_content field for providers that explicitly support it. + PreserveMessageReasoningContent bool +} + +func (r *contentConversionResult) reasoningContent() string { + return strings.Join(r.reasoningParts, "\n\n") +} + +func (r *contentConversionResult) setReasoningContent(message *chatMessage, options ClaudeToOpenAIConvertOptions) { + if options.PreserveMessageReasoningContent && len(r.reasoningParts) > 0 { + message.ReasoningContent = r.reasoningContent() + } +} + +func applyReasoningFields(message *chatMessage, conversionResult *contentConversionResult, options ClaudeToOpenAIConvertOptions) { + conversionResult.setReasoningContent(message, options) message.ClaudeContentBlocks = conversionResult.claudeContentBlocks } -// ConvertClaudeRequestToOpenAI converts a Claude chat completion request to OpenAI format +// ConvertClaudeRequestToOpenAI converts a Claude chat completion request to strict OpenAI format. +// Use ConvertClaudeRequestToOpenAIWithOptions for providers that support non-standard message reasoning fields. func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]byte, error) { + return c.ConvertClaudeRequestToOpenAIWithOptions(body, ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: false, + }) +} + +// ConvertClaudeRequestToOpenAIWithOptions converts a Claude chat completion request to OpenAI format. +func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAIWithOptions(body []byte, options ClaudeToOpenAIConvertOptions) ([]byte, error) { log.Debugf("[Claude->OpenAI] Original Claude request body: %s", string(body)) var claudeRequest claudeTextGenRequest @@ -184,7 +204,7 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b Role: claudeMsg.Role, ToolCalls: conversionResult.toolCalls, } - applyReasoningFields(&openaiMsg, conversionResult) + applyReasoningFields(&openaiMsg, conversionResult, options) // Add text content if present, otherwise set to null if len(conversionResult.textParts) > 0 { @@ -207,8 +227,9 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b } openaiRequest.Messages = append(openaiRequest.Messages, toolMsg) } - // Also add text content if present alongside tool results - // This handles cases like: [tool_result, tool_result, text] + // Also add visible text content if present alongside tool results. + // This companion message intentionally does not carry reasoning_content: + // tool_result content is user/tool-side data, while thinking belongs to assistant turns. if len(conversionResult.textParts) > 0 { textMsg := chatMessage{ Role: claudeMsg.Role, @@ -220,11 +241,21 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b // Handle regular content if no tool calls or tool results if len(conversionResult.toolCalls) == 0 && len(conversionResult.toolResults) == 0 { + var content any + if len(conversionResult.openaiContents) > 0 { + content = conversionResult.openaiContents + } openaiMsg := chatMessage{ Role: claudeMsg.Role, - Content: conversionResult.openaiContents, + Content: content, + } + applyReasoningFields(&openaiMsg, conversionResult, options) + if openaiMsg.Content == nil && openaiMsg.ReasoningContent == "" && conversionResult.hasReasoningBlocks { + // Strict OpenAI-style providers reject role-only messages. When Claude turns + // contain only non-portable reasoning blocks, degrade them to an empty visible + // message instead of emitting an invalid assistant/user turn. + openaiMsg.Content = "" } - applyReasoningFields(&openaiMsg, conversionResult) openaiRequest.Messages = append(openaiRequest.Messages, openaiMsg) } } @@ -1012,11 +1043,12 @@ func openAIFinishReasonToClaude(reason string) string { // convertContentArray converts an array of Claude content to OpenAI content format func (c *ClaudeToOpenAIConverter) convertContentArray(claudeContents []claudeChatMessageContent) *contentConversionResult { result := &contentConversionResult{ - textParts: []string{}, - toolCalls: []toolCall{}, - toolResults: []claudeChatMessageContent{}, - openaiContents: []chatMessageContent{}, - hasNonTextContent: false, + textParts: []string{}, + reasoningParts: []string{}, + toolCalls: []toolCall{}, + toolResults: []claudeChatMessageContent{}, + openaiContents: []chatMessageContent{}, + hasReasoningBlocks: false, } claudeContentBlocks := make([]claudeChatMessageContent, 0, len(claudeContents)) preserveClaudeContentBlocks := false @@ -1037,18 +1069,16 @@ func (c *ClaudeToOpenAIConverter) convertContentArray(claudeContents []claudeCha }) } case "thinking": - result.hasNonTextContent = true + result.hasReasoningBlocks = true preserveClaudeContentBlocks = true - result.reasoningContent += claudeContent.Thinking - if claudeContent.Signature != "" { - result.reasoningSignature = claudeContent.Signature + if claudeContent.Thinking != "" { + result.reasoningParts = append(result.reasoningParts, claudeContent.Thinking) } case "redacted_thinking": - result.hasNonTextContent = true + result.hasReasoningBlocks = true preserveClaudeContentBlocks = true - result.reasoningRedactedContent += claudeContent.Data + // data is an opaque Claude blob, not portable reasoning text. case "image": - result.hasNonTextContent = true if claudeContent.Source != nil { if claudeContent.Source.Type == "base64" { // Convert base64 image to OpenAI format @@ -1069,7 +1099,6 @@ func (c *ClaudeToOpenAIConverter) convertContentArray(claudeContents []claudeCha } } case "tool_use": - result.hasNonTextContent = true preserveClaudeContentBlocks = true // Convert Claude tool_use to OpenAI tool_calls format if claudeContent.Id != "" && claudeContent.Name != "" { @@ -1093,7 +1122,6 @@ func (c *ClaudeToOpenAIConverter) convertContentArray(claudeContents []claudeCha log.Debugf("[Claude->OpenAI] Converted tool_use to tool_call: %s", claudeContent.Name) } case "tool_result": - result.hasNonTextContent = true preserveClaudeContentBlocks = true // Store tool results for processing result.toolResults = append(result.toolResults, claudeContent) diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go index 4cbc93d0..c41ccdaf 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go @@ -383,6 +383,330 @@ func TestClaudeToOpenAIConverter_ConvertClaudeRequestToOpenAI(t *testing.T) { assert.Equal(t, float64(5), args["max_results"]) }) + t.Run("convert_thinking_and_tool_use_to_reasoning_content", func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "assistant", + "content": [{ + "type": "thinking", + "thinking": "The user needs current weather, so I should call the search tool.", + "signature": "signature-value" + }, { + "type": "tool_use", + "id": "toolu_weather", + "name": "web_search", + "input": { + "query": "today weather", + "max_results": 3 + } + }] + }], + "thinking": {"type": "enabled", "budget_tokens": 8192}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAIWithOptions([]byte(claudeRequest), ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: true, + }) + require.NoError(t, err) + + var openaiRequest chatCompletionRequest + err = json.Unmarshal(result, &openaiRequest) + require.NoError(t, err) + + assert.Equal(t, "medium", openaiRequest.ReasoningEffort) + require.Len(t, openaiRequest.Messages, 1) + assistantMsg := openaiRequest.Messages[0] + assert.Equal(t, "assistant", assistantMsg.Role) + assert.Nil(t, assistantMsg.Content) + assert.Equal(t, "The user needs current weather, so I should call the search tool.", assistantMsg.ReasoningContent) + require.Len(t, assistantMsg.ToolCalls, 1) + assert.Equal(t, "toolu_weather", assistantMsg.ToolCalls[0].Id) + assert.Equal(t, "web_search", assistantMsg.ToolCalls[0].Function.Name) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + rawAssistant := messages[0].(map[string]interface{}) + assert.Equal(t, "The user needs current weather, so I should call the search tool.", rawAssistant["reasoning_content"]) + assert.NotContains(t, rawAssistant, "thinking") + }) + + t.Run("convert_multiple_thinking_blocks_without_tool_use", func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "assistant", + "content": [{ + "type": "thinking", + "thinking": "First reasoning step.", + "signature": "signature-1" + }, { + "type": "thinking", + "thinking": "Second reasoning step.", + "signature": "signature-2" + }, { + "type": "text", + "text": "Final visible answer." + }] + }], + "thinking": {"type": "enabled", "budget_tokens": 2048}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAIWithOptions([]byte(claudeRequest), ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: true, + }) + require.NoError(t, err) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + rawAssistant := messages[0].(map[string]interface{}) + assert.Equal(t, "assistant", rawAssistant["role"]) + assert.Equal(t, "First reasoning step.\n\nSecond reasoning step.", rawAssistant["reasoning_content"]) + assert.NotContains(t, rawAssistant, "thinking") + assert.NotContains(t, rawAssistant, "signature") + + content := rawAssistant["content"].([]interface{}) + require.Len(t, content, 1) + textContent := content[0].(map[string]interface{}) + assert.Equal(t, "text", textContent["type"]) + assert.Equal(t, "Final visible answer.", textContent["text"]) + }) + + t.Run("omit_empty_content_array_for_thinking_only_message", func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "assistant", + "content": [{ + "type": "thinking", + "thinking": "Only private reasoning is present." + }] + }], + "thinking": {"type": "enabled", "budget_tokens": 2048}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAIWithOptions([]byte(claudeRequest), ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: true, + }) + require.NoError(t, err) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + rawAssistant := messages[0].(map[string]interface{}) + assert.Equal(t, "assistant", rawAssistant["role"]) + assert.Equal(t, "Only private reasoning is present.", rawAssistant["reasoning_content"]) + assert.NotContains(t, rawAssistant, "content") + assert.NotContains(t, rawAssistant, "thinking") + }) + + t.Run("omit_signature_only_thinking_with_tool_use_from_reasoning_content", func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "assistant", + "content": [{ + "type": "thinking", + "thinking": "", + "signature": "signature-only" + }, { + "type": "text", + "text": "Visible answer without reasoning text." + }, { + "type": "tool_use", + "id": "toolu_signature", + "name": "web_search", + "input": { + "query": "today weather" + } + }] + }], + "thinking": {"type": "enabled", "budget_tokens": 2048}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAIWithOptions([]byte(claudeRequest), ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: true, + }) + require.NoError(t, err) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + rawAssistant := messages[0].(map[string]interface{}) + assert.NotContains(t, rawAssistant, "reasoning_content") + assert.NotContains(t, rawAssistant, "thinking") + assert.NotContains(t, rawAssistant, "signature") + + assert.Equal(t, "Visible answer without reasoning text.", rawAssistant["content"]) + toolCalls := rawAssistant["tool_calls"].([]interface{}) + require.Len(t, toolCalls, 1) + toolCall := toolCalls[0].(map[string]interface{}) + assert.Equal(t, "toolu_signature", toolCall["id"]) + }) + + t.Run("omit_redacted_thinking_data_from_reasoning_content", func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "assistant", + "content": [{ + "type": "redacted_thinking", + "data": "opaque-redacted-thinking-data" + }, { + "type": "tool_use", + "id": "toolu_redacted", + "name": "web_search", + "input": { + "query": "latest weather" + } + }] + }], + "thinking": {"type": "enabled", "budget_tokens": 8192}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAIWithOptions([]byte(claudeRequest), ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: true, + }) + require.NoError(t, err) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + rawAssistant := messages[0].(map[string]interface{}) + assert.NotContains(t, rawAssistant, "reasoning_content") + assert.NotContains(t, rawAssistant, "redacted_thinking") + assert.NotContains(t, rawAssistant, "data") + require.Len(t, rawAssistant["tool_calls"].([]interface{}), 1) + }) + + t.Run("default_converter_omits_message_reasoning_content", func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "assistant", + "content": [{ + "type": "thinking", + "thinking": "This should not be sent by the strict default converter." + }, { + "type": "text", + "text": "Visible answer." + }] + }], + "thinking": {"type": "enabled", "budget_tokens": 2048}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAI([]byte(claudeRequest)) + require.NoError(t, err) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + rawAssistant := messages[0].(map[string]interface{}) + assert.NotContains(t, rawAssistant, "reasoning_content") + assert.NotContains(t, rawAssistant, "thinking") + }) + + t.Run("default_converter_degrades_reasoning_only_messages_to_empty_content", func(t *testing.T) { + tests := []struct { + name string + content string + }{ + { + name: "thinking only", + content: `{ + "type": "thinking", + "thinking": "Hidden chain of thought." + }`, + }, + { + name: "redacted thinking only", + content: `{ + "type": "redacted_thinking", + "data": "opaque-redacted-thinking-data" + }`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "assistant", + "content": [` + tt.content + `] + }], + "thinking": {"type": "enabled", "budget_tokens": 2048}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAI([]byte(claudeRequest)) + require.NoError(t, err) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + require.Len(t, messages, 1) + rawAssistant := messages[0].(map[string]interface{}) + assert.Equal(t, "assistant", rawAssistant["role"]) + assert.Equal(t, "", rawAssistant["content"]) + assert.NotContains(t, rawAssistant, "reasoning_content") + }) + } + }) + + t.Run("omit_reasoning_content_when_message_reasoning_is_not_supported", func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "assistant", + "content": [{ + "type": "thinking", + "thinking": "Do not send this non-standard field to strict providers.", + "signature": "signature-value" + }, { + "type": "tool_use", + "id": "toolu_strict", + "name": "web_search", + "input": { + "query": "today weather" + } + }] + }], + "thinking": {"type": "enabled", "budget_tokens": 8192}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAIWithOptions([]byte(claudeRequest), ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: false, + }) + require.NoError(t, err) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + rawAssistant := messages[0].(map[string]interface{}) + assert.NotContains(t, rawAssistant, "reasoning_content") + assert.NotContains(t, rawAssistant, "thinking") + assert.NotContains(t, rawAssistant, "signature") + require.Len(t, rawAssistant["tool_calls"].([]interface{}), 1) + }) + t.Run("preserve_empty_tool_use_input", func(t *testing.T) { claudeRequest := `{ "model": "anthropic/claude-sonnet-4", @@ -529,6 +853,48 @@ func TestClaudeToOpenAIConverter_ConvertClaudeRequestToOpenAI(t *testing.T) { assert.Equal(t, "继续", userMsg.Content) }) + t.Run("omit_reasoning_content_on_tool_result_companion_text_message", func(t *testing.T) { + claudeRequest := `{ + "model": "anthropic/claude-sonnet-4", + "messages": [{ + "role": "user", + "content": [{ + "type": "thinking", + "thinking": "Malformed thinking attached to a tool result turn." + }, { + "type": "tool_result", + "tool_use_id": "toolu_result", + "content": "Search result" + }, { + "type": "text", + "text": "continue" + }] + }], + "thinking": {"type": "enabled", "budget_tokens": 2048}, + "max_tokens": 1000 + }` + + result, err := converter.ConvertClaudeRequestToOpenAIWithOptions([]byte(claudeRequest), ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: true, + }) + require.NoError(t, err) + + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + messages := rawJSON["messages"].([]interface{}) + require.Len(t, messages, 2) + + rawTool := messages[0].(map[string]interface{}) + assert.Equal(t, "tool", rawTool["role"]) + assert.NotContains(t, rawTool, "reasoning_content") + + rawCompanionText := messages[1].(map[string]interface{}) + assert.Equal(t, "user", rawCompanionText["role"]) + assert.Equal(t, "continue", rawCompanionText["content"]) + assert.NotContains(t, rawCompanionText, "reasoning_content") + }) + t.Run("convert_multiple_tool_calls", func(t *testing.T) { // Test multiple tool_use in single message claudeRequest := `{ @@ -735,6 +1101,28 @@ func TestClaudeToOpenAIConverter_ConvertOpenAIResponseToClaude(t *testing.T) { }) } +func TestProviderConfigSupportsMessageReasoningContent(t *testing.T) { + tests := []struct { + name string + typ string + expected bool + }{ + {name: "qwen", typ: providerTypeQwen, expected: true}, + {name: "openrouter", typ: providerTypeOpenRouter, expected: true}, + {name: "zhipuai", typ: providerTypeZhipuAi, expected: true}, + {name: "openai", typ: providerTypeOpenAI, expected: false}, + {name: "azure", typ: providerTypeAzure, expected: false}, + {name: "generic", typ: providerTypeGeneric, expected: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := &ProviderConfig{typ: tt.typ} + assert.Equal(t, tt.expected, config.supportsMessageReasoningContent()) + }) + } +} + func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) { converter := &ClaudeToOpenAIConverter{} diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go index 501e8e66..40c0b82d 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go @@ -502,6 +502,9 @@ type ProviderConfig struct { // @Title zh-CN 空内容时提升思考为正文 // @Description zh-CN 开启后,若模型响应只包含 reasoning_content/thinking 而没有正文内容,将 reasoning 内容提升为正文内容返回,避免客户端收到空回复。 promoteThinkingOnEmpty bool `required:"false" yaml:"promoteThinkingOnEmpty" json:"promoteThinkingOnEmpty"` + // @Title zh-CN 记录上游错误响应体 + // @Description zh-CN 开启后,将上游 4xx/5xx 响应体以 warn 日志输出,便于排查 provider 兼容性问题。默认关闭,避免误记录敏感错误内容。 + logUpstreamErrorResponseBody bool `required:"false" yaml:"logUpstreamErrorResponseBody" json:"logUpstreamErrorResponseBody"` // @Title zh-CN HiClaw 模式 // @Description zh-CN 开启后同时启用 mergeConsecutiveMessages 和 promoteThinkingOnEmpty,适用于 HiClaw 多 Agent 协作场景。 hiclawMode bool `required:"false" yaml:"hiclawMode" json:"hiclawMode"` @@ -538,6 +541,18 @@ func (c *ProviderConfig) IsOpenAIProtocol() bool { return c.protocol == protocolOpenAI } +func (c *ProviderConfig) supportsMessageReasoningContent() bool { + switch c.typ { + case providerTypeQwen, providerTypeOpenRouter, providerTypeZhipuAi: + return true + default: + // DeepSeek supports Anthropic Messages natively, so Claude requests usually bypass + // this Claude->OpenAI conversion path. Its OpenAI request-side reasoning history + // semantics should be validated separately before adding it here. + return false + } +} + func (c *ProviderConfig) FromJson(json gjson.Result) { c.id = json.Get("id").String() c.typ = json.Get("type").String() @@ -743,6 +758,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) { c.mergeConsecutiveMessages = json.Get("mergeConsecutiveMessages").Bool() c.providerDomain = json.Get("providerDomain").String() c.promoteThinkingOnEmpty = json.Get("promoteThinkingOnEmpty").Bool() + c.logUpstreamErrorResponseBody = json.Get("logUpstreamErrorResponseBody").Bool() c.hiclawMode = json.Get("hiclawMode").Bool() if c.hiclawMode { c.mergeConsecutiveMessages = true @@ -891,6 +907,10 @@ func (c *ProviderConfig) GetPromoteThinkingOnEmpty() bool { return c.promoteThinkingOnEmpty } +func (c *ProviderConfig) GetLogUpstreamErrorResponseBody() bool { + return c.logUpstreamErrorResponseBody +} + func (c *ProviderConfig) ReplaceByCustomSettings(body []byte) ([]byte, error) { return ReplaceByCustomSettings(body, c.customSettings) } @@ -1204,7 +1224,9 @@ func (c *ProviderConfig) handleRequestBody( // Convert Claude protocol to OpenAI protocol converter := &ClaudeToOpenAIConverter{} - body, err = converter.ConvertClaudeRequestToOpenAI(body) + body, err = converter.ConvertClaudeRequestToOpenAIWithOptions(body, ClaudeToOpenAIConvertOptions{ + PreserveMessageReasoningContent: c.supportsMessageReasoningContent(), + }) if err != nil { return types.ActionContinue, fmt.Errorf("failed to convert claude request to openai: %v", err) } @@ -1245,7 +1267,7 @@ func (c *ProviderConfig) handleRequestBody( } if needClaudeConversion && provider.GetProviderType() != providerTypeBedrock && provider.GetProviderType() != providerTypeClaude { - body = stripClaudeInternalMessageFields(body) + body = stripClaudeInternalMessageFields(body, c.supportsMessageReasoningContent()) } // use openai protocol (either original openai or converted from claude) @@ -1281,7 +1303,7 @@ func (c *ProviderConfig) handleRequestBody( return types.ActionContinue, replaceRequestBody(body) } -func stripClaudeInternalMessageFields(body []byte) []byte { +func stripClaudeInternalMessageFields(body []byte, preserveMessageReasoningContent ...bool) []byte { result := body for _, field := range []string{"claude_thinking", "claude_output_config", "claude_anthropic_beta"} { if updated, err := sjson.DeleteBytes(result, field); err == nil { @@ -1294,15 +1316,19 @@ func stripClaudeInternalMessageFields(body []byte) []byte { return result } - for _, field := range []string{ + fields := []string{ "reasoning", - "reasoning_content", "reasoning_signature", "reasoning_redacted_content", "claude_content_blocks", "claude_content_block_index", "claude_content_block_stop", - } { + } + if len(preserveMessageReasoningContent) == 0 || !preserveMessageReasoningContent[0] { + fields = append(fields, "reasoning_content") + } + + for _, field := range fields { messages.ForEach(func(key, _ gjson.Result) bool { if updated, err := sjson.DeleteBytes(result, fmt.Sprintf("messages.%d.%s", key.Int(), field)); err == nil { result = updated diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider_test.go index 5b0f1037..ecf0b979 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/provider_test.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider_test.go @@ -468,6 +468,27 @@ func TestProviderConfig_GetPromoteThinkingOnEmpty(t *testing.T) { } } +func TestProviderConfig_GetLogUpstreamErrorResponseBody(t *testing.T) { + tests := []struct { + name string + logUpstreamErrorResponseBody bool + expected bool + }{ + {"enabled", true, true}, + {"default_disabled", false, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := &ProviderConfig{ + logUpstreamErrorResponseBody: tt.logUpstreamErrorResponseBody, + } + result := config.GetLogUpstreamErrorResponseBody() + assert.Equal(t, tt.expected, result) + }) + } +} + // ============ Failover Tests ============ func TestFailover_FromJson_Defaults(t *testing.T) { @@ -763,4 +784,9 @@ func TestStripClaudeInternalMessageFields(t *testing.T) { assert.False(t, gjson.GetBytes(result, "messages.0.claude_content_block_index").Exists()) assert.False(t, gjson.GetBytes(result, "messages.0.claude_content_block_stop").Exists()) assert.Equal(t, "answer", gjson.GetBytes(result, "messages.0.content").String()) + + preserved := stripClaudeInternalMessageFields(body, true) + assert.Equal(t, "reasoning", gjson.GetBytes(preserved, "messages.0.reasoning_content").String()) + assert.False(t, gjson.GetBytes(preserved, "messages.0.reasoning_signature").Exists()) + assert.False(t, gjson.GetBytes(preserved, "messages.0.claude_content_blocks").Exists()) } diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go index b49b3799..3686fd89 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go @@ -124,17 +124,29 @@ func (m *qwenProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName func (m *qwenProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) { if m.config.qwenEnableCompatible { - if gjson.GetBytes(body, "model").Exists() { - rawModel := gjson.GetBytes(body, "model").String() - mappedModel := getMappedModel(rawModel, m.config.modelMapping) - newBody, err := sjson.SetBytes(body, "model", mappedModel) + modifiedBody := body + model := gjson.GetBytes(modifiedBody, "model") + mappedModel := "" + if model.Exists() { + rawModel := model.String() + mappedModel = getMappedModel(rawModel, m.config.modelMapping) + newBody, err := sjson.SetBytes(modifiedBody, "model", mappedModel) if err != nil { log.Errorf("Replace model error: %v", err) return newBody, err } - return newBody, nil + modifiedBody = newBody } - return body, nil + if mappedModel != "" && requestBodyHasMessageReasoningContent(modifiedBody) && qwenSupportsPreserveThinking(mappedModel) { + // Qwen OpenAI-compatible mode requires top-level preserve_thinking=true + // before historical reasoning_content can be reused by supported models. + var err error + modifiedBody, err = sjson.SetBytes(modifiedBody, "preserve_thinking", true) + if err != nil { + return modifiedBody, err + } + } + return modifiedBody, nil } switch apiName { case ApiNameChatCompletion: @@ -286,6 +298,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(ctx wrapper.HttpContext, o TopP: math.Max(qwenTopPMin, math.Min(origRequest.TopP, qwenTopPMax)), IncrementalOutput: streaming && (origRequest.Tools == nil || len(origRequest.Tools) == 0), EnableSearch: m.config.qwenEnableSearch, + PreserveThinking: shouldEnableQwenPreserveThinking(origRequest.Model, origRequest.Messages), Tools: origRequest.Tools, }, } @@ -566,6 +579,7 @@ type qwenTextGenParameters struct { TopP float64 `json:"top_p,omitempty"` IncrementalOutput bool `json:"incremental_output,omitempty"` EnableSearch bool `json:"enable_search,omitempty"` + PreserveThinking bool `json:"preserve_thinking,omitempty"` Tools []tool `json:"tools,omitempty"` } @@ -673,13 +687,49 @@ func (m *qwenMessage) StringContent() string { return "" } +func chatMessagesHaveReasoningContent(messages []chatMessage) bool { + for _, message := range messages { + if message.ReasoningContent != "" { + return true + } + } + return false +} + +func shouldEnableQwenPreserveThinking(model string, messages []chatMessage) bool { + return chatMessagesHaveReasoningContent(messages) && qwenSupportsPreserveThinking(model) +} + +func qwenSupportsPreserveThinking(model string) bool { + switch { + case model == "qwen3.6-max-preview": + return true + case strings.HasPrefix(model, "qwen3.6-plus"): + return true + case model == "kimi-k2.6": + return true + default: + return false + } +} + +func requestBodyHasMessageReasoningContent(body []byte) bool { + for _, message := range gjson.GetBytes(body, "messages").Array() { + if message.Get("reasoning_content").String() != "" { + return true + } + } + return false +} + func chatMessage2QwenMessage(chatMessage chatMessage) qwenMessage { if chatMessage.IsStringContent() { return qwenMessage{ - Name: chatMessage.Name, - Role: chatMessage.Role, - Content: chatMessage.StringContent(), - ToolCalls: chatMessage.ToolCalls, + Name: chatMessage.Name, + Role: chatMessage.Role, + Content: chatMessage.StringContent(), + ReasoningContent: chatMessage.ReasoningContent, + ToolCalls: chatMessage.ToolCalls, } } else { var contents []qwenVlMessageContent @@ -694,10 +744,11 @@ func chatMessage2QwenMessage(chatMessage chatMessage) qwenMessage { contents = append(contents, content) } return qwenMessage{ - Name: chatMessage.Name, - Role: chatMessage.Role, - Content: contents, - ToolCalls: chatMessage.ToolCalls, + Name: chatMessage.Name, + Role: chatMessage.Role, + Content: contents, + ReasoningContent: chatMessage.ReasoningContent, + ToolCalls: chatMessage.ToolCalls, } } } diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/qwen_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/qwen_test.go new file mode 100644 index 00000000..399d385d --- /dev/null +++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen_test.go @@ -0,0 +1,214 @@ +package provider + +import ( + "encoding/json" + "net/http" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/tidwall/gjson" +) + +func TestChatMessage2QwenMessagePreservesReasoningContent(t *testing.T) { + t.Run("string content", func(t *testing.T) { + msg := chatMessage{ + Role: "assistant", + Content: "visible answer", + ReasoningContent: "preserved reasoning", + ToolCalls: []toolCall{ + { + Id: "call_1", + Type: "function", + Function: functionCall{ + Name: "lookup", + Arguments: `{"q":"weather"}`, + }, + }, + }, + } + + qwenMsg := chatMessage2QwenMessage(msg) + + assert.Equal(t, "assistant", qwenMsg.Role) + assert.Equal(t, "visible answer", qwenMsg.Content) + assert.Equal(t, "preserved reasoning", qwenMsg.ReasoningContent) + require.Len(t, qwenMsg.ToolCalls, 1) + assert.Equal(t, "call_1", qwenMsg.ToolCalls[0].Id) + }) + + t.Run("array content", func(t *testing.T) { + msg := chatMessage{ + Role: "assistant", + Content: []any{ + map[string]any{ + "type": "text", + "text": "visible answer", + }, + }, + ReasoningContent: "preserved reasoning", + } + + qwenMsg := chatMessage2QwenMessage(msg) + + assert.Equal(t, "assistant", qwenMsg.Role) + assert.Equal(t, "preserved reasoning", qwenMsg.ReasoningContent) + contents, ok := qwenMsg.Content.([]qwenVlMessageContent) + require.True(t, ok) + require.Len(t, contents, 1) + assert.Equal(t, "visible answer", contents[0].Text) + }) + + t.Run("array image content", func(t *testing.T) { + msg := chatMessage{ + Role: "assistant", + Content: []any{ + map[string]any{ + "type": "image_url", + "image_url": map[string]any{ + "url": "https://example.com/image.png", + }, + }, + }, + ReasoningContent: "preserved reasoning", + } + + qwenMsg := chatMessage2QwenMessage(msg) + + assert.Equal(t, "preserved reasoning", qwenMsg.ReasoningContent) + contents, ok := qwenMsg.Content.([]qwenVlMessageContent) + require.True(t, ok) + require.Len(t, contents, 1) + assert.Equal(t, "https://example.com/image.png", contents[0].Image) + }) +} + +func TestBuildQwenTextGenerationRequestEnablesPreserveThinkingForReasoningHistory(t *testing.T) { + provider := &qwenProvider{} + request := &chatCompletionRequest{ + Model: "qwen3.6-plus", + Messages: []chatMessage{ + {Role: "assistant", Content: "visible answer", ReasoningContent: "historical reasoning"}, + }, + MaxTokens: 256, + } + + body, err := provider.buildQwenTextGenerationRequest(nil, request, false) + require.NoError(t, err) + + var qwenRequest qwenTextGenRequest + require.NoError(t, json.Unmarshal(body, &qwenRequest)) + assert.True(t, qwenRequest.Parameters.PreserveThinking) +} + +func TestBuildQwenTextGenerationRequestOmitsPreserveThinkingForUnsupportedModel(t *testing.T) { + provider := &qwenProvider{} + request := &chatCompletionRequest{ + Model: "qwen-plus", + Messages: []chatMessage{ + {Role: "assistant", Content: "visible answer", ReasoningContent: "historical reasoning"}, + }, + MaxTokens: 256, + } + + body, err := provider.buildQwenTextGenerationRequest(nil, request, false) + require.NoError(t, err) + + assert.False(t, gjson.GetBytes(body, "parameters.preserve_thinking").Exists()) +} + +func TestBuildQwenTextGenerationRequestOmitsPreserveThinkingWithoutReasoningHistory(t *testing.T) { + provider := &qwenProvider{} + request := &chatCompletionRequest{ + Model: "qwen-plus", + Messages: []chatMessage{ + {Role: "assistant", Content: "visible answer"}, + }, + MaxTokens: 256, + } + + body, err := provider.buildQwenTextGenerationRequest(nil, request, false) + require.NoError(t, err) + + assert.False(t, gjson.GetBytes(body, "parameters.preserve_thinking").Exists()) +} + +func TestTransformRequestBodyHeadersCompatibleModeEnablesPreserveThinkingForReasoningHistory(t *testing.T) { + provider := &qwenProvider{ + config: ProviderConfig{ + qwenEnableCompatible: true, + }, + } + + body := []byte(`{ + "model":"qwen3.6-plus", + "messages":[ + {"role":"assistant","content":"visible answer","reasoning_content":"historical reasoning"} + ] + }`) + + modifiedBody, err := provider.TransformRequestBodyHeaders(nil, ApiNameChatCompletion, body, http.Header{}) + require.NoError(t, err) + assert.Equal(t, true, gjson.GetBytes(modifiedBody, "preserve_thinking").Bool()) +} + +func TestTransformRequestBodyHeadersCompatibleModeOmitsPreserveThinkingForUnsupportedModel(t *testing.T) { + provider := &qwenProvider{ + config: ProviderConfig{ + qwenEnableCompatible: true, + }, + } + + body := []byte(`{ + "model":"qwen-plus", + "messages":[ + {"role":"assistant","content":"visible answer","reasoning_content":"historical reasoning"} + ] + }`) + + modifiedBody, err := provider.TransformRequestBodyHeaders(nil, ApiNameChatCompletion, body, http.Header{}) + require.NoError(t, err) + assert.False(t, gjson.GetBytes(modifiedBody, "preserve_thinking").Exists()) +} + +func TestTransformRequestBodyHeadersCompatibleModeEnablesPreserveThinkingAfterModelMapping(t *testing.T) { + provider := &qwenProvider{ + config: ProviderConfig{ + qwenEnableCompatible: true, + modelMapping: map[string]string{ + "alias-model": "qwen3.6-plus-2026-04-02", + }, + }, + } + + body := []byte(`{ + "model":"alias-model", + "messages":[ + {"role":"assistant","content":"visible answer","reasoning_content":"historical reasoning"} + ] + }`) + + modifiedBody, err := provider.TransformRequestBodyHeaders(nil, ApiNameChatCompletion, body, http.Header{}) + require.NoError(t, err) + assert.Equal(t, "qwen3.6-plus-2026-04-02", gjson.GetBytes(modifiedBody, "model").String()) + assert.Equal(t, true, gjson.GetBytes(modifiedBody, "preserve_thinking").Bool()) +} + +func TestTransformRequestBodyHeadersCompatibleModeOmitsPreserveThinkingWithoutReasoningHistory(t *testing.T) { + provider := &qwenProvider{ + config: ProviderConfig{ + qwenEnableCompatible: true, + }, + } + + body := []byte(`{ + "model":"qwen-plus", + "messages":[ + {"role":"assistant","content":"visible answer"} + ] + }`) + + modifiedBody, err := provider.TransformRequestBodyHeaders(nil, ApiNameChatCompletion, body, http.Header{}) + require.NoError(t, err) + assert.False(t, gjson.GetBytes(modifiedBody, "preserve_thinking").Exists()) +} diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go index fe17e049..d897a846 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go @@ -174,7 +174,15 @@ func mergeConsecutiveMessages(body []byte) ([]byte, error) { result[len(result)-1].Role == msg.Role && (msg.Role == roleUser || msg.Role == roleAssistant) { last := &result[len(result)-1] + if msg.Role == roleAssistant && + (len(last.ToolCalls) > 0 || len(msg.ToolCalls) > 0 || last.FunctionCall != nil || msg.FunctionCall != nil) { + // Assistant tool-calling turns are structurally sensitive. Keep them split + // rather than trying to merge content/reasoning and risking dropped calls. + result = append(result, msg) + continue + } last.Content = mergeMessageContent(last.Content, msg.Content) + last.ReasoningContent = mergeReasoningContent(last.ReasoningContent, msg.ReasoningContent) merged = true continue } @@ -202,6 +210,17 @@ func mergeMessageContent(prev, curr any) any { return append(prevParts, currParts...) } +func mergeReasoningContent(prev, curr string) string { + switch { + case prev == "": + return curr + case curr == "": + return prev + default: + return prev + "\n\n" + curr + } +} + func ReplaceResponseBody(body []byte) error { log.Debugf("response body: %s", string(body)) err := proxywasm.ReplaceHttpResponseBody(body) diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper_test.go index e1dd2854..192457ce 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper_test.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper_test.go @@ -72,6 +72,97 @@ func TestMergeConsecutiveMessages(t *testing.T) { assert.Equal(t, "第一段\n\n第二段", output.Messages[1].Content) }) + t.Run("merges_consecutive_assistant_reasoning_content", func(t *testing.T) { + input := chatCompletionRequest{ + Messages: []chatMessage{ + {Role: "user", Content: "问题"}, + {Role: "assistant", Content: "第一段", ReasoningContent: "第一段推理"}, + {Role: "assistant", Content: "第二段", ReasoningContent: "第二段推理"}, + }, + } + body, err := json.Marshal(input) + require.NoError(t, err) + + result, err := mergeConsecutiveMessages(body) + assert.NoError(t, err) + + var output chatCompletionRequest + require.NoError(t, json.Unmarshal(result, &output)) + + assert.Len(t, output.Messages, 2) + assert.Equal(t, "assistant", output.Messages[1].Role) + assert.Equal(t, "第一段\n\n第二段", output.Messages[1].Content) + assert.Equal(t, "第一段推理\n\n第二段推理", output.Messages[1].ReasoningContent) + }) + + t.Run("does_not_merge_assistant_messages_with_tool_calls", func(t *testing.T) { + input := chatCompletionRequest{ + Messages: []chatMessage{ + {Role: "user", Content: "问题"}, + {Role: "assistant", Content: "先解释"}, + { + Role: "assistant", + Content: "", + ToolCalls: []toolCall{ + { + Id: "call_1", + Type: "function", + Function: functionCall{ + Name: "lookup", + Arguments: `{"q":"weather"}`, + }, + }, + }, + }, + }, + } + body, err := json.Marshal(input) + require.NoError(t, err) + + result, err := mergeConsecutiveMessages(body) + assert.NoError(t, err) + + var output chatCompletionRequest + require.NoError(t, json.Unmarshal(result, &output)) + + assert.Len(t, output.Messages, 3) + assert.Equal(t, "assistant", output.Messages[1].Role) + assert.Equal(t, "先解释", output.Messages[1].Content) + require.Len(t, output.Messages[2].ToolCalls, 1) + assert.Equal(t, "call_1", output.Messages[2].ToolCalls[0].Id) + }) + + t.Run("does_not_merge_assistant_messages_with_legacy_function_call", func(t *testing.T) { + input := chatCompletionRequest{ + Messages: []chatMessage{ + {Role: "user", Content: "问题"}, + {Role: "assistant", Content: "先解释"}, + { + Role: "assistant", + Content: "", + FunctionCall: &functionCall{ + Name: "lookup", + Arguments: `{"q":"weather"}`, + }, + }, + }, + } + body, err := json.Marshal(input) + require.NoError(t, err) + + result, err := mergeConsecutiveMessages(body) + assert.NoError(t, err) + + var output chatCompletionRequest + require.NoError(t, json.Unmarshal(result, &output)) + + assert.Len(t, output.Messages, 3) + assert.Equal(t, "assistant", output.Messages[1].Role) + assert.Equal(t, "先解释", output.Messages[1].Content) + require.NotNil(t, output.Messages[2].FunctionCall) + assert.Equal(t, "lookup", output.Messages[2].FunctionCall.Name) + }) + t.Run("merges_multiple_consecutive_same_role", func(t *testing.T) { input := chatCompletionRequest{ Messages: []chatMessage{ diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go b/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go index b3967b17..c2bf9691 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go @@ -100,6 +100,11 @@ func (m *zhipuAiProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName // Explicitly set thinking=disabled to prevent ZhipuAI from enabling it by default. body, _ = sjson.SetBytes(body, "thinking", map[string]string{"type": "disabled"}) } + if requestBodyHasMessageReasoningContent(body) { + // Z.AI clears historical reasoning_content by default. Disable clearing only + // when the converted request actually carries reusable reasoning history. + body, _ = sjson.SetBytes(body, "thinking.clear_thinking", false) + } return m.config.defaultTransformRequestBody(ctx, apiName, body) } diff --git a/plugins/wasm-go/extensions/ai-proxy/test/openai.go b/plugins/wasm-go/extensions/ai-proxy/test/openai.go index d67c5264..69441f99 100644 --- a/plugins/wasm-go/extensions/ai-proxy/test/openai.go +++ b/plugins/wasm-go/extensions/ai-proxy/test/openai.go @@ -25,6 +25,20 @@ var basicOpenAIConfig = func() json.RawMessage { return data }() +var openAIWithUpstreamErrorResponseBodyLogConfig = func() json.RawMessage { + data, _ := json.Marshal(map[string]interface{}{ + "provider": map[string]interface{}{ + "type": "openai", + "apiTokens": []string{"sk-openai-test123456789"}, + "logUpstreamErrorResponseBody": true, + "modelMapping": map[string]string{ + "*": "gpt-3.5-turbo", + }, + }, + }) + return data +}() + // 测试配置:OpenAI多模型配置 var openAIMultiModelConfig = func() json.RawMessage { data, _ := json.Marshal(map[string]interface{}{ @@ -826,6 +840,83 @@ func RunOpenAIOnHttpResponseBodyTests(t *testing.T) { require.True(t, hasResponseBodyLogs, "Should have response body processing logs") }) + t.Run("openai upstream error response body warn log disabled by default", func(t *testing.T) { + host, status := test.NewTestHost(basicOpenAIConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/chat/completions"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + + requestBody := `{"model":"gpt-4o","messages":[{"role":"user","content":"test"}]}` + host.CallOnHttpRequestBody([]byte(requestBody)) + + require.NoError(t, host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))) + responseHeaders := [][2]string{ + {":status", "400"}, + {"Content-Type", "application/json"}, + {"x-request-id", "upstream-req-123"}, + } + action := host.CallOnHttpResponseHeaders(responseHeaders) + require.Equal(t, types.ActionContinue, action) + + errorBody := `{"error":{"type":"invalid_request_error","message":"thinking is enabled but reasoning_content is missing"}}` + action = host.CallOnHttpResponseBody([]byte(errorBody)) + require.Equal(t, types.ActionContinue, action) + + for _, logEntry := range host.GetWarnLogs() { + require.NotContains(t, logEntry, "[upstream_error_response]") + } + }) + + t.Run("openai upstream error response body logs warn", func(t *testing.T) { + host, status := test.NewTestHost(openAIWithUpstreamErrorResponseBodyLogConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/chat/completions"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + + requestBody := `{"model":"gpt-4o","messages":[{"role":"user","content":"test"}]}` + host.CallOnHttpRequestBody([]byte(requestBody)) + + require.NoError(t, host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))) + responseHeaders := [][2]string{ + {":status", "400"}, + {"Content-Type", "application/json"}, + {"x-request-id", "upstream-req-123"}, + } + action := host.CallOnHttpResponseHeaders(responseHeaders) + require.Equal(t, types.ActionContinue, action) + + errorBody := `{"error":{"type":"invalid_request_error","message":"thinking is enabled but reasoning_content is missing"}}` + action = host.CallOnHttpResponseBody([]byte(errorBody)) + require.Equal(t, types.ActionContinue, action) + + warnLogs := host.GetWarnLogs() + hasUpstreamErrorLog := false + for _, logEntry := range warnLogs { + if strings.Contains(logEntry, "[upstream_error_response]") && + strings.Contains(logEntry, "provider=openai") && + strings.Contains(logEntry, "status=400") && + strings.Contains(logEntry, "request_id=upstream-req-123") && + strings.Contains(logEntry, "final_model=gpt-3.5-turbo") && + strings.Contains(logEntry, "reasoning_content is missing") { + hasUpstreamErrorLog = true + break + } + } + require.True(t, hasUpstreamErrorLog, "Should log upstream 400 response body at warn level, logs: %v", warnLogs) + }) + // 测试OpenAI响应体处理(嵌入接口) t.Run("openai embeddings response body", func(t *testing.T) { host, status := test.NewTestHost(basicOpenAIConfig) diff --git a/plugins/wasm-go/extensions/ai-proxy/test/zhipuai.go b/plugins/wasm-go/extensions/ai-proxy/test/zhipuai.go index 1d30d653..0a7c7c3f 100644 --- a/plugins/wasm-go/extensions/ai-proxy/test/zhipuai.go +++ b/plugins/wasm-go/extensions/ai-proxy/test/zhipuai.go @@ -61,6 +61,58 @@ func RunZhipuAIClaudeAutoConversionTests(t *testing.T) { assert.NotContains(t, bodyMap, "reasoning_effort") }) + t.Run("claude reasoning history disables zhipuai clear thinking", func(t *testing.T) { + host, status := test.NewTestHost(basicZhipuAIConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + action := host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/messages"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + require.Equal(t, types.HeaderStopIteration, action) + + requestBody := `{ + "model": "glm-4.5", + "max_tokens": 1000, + "messages": [ + {"role": "user", "content": "Need weather"}, + {"role": "assistant", "content": [ + {"type": "thinking", "thinking": "Need to call the weather tool.", "signature": "sig"}, + {"type": "tool_use", "id": "toolu_1", "name": "get_weather", "input": {"city": "Paris"}} + ]}, + {"role": "user", "content": [ + {"type": "tool_result", "tool_use_id": "toolu_1", "content": "sunny"} + ]} + ], + "thinking": {"type": "enabled", "budget_tokens": 8192} + }` + + action = host.CallOnHttpRequestBody([]byte(requestBody)) + require.Equal(t, types.ActionContinue, action) + + transformedBody := host.GetRequestBody() + require.NotNil(t, transformedBody) + + var bodyMap map[string]interface{} + err := json.Unmarshal(transformedBody, &bodyMap) + require.NoError(t, err) + + thinking, ok := bodyMap["thinking"].(map[string]interface{}) + require.True(t, ok, "thinking field should be present") + assert.Equal(t, "enabled", thinking["type"]) + assert.Equal(t, false, thinking["clear_thinking"]) + + messages, ok := bodyMap["messages"].([]interface{}) + require.True(t, ok, "messages should be present") + require.GreaterOrEqual(t, len(messages), 2) + assistantMsg, ok := messages[1].(map[string]interface{}) + require.True(t, ok, "assistant message should be an object") + assert.Equal(t, "Need to call the weather tool.", assistantMsg["reasoning_content"]) + }) + t.Run("claude without thinking sets thinking disabled for zhipuai", func(t *testing.T) { host, status := test.NewTestHost(basicZhipuAIConfig) defer host.Reset()