From 89587c1c9bce510c970f33a9f1926c65e3906ea3 Mon Sep 17 00:00:00 2001 From: woody Date: Wed, 1 Apr 2026 09:40:14 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20remove=20non-standard=20thinking/reasoni?= =?UTF-8?q?ng=5Fmax=5Ftokens=20from=20Claude-to=E2=80=A6=20(#3667)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../wasm-go/extensions/ai-proxy/main_test.go | 8 + .../ai-proxy/provider/claude_to_openai.go | 17 +- .../provider/claude_to_openai_test.go | 53 +++---- .../ai-proxy/provider/openrouter.go | 16 ++ .../extensions/ai-proxy/provider/provider.go | 17 ++ .../extensions/ai-proxy/provider/zhipuai.go | 4 + .../extensions/ai-proxy/test/openrouter.go | 148 ++++++++++++++++++ .../extensions/ai-proxy/test/zhipuai.go | 138 ++++++++++++++++ 8 files changed, 363 insertions(+), 38 deletions(-) create mode 100644 plugins/wasm-go/extensions/ai-proxy/test/openrouter.go create mode 100644 plugins/wasm-go/extensions/ai-proxy/test/zhipuai.go diff --git a/plugins/wasm-go/extensions/ai-proxy/main_test.go b/plugins/wasm-go/extensions/ai-proxy/main_test.go index 0965da70..b0d34c2d 100644 --- a/plugins/wasm-go/extensions/ai-proxy/main_test.go +++ b/plugins/wasm-go/extensions/ai-proxy/main_test.go @@ -228,3 +228,11 @@ func TestConsumerAffinity(t *testing.T) { test.RunConsumerAffinityParseConfigTests(t) test.RunConsumerAffinityOnHttpRequestHeadersTests(t) } + +func TestOpenRouter(t *testing.T) { + test.RunOpenRouterClaudeAutoConversionTests(t) +} + +func TestZhipuAI(t *testing.T) { + test.RunZhipuAIClaudeAutoConversionTests(t) +} diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go index 7a772305..1474dab8 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go @@ -187,14 +187,16 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b } // Convert thinking configuration if present + // Only set standard OpenAI fields (reasoning_effort). Non-standard fields like + // "thinking" and "reasoning_max_tokens" are NOT set here because they are not + // recognized by OpenAI/Azure and will cause 400 errors. Providers that need + // these non-standard fields (e.g., ZhipuAI) should handle them in their own + // OnRequestBody implementation. if claudeRequest.Thinking != nil { log.Debugf("[Claude->OpenAI] Found thinking config: type=%s, budget_tokens=%d", claudeRequest.Thinking.Type, claudeRequest.Thinking.BudgetTokens) if claudeRequest.Thinking.Type == "enabled" { - openaiRequest.ReasoningMaxTokens = claudeRequest.Thinking.BudgetTokens - openaiRequest.Thinking = &thinkingParam{Type: "enabled", BudgetToken: claudeRequest.Thinking.BudgetTokens} - // Set ReasoningEffort based on budget_tokens // low: <4096, medium: >=4096 and <16384, high: >=16384 if claudeRequest.Thinking.BudgetTokens < 4096 { @@ -205,14 +207,9 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b openaiRequest.ReasoningEffort = "high" } - log.Debugf("[Claude->OpenAI] Converted thinking config: budget_tokens=%d, reasoning_effort=%s, reasoning_max_tokens=%d", - claudeRequest.Thinking.BudgetTokens, openaiRequest.ReasoningEffort, openaiRequest.ReasoningMaxTokens) + log.Debugf("[Claude->OpenAI] Converted thinking config: budget_tokens=%d, reasoning_effort=%s", + claudeRequest.Thinking.BudgetTokens, openaiRequest.ReasoningEffort) } - } else { - // Explicitly disable thinking when not configured in Claude request - // This prevents providers like ZhipuAI from enabling thinking by default - openaiRequest.Thinking = &thinkingParam{Type: "disabled"} - log.Debugf("[Claude->OpenAI] No thinking config found, explicitly disabled") } result, err := json.Marshal(openaiRequest) diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go index d007b597..88bc202d 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go @@ -642,11 +642,9 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) { converter := &ClaudeToOpenAIConverter{} tests := []struct { - name string - claudeRequest string - expectedMaxTokens int - expectedEffort string - expectThinkingConfig bool + name string + claudeRequest string + expectedEffort string }{ { name: "thinking_enabled_low", @@ -656,9 +654,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) { "messages": [{"role": "user", "content": "Hello"}], "thinking": {"type": "enabled", "budget_tokens": 2048} }`, - expectedMaxTokens: 2048, - expectedEffort: "low", - expectThinkingConfig: true, + expectedEffort: "low", }, { name: "thinking_enabled_medium", @@ -668,9 +664,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) { "messages": [{"role": "user", "content": "Hello"}], "thinking": {"type": "enabled", "budget_tokens": 8192} }`, - expectedMaxTokens: 8192, - expectedEffort: "medium", - expectThinkingConfig: true, + expectedEffort: "medium", }, { name: "thinking_enabled_high", @@ -680,9 +674,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) { "messages": [{"role": "user", "content": "Hello"}], "thinking": {"type": "enabled", "budget_tokens": 20480} }`, - expectedMaxTokens: 20480, - expectedEffort: "high", - expectThinkingConfig: true, + expectedEffort: "high", }, { name: "thinking_disabled", @@ -692,9 +684,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) { "messages": [{"role": "user", "content": "Hello"}], "thinking": {"type": "disabled"} }`, - expectedMaxTokens: 0, - expectedEffort: "", - expectThinkingConfig: false, + expectedEffort: "", }, { name: "no_thinking", @@ -703,9 +693,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) { "max_tokens": 1000, "messages": [{"role": "user", "content": "Hello"}] }`, - expectedMaxTokens: 0, - expectedEffort: "", - expectThinkingConfig: false, + expectedEffort: "", }, } @@ -719,13 +707,23 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) { err = json.Unmarshal(result, &openaiRequest) assert.NoError(t, err) - if tt.expectThinkingConfig { - assert.Equal(t, tt.expectedMaxTokens, openaiRequest.ReasoningMaxTokens) - assert.Equal(t, tt.expectedEffort, openaiRequest.ReasoningEffort) - } else { - assert.Equal(t, 0, openaiRequest.ReasoningMaxTokens) - assert.Equal(t, "", openaiRequest.ReasoningEffort) - } + assert.Equal(t, tt.expectedEffort, openaiRequest.ReasoningEffort) + + // Verify non-standard fields are NEVER set in the converted request. + // These fields are not recognized by OpenAI/Azure and would cause 400 errors. + assert.Equal(t, 0, openaiRequest.ReasoningMaxTokens, + "reasoning_max_tokens must not be set - it is not a standard OpenAI parameter") + assert.Nil(t, openaiRequest.Thinking, + "thinking must not be set - it is not a standard OpenAI parameter") + + // Also verify at the raw JSON level to catch any serialization issues + var rawJSON map[string]interface{} + err = json.Unmarshal(result, &rawJSON) + require.NoError(t, err) + assert.NotContains(t, rawJSON, "thinking", + "raw JSON must not contain 'thinking' field") + assert.NotContains(t, rawJSON, "reasoning_max_tokens", + "raw JSON must not contain 'reasoning_max_tokens' field") }) } } @@ -930,7 +928,6 @@ func TestClaudeToOpenAIConverter_StripCchFromSystemMessage(t *testing.T) { assert.Equal(t, "You are a helpful assistant.", systemMsg.Content) }) } - func TestStripCchFromBillingHeader(t *testing.T) { tests := []struct { name string diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/openrouter.go b/plugins/wasm-go/extensions/ai-proxy/provider/openrouter.go index b11ee6af..2b603d64 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/openrouter.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/openrouter.go @@ -79,6 +79,22 @@ func (o *openrouterProvider) TransformRequestBody(ctx wrapper.HttpContext, apiNa // Check if ReasoningMaxTokens exists in the request body reasoningMaxTokens := gjson.GetBytes(body, "reasoning_max_tokens") if !reasoningMaxTokens.Exists() || reasoningMaxTokens.Int() == 0 { + // Check if budget_tokens was stored in context (from Claude auto-conversion path) + // Only use it when thinking was explicitly enabled, to avoid dirty input + if thinkingType, _ := ctx.GetContext(ctxKeyClaudeThinkingType).(string); thinkingType == "enabled" { + if budgetTokens, ok := ctx.GetContext(ctxKeyClaudeBudgetTokens).(int); ok && budgetTokens > 0 { + // Use budget_tokens from Claude thinking config + modifiedBody, err := sjson.DeleteBytes(body, "reasoning_effort") + if err != nil { + modifiedBody = body + } + modifiedBody, err = sjson.SetBytes(modifiedBody, "reasoning.max_tokens", budgetTokens) + if err != nil { + return nil, err + } + return o.config.defaultTransformRequestBody(ctx, apiName, modifiedBody) + } + } // No reasoning_max_tokens, use default transformation return o.config.defaultTransformRequestBody(ctx, apiName, body) } diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go index 98f85091..72be85c7 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go @@ -168,6 +168,8 @@ const ( finishReasonLength = "length" finishReasonToolCall = "tool_calls" + ctxKeyClaudeBudgetTokens = "claudeBudgetTokens" + ctxKeyClaudeThinkingType = "claudeThinkingType" ctxKeyIncrementalStreaming = "incrementalStreaming" ctxKeyApiKey = "apiKey" CtxKeyApiName = "apiName" @@ -1155,6 +1157,21 @@ func (c *ProviderConfig) handleRequestBody( // If main.go detected a Claude request that needs conversion, convert the body needClaudeConversion, _ := ctx.GetContext("needClaudeResponseConversion").(bool) if needClaudeConversion { + // Extract thinking config from original Claude body before conversion, + // so downstream providers (OpenRouter, ZhipuAI) can access it. + thinkingType := gjson.GetBytes(body, "thinking.type").String() + if thinkingType == "" { + // Claude request had no thinking field at all - treat as disabled + thinkingType = "disabled" + } + ctx.SetContext(ctxKeyClaudeThinkingType, thinkingType) + // Only extract budget_tokens when thinking is explicitly enabled + if thinkingType == "enabled" { + if budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int(); budgetTokens > 0 { + ctx.SetContext(ctxKeyClaudeBudgetTokens, int(budgetTokens)) + } + } + // Convert Claude protocol to OpenAI protocol converter := &ClaudeToOpenAIConverter{} body, err = converter.ConvertClaudeRequestToOpenAI(body) diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go b/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go index 8268cf8c..b3967b17 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go @@ -95,6 +95,10 @@ func (m *zhipuAiProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName body, _ = sjson.SetBytes(body, "thinking", map[string]string{"type": "enabled"}) // Remove reasoning_effort field as ZhipuAI doesn't recognize it body, _ = sjson.DeleteBytes(body, "reasoning_effort") + } else if thinkingType, ok := ctx.GetContext(ctxKeyClaudeThinkingType).(string); ok && thinkingType != "enabled" { + // Request came from Claude auto-conversion with thinking explicitly disabled or absent. + // Explicitly set thinking=disabled to prevent ZhipuAI from enabling it by default. + body, _ = sjson.SetBytes(body, "thinking", map[string]string{"type": "disabled"}) } return m.config.defaultTransformRequestBody(ctx, apiName, body) diff --git a/plugins/wasm-go/extensions/ai-proxy/test/openrouter.go b/plugins/wasm-go/extensions/ai-proxy/test/openrouter.go new file mode 100644 index 00000000..0cee5cfe --- /dev/null +++ b/plugins/wasm-go/extensions/ai-proxy/test/openrouter.go @@ -0,0 +1,148 @@ +package test + +import ( + "encoding/json" + "testing" + + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" + "github.com/higress-group/wasm-go/pkg/test" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var basicOpenRouterConfig = func() json.RawMessage { + data, _ := json.Marshal(map[string]interface{}{ + "provider": map[string]interface{}{ + "type": "openrouter", + "apiTokens": []string{"sk-openrouter-test"}, + }, + }) + return data +}() + +func RunOpenRouterClaudeAutoConversionTests(t *testing.T) { + test.RunGoTest(t, func(t *testing.T) { + t.Run("claude thinking budget_tokens is converted to reasoning.max_tokens", func(t *testing.T) { + host, status := test.NewTestHost(basicOpenRouterConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + // Send request with Claude /v1/messages path to trigger auto-conversion + action := host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/messages"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + require.Equal(t, types.HeaderStopIteration, action) + + // Claude request body with thinking enabled + requestBody := `{ + "model": "anthropic/claude-sonnet-4", + "max_tokens": 8000, + "messages": [{"role": "user", "content": "Hello"}], + "thinking": {"type": "enabled", "budget_tokens": 10000} + }` + + action = host.CallOnHttpRequestBody([]byte(requestBody)) + require.Equal(t, types.ActionContinue, action) + + transformedBody := host.GetRequestBody() + require.NotNil(t, transformedBody) + + var bodyMap map[string]interface{} + err := json.Unmarshal(transformedBody, &bodyMap) + require.NoError(t, err) + + // reasoning.max_tokens should be set from budget_tokens + reasoning, ok := bodyMap["reasoning"].(map[string]interface{}) + require.True(t, ok, "reasoning field should be present") + assert.Equal(t, float64(10000), reasoning["max_tokens"], + "reasoning.max_tokens should preserve the original budget_tokens value") + + // reasoning_effort should be removed (OpenRouter uses reasoning.max_tokens instead) + assert.NotContains(t, bodyMap, "reasoning_effort", + "reasoning_effort should be removed") + + // Non-standard fields should not be present + assert.NotContains(t, bodyMap, "thinking", + "thinking should not be in the final request") + assert.NotContains(t, bodyMap, "reasoning_max_tokens", + "reasoning_max_tokens should not be in the final request") + }) + + t.Run("claude without thinking uses default transformation", func(t *testing.T) { + host, status := test.NewTestHost(basicOpenRouterConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + action := host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/messages"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + require.Equal(t, types.HeaderStopIteration, action) + + requestBody := `{ + "model": "anthropic/claude-sonnet-4", + "max_tokens": 1000, + "messages": [{"role": "user", "content": "Hello"}] + }` + + action = host.CallOnHttpRequestBody([]byte(requestBody)) + require.Equal(t, types.ActionContinue, action) + + transformedBody := host.GetRequestBody() + require.NotNil(t, transformedBody) + + var bodyMap map[string]interface{} + err := json.Unmarshal(transformedBody, &bodyMap) + require.NoError(t, err) + + // No reasoning fields should be present + assert.NotContains(t, bodyMap, "reasoning") + assert.NotContains(t, bodyMap, "reasoning_effort") + assert.NotContains(t, bodyMap, "thinking") + assert.NotContains(t, bodyMap, "reasoning_max_tokens") + }) + + t.Run("claude thinking disabled does not set reasoning", func(t *testing.T) { + host, status := test.NewTestHost(basicOpenRouterConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + action := host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/messages"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + require.Equal(t, types.HeaderStopIteration, action) + + // thinking disabled with budget_tokens (dirty input) + requestBody := `{ + "model": "anthropic/claude-sonnet-4", + "max_tokens": 1000, + "messages": [{"role": "user", "content": "Hello"}], + "thinking": {"type": "disabled", "budget_tokens": 5000} + }` + + action = host.CallOnHttpRequestBody([]byte(requestBody)) + require.Equal(t, types.ActionContinue, action) + + transformedBody := host.GetRequestBody() + require.NotNil(t, transformedBody) + + var bodyMap map[string]interface{} + err := json.Unmarshal(transformedBody, &bodyMap) + require.NoError(t, err) + + // Should NOT have reasoning.max_tokens since thinking was disabled + assert.NotContains(t, bodyMap, "reasoning", + "reasoning should not be set when thinking is disabled") + assert.NotContains(t, bodyMap, "thinking") + assert.NotContains(t, bodyMap, "reasoning_max_tokens") + }) + }) +} diff --git a/plugins/wasm-go/extensions/ai-proxy/test/zhipuai.go b/plugins/wasm-go/extensions/ai-proxy/test/zhipuai.go new file mode 100644 index 00000000..1d30d653 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-proxy/test/zhipuai.go @@ -0,0 +1,138 @@ +package test + +import ( + "encoding/json" + "testing" + + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" + "github.com/higress-group/wasm-go/pkg/test" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var basicZhipuAIConfig = func() json.RawMessage { + data, _ := json.Marshal(map[string]interface{}{ + "provider": map[string]interface{}{ + "type": "zhipuai", + "apiTokens": []string{"sk-zhipuai-test"}, + }, + }) + return data +}() + +func RunZhipuAIClaudeAutoConversionTests(t *testing.T) { + test.RunGoTest(t, func(t *testing.T) { + t.Run("claude thinking enabled sets thinking enabled for zhipuai", func(t *testing.T) { + host, status := test.NewTestHost(basicZhipuAIConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + action := host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/messages"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + require.Equal(t, types.HeaderStopIteration, action) + + requestBody := `{ + "model": "glm-4", + "max_tokens": 1000, + "messages": [{"role": "user", "content": "Hello"}], + "thinking": {"type": "enabled", "budget_tokens": 8192} + }` + + action = host.CallOnHttpRequestBody([]byte(requestBody)) + require.Equal(t, types.ActionContinue, action) + + transformedBody := host.GetRequestBody() + require.NotNil(t, transformedBody) + + var bodyMap map[string]interface{} + err := json.Unmarshal(transformedBody, &bodyMap) + require.NoError(t, err) + + // ZhipuAI should have thinking=enabled (converted from reasoning_effort) + thinking, ok := bodyMap["thinking"].(map[string]interface{}) + require.True(t, ok, "thinking field should be present") + assert.Equal(t, "enabled", thinking["type"]) + + // reasoning_effort should be removed (ZhipuAI doesn't recognize it) + assert.NotContains(t, bodyMap, "reasoning_effort") + }) + + t.Run("claude without thinking sets thinking disabled for zhipuai", func(t *testing.T) { + host, status := test.NewTestHost(basicZhipuAIConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + action := host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/messages"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + require.Equal(t, types.HeaderStopIteration, action) + + requestBody := `{ + "model": "glm-4", + "max_tokens": 1000, + "messages": [{"role": "user", "content": "Hello"}] + }` + + action = host.CallOnHttpRequestBody([]byte(requestBody)) + require.Equal(t, types.ActionContinue, action) + + transformedBody := host.GetRequestBody() + require.NotNil(t, transformedBody) + + var bodyMap map[string]interface{} + err := json.Unmarshal(transformedBody, &bodyMap) + require.NoError(t, err) + + // ZhipuAI should explicitly set thinking=disabled + thinking, ok := bodyMap["thinking"].(map[string]interface{}) + require.True(t, ok, "thinking field should be present for disabled state") + assert.Equal(t, "disabled", thinking["type"]) + }) + + t.Run("claude thinking disabled sets thinking disabled for zhipuai", func(t *testing.T) { + host, status := test.NewTestHost(basicZhipuAIConfig) + defer host.Reset() + require.Equal(t, types.OnPluginStartStatusOK, status) + + action := host.CallOnHttpRequestHeaders([][2]string{ + {":authority", "example.com"}, + {":path", "/v1/messages"}, + {":method", "POST"}, + {"Content-Type", "application/json"}, + }) + require.Equal(t, types.HeaderStopIteration, action) + + requestBody := `{ + "model": "glm-4", + "max_tokens": 1000, + "messages": [{"role": "user", "content": "Hello"}], + "thinking": {"type": "disabled"} + }` + + action = host.CallOnHttpRequestBody([]byte(requestBody)) + require.Equal(t, types.ActionContinue, action) + + transformedBody := host.GetRequestBody() + require.NotNil(t, transformedBody) + + var bodyMap map[string]interface{} + err := json.Unmarshal(transformedBody, &bodyMap) + require.NoError(t, err) + + // ZhipuAI should explicitly set thinking=disabled + thinking, ok := bodyMap["thinking"].(map[string]interface{}) + require.True(t, ok, "thinking field should be present for disabled state") + assert.Equal(t, "disabled", thinking["type"]) + + // No reasoning fields + assert.NotContains(t, bodyMap, "reasoning_effort") + }) + }) +}