Add release notes

2026-03-20 19:17:43 +08:00 · 2026-02-22 12:31:15 +00:00
26 changed files with 165 additions and 3917 deletions
--- a/.github/workflows/wasm-plugin-unit-test.yml
+++ b/.github/workflows/wasm-plugin-unit-test.yml
@@ -199,14 +199,15 @@ jobs:
    - name: Checkout code
      uses: actions/checkout@v4
      
-    - name: Set up Go 1.25
+    - name: Set up Go 1.24
      uses: actions/setup-go@v4
      with:
-        go-version: 1.25
+        go-version: 1.24
        cache: true
-
+        
    - name: Install required tools
      run: |
+        go install github.com/wadey/gocovmerge@latest
        sudo apt-get update && sudo apt-get install -y bc
        
    - name: Download all test results
--- a/2
+++ b/2
@@ -1 +1 @@
-v2.2.1
+v2.2.0
--- a/helm/higress/Chart.lock
+++ b/helm/higress/Chart.lock
@@ -4,6 +4,6 @@ dependencies:
  version: 2.2.0
 - name: higress-console
  repository: https://higress.io/helm-charts/
-  version: 2.2.1
-digest: sha256:23fe7b0f84965c13ac7ceabe6334212fc3d323b7b781277a6d2b6fd38e935dda
-generated: "2026-03-07T12:45:44.267732+08:00"
+  version: 2.2.0
+digest: sha256:2cb148fa6d52856344e1905d3fea018466c2feb52013e08997c2d5c7d50f2e5d
+generated: "2026-02-11T17:45:59.187965929+08:00"
--- a/helm/higress/Chart.yaml
+++ b/helm/higress/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.2.1
+appVersion: 2.2.0
 description: Helm chart for deploying Higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -15,6 +15,6 @@ dependencies:
  version: 2.2.0
 - name: higress-console
  repository: "https://higress.io/helm-charts/"
-  version: 2.2.1
+  version: 2.2.0
 type: application
-version: 2.2.1
+version: 2.2.0
--- a/pkg/ingress/translation/translation.go
+++ b/pkg/ingress/translation/translation.go
@@ -17,6 +17,7 @@ package translation
 import (
 	"sync"

+	"istio.io/istio/pilot/pkg/model"
 	istiomodel "istio.io/istio/pilot/pkg/model"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/collection"
@@ -39,8 +40,8 @@ type IngressTranslation struct {
 	ingressConfig      *ingressconfig.IngressConfig
 	kingressConfig     *ingressconfig.KIngressConfig
 	mutex              sync.RWMutex
-	higressRouteCache  istiomodel.IngressRouteCollection
-	higressDomainCache istiomodel.IngressDomainCollection
+	higressRouteCache  model.IngressRouteCollection
+	higressDomainCache model.IngressDomainCollection
 }

 func NewIngressTranslation(localKubeClient kube.Client, xdsUpdater istiomodel.XDSUpdater, namespace string, options common.Options) *IngressTranslation {
@@ -108,11 +109,11 @@ func (m *IngressTranslation) SetWatchErrorHandler(f func(r *cache.Reflector, err
 	return nil
 }

-func (m *IngressTranslation) GetIngressRoutes() istiomodel.IngressRouteCollection {
+func (m *IngressTranslation) GetIngressRoutes() model.IngressRouteCollection {
 	m.mutex.RLock()
 	defer m.mutex.RUnlock()
 	ingressRouteCache := m.ingressConfig.GetIngressRoutes()
-	m.higressRouteCache = istiomodel.IngressRouteCollection{}
+	m.higressRouteCache = model.IngressRouteCollection{}
 	m.higressRouteCache.Invalid = append(m.higressRouteCache.Invalid, ingressRouteCache.Invalid...)
 	m.higressRouteCache.Valid = append(m.higressRouteCache.Valid, ingressRouteCache.Valid...)
 	if m.kingressConfig != nil {
@@ -124,12 +125,12 @@ func (m *IngressTranslation) GetIngressRoutes() istiomodel.IngressRouteCollectio
 	return m.higressRouteCache
 }

-func (m *IngressTranslation) GetIngressDomains() istiomodel.IngressDomainCollection {
+func (m *IngressTranslation) GetIngressDomains() model.IngressDomainCollection {
 	m.mutex.RLock()
 	defer m.mutex.RUnlock()
 	ingressDomainCache := m.ingressConfig.GetIngressDomains()

-	m.higressDomainCache = istiomodel.IngressDomainCollection{}
+	m.higressDomainCache = model.IngressDomainCollection{}
 	m.higressDomainCache.Invalid = append(m.higressDomainCache.Invalid, ingressDomainCache.Invalid...)
 	m.higressDomainCache.Valid = append(m.higressDomainCache.Valid, ingressDomainCache.Valid...)
 	if m.kingressConfig != nil {
--- a/plugins/golang-filter/mcp-session/common/sse.go
+++ b/plugins/golang-filter/mcp-session/common/sse.go
@@ -140,16 +140,10 @@ func (s *SSEServer) HandleSSE(cb api.FilterCallbackHandler, stopChan chan struct

 	// Send the initial endpoint event
 	initialEvent := fmt.Sprintf("event: endpoint\ndata: %s\n\n", messageEndpoint)
-	go func() {
-		defer func() {
-			if r := recover(); r != nil {
-				api.LogErrorf("Failed to send initial event: %v", r)
-			}
-		}()
-		defer cb.EncoderFilterCallbacks().RecoverPanic()
-		api.LogDebugf("SSE Send message: %s", initialEvent)
-		cb.EncoderFilterCallbacks().InjectData([]byte(initialEvent))
-	}()
+	err = s.redisClient.Publish(channel, initialEvent)
+	if err != nil {
+		api.LogErrorf("Failed to send initial event: %v", err)
+	}

 	// Start health check handler
 	go func() {
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -52,9 +52,6 @@ var (
 		{provider.PathOpenAICompletions, provider.ApiNameCompletion},
 		{provider.PathOpenAIEmbeddings, provider.ApiNameEmbeddings},
 		{provider.PathOpenAIAudioSpeech, provider.ApiNameAudioSpeech},
-		{provider.PathOpenAIAudioTranscriptions, provider.ApiNameAudioTranscription},
-		{provider.PathOpenAIAudioTranslations, provider.ApiNameAudioTranslation},
-		{provider.PathOpenAIRealtime, provider.ApiNameRealtime},
 		{provider.PathOpenAIImageGeneration, provider.ApiNameImageGeneration},
 		{provider.PathOpenAIImageVariation, provider.ApiNameImageVariation},
 		{provider.PathOpenAIImageEdit, provider.ApiNameImageEdit},
@@ -228,9 +225,9 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf
 		}
 	}

-	if contentType, _ := proxywasm.GetHttpRequestHeader(util.HeaderContentType); contentType != "" && !isSupportedRequestContentType(apiName, contentType) {
+	if contentType, _ := proxywasm.GetHttpRequestHeader(util.HeaderContentType); contentType != "" && !strings.Contains(contentType, util.MimeTypeApplicationJson) {
 		ctx.DontReadRequestBody()
-		log.Debugf("[onHttpRequestHeader] unsupported content type for api %s: %s, will not process the request body", apiName, contentType)
+		log.Debugf("[onHttpRequestHeader] unsupported content type: %s, will not process the request body", contentType)
 	}

 	if apiName == "" {
@@ -309,7 +306,6 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
 		if err == nil {
 			return action
 		}
-		log.Errorf("[onHttpRequestBody] failed to process request body, apiName=%s, err=%v", apiName, err)
 		_ = util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
 	}
 	return types.ActionContinue
@@ -385,8 +381,6 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 		return chunk
 	}

-	promoteThinking := pluginConfig.GetProviderConfig().GetPromoteThinkingOnEmpty()
-
 	log.Debugf("[onStreamingResponseBody] provider=%s", activeProvider.GetProviderType())
 	log.Debugf("[onStreamingResponseBody] isLastChunk=%v chunk: %s", isLastChunk, string(chunk))

@@ -394,9 +388,6 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
 		modifiedChunk, err := handler.OnStreamingResponseBody(ctx, apiName, chunk, isLastChunk)
 		if err == nil && modifiedChunk != nil {
-			if promoteThinking {
-				modifiedChunk = promoteThinkingInStreamingChunk(ctx, modifiedChunk, isLastChunk)
-			}
 			// Convert to Claude format if needed
 			claudeChunk, convertErr := convertStreamingResponseToClaude(ctx, modifiedChunk)
 			if convertErr != nil {
@@ -440,10 +431,6 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin

 		result := []byte(responseBuilder.String())

-		if promoteThinking {
-			result = promoteThinkingInStreamingChunk(ctx, result, isLastChunk)
-		}
-
 		// Convert to Claude format if needed
 		claudeChunk, convertErr := convertStreamingResponseToClaude(ctx, result)
 		if convertErr != nil {
@@ -452,12 +439,11 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 		return claudeChunk
 	}

-	if !needsClaudeResponseConversion(ctx) && !promoteThinking {
+	if !needsClaudeResponseConversion(ctx) {
 		return chunk
 	}

 	// If provider doesn't implement any streaming handlers but we need Claude conversion
-	// or thinking promotion
 	// First extract complete events from the chunk
 	events := provider.ExtractStreamingEvents(ctx, chunk)
 	log.Debugf("[onStreamingResponseBody] %d events received (no handler)", len(events))
@@ -474,10 +460,6 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin

 	result := []byte(responseBuilder.String())

-	if promoteThinking {
-		result = promoteThinkingInStreamingChunk(ctx, result, isLastChunk)
-	}
-
 	// Convert to Claude format if needed
 	claudeChunk, convertErr := convertStreamingResponseToClaude(ctx, result)
 	if convertErr != nil {
@@ -510,16 +492,6 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
 		finalBody = body
 	}

-	// Promote thinking/reasoning to content when content is empty
-	if pluginConfig.GetProviderConfig().GetPromoteThinkingOnEmpty() {
-		promoted, err := provider.PromoteThinkingOnEmptyResponse(finalBody)
-		if err != nil {
-			log.Warnf("[promoteThinkingOnEmpty] failed: %v", err)
-		} else {
-			finalBody = promoted
-		}
-	}
-
 	// Convert to Claude format if needed (applies to both branches)
 	convertedBody, err := convertResponseBodyToClaude(ctx, finalBody)
 	if err != nil {
@@ -568,49 +540,6 @@ func convertStreamingResponseToClaude(ctx wrapper.HttpContext, data []byte) ([]b
 	return claudeChunk, nil
 }

-// promoteThinkingInStreamingChunk processes SSE-formatted streaming data, buffering
-// reasoning deltas and stripping them from chunks. On the last chunk, if no content
-// was ever seen, it appends a flush chunk that emits buffered reasoning as content.
-func promoteThinkingInStreamingChunk(ctx wrapper.HttpContext, data []byte, isLastChunk bool) []byte {
-	// SSE data contains lines like "data: {...}\n\n"
-	// We need to find and process each data line
-	lines := strings.Split(string(data), "\n")
-	modified := false
-	for i, line := range lines {
-		if !strings.HasPrefix(line, "data: ") {
-			continue
-		}
-		payload := strings.TrimPrefix(line, "data: ")
-		if payload == "[DONE]" || payload == "" {
-			continue
-		}
-		stripped, err := provider.PromoteStreamingThinkingOnEmptyChunk(ctx, []byte(payload))
-		if err != nil {
-			continue
-		}
-		newLine := "data: " + string(stripped)
-		if newLine != line {
-			lines[i] = newLine
-			modified = true
-		}
-	}
-
-	result := data
-	if modified {
-		result = []byte(strings.Join(lines, "\n"))
-	}
-
-	// On last chunk, flush buffered reasoning as content if no content was seen
-	if isLastChunk {
-		flushChunk := provider.PromoteStreamingThinkingFlush(ctx)
-		if flushChunk != nil {
-			result = append(flushChunk, result...)
-		}
-	}
-
-	return result
-}
-
 // Helper function to convert OpenAI response body to Claude format
 func convertResponseBodyToClaude(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
 	if !needsClaudeResponseConversion(ctx) {
@@ -665,14 +594,3 @@ func getApiName(path string) provider.ApiName {

 	return ""
 }
-
-func isSupportedRequestContentType(apiName provider.ApiName, contentType string) bool {
-	if strings.Contains(contentType, util.MimeTypeApplicationJson) {
-		return true
-	}
-	contentType = strings.ToLower(contentType)
-	if strings.HasPrefix(contentType, "multipart/form-data") {
-		return apiName == provider.ApiNameImageEdit || apiName == provider.ApiNameImageVariation
-	}
-	return false
-}
--- a/plugins/wasm-go/extensions/ai-proxy/main_test.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main_test.go
@@ -18,12 +18,6 @@ func Test_getApiName(t *testing.T) {
 		{"openai completions", "/v1/completions", provider.ApiNameCompletion},
 		{"openai embeddings", "/v1/embeddings", provider.ApiNameEmbeddings},
 		{"openai audio speech", "/v1/audio/speech", provider.ApiNameAudioSpeech},
-		{"openai audio transcriptions", "/v1/audio/transcriptions", provider.ApiNameAudioTranscription},
-		{"openai audio transcriptions with prefix", "/proxy/v1/audio/transcriptions", provider.ApiNameAudioTranscription},
-		{"openai audio translations", "/v1/audio/translations", provider.ApiNameAudioTranslation},
-		{"openai realtime", "/v1/realtime", provider.ApiNameRealtime},
-		{"openai realtime with prefix", "/proxy/v1/realtime", provider.ApiNameRealtime},
-		{"openai realtime with trailing slash", "/v1/realtime/", ""},
 		{"openai image generation", "/v1/images/generations", provider.ApiNameImageGeneration},
 		{"openai image variation", "/v1/images/variations", provider.ApiNameImageVariation},
 		{"openai image edit", "/v1/images/edits", provider.ApiNameImageEdit},
@@ -69,54 +63,6 @@ func Test_getApiName(t *testing.T) {
 	}
 }

-func Test_isSupportedRequestContentType(t *testing.T) {
-	tests := []struct {
-		name        string
-		apiName     provider.ApiName
-		contentType string
-		want        bool
-	}{
-		{
-			name:        "json chat completion",
-			apiName:     provider.ApiNameChatCompletion,
-			contentType: "application/json",
-			want:        true,
-		},
-		{
-			name:        "multipart image edit",
-			apiName:     provider.ApiNameImageEdit,
-			contentType: "multipart/form-data; boundary=----boundary",
-			want:        true,
-		},
-		{
-			name:        "multipart image variation",
-			apiName:     provider.ApiNameImageVariation,
-			contentType: "multipart/form-data; boundary=----boundary",
-			want:        true,
-		},
-		{
-			name:        "multipart chat completion",
-			apiName:     provider.ApiNameChatCompletion,
-			contentType: "multipart/form-data; boundary=----boundary",
-			want:        false,
-		},
-		{
-			name:        "text plain image edit",
-			apiName:     provider.ApiNameImageEdit,
-			contentType: "text/plain",
-			want:        false,
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := isSupportedRequestContentType(tt.apiName, tt.contentType)
-			if got != tt.want {
-				t.Errorf("isSupportedRequestContentType(%v, %q) = %v, want %v", tt.apiName, tt.contentType, got, tt.want)
-			}
-		})
-	}
-}
-
 func TestAi360(t *testing.T) {
 	test.RunAi360ParseConfigTests(t)
 	test.RunAi360OnHttpRequestHeadersTests(t)
@@ -133,8 +79,6 @@ func TestOpenAI(t *testing.T) {
 	test.RunOpenAIOnHttpResponseHeadersTests(t)
 	test.RunOpenAIOnHttpResponseBodyTests(t)
 	test.RunOpenAIOnStreamingResponseBodyTests(t)
-	test.RunOpenAIPromoteThinkingOnEmptyTests(t)
-	test.RunOpenAIPromoteThinkingOnEmptyStreamingTests(t)
 }

 func TestQwen(t *testing.T) {
@@ -179,10 +123,6 @@ func TestUtil(t *testing.T) {
 	test.RunMapRequestPathByCapabilityTests(t)
 }

-func TestApiPathRegression(t *testing.T) {
-	test.RunApiPathRegressionTests(t)
-}
-
 func TestGeneric(t *testing.T) {
 	test.RunGenericParseConfigTests(t)
 	test.RunGenericOnHttpRequestHeadersTests(t)
@@ -197,8 +137,6 @@ func TestVertex(t *testing.T) {
 	test.RunVertexExpressModeOnStreamingResponseBodyTests(t)
 	test.RunVertexExpressModeImageGenerationRequestBodyTests(t)
 	test.RunVertexExpressModeImageGenerationResponseBodyTests(t)
-	test.RunVertexExpressModeImageEditVariationRequestBodyTests(t)
-	test.RunVertexExpressModeImageEditVariationResponseBodyTests(t)
 	// Vertex Raw 模式测试
 	test.RunVertexRawModeOnHttpRequestHeadersTests(t)
 	test.RunVertexRawModeOnHttpRequestBodyTests(t)
@@ -211,7 +149,6 @@ func TestBedrock(t *testing.T) {
 	test.RunBedrockOnHttpRequestBodyTests(t)
 	test.RunBedrockOnHttpResponseHeadersTests(t)
 	test.RunBedrockOnHttpResponseBodyTests(t)
-	test.RunBedrockOnStreamingResponseBodyTests(t)
 	test.RunBedrockToolCallTests(t)
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/bedrock.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/bedrock.go
@@ -35,23 +35,9 @@ const (
 	// converseStream路径 /model/{modelId}/converse-stream
 	bedrockStreamChatCompletionPath = "/model/%s/converse-stream"
 	// invoke_model 路径 /model/{modelId}/invoke
-	bedrockInvokeModelPath   = "/model/%s/invoke"
-	bedrockSignedHeaders     = "host;x-amz-date"
-	requestIdHeader          = "X-Amzn-Requestid"
-	bedrockCacheTypeDefault  = "default"
-	bedrockCacheTTL5m        = "5m"
-	bedrockCacheTTL1h        = "1h"
-	bedrockPromptCacheNova   = "amazon.nova"
-	bedrockPromptCacheClaude = "anthropic.claude"
-
-	bedrockCachePointPositionSystemPrompt    = "systemPrompt"
-	bedrockCachePointPositionLastUserMessage = "lastUserMessage"
-	bedrockCachePointPositionLastMessage     = "lastMessage"
-)
-
-var (
-	bedrockConversePathPattern = regexp.MustCompile(`/model/[^/]+/converse(-stream)?$`)
-	bedrockInvokePathPattern   = regexp.MustCompile(`/model/[^/]+/invoke(-with-response-stream)?$`)
+	bedrockInvokeModelPath = "/model/%s/invoke"
+	bedrockSignedHeaders   = "host;x-amz-date"
+	requestIdHeader        = "X-Amzn-Requestid"
 )

 type bedrockProviderInitializer struct{}
@@ -178,10 +164,9 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
 	if bedrockEvent.Usage != nil {
 		openAIFormattedChunk.Choices = choices[:0]
 		openAIFormattedChunk.Usage = &usage{
-			CompletionTokens:    bedrockEvent.Usage.OutputTokens,
-			PromptTokens:        bedrockEvent.Usage.InputTokens,
-			TotalTokens:         bedrockEvent.Usage.TotalTokens,
-			PromptTokensDetails: buildPromptTokensDetails(bedrockEvent.Usage.CacheReadInputTokens, bedrockEvent.Usage.CacheWriteInputTokens),
+			CompletionTokens: bedrockEvent.Usage.OutputTokens,
+			PromptTokens:     bedrockEvent.Usage.InputTokens,
+			TotalTokens:      bedrockEvent.Usage.TotalTokens,
 		}
 	}
 	openAIFormattedChunkBytes, _ := json.Marshal(openAIFormattedChunk)
@@ -645,24 +630,13 @@ func (b *bedrockProvider) GetProviderType() string {
 	return providerTypeBedrock
 }

-func (b *bedrockProvider) GetApiName(path string) ApiName {
-	switch {
-	case bedrockConversePathPattern.MatchString(path):
-		return ApiNameChatCompletion
-	case bedrockInvokePathPattern.MatchString(path):
-		return ApiNameImageGeneration
-	default:
-		return ""
-	}
-}
-
 func (b *bedrockProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName) error {
 	b.config.handleRequestHeaders(b, ctx, apiName)
 	return nil
 }

 func (b *bedrockProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
-	util.OverwriteRequestHostHeader(headers, fmt.Sprintf(bedrockDefaultDomain, strings.TrimSpace(b.config.awsRegion)))
+	util.OverwriteRequestHostHeader(headers, fmt.Sprintf(bedrockDefaultDomain, b.config.awsRegion))

 	// If apiTokens is configured, set Bearer token authentication here
 	// This follows the same pattern as other providers (qwen, zhipuai, etc.)
@@ -673,15 +647,6 @@ func (b *bedrockProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNa
 }

 func (b *bedrockProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
-	// In original protocol mode (e.g. /model/{modelId}/converse-stream), keep the body/path untouched
-	// and only apply auth headers.
-	if b.config.IsOriginal() {
-		headers := util.GetRequestHeaders()
-		b.setAuthHeaders(body, headers)
-		util.ReplaceRequestHeaders(headers)
-		return types.ActionContinue, replaceRequestBody(body)
-	}
-
 	if !b.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
@@ -689,25 +654,14 @@ func (b *bedrockProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName
 }

 func (b *bedrockProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) {
-	var transformedBody []byte
-	var err error
 	switch apiName {
 	case ApiNameChatCompletion:
-		transformedBody, err = b.onChatCompletionRequestBody(ctx, body, headers)
+		return b.onChatCompletionRequestBody(ctx, body, headers)
 	case ApiNameImageGeneration:
-		transformedBody, err = b.onImageGenerationRequestBody(ctx, body, headers)
+		return b.onImageGenerationRequestBody(ctx, body, headers)
 	default:
-		transformedBody, err = b.config.defaultTransformRequestBody(ctx, apiName, body)
+		return b.config.defaultTransformRequestBody(ctx, apiName, body)
 	}
-
-	if err != nil {
-		return nil, err
-	}
-
-	// Always apply auth after request body/path are finalized.
-	// For Bearer token mode this is a no-op; for AK/SK mode this generates SigV4 headers.
-	b.setAuthHeaders(transformedBody, headers)
-	return transformedBody, nil
 }

 func (b *bedrockProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
@@ -761,7 +715,9 @@ func (b *bedrockProvider) buildBedrockImageGenerationRequest(origRequest *imageG
 			Quality:        origRequest.Quality,
 		},
 	}
-	return json.Marshal(request)
+	requestBytes, err := json.Marshal(request)
+	b.setAuthHeaders(requestBytes, headers)
+	return requestBytes, err
 }

 func (b *bedrockProvider) buildBedrockImageGenerationResponse(bedrockResponse *bedrockImageGenerationResponse) *imageGenerationResponse {
@@ -841,19 +797,6 @@ func (b *bedrockProvider) buildBedrockTextGenerationRequest(origRequest *chatCom
 		},
 	}

-	effectivePromptCacheRetention := b.resolvePromptCacheRetention(origRequest.PromptCacheRetention)
-
-	if origRequest.PromptCacheKey != "" {
-		log.Warnf("bedrock provider ignores prompt_cache_key because Converse API has no equivalent field")
-	}
-	if isPromptCacheSupportedModel(origRequest.Model) {
-		if cacheTTL, ok := mapPromptCacheRetentionToBedrockTTL(effectivePromptCacheRetention); ok {
-			addPromptCachePointsToBedrockRequest(request, cacheTTL, b.getPromptCachePointPositions())
-		}
-	} else if effectivePromptCacheRetention != "" {
-		log.Warnf("skip prompt cache injection for unsupported model: %s", origRequest.Model)
-	}
-
 	if origRequest.ReasoningEffort != "" {
 		thinkingBudget := 1024 // default
 		switch origRequest.ReasoningEffort {
@@ -904,7 +847,9 @@ func (b *bedrockProvider) buildBedrockTextGenerationRequest(origRequest *chatCom
 		request.AdditionalModelRequestFields[key] = value
 	}

-	return json.Marshal(request)
+	requestBytes, err := json.Marshal(request)
+	b.setAuthHeaders(requestBytes, headers)
+	return requestBytes, err
 }

 func (b *bedrockProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, bedrockResponse *bedrockConverseResponse) *chatCompletionResponse {
@@ -955,10 +900,9 @@ func (b *bedrockProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, b
 		Object:            objectChatCompletion,
 		Choices:           choices,
 		Usage: &usage{
-			PromptTokens:        bedrockResponse.Usage.InputTokens,
-			CompletionTokens:    bedrockResponse.Usage.OutputTokens,
-			TotalTokens:         bedrockResponse.Usage.TotalTokens,
-			PromptTokensDetails: buildPromptTokensDetails(bedrockResponse.Usage.CacheReadInputTokens, bedrockResponse.Usage.CacheWriteInputTokens),
+			PromptTokens:     bedrockResponse.Usage.InputTokens,
+			CompletionTokens: bedrockResponse.Usage.OutputTokens,
+			TotalTokens:      bedrockResponse.Usage.TotalTokens,
 		},
 	}
 }
@@ -989,145 +933,6 @@ func stopReasonBedrock2OpenAI(reason string) string {
 	}
 }

-func mapPromptCacheRetentionToBedrockTTL(retention string) (string, bool) {
-	normalizedRetention := normalizePromptCacheRetention(retention)
-	switch normalizedRetention {
-	case "":
-		return "", false
-	case "in_memory":
-		// For the default 5-minute cache, omit ttl and let Bedrock apply its default.
-		// This is more robust for models that are strict about explicit ttl fields.
-		return "", true
-	case "24h":
-		return bedrockCacheTTL1h, true
-	default:
-		log.Warnf("unsupported prompt_cache_retention for bedrock mapping: %s", retention)
-		return "", false
-	}
-}
-
-func normalizePromptCacheRetention(retention string) string {
-	normalized := strings.ToLower(strings.TrimSpace(retention))
-	normalized = strings.ReplaceAll(normalized, "-", "_")
-	normalized = strings.ReplaceAll(normalized, " ", "_")
-	if normalized == "inmemory" {
-		return "in_memory"
-	}
-	return normalized
-}
-
-func isPromptCacheSupportedModel(model string) bool {
-	normalizedModel := strings.ToLower(strings.TrimSpace(model))
-	return strings.Contains(normalizedModel, bedrockPromptCacheNova) ||
-		strings.Contains(normalizedModel, bedrockPromptCacheClaude)
-}
-
-func (b *bedrockProvider) resolvePromptCacheRetention(requestPromptCacheRetention string) string {
-	if requestPromptCacheRetention != "" {
-		return requestPromptCacheRetention
-	}
-	if b.config.promptCacheRetention != "" {
-		return b.config.promptCacheRetention
-	}
-	return ""
-}
-
-func (b *bedrockProvider) getPromptCachePointPositions() map[string]bool {
-	if b.config.bedrockPromptCachePointPositions == nil {
-		return map[string]bool{
-			bedrockCachePointPositionSystemPrompt: true,
-			bedrockCachePointPositionLastMessage:  false,
-		}
-	}
-	positions := map[string]bool{
-		bedrockCachePointPositionSystemPrompt:    false,
-		bedrockCachePointPositionLastUserMessage: false,
-		bedrockCachePointPositionLastMessage:     false,
-	}
-	for rawKey, enabled := range b.config.bedrockPromptCachePointPositions {
-		key := normalizeBedrockCachePointPosition(rawKey)
-		switch key {
-		case bedrockCachePointPositionSystemPrompt, bedrockCachePointPositionLastUserMessage, bedrockCachePointPositionLastMessage:
-			positions[key] = enabled
-		default:
-			log.Warnf("unsupported bedrockPromptCachePointPositions key: %s", rawKey)
-		}
-	}
-	return positions
-}
-
-func normalizeBedrockCachePointPosition(raw string) string {
-	key := strings.ToLower(raw)
-	key = strings.ReplaceAll(key, "_", "")
-	key = strings.ReplaceAll(key, "-", "")
-	switch key {
-	case "systemprompt":
-		return bedrockCachePointPositionSystemPrompt
-	case "lastusermessage":
-		return bedrockCachePointPositionLastUserMessage
-	case "lastmessage":
-		return bedrockCachePointPositionLastMessage
-	default:
-		return raw
-	}
-}
-
-func addPromptCachePointsToBedrockRequest(request *bedrockTextGenRequest, cacheTTL string, positions map[string]bool) {
-	if positions[bedrockCachePointPositionSystemPrompt] && len(request.System) > 0 {
-		request.System = append(request.System, systemContentBlock{
-			CachePoint: &bedrockCachePoint{
-				Type: bedrockCacheTypeDefault,
-				TTL:  cacheTTL,
-			},
-		})
-	}
-
-	lastUserMessageIndex := -1
-	if positions[bedrockCachePointPositionLastUserMessage] {
-		lastUserMessageIndex = findLastMessageIndexByRole(request.Messages, roleUser)
-		if lastUserMessageIndex >= 0 {
-			appendCachePointToBedrockMessage(request, lastUserMessageIndex, cacheTTL)
-		}
-	}
-	if positions[bedrockCachePointPositionLastMessage] && len(request.Messages) > 0 {
-		lastMessageIndex := len(request.Messages) - 1
-		if lastMessageIndex != lastUserMessageIndex {
-			appendCachePointToBedrockMessage(request, lastMessageIndex, cacheTTL)
-		}
-	}
-}
-
-func findLastMessageIndexByRole(messages []bedrockMessage, role string) int {
-	for i := len(messages) - 1; i >= 0; i-- {
-		if messages[i].Role == role {
-			return i
-		}
-	}
-	return -1
-}
-
-func appendCachePointToBedrockMessage(request *bedrockTextGenRequest, messageIndex int, cacheTTL string) {
-	if messageIndex < 0 || messageIndex >= len(request.Messages) {
-		return
-	}
-	request.Messages[messageIndex].Content = append(request.Messages[messageIndex].Content, bedrockMessageContent{
-		CachePoint: &bedrockCachePoint{
-			Type: bedrockCacheTypeDefault,
-			TTL:  cacheTTL,
-		},
-	})
-}
-
-func buildPromptTokensDetails(cacheReadInputTokens int, cacheWriteInputTokens int) *promptTokensDetails {
-	totalCachedTokens := cacheReadInputTokens + cacheWriteInputTokens
-	if totalCachedTokens <= 0 {
-		return nil
-	}
-	return &promptTokensDetails{
-		CachedTokens: totalCachedTokens,
-	}
-}
-
 type bedrockTextGenRequest struct {
 	Messages                     []bedrockMessage         `json:"messages"`
 	System                       []systemContentBlock     `json:"system,omitempty"`
@@ -1172,21 +977,14 @@ type bedrockMessage struct {
 }

 type bedrockMessageContent struct {
-	Text       string             `json:"text,omitempty"`
-	Image      *imageBlock        `json:"image,omitempty"`
-	ToolResult *toolResultBlock   `json:"toolResult,omitempty"`
-	ToolUse    *toolUseBlock      `json:"toolUse,omitempty"`
-	CachePoint *bedrockCachePoint `json:"cachePoint,omitempty"`
+	Text       string           `json:"text,omitempty"`
+	Image      *imageBlock      `json:"image,omitempty"`
+	ToolResult *toolResultBlock `json:"toolResult,omitempty"`
+	ToolUse    *toolUseBlock    `json:"toolUse,omitempty"`
 }

 type systemContentBlock struct {
-	Text       string             `json:"text,omitempty"`
-	CachePoint *bedrockCachePoint `json:"cachePoint,omitempty"`
-}
-
-type bedrockCachePoint struct {
-	Type string `json:"type"`
-	TTL  string `json:"ttl,omitempty"`
+	Text string `json:"text,omitempty"`
 }

 type imageBlock struct {
@@ -1268,10 +1066,6 @@ type tokenUsage struct {
 	OutputTokens int `json:"outputTokens,omitempty"`

 	TotalTokens int `json:"totalTokens"`
-
-	CacheReadInputTokens int `json:"cacheReadInputTokens,omitempty"`
-
-	CacheWriteInputTokens int `json:"cacheWriteInputTokens,omitempty"`
 }

 func chatToolMessage2BedrockToolResultContent(chatMessage chatMessage) bedrockMessageContent {
@@ -1369,45 +1163,35 @@ func (b *bedrockProvider) setAuthHeaders(body []byte, headers http.Header) {
 	}

 	// Use AWS Signature V4 authentication
-	accessKey := strings.TrimSpace(b.config.awsAccessKey)
-	region := strings.TrimSpace(b.config.awsRegion)
 	t := time.Now().UTC()
 	amzDate := t.Format("20060102T150405Z")
 	dateStamp := t.Format("20060102")
 	path := headers.Get(":path")
 	signature := b.generateSignature(path, amzDate, dateStamp, body)
 	headers.Set("X-Amz-Date", amzDate)
-	util.OverwriteRequestAuthorizationHeader(headers, fmt.Sprintf("AWS4-HMAC-SHA256 Credential=%s/%s/%s/%s/aws4_request, SignedHeaders=%s, Signature=%s", accessKey, dateStamp, region, awsService, bedrockSignedHeaders, signature))
+	util.OverwriteRequestAuthorizationHeader(headers, fmt.Sprintf("AWS4-HMAC-SHA256 Credential=%s/%s/%s/%s/aws4_request, SignedHeaders=%s, Signature=%s", b.config.awsAccessKey, dateStamp, b.config.awsRegion, awsService, bedrockSignedHeaders, signature))
 }

 func (b *bedrockProvider) generateSignature(path, amzDate, dateStamp string, body []byte) string {
-	canonicalURI := encodeSigV4Path(path)
+	path = encodeSigV4Path(path)
 	hashedPayload := sha256Hex(body)
-	region := strings.TrimSpace(b.config.awsRegion)
-	secretKey := strings.TrimSpace(b.config.awsSecretKey)

-	endpoint := fmt.Sprintf(bedrockDefaultDomain, region)
+	endpoint := fmt.Sprintf(bedrockDefaultDomain, b.config.awsRegion)
 	canonicalHeaders := fmt.Sprintf("host:%s\nx-amz-date:%s\n", endpoint, amzDate)
 	canonicalRequest := fmt.Sprintf("%s\n%s\n\n%s\n%s\n%s",
-		httpPostMethod, canonicalURI, canonicalHeaders, bedrockSignedHeaders, hashedPayload)
+		httpPostMethod, path, canonicalHeaders, bedrockSignedHeaders, hashedPayload)

-	credentialScope := fmt.Sprintf("%s/%s/%s/aws4_request", dateStamp, region, awsService)
+	credentialScope := fmt.Sprintf("%s/%s/%s/aws4_request", dateStamp, b.config.awsRegion, awsService)
 	hashedCanonReq := sha256Hex([]byte(canonicalRequest))
 	stringToSign := fmt.Sprintf("AWS4-HMAC-SHA256\n%s\n%s\n%s",
 		amzDate, credentialScope, hashedCanonReq)

-	signingKey := getSignatureKey(secretKey, dateStamp, region, awsService)
+	signingKey := getSignatureKey(b.config.awsSecretKey, dateStamp, b.config.awsRegion, awsService)
 	signature := hmacHex(signingKey, stringToSign)
 	return signature
 }

 func encodeSigV4Path(path string) string {
-	// Keep only the URI path for canonical URI. Query string is handled separately in SigV4,
-	// and this implementation uses an empty canonical query string.
-	if queryIndex := strings.Index(path, "?"); queryIndex >= 0 {
-		path = path[:queryIndex]
-	}
-
 	segments := strings.Split(path, "/")
 	for i, seg := range segments {
 		if seg == "" {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/bedrock_sigv4_path_test.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/bedrock_sigv4_path_test.go
@@ -1,191 +0,0 @@
-package provider
-
-import (
-	"net/http"
-	"net/url"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestEncodeSigV4Path(t *testing.T) {
-	tests := []struct {
-		name string
-		path string
-		want string
-	}{
-		{
-			name: "raw model id keeps colon",
-			path: "/model/global.amazon.nova-2-lite-v1:0/converse-stream",
-			want: "/model/global.amazon.nova-2-lite-v1:0/converse-stream",
-		},
-		{
-			name: "pre-encoded model id escapes percent to avoid mismatch",
-			path: "/model/global.amazon.nova-2-lite-v1%3A0/converse-stream",
-			want: "/model/global.amazon.nova-2-lite-v1%253A0/converse-stream",
-		},
-		{
-			name: "raw inference profile arn keeps colon and slash delimiters",
-			path: "/model/arn:aws:bedrock:us-east-1:123456789012:inference-profile/global.anthropic.claude-sonnet-4-20250514-v1:0/converse",
-			want: "/model/arn:aws:bedrock:us-east-1:123456789012:inference-profile/global.anthropic.claude-sonnet-4-20250514-v1:0/converse",
-		},
-		{
-			name: "encoded inference profile arn preserves escaped slash as double-escaped percent",
-			path: "/model/arn%3Aaws%3Abedrock%3Aus-east-1%3A123456789012%3Ainference-profile%2Fglobal.anthropic.claude-sonnet-4-20250514-v1%3A0/converse",
-			want: "/model/arn%253Aaws%253Abedrock%253Aus-east-1%253A123456789012%253Ainference-profile%252Fglobal.anthropic.claude-sonnet-4-20250514-v1%253A0/converse",
-		},
-		{
-			name: "query string is stripped before canonical encoding",
-			path: "/model/global.amazon.nova-2-lite-v1%3A0/converse-stream?trace=1&foo=bar",
-			want: "/model/global.amazon.nova-2-lite-v1%253A0/converse-stream",
-		},
-		{
-			name: "invalid percent sequence falls back to escaped percent",
-			path: "/model/abc%ZZxyz/converse",
-			want: "/model/abc%25ZZxyz/converse",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			assert.Equal(t, tt.want, encodeSigV4Path(tt.path))
-		})
-	}
-}
-
-func TestOverwriteRequestPathHeaderPreservesSingleEncodedRequestPath(t *testing.T) {
-	p := &bedrockProvider{}
-	plainModel := "arn:aws:bedrock:us-east-1:123456789012:inference-profile/global.amazon.nova-2-lite-v1:0"
-	preEncodedModel := url.QueryEscape(plainModel)
-
-	t.Run("plain model is encoded once", func(t *testing.T) {
-		headers := http.Header{}
-		p.overwriteRequestPathHeader(headers, bedrockChatCompletionPath, plainModel)
-		assert.Equal(t, "/model/arn%3Aaws%3Abedrock%3Aus-east-1%3A123456789012%3Ainference-profile%2Fglobal.amazon.nova-2-lite-v1%3A0/converse", headers.Get(":path"))
-	})
-
-	t.Run("pre-encoded model is not double encoded", func(t *testing.T) {
-		headers := http.Header{}
-		p.overwriteRequestPathHeader(headers, bedrockChatCompletionPath, preEncodedModel)
-		assert.Equal(t, "/model/arn%3Aaws%3Abedrock%3Aus-east-1%3A123456789012%3Ainference-profile%2Fglobal.amazon.nova-2-lite-v1%3A0/converse", headers.Get(":path"))
-	})
-}
-
-func TestGenerateSignatureIgnoresQueryStringInCanonicalURI(t *testing.T) {
-	p := &bedrockProvider{
-		config: ProviderConfig{
-			awsRegion:    "ap-northeast-3",
-			awsSecretKey: "test-secret",
-		},
-	}
-	body := []byte(`{"messages":[{"role":"user","content":[{"text":"hello"}]}]}`)
-	pathWithoutQuery := "/model/global.amazon.nova-2-lite-v1%3A0/converse-stream"
-	pathWithQuery := pathWithoutQuery + "?trace=1&foo=bar"
-
-	sigWithoutQuery := p.generateSignature(pathWithoutQuery, "20260312T142942Z", "20260312", body)
-	sigWithQuery := p.generateSignature(pathWithQuery, "20260312T142942Z", "20260312", body)
-	assert.Equal(t, sigWithoutQuery, sigWithQuery)
-}
-
-func TestGenerateSignatureDiffersForRawAndPreEncodedModelPath(t *testing.T) {
-	p := &bedrockProvider{
-		config: ProviderConfig{
-			awsRegion:    "ap-northeast-3",
-			awsSecretKey: "test-secret",
-		},
-	}
-	body := []byte(`{"messages":[{"role":"user","content":[{"text":"hello"}]}]}`)
-	rawPath := "/model/global.amazon.nova-2-lite-v1:0/converse-stream"
-	preEncodedPath := "/model/global.amazon.nova-2-lite-v1%3A0/converse-stream"
-
-	rawSignature := p.generateSignature(rawPath, "20260312T142942Z", "20260312", body)
-	preEncodedSignature := p.generateSignature(preEncodedPath, "20260312T142942Z", "20260312", body)
-	assert.NotEqual(t, rawSignature, preEncodedSignature)
-}
-
-func TestNormalizePromptCacheRetention(t *testing.T) {
-	tests := []struct {
-		name      string
-		retention string
-		want      string
-	}{
-		{
-			name:      "inmemory alias maps to in_memory",
-			retention: "inmemory",
-			want:      "in_memory",
-		},
-		{
-			name:      "dash style maps to in_memory",
-			retention: "in-memory",
-			want:      "in_memory",
-		},
-		{
-			name:      "space style with trim maps to in_memory",
-			retention: " in memory ",
-			want:      "in_memory",
-		},
-		{
-			name:      "already normalized remains unchanged",
-			retention: "in_memory",
-			want:      "in_memory",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			assert.Equal(t, tt.want, normalizePromptCacheRetention(tt.retention))
-		})
-	}
-}
-
-func TestAppendCachePointToBedrockMessageInvalidIndexNoop(t *testing.T) {
-	request := &bedrockTextGenRequest{
-		Messages: []bedrockMessage{
-			{
-				Role: roleUser,
-				Content: []bedrockMessageContent{
-					{Text: "hello"},
-				},
-			},
-		},
-	}
-
-	appendCachePointToBedrockMessage(request, -1, bedrockCacheTTL5m)
-	appendCachePointToBedrockMessage(request, len(request.Messages), bedrockCacheTTL5m)
-
-	assert.Len(t, request.Messages[0].Content, 1)
-
-	appendCachePointToBedrockMessage(request, 0, bedrockCacheTTL5m)
-	assert.Len(t, request.Messages[0].Content, 2)
-	assert.NotNil(t, request.Messages[0].Content[1].CachePoint)
-}
-
-func TestIsPromptCacheSupportedModel(t *testing.T) {
-	tests := []struct {
-		name  string
-		model string
-		want  bool
-	}{
-		{
-			name:  "anthropic claude model is supported",
-			model: "anthropic.claude-3-5-haiku-20241022-v1:0",
-			want:  true,
-		},
-		{
-			name:  "amazon nova inference profile is supported",
-			model: "arn:aws:bedrock:us-east-1:123456789012:inference-profile/global.amazon.nova-2-lite-v1:0",
-			want:  true,
-		},
-		{
-			name:  "other model is not supported",
-			model: "meta.llama3-70b-instruct-v1:0",
-			want:  false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			assert.Equal(t, tt.want, isPromptCacheSupportedModel(tt.model))
-		})
-	}
-}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -1,7 +1,6 @@
 package provider

 import (
-	"encoding/json"
 	"fmt"
 	"strings"

@@ -42,36 +41,34 @@ type thinkingParam struct {

 type chatCompletionRequest struct {
 	NonOpenAIStyleOptions
-	Messages             []chatMessage          `json:"messages"`
-	Model                string                 `json:"model"`
-	Store                bool                   `json:"store,omitempty"`
-	ReasoningEffort      string                 `json:"reasoning_effort,omitempty"`
-	Metadata             map[string]string      `json:"metadata,omitempty"`
-	FrequencyPenalty     float64                `json:"frequency_penalty,omitempty"`
-	LogitBias            map[string]int         `json:"logit_bias,omitempty"`
-	Logprobs             bool                   `json:"logprobs,omitempty"`
-	TopLogprobs          int                    `json:"top_logprobs,omitempty"`
-	MaxTokens            int                    `json:"max_tokens,omitempty"`
-	MaxCompletionTokens  int                    `json:"max_completion_tokens,omitempty"`
-	N                    int                    `json:"n,omitempty"`
-	Modalities           []string               `json:"modalities,omitempty"`
-	Prediction           map[string]interface{} `json:"prediction,omitempty"`
-	Audio                map[string]interface{} `json:"audio,omitempty"`
-	PresencePenalty      float64                `json:"presence_penalty,omitempty"`
-	ResponseFormat       map[string]interface{} `json:"response_format,omitempty"`
-	Seed                 int                    `json:"seed,omitempty"`
-	ServiceTier          string                 `json:"service_tier,omitempty"`
-	Stop                 []string               `json:"stop,omitempty"`
-	Stream               bool                   `json:"stream,omitempty"`
-	StreamOptions        *streamOptions         `json:"stream_options,omitempty"`
-	PromptCacheRetention string                 `json:"prompt_cache_retention,omitempty"`
-	PromptCacheKey       string                 `json:"prompt_cache_key,omitempty"`
-	Temperature          float64                `json:"temperature,omitempty"`
-	TopP                 float64                `json:"top_p,omitempty"`
-	Tools                []tool                 `json:"tools,omitempty"`
-	ToolChoice           interface{}            `json:"tool_choice,omitempty"`
-	ParallelToolCalls    bool                   `json:"parallel_tool_calls,omitempty"`
-	User                 string                 `json:"user,omitempty"`
+	Messages            []chatMessage          `json:"messages"`
+	Model               string                 `json:"model"`
+	Store               bool                   `json:"store,omitempty"`
+	ReasoningEffort     string                 `json:"reasoning_effort,omitempty"`
+	Metadata            map[string]string      `json:"metadata,omitempty"`
+	FrequencyPenalty    float64                `json:"frequency_penalty,omitempty"`
+	LogitBias           map[string]int         `json:"logit_bias,omitempty"`
+	Logprobs            bool                   `json:"logprobs,omitempty"`
+	TopLogprobs         int                    `json:"top_logprobs,omitempty"`
+	MaxTokens           int                    `json:"max_tokens,omitempty"`
+	MaxCompletionTokens int                    `json:"max_completion_tokens,omitempty"`
+	N                   int                    `json:"n,omitempty"`
+	Modalities          []string               `json:"modalities,omitempty"`
+	Prediction          map[string]interface{} `json:"prediction,omitempty"`
+	Audio               map[string]interface{} `json:"audio,omitempty"`
+	PresencePenalty     float64                `json:"presence_penalty,omitempty"`
+	ResponseFormat      map[string]interface{} `json:"response_format,omitempty"`
+	Seed                int                    `json:"seed,omitempty"`
+	ServiceTier         string                 `json:"service_tier,omitempty"`
+	Stop                []string               `json:"stop,omitempty"`
+	Stream              bool                   `json:"stream,omitempty"`
+	StreamOptions       *streamOptions         `json:"stream_options,omitempty"`
+	Temperature         float64                `json:"temperature,omitempty"`
+	TopP                float64                `json:"top_p,omitempty"`
+	Tools               []tool                 `json:"tools,omitempty"`
+	ToolChoice          interface{}            `json:"tool_choice,omitempty"`
+	ParallelToolCalls   bool                   `json:"parallel_tool_calls,omitempty"`
+	User                string                 `json:"user,omitempty"`
 }

 func (c *chatCompletionRequest) getMaxTokens() int {
@@ -255,70 +252,6 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
 	}
 }

-// promoteThinkingOnEmpty promotes reasoning_content to content when content is empty.
-// This handles models that put user-facing replies into thinking blocks instead of text blocks.
-func (r *chatCompletionResponse) promoteThinkingOnEmpty() {
-	for i := range r.Choices {
-		msg := r.Choices[i].Message
-		if msg == nil {
-			continue
-		}
-		if !isContentEmpty(msg.Content) {
-			continue
-		}
-		if msg.ReasoningContent != "" {
-			msg.Content = msg.ReasoningContent
-			msg.ReasoningContent = ""
-		}
-	}
-}
-
-// promoteStreamingThinkingOnEmpty accumulates reasoning content during streaming.
-// It strips reasoning from chunks and buffers it. When content is seen, it marks
-// the stream as having content so no promotion will happen.
-// Call PromoteStreamingThinkingFlush at the end of the stream to emit buffered
-// reasoning as content if no content was ever seen.
-// Returns true if the chunk was modified (reasoning stripped).
-func promoteStreamingThinkingOnEmpty(ctx wrapper.HttpContext, msg *chatMessage) bool {
-	if msg == nil {
-		return false
-	}
-	hasContentDelta, _ := ctx.GetContext(ctxKeyHasContentDelta).(bool)
-	if hasContentDelta {
-		return false
-	}
-
-	if !isContentEmpty(msg.Content) {
-		ctx.SetContext(ctxKeyHasContentDelta, true)
-		return false
-	}
-
-	// Buffer reasoning content and strip it from the chunk
-	reasoning := msg.ReasoningContent
-	if reasoning == "" {
-		reasoning = msg.Reasoning
-	}
-	if reasoning != "" {
-		buffered, _ := ctx.GetContext(ctxKeyBufferedReasoning).(string)
-		ctx.SetContext(ctxKeyBufferedReasoning, buffered+reasoning)
-		msg.ReasoningContent = ""
-		msg.Reasoning = ""
-		return true
-	}
-	return false
-}
-
-func isContentEmpty(content any) bool {
-	switch v := content.(type) {
-	case nil:
-		return true
-	case string:
-		return strings.TrimSpace(v) == ""
-	default:
-		return false
-	}
-}
-
 type chatMessageContent struct {
 	CacheControl map[string]interface{}      `json:"cache_control,omitempty"`
 	Type         string                      `json:"type,omitempty"`
@@ -528,122 +461,6 @@ type imageGenerationRequest struct {
 	Size              string `json:"size,omitempty"`
 }

-type imageInputURL struct {
-	URL      string                      `json:"url,omitempty"`
-	ImageURL *chatMessageContentImageUrl `json:"image_url,omitempty"`
-}
-
-func (i *imageInputURL) UnmarshalJSON(data []byte) error {
-	// Support a plain string payload, e.g. "data:image/png;base64,..."
-	var rawURL string
-	if err := json.Unmarshal(data, &rawURL); err == nil {
-		i.URL = rawURL
-		i.ImageURL = nil
-		return nil
-	}
-
-	type alias imageInputURL
-	var value alias
-	if err := json.Unmarshal(data, &value); err != nil {
-		return err
-	}
-	*i = imageInputURL(value)
-	return nil
-}
-
-func (i *imageInputURL) GetURL() string {
-	if i == nil {
-		return ""
-	}
-	if i.ImageURL != nil && i.ImageURL.Url != "" {
-		return i.ImageURL.Url
-	}
-	return i.URL
-}
-
-type imageEditRequest struct {
-	Model             string          `json:"model"`
-	Prompt            string          `json:"prompt"`
-	Image             *imageInputURL  `json:"image,omitempty"`
-	Images            []imageInputURL `json:"images,omitempty"`
-	ImageURL          *imageInputURL  `json:"image_url,omitempty"`
-	Mask              *imageInputURL  `json:"mask,omitempty"`
-	MaskURL           *imageInputURL  `json:"mask_url,omitempty"`
-	Background        string          `json:"background,omitempty"`
-	Moderation        string          `json:"moderation,omitempty"`
-	OutputCompression int             `json:"output_compression,omitempty"`
-	OutputFormat      string          `json:"output_format,omitempty"`
-	Quality           string          `json:"quality,omitempty"`
-	ResponseFormat    string          `json:"response_format,omitempty"`
-	Style             string          `json:"style,omitempty"`
-	N                 int             `json:"n,omitempty"`
-	Size              string          `json:"size,omitempty"`
-}
-
-func (r *imageEditRequest) GetImageURLs() []string {
-	urls := make([]string, 0, len(r.Images)+2)
-	for _, image := range r.Images {
-		if url := image.GetURL(); url != "" {
-			urls = append(urls, url)
-		}
-	}
-	if r.Image != nil {
-		if url := r.Image.GetURL(); url != "" {
-			urls = append(urls, url)
-		}
-	}
-	if r.ImageURL != nil {
-		if url := r.ImageURL.GetURL(); url != "" {
-			urls = append(urls, url)
-		}
-	}
-	return urls
-}
-
-func (r *imageEditRequest) HasMask() bool {
-	if r.Mask != nil && r.Mask.GetURL() != "" {
-		return true
-	}
-	return r.MaskURL != nil && r.MaskURL.GetURL() != ""
-}
-
-type imageVariationRequest struct {
-	Model             string          `json:"model"`
-	Prompt            string          `json:"prompt,omitempty"`
-	Image             *imageInputURL  `json:"image,omitempty"`
-	Images            []imageInputURL `json:"images,omitempty"`
-	ImageURL          *imageInputURL  `json:"image_url,omitempty"`
-	Background        string          `json:"background,omitempty"`
-	Moderation        string          `json:"moderation,omitempty"`
-	OutputCompression int             `json:"output_compression,omitempty"`
-	OutputFormat      string          `json:"output_format,omitempty"`
-	Quality           string          `json:"quality,omitempty"`
-	ResponseFormat    string          `json:"response_format,omitempty"`
-	Style             string          `json:"style,omitempty"`
-	N                 int             `json:"n,omitempty"`
-	Size              string          `json:"size,omitempty"`
-}
-
-func (r *imageVariationRequest) GetImageURLs() []string {
-	urls := make([]string, 0, len(r.Images)+2)
-	for _, image := range r.Images {
-		if url := image.GetURL(); url != "" {
-			urls = append(urls, url)
-		}
-	}
-	if r.Image != nil {
-		if url := r.Image.GetURL(); url != "" {
-			urls = append(urls, url)
-		}
-	}
-	if r.ImageURL != nil {
-		if url := r.ImageURL.GetURL(); url != "" {
-			urls = append(urls, url)
-		}
-	}
-	return urls
-}
-
 type imageGenerationData struct {
 	URL           string `json:"url,omitempty"`
 	B64           string `json:"b64_json,omitempty"`
@@ -712,87 +529,3 @@ func (r embeddingsRequest) ParseInput() []string {
 	}
 	return input
 }
-
-// PromoteThinkingOnEmptyResponse promotes reasoning_content to content in a non-streaming
-// response body when content is empty. Returns the original body if no promotion is needed.
-func PromoteThinkingOnEmptyResponse(body []byte) ([]byte, error) {
-	var resp chatCompletionResponse
-	if err := json.Unmarshal(body, &resp); err != nil {
-		return body, fmt.Errorf("unable to unmarshal response for thinking promotion: %v", err)
-	}
-	promoted := false
-	for i := range resp.Choices {
-		msg := resp.Choices[i].Message
-		if msg == nil {
-			continue
-		}
-		if !isContentEmpty(msg.Content) {
-			continue
-		}
-		if msg.ReasoningContent != "" {
-			msg.Content = msg.ReasoningContent
-			msg.ReasoningContent = ""
-			promoted = true
-		}
-	}
-	if !promoted {
-		return body, nil
-	}
-	return json.Marshal(resp)
-}
-
-// PromoteStreamingThinkingOnEmptyChunk buffers reasoning deltas and strips them from
-// the chunk during streaming. Call PromoteStreamingThinkingFlush on the last chunk
-// to emit buffered reasoning as content if no real content was ever seen.
-func PromoteStreamingThinkingOnEmptyChunk(ctx wrapper.HttpContext, data []byte) ([]byte, error) {
-	var resp chatCompletionResponse
-	if err := json.Unmarshal(data, &resp); err != nil {
-		return data, nil // not a valid chat completion chunk, skip
-	}
-	modified := false
-	for i := range resp.Choices {
-		msg := resp.Choices[i].Delta
-		if msg == nil {
-			continue
-		}
-		if promoteStreamingThinkingOnEmpty(ctx, msg) {
-			modified = true
-		}
-	}
-	if !modified {
-		return data, nil
-	}
-	return json.Marshal(resp)
-}
-
-// PromoteStreamingThinkingFlush checks if the stream had no content and returns
-// an SSE chunk that emits the buffered reasoning as content. Returns nil if
-// content was already seen or no reasoning was buffered.
-func PromoteStreamingThinkingFlush(ctx wrapper.HttpContext) []byte {
-	hasContentDelta, _ := ctx.GetContext(ctxKeyHasContentDelta).(bool)
-	if hasContentDelta {
-		return nil
-	}
-	buffered, _ := ctx.GetContext(ctxKeyBufferedReasoning).(string)
-	if buffered == "" {
-		return nil
-	}
-	// Build a minimal chat.completion.chunk with the buffered reasoning as content
-	resp := chatCompletionResponse{
-		Object: objectChatCompletionChunk,
-		Choices: []chatCompletionChoice{
-			{
-				Index: 0,
-				Delta: &chatMessage{
-					Content: buffered,
-				},
-			},
-		},
-	}
-	data, err := json.Marshal(resp)
-	if err != nil {
-		return nil
-	}
-	// Format as SSE
-	return []byte("data: " + string(data) + "\n\n")
-}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/multipart_helper.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/multipart_helper.go
@@ -1,156 +0,0 @@
-package provider
-
-import (
-	"bytes"
-	"encoding/base64"
-	"fmt"
-	"io"
-	"mime"
-	"mime/multipart"
-	"net/http"
-	"strconv"
-	"strings"
-)
-
-type multipartImageRequest struct {
-	Model        string
-	Prompt       string
-	Size         string
-	OutputFormat string
-	N            int
-	ImageURLs    []string
-	HasMask      bool
-}
-
-func isMultipartFormData(contentType string) bool {
-	mediaType, _, err := mime.ParseMediaType(contentType)
-	if err != nil {
-		return false
-	}
-	return strings.EqualFold(mediaType, "multipart/form-data")
-}
-
-func parseMultipartImageRequest(body []byte, contentType string) (*multipartImageRequest, error) {
-	_, params, err := mime.ParseMediaType(contentType)
-	if err != nil {
-		return nil, fmt.Errorf("unable to parse content-type: %v", err)
-	}
-	boundary := params["boundary"]
-	if boundary == "" {
-		return nil, fmt.Errorf("missing multipart boundary")
-	}
-
-	req := &multipartImageRequest{
-		ImageURLs: make([]string, 0),
-	}
-	reader := multipart.NewReader(bytes.NewReader(body), boundary)
-	for {
-		part, err := reader.NextPart()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			return nil, fmt.Errorf("unable to read multipart part: %v", err)
-		}
-		fieldName := part.FormName()
-		if fieldName == "" {
-			_ = part.Close()
-			continue
-		}
-		partContentType := strings.TrimSpace(part.Header.Get("Content-Type"))
-
-		partData, err := io.ReadAll(part)
-		_ = part.Close()
-		if err != nil {
-			return nil, fmt.Errorf("unable to read multipart field %s: %v", fieldName, err)
-		}
-
-		value := strings.TrimSpace(string(partData))
-		switch fieldName {
-		case "model":
-			req.Model = value
-			continue
-		case "prompt":
-			req.Prompt = value
-			continue
-		case "size":
-			req.Size = value
-			continue
-		case "output_format":
-			req.OutputFormat = value
-			continue
-		case "n":
-			if value != "" {
-				if parsed, err := strconv.Atoi(value); err == nil {
-					req.N = parsed
-				}
-			}
-			continue
-		}
-
-		if isMultipartImageField(fieldName) {
-			if isMultipartImageURLValue(value) {
-				req.ImageURLs = append(req.ImageURLs, value)
-				continue
-			}
-			if len(partData) == 0 {
-				continue
-			}
-			imageURL := buildMultipartDataURL(partContentType, partData)
-			req.ImageURLs = append(req.ImageURLs, imageURL)
-			continue
-		}
-		if isMultipartMaskField(fieldName) {
-			if len(partData) > 0 || value != "" {
-				req.HasMask = true
-			}
-			continue
-		}
-	}
-
-	return req, nil
-}
-
-func isMultipartImageField(fieldName string) bool {
-	return fieldName == "image" || fieldName == "image[]" || strings.HasPrefix(fieldName, "image[")
-}
-
-func isMultipartMaskField(fieldName string) bool {
-	return fieldName == "mask" || fieldName == "mask[]" || strings.HasPrefix(fieldName, "mask[")
-}
-
-func isMultipartImageURLValue(value string) bool {
-	if value == "" {
-		return false
-	}
-	loweredValue := strings.ToLower(value)
-	return strings.HasPrefix(loweredValue, "data:") || strings.HasPrefix(loweredValue, "http://") || strings.HasPrefix(loweredValue, "https://")
-}
-
-func buildMultipartDataURL(contentType string, data []byte) string {
-	mimeType := strings.TrimSpace(contentType)
-	if mimeType == "" || strings.EqualFold(mimeType, "application/octet-stream") {
-		mimeType = http.DetectContentType(data)
-	}
-	mimeType = normalizeMultipartMimeType(mimeType)
-	if mimeType == "" {
-		mimeType = "application/octet-stream"
-	}
-	encoded := base64.StdEncoding.EncodeToString(data)
-	return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded)
-}
-
-func normalizeMultipartMimeType(contentType string) string {
-	contentType = strings.TrimSpace(contentType)
-	if contentType == "" {
-		return ""
-	}
-	mediaType, _, err := mime.ParseMediaType(contentType)
-	if err == nil && mediaType != "" {
-		return strings.TrimSpace(mediaType)
-	}
-	if idx := strings.Index(contentType, ";"); idx > 0 {
-		return strings.TrimSpace(contentType[:idx])
-	}
-	return contentType
-}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
@@ -34,9 +34,6 @@ func (m *openaiProviderInitializer) DefaultCapabilities() map[string]string {
 		string(ApiNameImageEdit):                            PathOpenAIImageEdit,
 		string(ApiNameImageVariation):                       PathOpenAIImageVariation,
 		string(ApiNameAudioSpeech):                          PathOpenAIAudioSpeech,
-		string(ApiNameAudioTranscription):                   PathOpenAIAudioTranscriptions,
-		string(ApiNameAudioTranslation):                     PathOpenAIAudioTranslations,
-		string(ApiNameRealtime):                             PathOpenAIRealtime,
 		string(ApiNameModels):                               PathOpenAIModels,
 		string(ApiNameFiles):                                PathOpenAIFiles,
 		string(ApiNameRetrieveFile):                         PathOpenAIRetrieveFile,
@@ -66,8 +63,6 @@ func isDirectPath(path string) bool {
 	return strings.HasSuffix(path, "/completions") ||
 		strings.HasSuffix(path, "/embeddings") ||
 		strings.HasSuffix(path, "/audio/speech") ||
-		strings.HasSuffix(path, "/audio/transcriptions") ||
-		strings.HasSuffix(path, "/audio/translations") ||
 		strings.HasSuffix(path, "/images/generations") ||
 		strings.HasSuffix(path, "/images/variations") ||
 		strings.HasSuffix(path, "/images/edits") ||
@@ -75,7 +70,6 @@ func isDirectPath(path string) bool {
 		strings.HasSuffix(path, "/responses") ||
 		strings.HasSuffix(path, "/fine_tuning/jobs") ||
 		strings.HasSuffix(path, "/fine_tuning/checkpoints") ||
-		strings.HasSuffix(path, "/realtime") ||
 		strings.HasSuffix(path, "/videos")
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -41,9 +41,6 @@ const (
 	ApiNameImageEdit                            ApiName = "openai/v1/imageedit"
 	ApiNameImageVariation                       ApiName = "openai/v1/imagevariation"
 	ApiNameAudioSpeech                          ApiName = "openai/v1/audiospeech"
-	ApiNameAudioTranscription                   ApiName = "openai/v1/audiotranscription"
-	ApiNameAudioTranslation                     ApiName = "openai/v1/audiotranslation"
-	ApiNameRealtime                             ApiName = "openai/v1/realtime"
 	ApiNameFiles                                ApiName = "openai/v1/files"
 	ApiNameRetrieveFile                         ApiName = "openai/v1/retrievefile"
 	ApiNameRetrieveFileContent                  ApiName = "openai/v1/retrievefilecontent"
@@ -93,9 +90,6 @@ const (
 	PathOpenAIImageEdit                            = "/v1/images/edits"
 	PathOpenAIImageVariation                       = "/v1/images/variations"
 	PathOpenAIAudioSpeech                          = "/v1/audio/speech"
-	PathOpenAIAudioTranscriptions                  = "/v1/audio/transcriptions"
-	PathOpenAIAudioTranslations                    = "/v1/audio/translations"
-	PathOpenAIRealtime                             = "/v1/realtime"
 	PathOpenAIResponses                            = "/v1/responses"
 	PathOpenAIFineTuningJobs                       = "/v1/fine_tuning/jobs"
 	PathOpenAIRetrieveFineTuningJob                = "/v1/fine_tuning/jobs/{fine_tuning_job_id}"
@@ -178,8 +172,6 @@ const (
 	ctxKeyPushedMessage          = "pushedMessage"
 	ctxKeyContentPushed          = "contentPushed"
 	ctxKeyReasoningContentPushed = "reasoningContentPushed"
-	ctxKeyHasContentDelta        = "hasContentDelta"
-	ctxKeyBufferedReasoning      = "bufferedReasoning"

 	objectChatCompletion      = "chat.completion"
 	objectChatCompletionChunk = "chat.completion.chunk"
@@ -206,7 +198,8 @@ var (

 	// Providers that support the "developer" role. Other providers will have "developer" roles converted to "system".
 	developerRoleSupportedProviders = map[string]bool{
-		providerTypeAzure: true,
+		providerTypeOpenAI: true,
+		providerTypeAzure:  true,
 	}

 	providerInitializers = map[string]providerInitializer{
@@ -362,12 +355,6 @@ type ProviderConfig struct {
 	// @Title zh-CN Amazon Bedrock 额外模型请求参数
 	// @Description zh-CN 仅适用于Amazon Bedrock服务，用于设置模型特定的推理参数
 	bedrockAdditionalFields map[string]interface{} `required:"false" yaml:"bedrockAdditionalFields" json:"bedrockAdditionalFields"`
-	// @Title zh-CN Amazon Bedrock Prompt CachePoint 插入位置
-	// @Description zh-CN 仅适用于Amazon Bedrock服务。用于配置 cachePoint 插入位置，支持多选：systemPrompt、lastUserMessage、lastMessage。值为 true 表示启用该位置。
-	bedrockPromptCachePointPositions map[string]bool `required:"false" yaml:"bedrockPromptCachePointPositions" json:"bedrockPromptCachePointPositions"`
-	// @Title zh-CN Amazon Bedrock Prompt Cache 保留策略（默认值）
-	// @Description zh-CN 仅适用于Amazon Bedrock服务。作为请求中 prompt_cache_retention 缺省时的默认值，支持 in_memory 和 24h。
-	promptCacheRetention string `required:"false" yaml:"promptCacheRetention" json:"promptCacheRetention"`
 	// @Title zh-CN minimax API type
 	// @Description zh-CN 仅适用于 minimax 服务。minimax API 类型，v2 和 pro 中选填一项，默认值为 v2
 	minimaxApiType string `required:"false" yaml:"minimaxApiType" json:"minimaxApiType"`
@@ -473,15 +460,6 @@ type ProviderConfig struct {
 	// @Title zh-CN 智谱AI Code Plan 模式
 	// @Description zh-CN 仅适用于智谱AI服务。启用后将使用 /api/coding/paas/v4/chat/completions 接口
 	zhipuCodePlanMode bool `required:"false" yaml:"zhipuCodePlanMode" json:"zhipuCodePlanMode"`
-	// @Title zh-CN 合并连续同角色消息
-	// @Description zh-CN 开启后，若请求的 messages 中存在连续的同角色消息（如连续两条 user 消息），将其内容合并为一条，以满足要求严格轮流交替（user→assistant→user→...）的模型服务商的要求。
-	mergeConsecutiveMessages bool `required:"false" yaml:"mergeConsecutiveMessages" json:"mergeConsecutiveMessages"`
-	// @Title zh-CN 空内容时提升思考为正文
-	// @Description zh-CN 开启后，若模型响应只包含 reasoning_content/thinking 而没有正文内容，将 reasoning 内容提升为正文内容返回，避免客户端收到空回复。
-	promoteThinkingOnEmpty bool `required:"false" yaml:"promoteThinkingOnEmpty" json:"promoteThinkingOnEmpty"`
-	// @Title zh-CN HiClaw 模式
-	// @Description zh-CN 开启后同时启用 mergeConsecutiveMessages 和 promoteThinkingOnEmpty，适用于 HiClaw 多 Agent 协作场景。
-	hiclawMode bool `required:"false" yaml:"hiclawMode" json:"hiclawMode"`
 }

 func (c *ProviderConfig) GetId() string {
@@ -575,13 +553,6 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		for k, v := range json.Get("bedrockAdditionalFields").Map() {
 			c.bedrockAdditionalFields[k] = v.Value()
 		}
-		c.promptCacheRetention = json.Get("promptCacheRetention").String()
-		if rawPositions := json.Get("bedrockPromptCachePointPositions"); rawPositions.Exists() {
-			c.bedrockPromptCachePointPositions = make(map[string]bool)
-			for k, v := range rawPositions.Map() {
-				c.bedrockPromptCachePointPositions[k] = v.Bool()
-			}
-		}
 	}
 	c.minimaxApiType = json.Get("minimaxApiType").String()
 	c.minimaxGroupId = json.Get("minimaxGroupId").String()
@@ -676,10 +647,6 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 			string(ApiNameImageVariation),
 			string(ApiNameImageEdit),
 			string(ApiNameAudioSpeech),
-			string(ApiNameAudioTranscription),
-			string(ApiNameAudioTranslation),
-			string(ApiNameRealtime),
-			string(ApiNameResponses),
 			string(ApiNameCohereV1Rerank),
 			string(ApiNameVideos),
 			string(ApiNameRetrieveVideo),
@@ -706,13 +673,6 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 			c.contextCleanupCommands = append(c.contextCleanupCommands, cmd.String())
 		}
 	}
-	c.mergeConsecutiveMessages = json.Get("mergeConsecutiveMessages").Bool()
-	c.promoteThinkingOnEmpty = json.Get("promoteThinkingOnEmpty").Bool()
-	c.hiclawMode = json.Get("hiclawMode").Bool()
-	if c.hiclawMode {
-		c.mergeConsecutiveMessages = true
-		c.promoteThinkingOnEmpty = true
-	}
 }

 func (c *ProviderConfig) Validate() error {
@@ -803,19 +763,19 @@ func (c *ProviderConfig) GetRandomToken() string {
 func isStatefulAPI(apiName string) bool {
 	// These APIs maintain session state and should be routed to the same provider consistently
 	statefulAPIs := map[string]bool{
-		string(ApiNameResponses):                true, // Response API - uses previous_response_id
-		string(ApiNameFiles):                    true, // Files API - maintains file state
-		string(ApiNameRetrieveFile):             true, // File retrieval - depends on file upload
-		string(ApiNameRetrieveFileContent):      true, // File content - depends on file upload
-		string(ApiNameBatches):                  true, // Batch API - maintains batch state
-		string(ApiNameRetrieveBatch):            true, // Batch status - depends on batch creation
-		string(ApiNameCancelBatch):              true, // Batch operations - depends on batch state
-		string(ApiNameFineTuningJobs):           true, // Fine-tuning - maintains job state
-		string(ApiNameRetrieveFineTuningJob):    true, // Fine-tuning job status
-		string(ApiNameFineTuningJobEvents):      true, // Fine-tuning events
-		string(ApiNameFineTuningJobCheckpoints): true, // Fine-tuning checkpoints
-		string(ApiNameCancelFineTuningJob):      true, // Cancel fine-tuning job
-		string(ApiNameResumeFineTuningJob):      true, // Resume fine-tuning job
+		string(ApiNameResponses):                 true, // Response API - uses previous_response_id
+		string(ApiNameFiles):                     true, // Files API - maintains file state
+		string(ApiNameRetrieveFile):              true, // File retrieval - depends on file upload
+		string(ApiNameRetrieveFileContent):       true, // File content - depends on file upload
+		string(ApiNameBatches):                   true, // Batch API - maintains batch state
+		string(ApiNameRetrieveBatch):             true, // Batch status - depends on batch creation
+		string(ApiNameCancelBatch):               true, // Batch operations - depends on batch state
+		string(ApiNameFineTuningJobs):            true, // Fine-tuning - maintains job state
+		string(ApiNameRetrieveFineTuningJob):     true, // Fine-tuning job status
+		string(ApiNameFineTuningJobEvents):       true, // Fine-tuning events
+		string(ApiNameFineTuningJobCheckpoints):  true, // Fine-tuning checkpoints
+		string(ApiNameCancelFineTuningJob):       true, // Cancel fine-tuning job
+		string(ApiNameResumeFineTuningJob):       true, // Resume fine-tuning job
 	}
 	return statefulAPIs[apiName]
 }
@@ -847,10 +807,6 @@ func (c *ProviderConfig) IsOriginal() bool {
 	return c.protocol == protocolOriginal
 }

-func (c *ProviderConfig) GetPromoteThinkingOnEmpty() bool {
-	return c.promoteThinkingOnEmpty
-}
-
 func (c *ProviderConfig) ReplaceByCustomSettings(body []byte) ([]byte, error) {
 	return ReplaceByCustomSettings(body, c.customSettings)
 }
@@ -889,16 +845,6 @@ func (c *ProviderConfig) parseRequestAndMapModel(ctx wrapper.HttpContext, reques
 			return err
 		}
 		return c.setRequestModel(ctx, req)
-	case *imageEditRequest:
-		if err := decodeImageEditRequest(body, req); err != nil {
-			return err
-		}
-		return c.setRequestModel(ctx, req)
-	case *imageVariationRequest:
-		if err := decodeImageVariationRequest(body, req); err != nil {
-			return err
-		}
-		return c.setRequestModel(ctx, req)
 	default:
 		return errors.New("unsupported request type")
 	}
@@ -914,10 +860,6 @@ func (c *ProviderConfig) setRequestModel(ctx wrapper.HttpContext, request interf
 		model = &req.Model
 	case *imageGenerationRequest:
 		model = &req.Model
-	case *imageEditRequest:
-		model = &req.Model
-	case *imageVariationRequest:
-		model = &req.Model
 	default:
 		return errors.New("unsupported request type")
 	}
@@ -1078,7 +1020,7 @@ func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte) []StreamEvent
 		if lineStartIndex != -1 {
 			value := string(body[valueStartIndex:i])
 			currentEvent.SetValue(currentKey, value)
-		} else if eventStartIndex != -1 {
+		} else {
 			currentEvent.RawEvent = string(body[eventStartIndex : i+1])
 			// Extra new line. The current event is complete.
 			events = append(events, *currentEvent)
@@ -1156,17 +1098,6 @@ func (c *ProviderConfig) handleRequestBody(
 		}
 	}

-	// merge consecutive same-role messages for providers that require strict role alternation
-	if apiName == ApiNameChatCompletion && c.mergeConsecutiveMessages {
-		body, err = mergeConsecutiveMessages(body)
-		if err != nil {
-			log.Warnf("[mergeConsecutiveMessages] failed to merge messages: %v", err)
-			err = nil
-		} else {
-			log.Debugf("[mergeConsecutiveMessages] merged consecutive messages for provider: %s", c.typ)
-		}
-	}
-
 	// convert developer role to system role for providers that don't support it
 	if apiName == ApiNameChatCompletion && !isDeveloperRoleSupported(c.typ) {
 		body, err = convertDeveloperRoleToSystem(body)
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -30,7 +30,6 @@ const (
 	qwenCompatibleChatCompletionPath      = "/compatible-mode/v1/chat/completions"
 	qwenCompatibleCompletionsPath         = "/compatible-mode/v1/completions"
 	qwenCompatibleTextEmbeddingPath       = "/compatible-mode/v1/embeddings"
-	qwenCompatibleResponsesPath           = "/api/v2/apps/protocols/compatible-mode/v1/responses"
 	qwenCompatibleFilesPath               = "/compatible-mode/v1/files"
 	qwenCompatibleRetrieveFilePath        = "/compatible-mode/v1/files/{file_id}"
 	qwenCompatibleRetrieveFileContentPath = "/compatible-mode/v1/files/{file_id}/content"
@@ -38,7 +37,7 @@ const (
 	qwenCompatibleRetrieveBatchPath       = "/compatible-mode/v1/batches/{batch_id}"
 	qwenBailianPath                       = "/api/v1/apps"
 	qwenMultimodalGenerationPath          = "/api/v1/services/aigc/multimodal-generation/generation"
-	qwenAnthropicMessagesPath             = "/apps/anthropic/v1/messages"
+	qwenAnthropicMessagesPath             = "/api/v2/apps/claude-code-proxy/v1/messages"

 	qwenAsyncAIGCPath = "/api/v1/services/aigc/"
 	qwenAsyncTaskPath = "/api/v1/tasks/"
@@ -70,7 +69,6 @@ func (m *qwenProviderInitializer) DefaultCapabilities(qwenEnableCompatible bool)
 			string(ApiNameChatCompletion):      qwenCompatibleChatCompletionPath,
 			string(ApiNameEmbeddings):          qwenCompatibleTextEmbeddingPath,
 			string(ApiNameCompletion):          qwenCompatibleCompletionsPath,
-			string(ApiNameResponses):           qwenCompatibleResponsesPath,
 			string(ApiNameFiles):               qwenCompatibleFilesPath,
 			string(ApiNameRetrieveFile):        qwenCompatibleRetrieveFilePath,
 			string(ApiNameRetrieveFileContent): qwenCompatibleRetrieveFileContentPath,
@@ -709,8 +707,6 @@ func (m *qwenProvider) GetApiName(path string) ApiName {
 	case strings.Contains(path, qwenTextEmbeddingPath),
 		strings.Contains(path, qwenCompatibleTextEmbeddingPath):
 		return ApiNameEmbeddings
-	case strings.Contains(path, qwenCompatibleResponsesPath):
-		return ApiNameResponses
 	case strings.Contains(path, qwenAsyncAIGCPath):
 		return ApiNameQwenAsyncAIGC
 	case strings.Contains(path, qwenAsyncTaskPath):
--- a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go
@@ -4,8 +4,8 @@ import (
 	"encoding/json"
 	"fmt"

-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/wasm-go/pkg/log"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 )

 func decodeChatCompletionRequest(body []byte, request *chatCompletionRequest) error {
@@ -32,20 +32,6 @@ func decodeImageGenerationRequest(body []byte, request *imageGenerationRequest)
 	return nil
 }

-func decodeImageEditRequest(body []byte, request *imageEditRequest) error {
-	if err := json.Unmarshal(body, request); err != nil {
-		return fmt.Errorf("unable to unmarshal request: %v", err)
-	}
-	return nil
-}
-
-func decodeImageVariationRequest(body []byte, request *imageVariationRequest) error {
-	if err := json.Unmarshal(body, request); err != nil {
-		return fmt.Errorf("unable to unmarshal request: %v", err)
-	}
-	return nil
-}
-
 func replaceJsonRequestBody(request interface{}) error {
 	body, err := json.Marshal(request)
 	if err != nil {
@@ -154,54 +140,6 @@ func cleanupContextMessages(body []byte, cleanupCommands []string) ([]byte, erro
 	return json.Marshal(request)
 }

-// mergeConsecutiveMessages merges consecutive messages of the same role (user or assistant).
-// Many LLM providers require strict user↔assistant alternation and reject requests where
-// two messages of the same role appear consecutively. When enabled, consecutive same-role
-// messages have their content concatenated into a single message.
-func mergeConsecutiveMessages(body []byte) ([]byte, error) {
-	request := &chatCompletionRequest{}
-	if err := json.Unmarshal(body, request); err != nil {
-		return body, fmt.Errorf("unable to unmarshal request for message merging: %v", err)
-	}
-	if len(request.Messages) <= 1 {
-		return body, nil
-	}
-
-	merged := false
-	result := make([]chatMessage, 0, len(request.Messages))
-	for _, msg := range request.Messages {
-		if len(result) > 0 &&
-			result[len(result)-1].Role == msg.Role &&
-			(msg.Role == roleUser || msg.Role == roleAssistant) {
-			last := &result[len(result)-1]
-			last.Content = mergeMessageContent(last.Content, msg.Content)
-			merged = true
-			continue
-		}
-		result = append(result, msg)
-	}
-
-	if !merged {
-		return body, nil
-	}
-	request.Messages = result
-	return json.Marshal(request)
-}
-
-// mergeMessageContent concatenates two message content values.
-// If both are plain strings they are joined with a blank line.
-// Otherwise both are converted to content-block arrays and concatenated.
-func mergeMessageContent(prev, curr any) any {
-	prevStr, prevIsStr := prev.(string)
-	currStr, currIsStr := curr.(string)
-	if prevIsStr && currIsStr {
-		return prevStr + "\n\n" + currStr
-	}
-	prevParts := (&chatMessage{Content: prev}).ParseContent()
-	currParts := (&chatMessage{Content: curr}).ParseContent()
-	return append(prevParts, currParts...)
-}
-
 func ReplaceResponseBody(body []byte) error {
 	log.Debugf("response body: %s", string(body))
 	err := proxywasm.ReplaceHttpResponseBody(body)
--- a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper_test.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper_test.go
@@ -8,131 +8,6 @@ import (
 	"github.com/stretchr/testify/require"
 )

-func TestMergeConsecutiveMessages(t *testing.T) {
-	t.Run("no_consecutive_messages", func(t *testing.T) {
-		input := chatCompletionRequest{
-			Messages: []chatMessage{
-				{Role: "user", Content: "你好"},
-				{Role: "assistant", Content: "你好！"},
-				{Role: "user", Content: "再见"},
-			},
-		}
-		body, err := json.Marshal(input)
-		require.NoError(t, err)
-
-		result, err := mergeConsecutiveMessages(body)
-		assert.NoError(t, err)
-		// No merging needed, returned body should be identical
-		assert.Equal(t, body, result)
-	})
-
-	t.Run("merges_consecutive_user_messages", func(t *testing.T) {
-		input := chatCompletionRequest{
-			Messages: []chatMessage{
-				{Role: "user", Content: "第一条"},
-				{Role: "user", Content: "第二条"},
-				{Role: "assistant", Content: "回复"},
-			},
-		}
-		body, err := json.Marshal(input)
-		require.NoError(t, err)
-
-		result, err := mergeConsecutiveMessages(body)
-		assert.NoError(t, err)
-
-		var output chatCompletionRequest
-		require.NoError(t, json.Unmarshal(result, &output))
-
-		assert.Len(t, output.Messages, 2)
-		assert.Equal(t, "user", output.Messages[0].Role)
-		assert.Equal(t, "第一条\n\n第二条", output.Messages[0].Content)
-		assert.Equal(t, "assistant", output.Messages[1].Role)
-	})
-
-	t.Run("merges_consecutive_assistant_messages", func(t *testing.T) {
-		input := chatCompletionRequest{
-			Messages: []chatMessage{
-				{Role: "user", Content: "问题"},
-				{Role: "assistant", Content: "第一段"},
-				{Role: "assistant", Content: "第二段"},
-			},
-		}
-		body, err := json.Marshal(input)
-		require.NoError(t, err)
-
-		result, err := mergeConsecutiveMessages(body)
-		assert.NoError(t, err)
-
-		var output chatCompletionRequest
-		require.NoError(t, json.Unmarshal(result, &output))
-
-		assert.Len(t, output.Messages, 2)
-		assert.Equal(t, "user", output.Messages[0].Role)
-		assert.Equal(t, "assistant", output.Messages[1].Role)
-		assert.Equal(t, "第一段\n\n第二段", output.Messages[1].Content)
-	})
-
-	t.Run("merges_multiple_consecutive_same_role", func(t *testing.T) {
-		input := chatCompletionRequest{
-			Messages: []chatMessage{
-				{Role: "user", Content: "A"},
-				{Role: "user", Content: "B"},
-				{Role: "user", Content: "C"},
-				{Role: "assistant", Content: "回复"},
-			},
-		}
-		body, err := json.Marshal(input)
-		require.NoError(t, err)
-
-		result, err := mergeConsecutiveMessages(body)
-		assert.NoError(t, err)
-
-		var output chatCompletionRequest
-		require.NoError(t, json.Unmarshal(result, &output))
-
-		assert.Len(t, output.Messages, 2)
-		assert.Equal(t, "A\n\nB\n\nC", output.Messages[0].Content)
-	})
-
-	t.Run("system_messages_not_merged", func(t *testing.T) {
-		input := chatCompletionRequest{
-			Messages: []chatMessage{
-				{Role: "system", Content: "系统提示1"},
-				{Role: "system", Content: "系统提示2"},
-				{Role: "user", Content: "问题"},
-			},
-		}
-		body, err := json.Marshal(input)
-		require.NoError(t, err)
-
-		result, err := mergeConsecutiveMessages(body)
-		assert.NoError(t, err)
-		// system messages are not merged, body unchanged
-		assert.Equal(t, body, result)
-	})
-
-	t.Run("single_message_unchanged", func(t *testing.T) {
-		input := chatCompletionRequest{
-			Messages: []chatMessage{
-				{Role: "user", Content: "只有一条"},
-			},
-		}
-		body, err := json.Marshal(input)
-		require.NoError(t, err)
-
-		result, err := mergeConsecutiveMessages(body)
-		assert.NoError(t, err)
-		assert.Equal(t, body, result)
-	})
-
-	t.Run("invalid_json_body", func(t *testing.T) {
-		body := []byte(`invalid json`)
-		result, err := mergeConsecutiveMessages(body)
-		assert.Error(t, err)
-		assert.Equal(t, body, result)
-	})
-}
-
 func TestCleanupContextMessages(t *testing.T) {
 	t.Run("empty_cleanup_commands", func(t *testing.T) {
 		body := []byte(`{"messages":[{"role":"user","content":"hello"}]}`)
--- a/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
@@ -45,9 +45,7 @@ const (
 	contextClaudeMarker                = "isClaudeRequest"
 	contextOpenAICompatibleMarker      = "isOpenAICompatibleRequest"
 	contextVertexRawMarker             = "isVertexRawRequest"
-	contextVertexStreamDoneMarker      = "vertexStreamDoneSent"
 	vertexAnthropicVersion             = "vertex-2023-10-16"
-	vertexImageVariationDefaultPrompt  = "Create variations of the provided image."
 )

 // vertexRawPathRegex 匹配原生 Vertex AI REST API 路径
@@ -100,8 +98,6 @@ func (v *vertexProviderInitializer) DefaultCapabilities() map[string]string {
 		string(ApiNameChatCompletion):  vertexPathTemplate,
 		string(ApiNameEmbeddings):      vertexPathTemplate,
 		string(ApiNameImageGeneration): vertexPathTemplate,
-		string(ApiNameImageEdit):       vertexPathTemplate,
-		string(ApiNameImageVariation):  vertexPathTemplate,
 		string(ApiNameVertexRaw):       "", // 空字符串表示保持原路径，不做路径转换
 	}
 }
@@ -311,10 +307,6 @@ func (v *vertexProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, ap
 		return v.onEmbeddingsRequestBody(ctx, body, headers)
 	case ApiNameImageGeneration:
 		return v.onImageGenerationRequestBody(ctx, body, headers)
-	case ApiNameImageEdit:
-		return v.onImageEditRequestBody(ctx, body, headers)
-	case ApiNameImageVariation:
-		return v.onImageVariationRequestBody(ctx, body, headers)
 	default:
 		return body, nil
 	}
@@ -395,108 +387,11 @@ func (v *vertexProvider) onImageGenerationRequestBody(ctx wrapper.HttpContext, b
 	path := v.getRequestPath(ApiNameImageGeneration, request.Model, false)
 	util.OverwriteRequestPathHeader(headers, path)

-	vertexRequest, err := v.buildVertexImageGenerationRequest(request)
-	if err != nil {
-		return nil, err
-	}
+	vertexRequest := v.buildVertexImageGenerationRequest(request)
 	return json.Marshal(vertexRequest)
 }

-func (v *vertexProvider) onImageEditRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
-	request := &imageEditRequest{}
-	imageURLs := make([]string, 0)
-	contentType := headers.Get("Content-Type")
-	if isMultipartFormData(contentType) {
-		parsedRequest, err := parseMultipartImageRequest(body, contentType)
-		if err != nil {
-			return nil, err
-		}
-		request.Model = parsedRequest.Model
-		request.Prompt = parsedRequest.Prompt
-		request.Size = parsedRequest.Size
-		request.OutputFormat = parsedRequest.OutputFormat
-		request.N = parsedRequest.N
-		imageURLs = parsedRequest.ImageURLs
-		if err := v.config.mapModel(ctx, &request.Model); err != nil {
-			return nil, err
-		}
-		if parsedRequest.HasMask {
-			return nil, fmt.Errorf("mask is not supported for vertex image edits yet")
-		}
-	} else {
-		if err := v.config.parseRequestAndMapModel(ctx, request, body); err != nil {
-			return nil, err
-		}
-		if request.HasMask() {
-			return nil, fmt.Errorf("mask is not supported for vertex image edits yet")
-		}
-		imageURLs = request.GetImageURLs()
-	}
-	if len(imageURLs) == 0 {
-		return nil, fmt.Errorf("missing image_url in request")
-	}
-	if request.Prompt == "" {
-		return nil, fmt.Errorf("missing prompt in request")
-	}
-
-	path := v.getRequestPath(ApiNameImageEdit, request.Model, false)
-	util.OverwriteRequestPathHeader(headers, path)
-	headers.Set("Content-Type", util.MimeTypeApplicationJson)
-	vertexRequest, err := v.buildVertexImageRequest(request.Prompt, request.Size, request.OutputFormat, imageURLs)
-	if err != nil {
-		return nil, err
-	}
-	return json.Marshal(vertexRequest)
-}
-
-func (v *vertexProvider) onImageVariationRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
-	request := &imageVariationRequest{}
-	imageURLs := make([]string, 0)
-	contentType := headers.Get("Content-Type")
-	if isMultipartFormData(contentType) {
-		parsedRequest, err := parseMultipartImageRequest(body, contentType)
-		if err != nil {
-			return nil, err
-		}
-		request.Model = parsedRequest.Model
-		request.Prompt = parsedRequest.Prompt
-		request.Size = parsedRequest.Size
-		request.OutputFormat = parsedRequest.OutputFormat
-		request.N = parsedRequest.N
-		imageURLs = parsedRequest.ImageURLs
-		if err := v.config.mapModel(ctx, &request.Model); err != nil {
-			return nil, err
-		}
-	} else {
-		if err := v.config.parseRequestAndMapModel(ctx, request, body); err != nil {
-			return nil, err
-		}
-		imageURLs = request.GetImageURLs()
-	}
-	if len(imageURLs) == 0 {
-		return nil, fmt.Errorf("missing image_url in request")
-	}
-
-	prompt := request.Prompt
-	if prompt == "" {
-		prompt = vertexImageVariationDefaultPrompt
-	}
-
-	path := v.getRequestPath(ApiNameImageVariation, request.Model, false)
-	util.OverwriteRequestPathHeader(headers, path)
-	headers.Set("Content-Type", util.MimeTypeApplicationJson)
-	vertexRequest, err := v.buildVertexImageRequest(prompt, request.Size, request.OutputFormat, imageURLs)
-	if err != nil {
-		return nil, err
-	}
-	return json.Marshal(vertexRequest)
-}
-
-func (v *vertexProvider) buildVertexImageGenerationRequest(request *imageGenerationRequest) (*vertexChatRequest, error) {
-	return v.buildVertexImageRequest(request.Prompt, request.Size, request.OutputFormat, nil)
-}
-
-func (v *vertexProvider) buildVertexImageRequest(prompt string, size string, outputFormat string, imageURLs []string) (*vertexChatRequest, error) {
+func (v *vertexProvider) buildVertexImageGenerationRequest(request *imageGenerationRequest) *vertexChatRequest {
 	// 构建安全设置
 	safetySettings := make([]vertexChatSafetySetting, 0)
 	for category, threshold := range v.config.geminiSafetySetting {
@@ -507,12 +402,12 @@ func (v *vertexProvider) buildVertexImageRequest(prompt string, size string, out
 	}

 	// 解析尺寸参数
-	aspectRatio, imageSize := v.parseImageSize(size)
+	aspectRatio, imageSize := v.parseImageSize(request.Size)

 	// 确定输出 MIME 类型
 	mimeType := "image/png"
-	if outputFormat != "" {
-		switch outputFormat {
+	if request.OutputFormat != "" {
+		switch request.OutputFormat {
 		case "jpeg", "jpg":
 			mimeType = "image/jpeg"
 		case "webp":
@@ -522,27 +417,12 @@ func (v *vertexProvider) buildVertexImageRequest(prompt string, size string, out
 		}
 	}

-	parts := make([]vertexPart, 0, len(imageURLs)+1)
-	for _, imageURL := range imageURLs {
-		part, err := convertMediaContent(imageURL)
-		if err != nil {
-			return nil, err
-		}
-		parts = append(parts, part)
-	}
-	if prompt != "" {
-		parts = append(parts, vertexPart{
-			Text: prompt,
-		})
-	}
-	if len(parts) == 0 {
-		return nil, fmt.Errorf("missing prompt and image_url in request")
-	}
-
 	vertexRequest := &vertexChatRequest{
 		Contents: []vertexChatContent{{
-			Role:  roleUser,
-			Parts: parts,
+			Role: roleUser,
+			Parts: []vertexPart{{
+				Text: request.Prompt,
+			}},
 		}},
 		SafetySettings: safetySettings,
 		GenerationConfig: vertexChatGenerationConfig{
@@ -560,7 +440,7 @@ func (v *vertexProvider) buildVertexImageRequest(prompt string, size string, out
 		},
 	}

-	return vertexRequest, nil
+	return vertexRequest
 }

 // parseImageSize 解析 OpenAI 格式的尺寸字符串（如 "1024x1024"）为 Vertex AI 的 aspectRatio 和 imageSize
@@ -622,46 +502,23 @@ func (v *vertexProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 		return v.claude.OnStreamingResponseBody(ctx, name, chunk, isLastChunk)
 	}
 	log.Infof("[vertexProvider] receive chunk body: %s", string(chunk))
-	if len(chunk) == 0 && !isLastChunk {
+	if isLastChunk {
+		return []byte(ssePrefix + "[DONE]\n\n"), nil
+	}
+	if len(chunk) == 0 {
 		return nil, nil
 	}
 	if name != ApiNameChatCompletion {
-		if isLastChunk {
-			return []byte(ssePrefix + "[DONE]\n\n"), nil
-		}
 		return chunk, nil
 	}
-
 	responseBuilder := &strings.Builder{}
-	// Flush a trailing event when upstream closes stream without a final blank line.
-	chunkForParsing := chunk
-	if isLastChunk {
-		trailingNewLineCount := 0
-		for i := len(chunkForParsing) - 1; i >= 0 && chunkForParsing[i] == '\n'; i-- {
-			trailingNewLineCount++
-		}
-		if trailingNewLineCount < 2 {
-			chunkForParsing = append([]byte(nil), chunk...)
-			for i := 0; i < 2-trailingNewLineCount; i++ {
-				chunkForParsing = append(chunkForParsing, '\n')
-			}
-		}
-	}
-	streamEvents := ExtractStreamingEvents(ctx, chunkForParsing)
-	doneSent, _ := ctx.GetContext(contextVertexStreamDoneMarker).(bool)
-	appendDone := isLastChunk && !doneSent
-	for _, event := range streamEvents {
-		data := event.Data
-		if data == "" {
-			continue
-		}
-		if data == streamEndDataValue {
-			if !doneSent {
-				appendDone = true
-				doneSent = true
-			}
+	lines := strings.Split(string(chunk), "\n")
+	for _, data := range lines {
+		if len(data) < 6 {
+			// ignore blank line or wrong format
 			continue
 		}
+		data = data[6:]
 		var vertexResp vertexChatResponse
 		if err := json.Unmarshal([]byte(data), &vertexResp); err != nil {
 			log.Errorf("unable to unmarshal vertex response: %v", err)
@@ -675,17 +532,7 @@ func (v *vertexProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 		}
 		v.appendResponse(responseBuilder, string(responseBody))
 	}
-	if appendDone {
-		responseBuilder.WriteString(ssePrefix + "[DONE]\n\n")
-		doneSent = true
-	}
-	ctx.SetContext(contextVertexStreamDoneMarker, doneSent)
 	modifiedResponseChunk := responseBuilder.String()
-	if modifiedResponseChunk == "" {
-		// Returning an empty payload prevents main.go from falling back to
-		// forwarding the original raw chunk, which may contain partial JSON.
-		return []byte(""), nil
-	}
 	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
 	return []byte(modifiedResponseChunk), nil
 }
@@ -706,7 +553,7 @@ func (v *vertexProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName
 		return v.onChatCompletionResponseBody(ctx, body)
 	case ApiNameEmbeddings:
 		return v.onEmbeddingsResponseBody(ctx, body)
-	case ApiNameImageGeneration, ApiNameImageEdit, ApiNameImageVariation:
+	case ApiNameImageGeneration:
 		return v.onImageGenerationResponseBody(ctx, body)
 	default:
 		return body, nil
@@ -937,7 +784,7 @@ func (v *vertexProvider) getRequestPath(apiName ApiName, modelId string, stream
 	switch apiName {
 	case ApiNameEmbeddings:
 		action = vertexEmbeddingAction
-	case ApiNameImageGeneration, ApiNameImageEdit, ApiNameImageVariation:
+	case ApiNameImageGeneration:
 		// 图片生成使用非流式端点，需要完整响应
 		action = vertexChatCompletionAction
 	default:
--- a/plugins/wasm-go/extensions/ai-proxy/test/api_paths.go
+++ b/plugins/wasm-go/extensions/ai-proxy/test/api_paths.go
@@ -1,116 +0,0 @@
-package test
-
-import (
-	"encoding/json"
-	"testing"
-
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
-	wasmtest "github.com/higress-group/wasm-go/pkg/test"
-	"github.com/stretchr/testify/require"
-)
-
-func openAICustomEndpointConfig(customURL string) json.RawMessage {
-	data, _ := json.Marshal(map[string]interface{}{
-		"provider": map[string]interface{}{
-			"type":      "openai",
-			"apiTokens": []string{"sk-openai-test-custom-endpoint"},
-			"modelMapping": map[string]string{
-				"*": "gpt-4o-mini",
-			},
-			"openaiCustomUrl": customURL,
-		},
-	})
-	return data
-}
-
-var openAICustomAudioTranscriptionsEndpointConfig = openAICustomEndpointConfig("https://custom.openai.com/v1/audio/transcriptions")
-var openAICustomAudioTranslationsEndpointConfig = openAICustomEndpointConfig("https://custom.openai.com/v1/audio/translations")
-var openAICustomRealtimeEndpointConfig = openAICustomEndpointConfig("https://custom.openai.com/v1/realtime")
-var openAICustomRealtimeSessionsEndpointConfig = openAICustomEndpointConfig("https://custom.openai.com/v1/realtime/sessions")
-
-func RunApiPathRegressionTests(t *testing.T) {
-	wasmtest.RunTest(t, func(t *testing.T) {
-		t.Run("openai direct custom endpoint audio transcriptions", func(t *testing.T) {
-			host, status := wasmtest.NewTestHost(openAICustomAudioTranscriptionsEndpointConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/audio/transcriptions"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-			require.Equal(t, types.HeaderStopIteration, action)
-
-			requestHeaders := host.GetRequestHeaders()
-			pathValue, hasPath := wasmtest.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Equal(t, "/v1/audio/transcriptions", pathValue)
-		})
-
-		t.Run("openai direct custom endpoint audio translations", func(t *testing.T) {
-			host, status := wasmtest.NewTestHost(openAICustomAudioTranslationsEndpointConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/audio/translations"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-			require.Equal(t, types.HeaderStopIteration, action)
-
-			requestHeaders := host.GetRequestHeaders()
-			pathValue, hasPath := wasmtest.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Equal(t, "/v1/audio/translations", pathValue)
-		})
-
-		t.Run("openai direct custom endpoint realtime", func(t *testing.T) {
-			host, status := wasmtest.NewTestHost(openAICustomRealtimeEndpointConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/realtime"},
-				{":method", "GET"},
-				{"Connection", "Upgrade"},
-				{"Upgrade", "websocket"},
-				{"Sec-WebSocket-Version", "13"},
-				{"Sec-WebSocket-Key", "dGhlIHNhbXBsZSBub25jZQ=="},
-			})
-			require.True(t, action == types.ActionContinue || action == types.HeaderStopIteration)
-
-			requestHeaders := host.GetRequestHeaders()
-			pathValue, hasPath := wasmtest.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Equal(t, "/v1/realtime", pathValue)
-		})
-
-		t.Run("openai non-direct endpoint appends mapped realtime suffix", func(t *testing.T) {
-			host, status := wasmtest.NewTestHost(openAICustomRealtimeSessionsEndpointConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/realtime"},
-				{":method", "GET"},
-				{"Connection", "Upgrade"},
-				{"Upgrade", "websocket"},
-				{"Sec-WebSocket-Version", "13"},
-				{"Sec-WebSocket-Key", "dGhlIHNhbXBsZSBub25jZQ=="},
-			})
-			require.True(t, action == types.ActionContinue || action == types.HeaderStopIteration)
-
-			requestHeaders := host.GetRequestHeaders()
-			pathValue, hasPath := wasmtest.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Equal(t, "/v1/realtime/sessions/realtime", pathValue)
-		})
-
-	})
-}
--- a/plugins/wasm-go/extensions/ai-proxy/test/bedrock.go
+++ b/plugins/wasm-go/extensions/ai-proxy/test/bedrock.go
--- a/plugins/wasm-go/extensions/ai-proxy/test/mock_context.go
+++ b/plugins/wasm-go/extensions/ai-proxy/test/mock_context.go
@@ -1,50 +0,0 @@
-package test
-
-import "github.com/higress-group/wasm-go/pkg/iface"
-
-// MockHttpContext is a minimal mock for wrapper.HttpContext used in unit tests
-// that call provider functions directly (e.g. streaming thinking promotion).
-type MockHttpContext struct {
-	contextMap map[string]interface{}
-}
-
-func NewMockHttpContext() *MockHttpContext {
-	return &MockHttpContext{contextMap: make(map[string]interface{})}
-}
-
-func (m *MockHttpContext) SetContext(key string, value interface{})          { m.contextMap[key] = value }
-func (m *MockHttpContext) GetContext(key string) interface{}                 { return m.contextMap[key] }
-func (m *MockHttpContext) GetBoolContext(key string, def bool) bool          { return def }
-func (m *MockHttpContext) GetStringContext(key, def string) string           { return def }
-func (m *MockHttpContext) GetByteSliceContext(key string, def []byte) []byte { return def }
-func (m *MockHttpContext) Scheme() string                                    { return "" }
-func (m *MockHttpContext) Host() string                                      { return "" }
-func (m *MockHttpContext) Path() string                                      { return "" }
-func (m *MockHttpContext) Method() string                                    { return "" }
-func (m *MockHttpContext) GetUserAttribute(key string) interface{}           { return nil }
-func (m *MockHttpContext) SetUserAttribute(key string, value interface{})    {}
-func (m *MockHttpContext) SetUserAttributeMap(kvmap map[string]interface{})  {}
-func (m *MockHttpContext) GetUserAttributeMap() map[string]interface{}       { return nil }
-func (m *MockHttpContext) WriteUserAttributeToLog() error                    { return nil }
-func (m *MockHttpContext) WriteUserAttributeToLogWithKey(key string) error   { return nil }
-func (m *MockHttpContext) WriteUserAttributeToTrace() error                  { return nil }
-func (m *MockHttpContext) DontReadRequestBody()                              {}
-func (m *MockHttpContext) DontReadResponseBody()                             {}
-func (m *MockHttpContext) BufferRequestBody()                                {}
-func (m *MockHttpContext) BufferResponseBody()                               {}
-func (m *MockHttpContext) NeedPauseStreamingResponse()                       {}
-func (m *MockHttpContext) PushBuffer(buffer []byte)                          {}
-func (m *MockHttpContext) PopBuffer() []byte                                 { return nil }
-func (m *MockHttpContext) BufferQueueSize() int                              { return 0 }
-func (m *MockHttpContext) DisableReroute()                                   {}
-func (m *MockHttpContext) SetRequestBodyBufferLimit(byteSize uint32)         {}
-func (m *MockHttpContext) SetResponseBodyBufferLimit(byteSize uint32)        {}
-func (m *MockHttpContext) RouteCall(method, url string, headers [][2]string, body []byte, callback iface.RouteResponseCallback) error {
-	return nil
-}
-func (m *MockHttpContext) GetExecutionPhase() iface.HTTPExecutionPhase { return 0 }
-func (m *MockHttpContext) HasRequestBody() bool                        { return false }
-func (m *MockHttpContext) HasResponseBody() bool                       { return false }
-func (m *MockHttpContext) IsWebsocket() bool                           { return false }
-func (m *MockHttpContext) IsBinaryRequestBody() bool                   { return false }
-func (m *MockHttpContext) IsBinaryResponseBody() bool                  { return false }
--- a/plugins/wasm-go/extensions/ai-proxy/test/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/test/openai.go
@@ -5,7 +5,6 @@ import (
 	"strings"
 	"testing"

-	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/provider"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 	"github.com/higress-group/wasm-go/pkg/test"
 	"github.com/stretchr/testify/require"
@@ -244,84 +243,6 @@ func RunOpenAIOnHttpRequestHeadersTests(t *testing.T) {
 			require.Contains(t, authValue, "sk-openai-test123456789", "Authorization should contain OpenAI API token")
 		})

-		// 测试OpenAI请求头处理（语音转写接口）
-		t.Run("openai audio transcriptions request headers", func(t *testing.T) {
-			host, status := test.NewTestHost(basicOpenAIConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/audio/transcriptions"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			require.Equal(t, types.HeaderStopIteration, action)
-
-			requestHeaders := host.GetRequestHeaders()
-			require.NotNil(t, requestHeaders)
-
-			hostValue, hasHost := test.GetHeaderValue(requestHeaders, ":authority")
-			require.True(t, hasHost)
-			require.Equal(t, "api.openai.com", hostValue)
-
-			pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Contains(t, pathValue, "/v1/audio/transcriptions", "Path should contain audio transcriptions endpoint")
-		})
-
-		// 测试OpenAI请求头处理（语音翻译接口）
-		t.Run("openai audio translations request headers", func(t *testing.T) {
-			host, status := test.NewTestHost(basicOpenAIConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/audio/translations"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			require.Equal(t, types.HeaderStopIteration, action)
-
-			requestHeaders := host.GetRequestHeaders()
-			require.NotNil(t, requestHeaders)
-
-			pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Contains(t, pathValue, "/v1/audio/translations", "Path should contain audio translations endpoint")
-		})
-
-		// 测试OpenAI请求头处理（实时接口，WebSocket握手）
-		t.Run("openai realtime websocket handshake request headers", func(t *testing.T) {
-			host, status := test.NewTestHost(basicOpenAIConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/realtime?model=gpt-4o-realtime-preview"},
-				{":method", "GET"},
-				{"Connection", "Upgrade"},
-				{"Upgrade", "websocket"},
-				{"Sec-WebSocket-Version", "13"},
-				{"Sec-WebSocket-Key", "dGhlIHNhbXBsZSBub25jZQ=="},
-			})
-
-			// WebSocket 握手本身不应依赖请求体。受测试框架限制，某些场景可能仍返回 HeaderStopIteration。
-			require.True(t, action == types.ActionContinue || action == types.HeaderStopIteration)
-
-			requestHeaders := host.GetRequestHeaders()
-			require.NotNil(t, requestHeaders)
-
-			pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Contains(t, pathValue, "/v1/realtime", "Path should contain realtime endpoint")
-			require.Contains(t, pathValue, "model=gpt-4o-realtime-preview", "Query parameters should be preserved")
-		})
-
 		// 测试OpenAI请求头处理（图像生成接口）
 		t.Run("openai image generation request headers", func(t *testing.T) {
 			host, status := test.NewTestHost(basicOpenAIConfig)
@@ -384,61 +305,6 @@ func RunOpenAIOnHttpRequestHeadersTests(t *testing.T) {
 			// 对于直接路径，应该保持原有路径
 			require.Contains(t, pathValue, "/v1/chat/completions", "Path should be preserved for direct custom path")
 		})
-
-		// 测试OpenAI自定义域名请求头处理（间接路径语音转写）
-		t.Run("openai custom domain indirect path audio transcriptions request headers", func(t *testing.T) {
-			host, status := test.NewTestHost(openAICustomDomainIndirectPathConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/audio/transcriptions"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			require.Equal(t, types.HeaderStopIteration, action)
-
-			requestHeaders := host.GetRequestHeaders()
-			require.NotNil(t, requestHeaders)
-
-			hostValue, hasHost := test.GetHeaderValue(requestHeaders, ":authority")
-			require.True(t, hasHost)
-			require.Equal(t, "custom.openai.com", hostValue, "Host should be changed to custom domain")
-
-			pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Contains(t, pathValue, "/api/audio/transcriptions", "Path should be rewritten with indirect custom prefix")
-		})
-
-		// 测试OpenAI自定义域名请求头处理（间接路径 realtime，WebSocket握手）
-		t.Run("openai custom domain indirect path realtime websocket handshake request headers", func(t *testing.T) {
-			host, status := test.NewTestHost(openAICustomDomainIndirectPathConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/realtime?model=gpt-4o-realtime-preview"},
-				{":method", "GET"},
-				{"Connection", "Upgrade"},
-				{"Upgrade", "websocket"},
-				{"Sec-WebSocket-Version", "13"},
-				{"Sec-WebSocket-Key", "dGhlIHNhbXBsZSBub25jZQ=="},
-			})
-
-			// WebSocket 握手本身不应依赖请求体。受测试框架限制，某些场景可能仍返回 HeaderStopIteration。
-			require.True(t, action == types.ActionContinue || action == types.HeaderStopIteration)
-
-			requestHeaders := host.GetRequestHeaders()
-			require.NotNil(t, requestHeaders)
-
-			pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Contains(t, pathValue, "/api/realtime", "Path should be rewritten with indirect custom prefix")
-			require.Contains(t, pathValue, "model=gpt-4o-realtime-preview", "Query parameters should be preserved")
-		})
 	})
 }

@@ -998,158 +864,3 @@ func RunOpenAIOnStreamingResponseBodyTests(t *testing.T) {
 		})
 	})
 }
-
-// 测试配置：OpenAI配置 + promoteThinkingOnEmpty
-var openAIPromoteThinkingConfig = func() json.RawMessage {
-	data, _ := json.Marshal(map[string]interface{}{
-		"provider": map[string]interface{}{
-			"type":                   "openai",
-			"apiTokens":              []string{"sk-openai-test123456789"},
-			"promoteThinkingOnEmpty": true,
-		},
-	})
-	return data
-}()
-
-// 测试配置：OpenAI配置 + hiclawMode
-var openAIHiclawModeConfig = func() json.RawMessage {
-	data, _ := json.Marshal(map[string]interface{}{
-		"provider": map[string]interface{}{
-			"type":       "openai",
-			"apiTokens":  []string{"sk-openai-test123456789"},
-			"hiclawMode": true,
-		},
-	})
-	return data
-}()
-
-func RunOpenAIPromoteThinkingOnEmptyTests(t *testing.T) {
-	// Config parsing tests via host framework
-	test.RunGoTest(t, func(t *testing.T) {
-		t.Run("promoteThinkingOnEmpty config parses", func(t *testing.T) {
-			host, status := test.NewTestHost(openAIPromoteThinkingConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-			config, err := host.GetMatchConfig()
-			require.NoError(t, err)
-			require.NotNil(t, config)
-		})
-
-		t.Run("hiclawMode config parses", func(t *testing.T) {
-			host, status := test.NewTestHost(openAIHiclawModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-			config, err := host.GetMatchConfig()
-			require.NoError(t, err)
-			require.NotNil(t, config)
-		})
-	})
-
-	// Non-streaming promote logic tests via provider functions directly
-	t.Run("promotes reasoning_content when content is empty string", func(t *testing.T) {
-		body := []byte(`{"choices":[{"index":0,"message":{"role":"assistant","content":"","reasoning_content":"这是思考内容"},"finish_reason":"stop"}]}`)
-		result, err := provider.PromoteThinkingOnEmptyResponse(body)
-		require.NoError(t, err)
-		require.Contains(t, string(result), `"content":"这是思考内容"`)
-		require.NotContains(t, string(result), `"reasoning_content":"这是思考内容"`)
-	})
-
-	t.Run("promotes reasoning_content when content is nil", func(t *testing.T) {
-		body := []byte(`{"choices":[{"index":0,"message":{"role":"assistant","reasoning_content":"思考结果"},"finish_reason":"stop"}]}`)
-		result, err := provider.PromoteThinkingOnEmptyResponse(body)
-		require.NoError(t, err)
-		require.Contains(t, string(result), `"content":"思考结果"`)
-	})
-
-	t.Run("no promotion when content is present", func(t *testing.T) {
-		body := []byte(`{"choices":[{"index":0,"message":{"role":"assistant","content":"正常回复","reasoning_content":"思考过程"},"finish_reason":"stop"}]}`)
-		result, err := provider.PromoteThinkingOnEmptyResponse(body)
-		require.NoError(t, err)
-		require.Equal(t, string(body), string(result))
-	})
-
-	t.Run("no promotion when no reasoning", func(t *testing.T) {
-		body := []byte(`{"choices":[{"index":0,"message":{"role":"assistant","content":"正常回复"},"finish_reason":"stop"}]}`)
-		result, err := provider.PromoteThinkingOnEmptyResponse(body)
-		require.NoError(t, err)
-		require.Equal(t, string(body), string(result))
-	})
-
-	t.Run("no promotion when both empty", func(t *testing.T) {
-		body := []byte(`{"choices":[{"index":0,"message":{"role":"assistant","content":""},"finish_reason":"stop"}]}`)
-		result, err := provider.PromoteThinkingOnEmptyResponse(body)
-		require.NoError(t, err)
-		require.Equal(t, string(body), string(result))
-	})
-
-	t.Run("invalid json returns error", func(t *testing.T) {
-		body := []byte(`not json`)
-		result, err := provider.PromoteThinkingOnEmptyResponse(body)
-		require.Error(t, err)
-		require.Equal(t, string(body), string(result))
-	})
-}
-
-func RunOpenAIPromoteThinkingOnEmptyStreamingTests(t *testing.T) {
-	// Streaming tests use provider functions directly since the test framework
-	// does not expose GetStreamingResponseBody.
-	t.Run("streaming: buffers reasoning and flushes on end when no content", func(t *testing.T) {
-		ctx := NewMockHttpContext()
-		// Chunk with only reasoning_content
-		data := []byte(`{"choices":[{"index":0,"delta":{"reasoning_content":"流式思考"}}]}`)
-		result, err := provider.PromoteStreamingThinkingOnEmptyChunk(ctx, data)
-		require.NoError(t, err)
-		// Reasoning should be stripped (not promoted inline)
-		require.NotContains(t, string(result), `"content":"流式思考"`)
-
-		// Flush should emit buffered reasoning as content
-		flush := provider.PromoteStreamingThinkingFlush(ctx)
-		require.NotNil(t, flush)
-		require.Contains(t, string(flush), `"content":"流式思考"`)
-	})
-
-	t.Run("streaming: no flush when content was seen", func(t *testing.T) {
-		ctx := NewMockHttpContext()
-		// First chunk: content delta
-		data1 := []byte(`{"choices":[{"index":0,"delta":{"content":"正文"}}]}`)
-		_, _ = provider.PromoteStreamingThinkingOnEmptyChunk(ctx, data1)
-
-		// Second chunk: reasoning only
-		data2 := []byte(`{"choices":[{"index":0,"delta":{"reasoning_content":"后续思考"}}]}`)
-		result, err := provider.PromoteStreamingThinkingOnEmptyChunk(ctx, data2)
-		require.NoError(t, err)
-		// Should be unchanged since content was already seen
-		require.Equal(t, string(data2), string(result))
-
-		// Flush should return nil since content was seen
-		flush := provider.PromoteStreamingThinkingFlush(ctx)
-		require.Nil(t, flush)
-	})
-
-	t.Run("streaming: accumulates multiple reasoning chunks", func(t *testing.T) {
-		ctx := NewMockHttpContext()
-		data1 := []byte(`{"choices":[{"index":0,"delta":{"reasoning_content":"第一段"}}]}`)
-		_, _ = provider.PromoteStreamingThinkingOnEmptyChunk(ctx, data1)
-
-		data2 := []byte(`{"choices":[{"index":0,"delta":{"reasoning_content":"第二段"}}]}`)
-		_, _ = provider.PromoteStreamingThinkingOnEmptyChunk(ctx, data2)
-
-		flush := provider.PromoteStreamingThinkingFlush(ctx)
-		require.NotNil(t, flush)
-		require.Contains(t, string(flush), `"content":"第一段第二段"`)
-	})
-
-	t.Run("streaming: no flush when no reasoning buffered", func(t *testing.T) {
-		ctx := NewMockHttpContext()
-		flush := provider.PromoteStreamingThinkingFlush(ctx)
-		require.Nil(t, flush)
-	})
-
-	t.Run("streaming: invalid json returns original", func(t *testing.T) {
-		ctx := NewMockHttpContext()
-		data := []byte(`not json`)
-		result, err := provider.PromoteStreamingThinkingOnEmptyChunk(ctx, data)
-		require.NoError(t, err)
-		require.Equal(t, string(data), string(result))
-	})
-}
--- a/plugins/wasm-go/extensions/ai-proxy/test/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/test/qwen.go
@@ -100,22 +100,6 @@ var qwenEnableCompatibleConfig = func() json.RawMessage {
 	return data
 }()

-// 测试配置：qwen original + 兼容模式（用于覆盖 provider.GetApiName 分支）
-var qwenOriginalCompatibleConfig = func() json.RawMessage {
-	data, _ := json.Marshal(map[string]interface{}{
-		"provider": map[string]interface{}{
-			"type":      "qwen",
-			"apiTokens": []string{"sk-qwen-original-compatible"},
-			"modelMapping": map[string]string{
-				"*": "qwen-turbo",
-			},
-			"qwenEnableCompatible": true,
-			"protocol":             "original",
-		},
-	})
-	return data
-}()
-
 // 测试配置：qwen文件ID配置
 var qwenFileIdsConfig = func() json.RawMessage {
 	data, _ := json.Marshal(map[string]interface{}{
@@ -175,15 +159,6 @@ var qwenConflictConfig = func() json.RawMessage {
 	return data
 }()

-func hasUnsupportedAPINameError(errorLogs []string) bool {
-	for _, log := range errorLogs {
-		if strings.Contains(log, "unsupported API name") {
-			return true
-		}
-	}
-	return false
-}
-
 func RunQwenParseConfigTests(t *testing.T) {
 	test.RunGoTest(t, func(t *testing.T) {
 		// 测试基本qwen配置解析
@@ -428,29 +403,6 @@ func RunQwenOnHttpRequestHeadersTests(t *testing.T) {
 			require.True(t, hasPath)
 			require.Contains(t, pathValue, "/compatible-mode/v1/chat/completions", "Path should use compatible mode path")
 		})
-
-		// 测试qwen兼容模式请求头处理（responses接口）
-		t.Run("qwen compatible mode responses request headers", func(t *testing.T) {
-			host, status := test.NewTestHost(qwenEnableCompatibleConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/responses"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			require.Equal(t, types.HeaderStopIteration, action)
-
-			requestHeaders := host.GetRequestHeaders()
-			require.NotNil(t, requestHeaders)
-
-			pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Contains(t, pathValue, "/api/v2/apps/protocols/compatible-mode/v1/responses", "Path should use compatible mode responses path")
-		})
 	})
 }

@@ -699,112 +651,6 @@ func RunQwenOnHttpRequestBodyTests(t *testing.T) {
 			}
 			require.True(t, hasCompatibleLogs, "Should have compatible mode processing logs")
 		})
-
-		// 测试qwen请求体处理（兼容模式 responses接口）
-		t.Run("qwen compatible mode responses request body", func(t *testing.T) {
-			host, status := test.NewTestHost(qwenEnableCompatibleConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/responses"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"qwen-turbo","input":"test"}`
-			action := host.CallOnHttpRequestBody([]byte(requestBody))
-
-			require.Equal(t, types.ActionContinue, action)
-
-			processedBody := host.GetRequestBody()
-			require.NotNil(t, processedBody)
-			require.Contains(t, string(processedBody), "qwen-turbo", "Model name should be preserved in responses request")
-		})
-
-		// 测试qwen请求体处理（非兼容模式 responses接口应报不支持）
-		t.Run("qwen non-compatible mode responses request body unsupported", func(t *testing.T) {
-			host, status := test.NewTestHost(basicQwenConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			action := host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/responses"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-			require.Equal(t, types.HeaderStopIteration, action)
-
-			requestHeaders := host.GetRequestHeaders()
-			require.NotNil(t, requestHeaders)
-
-			pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
-			require.True(t, hasPath)
-			require.Contains(t, pathValue, "/v1/responses", "Path should remain unchanged when responses is unsupported")
-
-			requestBody := `{"model":"qwen-turbo","input":"test"}`
-			bodyAction := host.CallOnHttpRequestBody([]byte(requestBody))
-			require.Equal(t, types.ActionContinue, bodyAction)
-
-			hasUnsupportedErr := hasUnsupportedAPINameError(host.GetErrorLogs())
-			require.True(t, hasUnsupportedErr, "Should log unsupported API name for non-compatible responses")
-		})
-
-		// 覆盖 qwen.GetApiName 中以下分支：
-		// - qwenCompatibleTextEmbeddingPath => ApiNameEmbeddings
-		// - qwenCompatibleResponsesPath => ApiNameResponses
-		// - qwenAsyncAIGCPath => ApiNameQwenAsyncAIGC
-		// - qwenAsyncTaskPath => ApiNameQwenAsyncTask
-		t.Run("qwen original protocol get api name coverage for compatible embeddings responses and async paths", func(t *testing.T) {
-			cases := []struct {
-				name string
-				path string
-			}{
-				{
-					name: "compatible embeddings path",
-					path: "/compatible-mode/v1/embeddings",
-				},
-				{
-					name: "compatible responses path",
-					path: "/api/v2/apps/protocols/compatible-mode/v1/responses",
-				},
-				{
-					name: "async aigc path",
-					path: "/api/v1/services/aigc/custom-async-endpoint",
-				},
-				{
-					name: "async task path",
-					path: "/api/v1/tasks/task-123",
-				},
-			}
-
-			for _, tc := range cases {
-				t.Run(tc.name, func(t *testing.T) {
-					host, status := test.NewTestHost(qwenOriginalCompatibleConfig)
-					defer host.Reset()
-					require.Equal(t, types.OnPluginStartStatusOK, status)
-
-					action := host.CallOnHttpRequestHeaders([][2]string{
-						{":authority", "example.com"},
-						{":path", tc.path},
-						{":method", "POST"},
-						{"Content-Type", "application/json"},
-					})
-					// 测试框架中 action 可能表现为 Continue 或 HeaderStopIteration，
-					// 这里关注的是后续 body 阶段不出现 unsupported API name。
-					require.True(t, action == types.ActionContinue || action == types.HeaderStopIteration)
-
-					requestBody := `{"model":"qwen-turbo","input":"test"}`
-					bodyAction := host.CallOnHttpRequestBody([]byte(requestBody))
-					require.Equal(t, types.ActionContinue, bodyAction)
-
-					hasUnsupportedErr := hasUnsupportedAPINameError(host.GetErrorLogs())
-					require.False(t, hasUnsupportedErr, "Path should be recognized by qwen.GetApiName in original protocol")
-				})
-			}
-		})
 	})
 }

@@ -1140,51 +986,6 @@ func RunQwenOnHttpResponseBodyTests(t *testing.T) {
 			require.Contains(t, responseStr, "chat.completion", "Response should contain chat completion object")
 			require.Contains(t, responseStr, "qwen-turbo", "Response should contain model name")
 		})
-
-		// 测试qwen响应体处理（兼容模式 responses 接口透传）
-		t.Run("qwen compatible mode responses response body", func(t *testing.T) {
-			host, status := test.NewTestHost(qwenEnableCompatibleConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/responses"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"qwen-turbo","input":"test"}`
-			host.CallOnHttpRequestBody([]byte(requestBody))
-
-			responseHeaders := [][2]string{
-				{":status", "200"},
-				{"Content-Type", "application/json"},
-			}
-			host.CallOnHttpResponseHeaders(responseHeaders)
-
-			responseBody := `{
-				"id": "resp-123",
-				"object": "response",
-				"status": "completed",
-				"output": [{
-					"type": "message",
-					"role": "assistant",
-					"content": [{
-						"type": "output_text",
-						"text": "hello"
-					}]
-				}]
-			}`
-			action := host.CallOnHttpResponseBody([]byte(responseBody))
-			require.Equal(t, types.ActionContinue, action)
-
-			processedResponseBody := host.GetResponseBody()
-			require.NotNil(t, processedResponseBody)
-			responseStr := string(processedResponseBody)
-			require.Contains(t, responseStr, "\"object\": \"response\"", "Responses API payload should be passthrough in compatible mode")
-			require.Contains(t, responseStr, "\"text\": \"hello\"", "Assistant content should be preserved")
-		})
 	})
 }

--- a/plugins/wasm-go/extensions/ai-proxy/test/vertex.go
+++ b/plugins/wasm-go/extensions/ai-proxy/test/vertex.go
@@ -1,9 +1,7 @@
 package test

 import (
-	"bytes"
 	"encoding/json"
-	"mime/multipart"
 	"strings"
 	"testing"

@@ -691,7 +689,7 @@ func RunVertexOpenAICompatibleModeOnHttpRequestBodyTests(t *testing.T) {

 func RunVertexExpressModeOnStreamingResponseBodyTests(t *testing.T) {
 	test.RunTest(t, func(t *testing.T) {
-		// 测试 Vertex Express Mode 流式响应处理：最后一个 chunk 不应丢失
+		// 测试 Vertex Express Mode 流式响应处理
 		t.Run("vertex express mode streaming response body", func(t *testing.T) {
 			host, status := test.NewTestHost(vertexExpressModeConfig)
 			defer host.Reset()
@@ -709,9 +707,6 @@ func RunVertexExpressModeOnStreamingResponseBodyTests(t *testing.T) {
 			requestBody := `{"model":"gemini-2.5-flash","messages":[{"role":"user","content":"test"}],"stream":true}`
 			host.CallOnHttpRequestBody([]byte(requestBody))

-			// 设置响应属性，确保IsResponseFromUpstream()返回true
-			host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))
-
 			// 设置流式响应头
 			responseHeaders := [][2]string{
 				{":status", "200"},
@@ -720,8 +715,8 @@ func RunVertexExpressModeOnStreamingResponseBodyTests(t *testing.T) {
 			host.CallOnHttpResponseHeaders(responseHeaders)

 			// 模拟流式响应体
-			chunk1 := "data: {\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Hello\"}],\"role\":\"model\"},\"finishReason\":\"\",\"index\":0}],\"usageMetadata\":{\"promptTokenCount\":9,\"candidatesTokenCount\":5,\"totalTokenCount\":14}}\n\n"
-			chunk2 := "data: {\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Hello! How can I help you today?\"}],\"role\":\"model\"},\"finishReason\":\"STOP\",\"index\":0}],\"usageMetadata\":{\"promptTokenCount\":9,\"candidatesTokenCount\":12,\"totalTokenCount\":21}}\n\n"
+			chunk1 := `data: {"candidates":[{"content":{"parts":[{"text":"Hello"}],"role":"model"},"finishReason":"","index":0}],"usageMetadata":{"promptTokenCount":9,"candidatesTokenCount":5,"totalTokenCount":14}}`
+			chunk2 := `data: {"candidates":[{"content":{"parts":[{"text":"Hello! How can I help you today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":9,"candidatesTokenCount":12,"totalTokenCount":21}}`

 			// 处理流式响应体
 			action1 := host.CallOnHttpStreamingResponseBody([]byte(chunk1), false)
@@ -730,194 +725,16 @@ func RunVertexExpressModeOnStreamingResponseBodyTests(t *testing.T) {
 			action2 := host.CallOnHttpStreamingResponseBody([]byte(chunk2), true)
 			require.Equal(t, types.ActionContinue, action2)

-			// 验证最后一个 chunk 的内容不会被 [DONE] 覆盖
-			transformedResponseBody := host.GetResponseBody()
-			require.NotNil(t, transformedResponseBody)
-			responseStr := string(transformedResponseBody)
-			require.Contains(t, responseStr, "Hello! How can I help you today?", "last chunk content should be preserved")
-			require.Contains(t, responseStr, "data: [DONE]", "stream should end with [DONE]")
-		})
-
-		// 测试 Vertex Express Mode 流式响应处理：单个 SSE 事件被拆包时可正确重组
-		t.Run("vertex express mode streaming response body with split sse event", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/chat/completions"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.5-flash","messages":[{"role":"user","content":"test"}],"stream":true}`
-			host.CallOnHttpRequestBody([]byte(requestBody))
-
-			// 设置响应属性，确保IsResponseFromUpstream()返回true
-			host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))
-
-			responseHeaders := [][2]string{
-				{":status", "200"},
-				{"Content-Type", "text/event-stream"},
-			}
-			host.CallOnHttpResponseHeaders(responseHeaders)
-
-			fullEvent := "data: {\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"split chunk\"}],\"role\":\"model\"},\"finishReason\":\"STOP\",\"index\":0}],\"usageMetadata\":{\"promptTokenCount\":1,\"candidatesTokenCount\":2,\"totalTokenCount\":3}}\n\n"
-			splitIdx := strings.Index(fullEvent, "chunk")
-			require.Greater(t, splitIdx, 0, "split marker should exist in test payload")
-			chunkPart1 := fullEvent[:splitIdx]
-			chunkPart2 := fullEvent[splitIdx:]
-
-			action1 := host.CallOnHttpStreamingResponseBody([]byte(chunkPart1), false)
-			require.Equal(t, types.ActionContinue, action1)
-			action2 := host.CallOnHttpStreamingResponseBody([]byte(chunkPart2), true)
-			require.Equal(t, types.ActionContinue, action2)
-
-			transformedResponseBody := host.GetResponseBody()
-			require.NotNil(t, transformedResponseBody)
-			responseStr := string(transformedResponseBody)
-			require.Contains(t, responseStr, "split chunk", "split SSE event should be reassembled and parsed")
-			require.Contains(t, responseStr, "data: [DONE]", "stream should end with [DONE]")
-		})
-
-		// 测试：thoughtSignature 很大时，单个 SSE 事件被拆成多段也能重组并成功解析
-		t.Run("vertex express mode streaming response body with huge thought signature split across chunks", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/chat/completions"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.5-flash","messages":[{"role":"user","content":"test"}],"stream":true}`
-			host.CallOnHttpRequestBody([]byte(requestBody))
-			host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))
-			host.CallOnHttpResponseHeaders([][2]string{
-				{":status", "200"},
-				{"Content-Type", "text/event-stream"},
-			})
-
-			hugeThoughtSignature := strings.Repeat("CmMBjz1rX4j+TQjtDy2rZxSdYOE1jUqDbRhWetraLlQNrkyaRNQZ/", 180)
-			fullEvent := "data: {\"candidates\":[{\"content\":{\"role\":\"model\",\"parts\":[{\"text\":\"thought-signature-merge-ok\",\"thoughtSignature\":\"" +
-				hugeThoughtSignature +
-				"\"}]},\"finishReason\":\"STOP\",\"index\":0}],\"usageMetadata\":{\"promptTokenCount\":28,\"candidatesTokenCount\":3589,\"totalTokenCount\":5240,\"thoughtsTokenCount\":1623}}\n\n"
-
-			signatureStart := strings.Index(fullEvent, "\"thoughtSignature\":\"")
-			require.Greater(t, signatureStart, 0, "thoughtSignature field should exist in test payload")
-			splitAt1 := signatureStart + len("\"thoughtSignature\":\"") + 700
-			splitAt2 := splitAt1 + 1600
-			require.Less(t, splitAt2, len(fullEvent)-1, "split indexes should keep payload in three chunks")
-
-			chunkPart1 := fullEvent[:splitAt1]
-			chunkPart2 := fullEvent[splitAt1:splitAt2]
-			chunkPart3 := fullEvent[splitAt2:]
-
-			action1 := host.CallOnHttpStreamingResponseBody([]byte(chunkPart1), false)
-			require.Equal(t, types.ActionContinue, action1)
-			firstBody := host.GetResponseBody()
-			require.Equal(t, 0, len(firstBody), "partial chunk should not be forwarded to client")
-
-			action2 := host.CallOnHttpStreamingResponseBody([]byte(chunkPart2), false)
-			require.Equal(t, types.ActionContinue, action2)
-			secondBody := host.GetResponseBody()
-			require.Equal(t, 0, len(secondBody), "partial chunk should not be forwarded to client")
-
-			action3 := host.CallOnHttpStreamingResponseBody([]byte(chunkPart3), true)
-			require.Equal(t, types.ActionContinue, action3)
-
-			transformedResponseBody := host.GetResponseBody()
-			require.NotNil(t, transformedResponseBody)
-			responseStr := string(transformedResponseBody)
-			require.Contains(t, responseStr, "thought-signature-merge-ok", "split huge thoughtSignature event should be reassembled and parsed")
-			require.Contains(t, responseStr, "data: [DONE]", "stream should end with [DONE]")
-
-			errorLogs := host.GetErrorLogs()
-			hasUnmarshalError := false
-			for _, log := range errorLogs {
-				if strings.Contains(log, "unable to unmarshal vertex response") {
-					hasUnmarshalError = true
+			// 验证流式响应处理
+			debugLogs := host.GetDebugLogs()
+			hasStreamingLogs := false
+			for _, log := range debugLogs {
+				if strings.Contains(log, "streaming") || strings.Contains(log, "chunk") || strings.Contains(log, "vertex") {
+					hasStreamingLogs = true
 					break
 				}
 			}
-			require.False(t, hasUnmarshalError, "should not have vertex unmarshal errors for split huge thoughtSignature event")
-		})
-
-		// 测试：上游已发送 [DONE]，框架再触发空的最后回调时不应重复输出 [DONE]
-		t.Run("vertex express mode streaming response body with upstream done and empty final callback", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/chat/completions"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.5-flash","messages":[{"role":"user","content":"test"}],"stream":true}`
-			host.CallOnHttpRequestBody([]byte(requestBody))
-			host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))
-			host.CallOnHttpResponseHeaders([][2]string{
-				{":status", "200"},
-				{"Content-Type", "text/event-stream"},
-			})
-
-			doneChunk := "data: [DONE]\n\n"
-			action1 := host.CallOnHttpStreamingResponseBody([]byte(doneChunk), false)
-			require.Equal(t, types.ActionContinue, action1)
-			firstBody := host.GetResponseBody()
-			require.NotNil(t, firstBody)
-			require.Contains(t, string(firstBody), "data: [DONE]", "first callback should output [DONE]")
-
-			action2 := host.CallOnHttpStreamingResponseBody([]byte{}, true)
-			require.Equal(t, types.ActionContinue, action2)
-
-			debugLogs := host.GetDebugLogs()
-			doneChunkLogCount := 0
-			for _, log := range debugLogs {
-				if strings.Contains(log, "=== modified response chunk: data: [DONE]") {
-					doneChunkLogCount++
-				}
-			}
-			require.Equal(t, 1, doneChunkLogCount, "[DONE] should only be emitted once when upstream already sent it")
-		})
-
-		// 测试：最后一个 chunk 缺少 SSE 结束空行时，isLastChunk=true 也应正确解析并输出
-		t.Run("vertex express mode streaming response body last chunk without terminator", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/chat/completions"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.5-flash","messages":[{"role":"user","content":"test"}],"stream":true}`
-			host.CallOnHttpRequestBody([]byte(requestBody))
-			host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))
-			host.CallOnHttpResponseHeaders([][2]string{
-				{":status", "200"},
-				{"Content-Type", "text/event-stream"},
-			})
-
-			lastChunkWithoutTerminator := "data: {\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"no terminator\"}],\"role\":\"model\"},\"finishReason\":\"STOP\",\"index\":0}],\"usageMetadata\":{\"promptTokenCount\":2,\"candidatesTokenCount\":3,\"totalTokenCount\":5}}"
-			action := host.CallOnHttpStreamingResponseBody([]byte(lastChunkWithoutTerminator), true)
-			require.Equal(t, types.ActionContinue, action)
-
-			transformedResponseBody := host.GetResponseBody()
-			require.NotNil(t, transformedResponseBody)
-			responseStr := string(transformedResponseBody)
-			require.Contains(t, responseStr, "no terminator", "last chunk without terminator should still be parsed")
-			require.Contains(t, responseStr, "data: [DONE]", "stream should end with [DONE]")
+			require.True(t, hasStreamingLogs, "Should have streaming response processing logs")
 		})
 	})
 }
@@ -1456,324 +1273,6 @@ func RunVertexExpressModeImageGenerationResponseBodyTests(t *testing.T) {
 	})
 }

-func buildMultipartRequestBody(t *testing.T, fields map[string]string, files map[string][]byte) ([]byte, string) {
-	var buffer bytes.Buffer
-	writer := multipart.NewWriter(&buffer)
-
-	for key, value := range fields {
-		require.NoError(t, writer.WriteField(key, value))
-	}
-
-	for fieldName, data := range files {
-		part, err := writer.CreateFormFile(fieldName, "upload-image.png")
-		require.NoError(t, err)
-		_, err = part.Write(data)
-		require.NoError(t, err)
-	}
-
-	require.NoError(t, writer.Close())
-	return buffer.Bytes(), writer.FormDataContentType()
-}
-
-func RunVertexExpressModeImageEditVariationRequestBodyTests(t *testing.T) {
-	test.RunTest(t, func(t *testing.T) {
-		const testDataURL = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
-
-		t.Run("vertex express mode image edit request body with image_url", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/images/edits"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.0-flash-exp","prompt":"Add sunglasses to the cat","image":{"image_url":{"url":"` + testDataURL + `"}},"size":"1024x1024"}`
-			action := host.CallOnHttpRequestBody([]byte(requestBody))
-			require.Equal(t, types.ActionContinue, action)
-
-			processedBody := host.GetRequestBody()
-			require.NotNil(t, processedBody)
-
-			bodyStr := string(processedBody)
-			require.Contains(t, bodyStr, "inlineData", "Request should contain inlineData converted from image_url")
-			require.Contains(t, bodyStr, "Add sunglasses to the cat", "Prompt text should be preserved")
-			require.NotContains(t, bodyStr, "image_url", "OpenAI image_url field should be converted to Vertex format")
-
-			requestHeaders := host.GetRequestHeaders()
-			pathHeader := ""
-			for _, header := range requestHeaders {
-				if header[0] == ":path" {
-					pathHeader = header[1]
-					break
-				}
-			}
-			require.Contains(t, pathHeader, "generateContent", "Image edit should use generateContent action")
-			require.Contains(t, pathHeader, "key=test-api-key-123456789", "Path should contain API key")
-		})
-
-		t.Run("vertex express mode image edit request body with image string", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/images/edits"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.0-flash-exp","prompt":"Add sunglasses to the cat","image":"` + testDataURL + `","size":"1024x1024"}`
-			action := host.CallOnHttpRequestBody([]byte(requestBody))
-			require.Equal(t, types.ActionContinue, action)
-
-			processedBody := host.GetRequestBody()
-			require.NotNil(t, processedBody)
-
-			bodyStr := string(processedBody)
-			require.Contains(t, bodyStr, "inlineData", "Request should contain inlineData converted from image string")
-			require.Contains(t, bodyStr, "Add sunglasses to the cat", "Prompt text should be preserved")
-		})
-
-		t.Run("vertex express mode image edit multipart request body", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			body, contentType := buildMultipartRequestBody(t, map[string]string{
-				"model":  "gemini-2.0-flash-exp",
-				"prompt": "Add sunglasses to the cat",
-				"size":   "1024x1024",
-			}, map[string][]byte{
-				"image": []byte("fake-image-content"),
-			})
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/images/edits"},
-				{":method", "POST"},
-				{"Content-Type", contentType},
-			})
-
-			action := host.CallOnHttpRequestBody(body)
-			require.Equal(t, types.ActionContinue, action)
-
-			processedBody := host.GetRequestBody()
-			require.NotNil(t, processedBody)
-			bodyStr := string(processedBody)
-			require.Contains(t, bodyStr, "inlineData", "Multipart image should be converted to inlineData")
-			require.Contains(t, bodyStr, "Add sunglasses to the cat", "Prompt text should be preserved")
-
-			requestHeaders := host.GetRequestHeaders()
-			require.True(t, test.HasHeaderWithValue(requestHeaders, "Content-Type", "application/json"), "Content-Type should be rewritten to application/json")
-		})
-
-		t.Run("vertex express mode image variation multipart request body", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			body, contentType := buildMultipartRequestBody(t, map[string]string{
-				"model": "gemini-2.0-flash-exp",
-				"size":  "1024x1024",
-			}, map[string][]byte{
-				"image": []byte("fake-image-content"),
-			})
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/images/variations"},
-				{":method", "POST"},
-				{"Content-Type", contentType},
-			})
-
-			action := host.CallOnHttpRequestBody(body)
-			require.Equal(t, types.ActionContinue, action)
-
-			processedBody := host.GetRequestBody()
-			require.NotNil(t, processedBody)
-			bodyStr := string(processedBody)
-			require.Contains(t, bodyStr, "inlineData", "Multipart image should be converted to inlineData")
-			require.Contains(t, bodyStr, "Create variations of the provided image.", "Variation request should inject a default prompt")
-
-			requestHeaders := host.GetRequestHeaders()
-			require.True(t, test.HasHeaderWithValue(requestHeaders, "Content-Type", "application/json"), "Content-Type should be rewritten to application/json")
-		})
-
-		t.Run("vertex express mode image edit with model mapping", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeWithModelMappingConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/images/edits"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gpt-4","prompt":"Turn it into watercolor","image_url":{"url":"` + testDataURL + `"}}`
-			action := host.CallOnHttpRequestBody([]byte(requestBody))
-			require.Equal(t, types.ActionContinue, action)
-
-			requestHeaders := host.GetRequestHeaders()
-			pathHeader := ""
-			for _, header := range requestHeaders {
-				if header[0] == ":path" {
-					pathHeader = header[1]
-					break
-				}
-			}
-			require.Contains(t, pathHeader, "gemini-2.5-flash", "Path should contain mapped model name")
-		})
-
-		t.Run("vertex express mode image variation request body with image_url", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/images/variations"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.0-flash-exp","image_url":{"url":"` + testDataURL + `"}}`
-			action := host.CallOnHttpRequestBody([]byte(requestBody))
-			require.Equal(t, types.ActionContinue, action)
-
-			processedBody := host.GetRequestBody()
-			require.NotNil(t, processedBody)
-
-			bodyStr := string(processedBody)
-			require.Contains(t, bodyStr, "inlineData", "Request should contain inlineData converted from image_url")
-			require.Contains(t, bodyStr, "Create variations of the provided image.", "Variation request should inject a default prompt")
-
-			requestHeaders := host.GetRequestHeaders()
-			pathHeader := ""
-			for _, header := range requestHeaders {
-				if header[0] == ":path" {
-					pathHeader = header[1]
-					break
-				}
-			}
-			require.Contains(t, pathHeader, "generateContent", "Image variation should use generateContent action")
-			require.Contains(t, pathHeader, "key=test-api-key-123456789", "Path should contain API key")
-		})
-	})
-}
-
-func RunVertexExpressModeImageEditVariationResponseBodyTests(t *testing.T) {
-	test.RunTest(t, func(t *testing.T) {
-		const testDataURL = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
-
-		t.Run("vertex express mode image edit response body", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/images/edits"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.0-flash-exp","prompt":"Add glasses","image_url":{"url":"` + testDataURL + `"}}`
-			host.CallOnHttpRequestBody([]byte(requestBody))
-
-			host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))
-			host.CallOnHttpResponseHeaders([][2]string{
-				{":status", "200"},
-				{"Content-Type", "application/json"},
-			})
-
-			responseBody := `{
-				"candidates": [{
-					"content": {
-						"role": "model",
-						"parts": [{
-							"inlineData": {
-								"mimeType": "image/png",
-								"data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
-							}
-						}]
-					}
-				}],
-				"usageMetadata": {
-					"promptTokenCount": 12,
-					"candidatesTokenCount": 1024,
-					"totalTokenCount": 1036
-				}
-			}`
-			action := host.CallOnHttpResponseBody([]byte(responseBody))
-			require.Equal(t, types.ActionContinue, action)
-
-			processedResponseBody := host.GetResponseBody()
-			require.NotNil(t, processedResponseBody)
-
-			responseStr := string(processedResponseBody)
-			require.Contains(t, responseStr, "b64_json", "Response should contain b64_json field")
-			require.Contains(t, responseStr, "usage", "Response should contain usage field")
-		})
-
-		t.Run("vertex express mode image variation response body", func(t *testing.T) {
-			host, status := test.NewTestHost(vertexExpressModeConfig)
-			defer host.Reset()
-			require.Equal(t, types.OnPluginStartStatusOK, status)
-
-			host.CallOnHttpRequestHeaders([][2]string{
-				{":authority", "example.com"},
-				{":path", "/v1/images/variations"},
-				{":method", "POST"},
-				{"Content-Type", "application/json"},
-			})
-
-			requestBody := `{"model":"gemini-2.0-flash-exp","image_url":{"url":"` + testDataURL + `"}}`
-			host.CallOnHttpRequestBody([]byte(requestBody))
-
-			host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))
-			host.CallOnHttpResponseHeaders([][2]string{
-				{":status", "200"},
-				{"Content-Type", "application/json"},
-			})
-
-			responseBody := `{
-				"candidates": [{
-					"content": {
-						"role": "model",
-						"parts": [{
-							"inlineData": {
-								"mimeType": "image/png",
-								"data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
-							}
-						}]
-					}
-				}],
-				"usageMetadata": {
-					"promptTokenCount": 8,
-					"candidatesTokenCount": 768,
-					"totalTokenCount": 776
-				}
-			}`
-			action := host.CallOnHttpResponseBody([]byte(responseBody))
-			require.Equal(t, types.ActionContinue, action)
-
-			processedResponseBody := host.GetResponseBody()
-			require.NotNil(t, processedResponseBody)
-
-			responseStr := string(processedResponseBody)
-			require.Contains(t, responseStr, "b64_json", "Response should contain b64_json field")
-			require.Contains(t, responseStr, "usage", "Response should contain usage field")
-		})
-	})
-}
-
 // ==================== Vertex Raw 模式测试 ====================

 func RunVertexRawModeOnHttpRequestHeadersTests(t *testing.T) {
--- a/release-notes/2.1.11/README.md
+++ b/release-notes/2.1.11/README.md
@@ -7,55 +7,55 @@ This release includes **6** updates, covering feature enhancements, bug fixes, a

 ### Distribution of Updates

- **New Features**: 4
- **Bug Fixes**: 2
+- **New Features**: 4  
+- **Bug Fixes**: 2  

 ---

-## 📝 Full Changelog
+## 📝 Full Change Log

-### 🚀 New Features (Features)
+### 🚀 New Features

 - **Related PR**: [#666](https://github.com/higress-group/higress-console/pull/666) \
  **Contributor**: @johnlanni \
-  **Change Log**: This PR adds support for `pluginImageRegistry` and `pluginImageNamespace` configuration to the built-in plugins and allows these configurations to be specified via environment variables, enabling users to customize plugin image locations without modifying the `plugins.properties` file. \
-  **Feature Value**: The new feature allows users to manage their application's plugin image sources more flexibly, enhancing system configurability and convenience, especially useful for users requiring specific image repositories or namespaces.
+  **Change Log**: Added configuration options for the plugin image registry and namespace. Supports dynamically specifying built-in WASM plugin image addresses via environment variables `HIGRESS_ADMIN_WASM_PLUGIN_IMAGE_REGISTRY`/`NAMESPACE`, eliminating the need to modify `plugins.properties`. Corresponding Helm Chart `values` parameters and deployment template rendering logic have also been integrated. \
+  **Feature Value**: Enables users to flexibly configure WASM plugin image sources across diverse network environments (e.g., private cloud, air-gapped environments), improving deployment flexibility and security; reduces operational overhead and mitigates maintenance difficulties and upgrade risks associated with hard-coded configurations.

 - **Related PR**: [#665](https://github.com/higress-group/higress-console/pull/665) \
  **Contributor**: @johnlanni \
-  **Change Log**: This PR introduces advanced configuration options for Zhipu AI and Claude, including custom domains, code plan mode switching, and API version settings. \
-  **Feature Value**: It enhances the flexibility and functionality of AI services, allowing users to control AI service behavior more precisely, particularly beneficial for scenarios needing optimized code generation capabilities.
+  **Change Log**: Added support for Zhipu AI’s Code Plan mode and Claude’s API version configuration. Achieved by extending `ZhipuAILlmProviderHandler` and `ClaudeLlmProviderHandler` to support custom domains, code-generation optimization toggles, and API version parameters—enhancing LLM invocation flexibility and scenario adaptability. \
+  **Feature Value**: Allows users to enable model-specific code generation modes (e.g., Zhipu Code Plan) based on AI vendor characteristics and precisely control Claude API versions, significantly improving code generation quality and compatibility, lowering integration barriers, and strengthening the practicality of the AI Gateway in multi-model collaborative development scenarios.

 - **Related PR**: [#661](https://github.com/higress-group/higress-console/pull/661) \
  **Contributor**: @johnlanni \
-  **Change Log**: This PR enables a lightweight mode for ai-statistics plugin configuration, adds the USE_DEFAULT_RESPONSE_ATTRIBUTES constant, and applies this setting in AiRouteServiceImpl. \
-  **Feature Value**: By enabling the lightweight mode, this feature optimizes AI routing performance, especially suitable for production environments, reducing response attribute buffering and improving system efficiency.
+  **Change Log**: Introduced a lightweight mode configuration for the AI statistics plugin. Added the `USE_DEFAULT_ATTRIBUTES` constant and enabled `use_default_response_attributes: true` in `AiRouteServiceImpl`, reducing response attribute collection overhead and preventing memory buffer issues. \
+  **Feature Value**: Improves production environment stability and performance while lowering resource consumption of AI route statistics; eliminates the need for manual configuration of complex attributes—the system automatically adopts a default, streamlined attribute set—simplifying operations and enhancing reliability under high-concurrency workloads.

 - **Related PR**: [#657](https://github.com/higress-group/higress-console/pull/657) \
  **Contributor**: @liangziccc \
-  **Change Log**: This PR removes the existing input box search function and adds multiple selection dropdown filters for route names, domain names, and other attributes, while also implementing language adaptation for Chinese and English. \
-  **Feature Value**: By adding multi-condition filtering, users can more accurately locate and manage specific route information, enhancing system usability and flexibility, which helps improve work efficiency.
+  **Change Log**: Removed the original text-input search from the Route Management page and introduced multi-select dropdown filters for five fields: Route Name, Domain, Route Conditions, Destination Service, and Request Authorization. Completed Chinese–English internationalization support and implemented multi-dimensional composite filtering (OR within each field, AND across fields), significantly improving data filtering precision. \
+  **Feature Value**: Enables users to quickly locate specific routes via intuitive dropdown selection, avoiding input errors; bilingual support accommodates international usage scenarios; multi-condition combined filtering substantially boosts query efficiency and operational experience for SREs managing large-scale route configurations.

-### 🐛 Bug Fixes (Bug Fixes)
+### 🐛 Bug Fixes

 - **Related PR**: [#662](https://github.com/higress-group/higress-console/pull/662) \
  **Contributor**: @johnlanni \
-  **Change Log**: This PR corrects the mcp-server OCI image path from `mcp-server/all-in-one` to `plugins/mcp-server`, aligning with the new plugin structure. \
-  **Feature Value**: Updating the image path ensures consistency with the new plugin directory, ensuring proper service operation and avoiding deployment or runtime issues due to incorrect paths.
+  **Change Log**: Fixed an issue where the OCI image path for the `mcp-server` plugin was not migrated synchronously—updated the original path `mcp-server/all-in-one` to `plugins/mcp-server` to align with the new plugin directory structure, ensuring correct plugin loading and deployment. \
+  **Feature Value**: Prevents plugin pull or startup failures caused by incorrect image paths, guaranteeing stable operation and seamless upgrades of the `mcp-server` plugin within the Higress Gateway, thereby enhancing deployment reliability in plugin-driven use cases.

 - **Related PR**: [#654](https://github.com/higress-group/higress-console/pull/654) \
  **Contributor**: @fgksking \
-  **Change Log**: This PR resolves the issue of empty request bodies displayed in Swagger UI by upgrading the springdoc's swagger-ui dependency, ensuring the accuracy of API documentation. \
-  **Feature Value**: Fixing the empty request body value issue in Swagger UI improves user experience and developer trust in API documentation, ensuring consistency between interface testing and actual usage.
+  **Change Log**: Upgraded the `swagger-ui` version dependency of `springdoc` by introducing a newer version of the `webjars-lo` dependency in `pom.xml` and updating related version properties, resolving an issue where request body schemas appeared empty in Swagger UI. \
+  **Feature Value**: Ensures users can correctly view and interact with request body structures when using the API documentation functionality in the Higress Console, improving API debugging experience and development efficiency—and preventing interface misinterpretations caused by documentation display anomalies.

 ---

 ## 📊 Release Statistics

- 🚀 New Features: 4
- 🐛 Bug Fixes: 2
+- 🚀 New Features: 4  
+- 🐛 Bug Fixes: 2  

-**Total**: 6 changes
+**Total**: 6 changes  

 Thank you to all contributors for your hard work! 🎉

--- a/release-notes/2.1.11/README_ZH.md
+++ b/release-notes/2.1.11/README_ZH.md
@@ -18,35 +18,35 @@

 - **Related PR**: [#666](https://github.com/higress-group/higress-console/pull/666) \
  **Contributor**: @johnlanni \
-  **Change Log**: 此PR向内置插件添加了`pluginImageRegistry`和`pluginImageNamespace`配置支持，并通过环境变量来指定这些配置，使用户可以在不修改`plugins.properties`文件的情况下自定义插件镜像的位置。 \
-  **Feature Value**: 新增的功能允许用户更灵活地管理其应用的插件镜像源，提高了系统的可配置性和便利性，特别对于需要使用特定镜像仓库或命名空间的用户来说十分有用。
+  **Change Log**: 新增插件镜像仓库和命名空间配置项，支持通过环境变量HIGRESS_ADMIN_WASM_PLUGIN_IMAGE_REGISTRY/NAMESPACE动态指定内置插件镜像地址，无需修改plugins.properties；同时在Helm Chart中集成对应values参数与部署模板渲染逻辑。 \
+  **Feature Value**: 用户可在不同网络环境（如私有云、离线环境）灵活配置WASM插件镜像源，提升部署灵活性与安全性；降低运维成本，避免硬编码配置带来的维护困难和升级风险。

 - **Related PR**: [#665](https://github.com/higress-group/higress-console/pull/665) \
  **Contributor**: @johnlanni \
-  **Change Log**: 本PR为Zhipu AI及Claude引入了高级配置选项支持，包括自定义域、代码计划模式切换以及API版本设置。 \
-  **Feature Value**: 增强了AI服务的灵活性和功能性，用户现在可以更精细地控制AI服务的行为，特别是对于需要优化代码生成能力的场景特别有用。
+  **Change Log**: 新增Zhipu AI的Code Plan模式支持和Claude的API版本配置能力，通过扩展ZhipuAILlmProviderHandler和ClaudeLlmProviderHandler实现自定义域名、代码生成优化开关及API版本参数，提升大模型调用灵活性与场景适配性。 \
+  **Feature Value**: 用户可基于不同AI厂商特性启用代码专项生成模式（如Zhipu Code Plan）并精确控制Claude API版本，显著提升代码生成质量与兼容性，降低集成门槛，增强AI网关在多模型协同开发场景中的实用性。

 - **Related PR**: [#661](https://github.com/higress-group/higress-console/pull/661) \
  **Contributor**: @johnlanni \
-  **Change Log**: 此PR为ai-statistics插件配置启用了轻量模式，添加了USE_DEFAULT_RESPONSE_ATTRIBUTES常量，并在AiRouteServiceImpl中应用了这一设置。 \
-  **Feature Value**: 通过启用轻量模式，此功能优化了AI路由的性能，尤其适合生产环境，减少了响应属性缓冲，提升了系统效率。
+  **Change Log**: 为AI统计插件引入轻量模式配置，新增USE_DEFAULT_ATTRIBUTES常量，并在AiRouteServiceImpl中启用use_default_response_attributes: true，减少响应属性采集开销，避免内存缓冲问题。 \
+  **Feature Value**: 提升生产环境稳定性与性能，降低AI路由统计的资源消耗；用户无需手动配置复杂属性，系统自动采用默认精简属性集，简化运维并增强高并发场景下的可靠性。

 - **Related PR**: [#657](https://github.com/higress-group/higress-console/pull/657) \
  **Contributor**: @liangziccc \
-  **Change Log**: 此PR移除了原有的输入框搜索功能，新增了针对路由名称、域名等多个属性的多选下拉筛选框，并实现了中英文语言适配。 \
-  **Feature Value**: 通过增加多条件筛选功能，用户能够更精确地定位和管理特定路由信息，提升了系统的易用性和灵活性，有助于提高工作效率。
+  **Change Log**: PR在路由管理页移除了原有输入框搜索，新增路由名称、域名、路由条件、目标服务、请求授权五个字段的多选下拉筛选，并完成中英文国际化适配，支持多维度组合筛选（各字段内OR、字段间AND），提升数据过滤精准度。 \
+  **Feature Value**: 用户可通过直观下拉选择快速定位特定路由，避免手动输入错误；中英文切换支持国际化使用场景；多条件联合筛选显著提升运维人员在海量路由配置中的查询效率和操作体验。

 ### 🐛 Bug修复 (Bug Fixes)

 - **Related PR**: [#662](https://github.com/higress-group/higress-console/pull/662) \
  **Contributor**: @johnlanni \
-  **Change Log**: 该PR修正了mcp-server OCI镜像路径从`mcp-server/all-in-one`到`plugins/mcp-server`的变更，以匹配新的插件结构。 \
-  **Feature Value**: 通过更新镜像路径确保与新插件目录一致，从而保证服务正常运行，避免因路径错误导致的部署或运行问题。
+  **Change Log**: 修复了mcp-server插件OCI镜像路径未同步迁移的问题，将原路径mcp-server/all-in-one更新为plugins/mcp-server，适配新插件目录结构，确保插件加载和部署正常。 \
+  **Feature Value**: 避免因镜像路径错误导致插件无法拉取或启动失败，保障Higress网关中mcp-server插件的稳定运行与无缝升级，提升用户在插件化场景下的部署可靠性。

 - **Related PR**: [#654](https://github.com/higress-group/higress-console/pull/654) \
  **Contributor**: @fgksking \
-  **Change Log**: 此PR通过升级springdoc内置的swagger-ui依赖解决了请求体显示为空的问题，保证了API文档的准确性。 \
-  **Feature Value**: 修复了Swagger UI中空请求体值的问题，提升了用户体验和开发者对API文档的信任度，确保接口测试与实际使用的一致性。
+  **Change Log**: 升级了springdoc依赖的swagger-ui版本，通过在pom.xml中引入更高版本的webjars-lo依赖和更新相关版本属性，修复了Swagger UI中请求体schema显示为空的问题。 \
+  **Feature Value**: 用户在使用Higress控制台的API文档功能时，能正确查看和交互请求体结构，提升API调试体验与开发效率，避免因文档展示异常导致的接口调用误解。

 ---