release 2.0.6-rc.3 (#1680 )

Fix istio lds cache (#1679 )
Update build-and-test-plugin.yaml
2026-03-09 11:10:49 +08:00 · 2025-01-15 20:47:20 +08:00 · 2025-01-15 20:44:13 +08:00 · 2025-01-15 20:19:58 +08:00 · 2025-01-15 19:15:11 +08:00 · 2025-01-15 15:29:44 +08:00
138 changed files with 4375 additions and 1716 deletions
--- a/.github/workflows/build-and-test-plugin.yaml
+++ b/.github/workflows/build-and-test-plugin.yaml
@@ -6,11 +6,15 @@ on:
    paths:
      - 'plugins/**'
      - 'test/**'
+      - 'helm/**'
+      - 'Makefile.core.mk'
  pull_request:
    branches: [ "*" ]
    paths:
      - 'plugins/**'
      - 'test/**'
+      - 'helm/**'
+      - 'Makefile.core.mk'
  workflow_dispatch: ~      

 jobs:
--- a/.github/workflows/release-hgctl.yaml
+++ b/.github/workflows/release-hgctl.yaml
@@ -58,7 +58,7 @@ jobs:
          hgctl_${{ env.HGCTL_VERSION }}_darwin_arm64.tar.gz

  release-hgctl-macos-amd64:
-    runs-on: macos-12
+    runs-on: macos-14
    env:
      HGCTL_VERSION: ${{github.ref_name}}
    steps:
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -12,6 +12,7 @@ header:
    - 'LICENSE'
    - 'api/**'
    - 'samples/**'
+    - 'docs/**'
    - '.github/**'
    - '.licenserc.yaml'
    - 'helm/**'
--- a/3
+++ b/3
@@ -2,7 +2,8 @@
 /envoy @gengleilei @johnlanni
 /istio @SpecialYang @johnlanni
 /pkg @SpecialYang @johnlanni @CH3CHO
-/plugins @johnlanni @WeixinX @CH3CHO
+/plugins @johnlanni @CH3CHO @rinfx
+/plugins/wasm-go/extensions/ai-proxy @cr7258 @CH3CHO @rinfx
 /plugins/wasm-rust @007gzs @jizhuozhi
 /registry @NameHaibinZhang @2456868764 @johnlanni
 /test @Xunzhuo @2456868764 @CH3CHO
--- a/Makefile.core.mk
+++ b/Makefile.core.mk
@@ -144,7 +144,7 @@ docker-buildx-push: clean-env docker.higress-buildx
 export PARENT_GIT_TAG:=$(shell cat VERSION)
 export PARENT_GIT_REVISION:=$(TAG)

-export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.0.0/envoy-symbol-ARCH.tar.gz
+export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.0/envoy-symbol-ARCH.tar.gz

 build-envoy: prebuild
 	./tools/hack/build-envoy.sh
@@ -187,8 +187,8 @@ install: pre-install
 	cd helm/higress; helm dependency build
 	helm install higress helm/higress -n higress-system --create-namespace --set 'global.local=true'

-ENVOY_LATEST_IMAGE_TAG ?= 2.0.1
-ISTIO_LATEST_IMAGE_TAG ?= 2.0.1
+ENVOY_LATEST_IMAGE_TAG ?= 958467a353d411ae3f06e03b096bfd342cddb2c6
+ISTIO_LATEST_IMAGE_TAG ?= f5cd4d940185204f375a0dd863246037c183cb76

 install-dev: pre-install
 	helm install higress helm/core -n higress-system --create-namespace --set 'controller.tag=$(TAG)' --set 'gateway.replicas=1' --set 'pilot.tag=$(ISTIO_LATEST_IMAGE_TAG)' --set 'gateway.tag=$(ENVOY_LATEST_IMAGE_TAG)' --set 'global.local=true'
@@ -299,7 +299,7 @@ kube-load-image: $(tools/kind) ## Install the Higress image to a kind cluster us
 	tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server 1.3.0
 	tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server v1.0
 	tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-body 1.0.0
-	tools/hack/docker-pull-image.sh openpolicyagent/opa latest
+	tools/hack/docker-pull-image.sh openpolicyagent/opa 0.61.0
 	tools/hack/docker-pull-image.sh curlimages/curl latest
 	tools/hack/docker-pull-image.sh registry.cn-hangzhou.aliyuncs.com/2456868764/httpbin 1.0.2
 	tools/hack/docker-pull-image.sh registry.cn-hangzhou.aliyuncs.com/hinsteny/nacos-standlone-rc3 1.0.0-RC3
@@ -312,7 +312,7 @@ kube-load-image: $(tools/kind) ## Install the Higress image to a kind cluster us
 	tools/hack/kind-load-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server 1.3.0
 	tools/hack/kind-load-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server v1.0
 	tools/hack/kind-load-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-body 1.0.0
-	tools/hack/kind-load-image.sh openpolicyagent/opa latest
+	tools/hack/kind-load-image.sh openpolicyagent/opa 0.61.0
 	tools/hack/kind-load-image.sh curlimages/curl latest
 	tools/hack/kind-load-image.sh registry.cn-hangzhou.aliyuncs.com/2456868764/httpbin 1.0.2
 	tools/hack/kind-load-image.sh registry.cn-hangzhou.aliyuncs.com/hinsteny/nacos-standlone-rc3 1.0.0-RC3
--- a/README.md
+++ b/README.md
@@ -6,9 +6,14 @@
 </h1>
 <h4 align="center"> AI Native API Gateway </h4>

+<div align="center">
+    
 [![Build Status](https://github.com/alibaba/higress/actions/workflows/build-and-test.yaml/badge.svg?branch=main)](https://github.com/alibaba/higress/actions)
 [![license](https://img.shields.io/github/license/alibaba/higress.svg)](https://www.apache.org/licenses/LICENSE-2.0.html)

+<a href="https://trendshift.io/repositories/10918" target="_blank"><img src="https://trendshift.io/api/badge/repositories/10918" alt="alibaba%2Fhigress | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</div>
+
 [**官网**](https://higress.cn/) &nbsp; |
 &nbsp; [**文档**](https://higress.cn/docs/latest/overview/what-is-higress/) &nbsp; |
 &nbsp; [**博客**](https://higress.cn/blog/) &nbsp; |
@@ -17,6 +22,7 @@
 &nbsp; [**AI插件**](https://higress.cn/plugin/) &nbsp;


+
 <p>
   <a href="README_EN.md"> English <a/>| 中文 | <a href="README_JP.md"> 日本語 <a/> 
 </p>
@@ -180,7 +186,7 @@ K8s 下使用 Helm 部署等其他安装方式可以参考官网 [Quick Start

 ### 交流群

-![image](https://img.alicdn.com/imgextra/i2/O1CN01BkopaB22ZsvamFftE_!!6000000007135-0-tps-720-405.jpg)
+![image](https://img.alicdn.com/imgextra/i2/O1CN01fZefEP1aPWkzG3A19_!!6000000003322-0-tps-720-405.jpg)

 ### 技术分享

--- a/2
+++ b/2
@@ -1 +1 @@
-v2.0.3
+v2.0.6-rc.3
--- a/api/extensions/v1alpha1/wasmplugin.pb.go
+++ b/api/extensions/v1alpha1/wasmplugin.pb.go
@@ -341,7 +341,7 @@ type WasmPlugin struct {
 	// Extended by Higress, matching rules take effect
 	MatchRules []*MatchRule `protobuf:"bytes,102,rep,name=match_rules,json=matchRules,proto3" json:"match_rules,omitempty"`
 	// disable the default config
-	DefaultConfigDisable bool `protobuf:"varint,103,opt,name=default_config_disable,json=defaultConfigDisable,proto3" json:"default_config_disable,omitempty"`
+	DefaultConfigDisable *wrappers.BoolValue `protobuf:"bytes,103,opt,name=default_config_disable,json=defaultConfigDisable,proto3" json:"default_config_disable,omitempty"`
 }

 func (x *WasmPlugin) Reset() {
@@ -467,11 +467,11 @@ func (x *WasmPlugin) GetMatchRules() []*MatchRule {
 	return nil
 }

-func (x *WasmPlugin) GetDefaultConfigDisable() bool {
+func (x *WasmPlugin) GetDefaultConfigDisable() *wrappers.BoolValue {
 	if x != nil {
 		return x.DefaultConfigDisable
 	}
-	return false
+	return nil
 }

 // Extended by Higress
@@ -480,11 +480,11 @@ type MatchRule struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields

-	Ingress       []string        `protobuf:"bytes,1,rep,name=ingress,proto3" json:"ingress,omitempty"`
-	Domain        []string        `protobuf:"bytes,2,rep,name=domain,proto3" json:"domain,omitempty"`
-	Config        *_struct.Struct `protobuf:"bytes,3,opt,name=config,proto3" json:"config,omitempty"`
-	ConfigDisable bool            `protobuf:"varint,4,opt,name=config_disable,json=configDisable,proto3" json:"config_disable,omitempty"`
-	Service       []string        `protobuf:"bytes,5,rep,name=service,proto3" json:"service,omitempty"`
+	Ingress       []string            `protobuf:"bytes,1,rep,name=ingress,proto3" json:"ingress,omitempty"`
+	Domain        []string            `protobuf:"bytes,2,rep,name=domain,proto3" json:"domain,omitempty"`
+	Config        *_struct.Struct     `protobuf:"bytes,3,opt,name=config,proto3" json:"config,omitempty"`
+	ConfigDisable *wrappers.BoolValue `protobuf:"bytes,4,opt,name=config_disable,json=configDisable,proto3" json:"config_disable,omitempty"`
+	Service       []string            `protobuf:"bytes,5,rep,name=service,proto3" json:"service,omitempty"`
 }

 func (x *MatchRule) Reset() {
@@ -540,11 +540,11 @@ func (x *MatchRule) GetConfig() *_struct.Struct {
 	return nil
 }

-func (x *MatchRule) GetConfigDisable() bool {
+func (x *MatchRule) GetConfigDisable() *wrappers.BoolValue {
 	if x != nil {
 		return x.ConfigDisable
 	}
-	return false
+	return nil
 }

 func (x *MatchRule) GetService() []string {
@@ -686,7 +686,7 @@ var file_extensions_v1alpha1_wasmplugin_proto_rawDesc = []byte{
 	0x6f, 0x62, 0x75, 0x66, 0x2f, 0x77, 0x72, 0x61, 0x70, 0x70, 0x65, 0x72, 0x73, 0x2e, 0x70, 0x72,
 	0x6f, 0x74, 0x6f, 0x1a, 0x1c, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74,
 	0x6f, 0x62, 0x75, 0x66, 0x2f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74,
-	0x6f, 0x22, 0x8d, 0x06, 0x0a, 0x0a, 0x57, 0x61, 0x73, 0x6d, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e,
+	0x6f, 0x22, 0xa9, 0x06, 0x0a, 0x0a, 0x57, 0x61, 0x73, 0x6d, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e,
 	0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75,
 	0x72, 0x6c, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x68, 0x61, 0x32, 0x35, 0x36, 0x18, 0x03, 0x20, 0x01,
 	0x28, 0x09, 0x52, 0x06, 0x73, 0x68, 0x61, 0x32, 0x35, 0x36, 0x12, 0x53, 0x0a, 0x11, 0x69, 0x6d,
@@ -731,52 +731,55 @@ var file_extensions_v1alpha1_wasmplugin_proto_rawDesc = []byte{
 	0x73, 0x18, 0x66, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73,
 	0x73, 0x2e, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x76, 0x31, 0x61,
 	0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x75, 0x6c, 0x65, 0x52,
-	0x0a, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x75, 0x6c, 0x65, 0x73, 0x12, 0x34, 0x0a, 0x16, 0x64,
+	0x0a, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x75, 0x6c, 0x65, 0x73, 0x12, 0x50, 0x0a, 0x16, 0x64,
 	0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x5f, 0x64, 0x69,
-	0x73, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x67, 0x20, 0x01, 0x28, 0x08, 0x52, 0x14, 0x64, 0x65, 0x66,
-	0x61, 0x75, 0x6c, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c,
-	0x65, 0x22, 0xaf, 0x01, 0x0a, 0x09, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x75, 0x6c, 0x65, 0x12,
-	0x18, 0x0a, 0x07, 0x69, 0x6e, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09,
-	0x52, 0x07, 0x69, 0x6e, 0x67, 0x72, 0x65, 0x73, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d,
-	0x61, 0x69, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69,
-	0x6e, 0x12, 0x2f, 0x0a, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28,
-	0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
-	0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, 0x06, 0x63, 0x6f, 0x6e, 0x66,
-	0x69, 0x67, 0x12, 0x25, 0x0a, 0x0e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x5f, 0x64, 0x69, 0x73,
-	0x61, 0x62, 0x6c, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x63, 0x6f, 0x6e, 0x66,
-	0x69, 0x67, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x65, 0x72,
-	0x76, 0x69, 0x63, 0x65, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x73, 0x65, 0x72, 0x76,
-	0x69, 0x63, 0x65, 0x22, 0x41, 0x0a, 0x08, 0x56, 0x6d, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12,
-	0x35, 0x0a, 0x03, 0x65, 0x6e, 0x76, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x68,
-	0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2e, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e,
-	0x73, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x6e, 0x76, 0x56, 0x61,
-	0x72, 0x52, 0x03, 0x65, 0x6e, 0x76, 0x22, 0x7e, 0x0a, 0x06, 0x45, 0x6e, 0x76, 0x56, 0x61, 0x72,
-	0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04,
-	0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4a, 0x0a, 0x0a, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x5f, 0x66, 0x72,
-	0x6f, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2b, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65,
-	0x73, 0x73, 0x2e, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x76, 0x31,
-	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45, 0x6e, 0x76, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x53,
-	0x6f, 0x75, 0x72, 0x63, 0x65, 0x52, 0x09, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x46, 0x72, 0x6f, 0x6d,
-	0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52,
-	0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2a, 0x45, 0x0a, 0x0b, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e,
-	0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x15, 0x0a, 0x11, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49,
-	0x46, 0x49, 0x45, 0x44, 0x5f, 0x50, 0x48, 0x41, 0x53, 0x45, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05,
-	0x41, 0x55, 0x54, 0x48, 0x4e, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x55, 0x54, 0x48, 0x5a,
-	0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x53, 0x54, 0x41, 0x54, 0x53, 0x10, 0x03, 0x2a, 0x42, 0x0a,
-	0x0a, 0x50, 0x75, 0x6c, 0x6c, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x12, 0x16, 0x0a, 0x12, 0x55,
-	0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x5f, 0x50, 0x4f, 0x4c, 0x49, 0x43,
-	0x59, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x49, 0x66, 0x4e, 0x6f, 0x74, 0x50, 0x72, 0x65, 0x73,
-	0x65, 0x6e, 0x74, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x41, 0x6c, 0x77, 0x61, 0x79, 0x73, 0x10,
-	0x02, 0x2a, 0x26, 0x0a, 0x0e, 0x45, 0x6e, 0x76, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x53, 0x6f, 0x75,
-	0x72, 0x63, 0x65, 0x12, 0x0a, 0x0a, 0x06, 0x49, 0x4e, 0x4c, 0x49, 0x4e, 0x45, 0x10, 0x00, 0x12,
-	0x08, 0x0a, 0x04, 0x48, 0x4f, 0x53, 0x54, 0x10, 0x01, 0x2a, 0x2d, 0x0a, 0x0c, 0x46, 0x61, 0x69,
-	0x6c, 0x53, 0x74, 0x72, 0x61, 0x74, 0x65, 0x67, 0x79, 0x12, 0x0e, 0x0a, 0x0a, 0x46, 0x41, 0x49,
-	0x4c, 0x5f, 0x43, 0x4c, 0x4f, 0x53, 0x45, 0x10, 0x00, 0x12, 0x0d, 0x0a, 0x09, 0x46, 0x41, 0x49,
-	0x4c, 0x5f, 0x4f, 0x50, 0x45, 0x4e, 0x10, 0x01, 0x42, 0x34, 0x5a, 0x32, 0x67, 0x69, 0x74, 0x68,
-	0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x6c, 0x69, 0x62, 0x61, 0x62, 0x61, 0x2f, 0x68,
-	0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x65, 0x78, 0x74, 0x65, 0x6e,
-	0x73, 0x69, 0x6f, 0x6e, 0x73, 0x2f, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x62, 0x06,
-	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+	0x73, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x67, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f,
+	0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x42, 0x6f,
+	0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x14, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74,
+	0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x22, 0xcb, 0x01,
+	0x0a, 0x09, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x75, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x69,
+	0x6e, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x69, 0x6e,
+	0x67, 0x72, 0x65, 0x73, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x18,
+	0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x64, 0x6f, 0x6d, 0x61, 0x69, 0x6e, 0x12, 0x2f, 0x0a,
+	0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e,
+	0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e,
+	0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x41,
+	0x0a, 0x0e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x5f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65,
+	0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e,
+	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x42, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c,
+	0x75, 0x65, 0x52, 0x0d, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x44, 0x69, 0x73, 0x61, 0x62, 0x6c,
+	0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x18, 0x05, 0x20, 0x03,
+	0x28, 0x09, 0x52, 0x07, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x22, 0x41, 0x0a, 0x08, 0x56,
+	0x6d, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x35, 0x0a, 0x03, 0x65, 0x6e, 0x76, 0x18, 0x01,
+	0x20, 0x03, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2e, 0x65,
+	0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68,
+	0x61, 0x31, 0x2e, 0x45, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x52, 0x03, 0x65, 0x6e, 0x76, 0x22, 0x7e,
+	0x0a, 0x06, 0x45, 0x6e, 0x76, 0x56, 0x61, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65,
+	0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4a, 0x0a, 0x0a,
+	0x76, 0x61, 0x6c, 0x75, 0x65, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e,
+	0x32, 0x2b, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2e, 0x65, 0x78, 0x74, 0x65, 0x6e,
+	0x73, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x76, 0x31, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2e, 0x45,
+	0x6e, 0x76, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x52, 0x09, 0x76,
+	0x61, 0x6c, 0x75, 0x65, 0x46, 0x72, 0x6f, 0x6d, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75,
+	0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x2a, 0x45,
+	0x0a, 0x0b, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x15, 0x0a,
+	0x11, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x5f, 0x50, 0x48, 0x41,
+	0x53, 0x45, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x55, 0x54, 0x48, 0x4e, 0x10, 0x01, 0x12,
+	0x09, 0x0a, 0x05, 0x41, 0x55, 0x54, 0x48, 0x5a, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x53, 0x54,
+	0x41, 0x54, 0x53, 0x10, 0x03, 0x2a, 0x42, 0x0a, 0x0a, 0x50, 0x75, 0x6c, 0x6c, 0x50, 0x6f, 0x6c,
+	0x69, 0x63, 0x79, 0x12, 0x16, 0x0a, 0x12, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49,
+	0x45, 0x44, 0x5f, 0x50, 0x4f, 0x4c, 0x49, 0x43, 0x59, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x49,
+	0x66, 0x4e, 0x6f, 0x74, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x74, 0x10, 0x01, 0x12, 0x0a, 0x0a,
+	0x06, 0x41, 0x6c, 0x77, 0x61, 0x79, 0x73, 0x10, 0x02, 0x2a, 0x26, 0x0a, 0x0e, 0x45, 0x6e, 0x76,
+	0x56, 0x61, 0x6c, 0x75, 0x65, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x12, 0x0a, 0x0a, 0x06, 0x49,
+	0x4e, 0x4c, 0x49, 0x4e, 0x45, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x48, 0x4f, 0x53, 0x54, 0x10,
+	0x01, 0x2a, 0x2d, 0x0a, 0x0c, 0x46, 0x61, 0x69, 0x6c, 0x53, 0x74, 0x72, 0x61, 0x74, 0x65, 0x67,
+	0x79, 0x12, 0x0e, 0x0a, 0x0a, 0x46, 0x41, 0x49, 0x4c, 0x5f, 0x43, 0x4c, 0x4f, 0x53, 0x45, 0x10,
+	0x00, 0x12, 0x0d, 0x0a, 0x09, 0x46, 0x41, 0x49, 0x4c, 0x5f, 0x4f, 0x50, 0x45, 0x4e, 0x10, 0x01,
+	0x42, 0x34, 0x5a, 0x32, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61,
+	0x6c, 0x69, 0x62, 0x61, 0x62, 0x61, 0x2f, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2f, 0x61,
+	0x70, 0x69, 0x2f, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x2f, 0x76, 0x31,
+	0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
 }

 var (
@@ -804,6 +807,7 @@ var file_extensions_v1alpha1_wasmplugin_proto_goTypes = []interface{}{
 	(*EnvVar)(nil),              // 7: higress.extensions.v1alpha1.EnvVar
 	(*_struct.Struct)(nil),      // 8: google.protobuf.Struct
 	(*wrappers.Int32Value)(nil), // 9: google.protobuf.Int32Value
+	(*wrappers.BoolValue)(nil),  // 10: google.protobuf.BoolValue
 }
 var file_extensions_v1alpha1_wasmplugin_proto_depIdxs = []int32{
 	1,  // 0: higress.extensions.v1alpha1.WasmPlugin.image_pull_policy:type_name -> higress.extensions.v1alpha1.PullPolicy
@@ -814,14 +818,16 @@ var file_extensions_v1alpha1_wasmplugin_proto_depIdxs = []int32{
 	6,  // 5: higress.extensions.v1alpha1.WasmPlugin.vm_config:type_name -> higress.extensions.v1alpha1.VmConfig
 	8,  // 6: higress.extensions.v1alpha1.WasmPlugin.default_config:type_name -> google.protobuf.Struct
 	5,  // 7: higress.extensions.v1alpha1.WasmPlugin.match_rules:type_name -> higress.extensions.v1alpha1.MatchRule
-	8,  // 8: higress.extensions.v1alpha1.MatchRule.config:type_name -> google.protobuf.Struct
-	7,  // 9: higress.extensions.v1alpha1.VmConfig.env:type_name -> higress.extensions.v1alpha1.EnvVar
-	2,  // 10: higress.extensions.v1alpha1.EnvVar.value_from:type_name -> higress.extensions.v1alpha1.EnvValueSource
-	11, // [11:11] is the sub-list for method output_type
-	11, // [11:11] is the sub-list for method input_type
-	11, // [11:11] is the sub-list for extension type_name
-	11, // [11:11] is the sub-list for extension extendee
-	0,  // [0:11] is the sub-list for field type_name
+	10, // 8: higress.extensions.v1alpha1.WasmPlugin.default_config_disable:type_name -> google.protobuf.BoolValue
+	8,  // 9: higress.extensions.v1alpha1.MatchRule.config:type_name -> google.protobuf.Struct
+	10, // 10: higress.extensions.v1alpha1.MatchRule.config_disable:type_name -> google.protobuf.BoolValue
+	7,  // 11: higress.extensions.v1alpha1.VmConfig.env:type_name -> higress.extensions.v1alpha1.EnvVar
+	2,  // 12: higress.extensions.v1alpha1.EnvVar.value_from:type_name -> higress.extensions.v1alpha1.EnvValueSource
+	13, // [13:13] is the sub-list for method output_type
+	13, // [13:13] is the sub-list for method input_type
+	13, // [13:13] is the sub-list for extension type_name
+	13, // [13:13] is the sub-list for extension extendee
+	0,  // [0:13] is the sub-list for field type_name
 }

 func init() { file_extensions_v1alpha1_wasmplugin_proto_init() }
--- a/api/extensions/v1alpha1/wasmplugin.proto
+++ b/api/extensions/v1alpha1/wasmplugin.proto
@@ -112,7 +112,7 @@ message WasmPlugin {
  // Extended by Higress, matching rules take effect
  repeated MatchRule match_rules = 102;
  // disable the default config
-  bool default_config_disable = 103;
+  google.protobuf.BoolValue default_config_disable = 103;
 }

 // Extended by Higress
@@ -120,7 +120,7 @@ message MatchRule {
  repeated string ingress = 1;
  repeated string domain = 2;
  google.protobuf.Struct config = 3;
-  bool config_disable = 4;
+  google.protobuf.BoolValue config_disable = 4;
  repeated string service = 5;
 }

--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -0,0 +1,143 @@
+# Higress 核心组件和原理
+
+Higress 是基于 Envoy 和 Istio 进行二次定制化开发构建和功能增强，同时利用 Envoy 和 Istio 一些插件机制，实现了一个轻量级的网关服务。其包括 3 个核心组件：Higress Controller（控制器）、Higress Gateway（网关）和 Higress Console（控制台）。
+下图概况了其核心工作流程：
+
+![img](./images/img_02_01.png)
+
+本章将重点介绍 Higress 的两个核心组件：Higress Controller 和 Higress Gateway。
+
+## 1 Higress Console
+
+Higress Console 是 Higress 网关的管理控制台，主要功能是管理 Higress 网关的路由配置、插件配置等。
+
+### 1.1 Higress Admin SDK
+
+Higress Admin SDK 脱胎于 Higress Console。起初，它作为 Higress Console 的一部分，为前端界面提供实际的功能支持。后来考虑到对接外部系统等需求，将配置管理的部分剥离出来，形成一个独立的逻辑组件，便于和各个系统进行对接。目前支持服务来源管理、服务管理、路由管理、域名管理、证书管理、插件管理等功能。
+Higress Admin SDK 现在只提供 Java 版本，且要求 JDK 版本不低于 17。具体如何集成请参考 Higress 官方 BLOG [如何使用 Higress Admin SDK 进行配置管理](https://higress.io/zh-cn/blog/admin-sdk-intro)。
+
+## 2 Higress Controller
+
+Higress Controller（控制器） 是 Higress 的核心组件，其功能主要是实现 Higress 网关的服务发现、动态配置管理，以及动态下发配置给数据面。Higress Controller 内部包含两个子组件：Discovery 和  Higress Core。
+
+### 2.1 Discovery 组件
+
+Discovery 组件（Istio Pilot-Discovery）是 Istio 的核心组件，负责服务发现、配置管理、证书签发、控制面和数据面之间的通讯和配置下发等。Discovery 内部结构比较复杂，本文只介绍 Discovery 配置管理和服务发现的基本原理，其核心功能的详细介绍可以参考赵化冰老师的 BLOG [Istio Pilot 组件介绍](https://www.zhaohuabing.com/post/2019-10-21-pilot-discovery-code-analysis/)。
+Discovery 将 Kubernetes Service、Gateway API 配置等转换成 Istio 配置，然后将所有 Istio 配置合并转成符合 xDS 接口规范的数据结构，通过 GRPC 下发到数据面的 Envoy。其工作原理如下图：
+
+![img](./images/img_02_02.png)
+
+#### 2.1.1 Config Controller
+
+Discovery 为了更好管理 Istio 配置来源，提供 `Config Controller` 用于管理各种配置来源，目前支持 4 种类型的 `Config Controller`：
+
+- Kubernetes：使用 Kubernetes 作为配置信息来源，该方式的直接依赖 Kubernetes 强大的 CRD 机制来存储配置信息，简单方便，是 Istio 最开始使用的配置信息存储方案, 其中包括 `Kubernetes Controller` 和 `Gateway API Controller` 两个实现。
+- MCP（Mesh Configuration Protocol）：使用 Kubernetes 存储配置数据导致了 Istio 和 Kubernetes 的耦合，限制了 Istio 在非 Kubernetes 环境下的运用。为了解决该耦合，Istio 社区提出了 MCP。
+- Memory：一个基于内存的 Config Controller 实现，主要用于测试。
+- File：一个基于文件的 Config Controller 实现，主要用于测试。
+
+1. Istio 配置
+
+Istio 配置包括：`Gateway`、`VirtualService`、`DestinationRule`、`ServiceEntry`、`EnvoyFilter`、`WasmPlugin`、`WorkloadEntry`、`WorkloadGroup` 等，可以参考 Istio 官方文档[流量管理](https://istio.io/latest/zh/docs/reference/config/networking/)了解更多配置信息。
+
+2. Gateway API 配置
+
+Gateway API 配置包括：`GatewayClass`、`Gateway`、`HttpRoute`、`TCPRoute`、`GRPCRoute` 等, 可以参考 Gateway API 官方文档 [Gateway API](https://gateway-api.sigs.k8s.io/api-types/gateway/) 了解更多配置信息。
+
+3. MCP over xDS
+
+Discovery 作为 MCP Client，任何实现了 MCP 协议的 Server 都可以通过 MCP 协议向 Discovery 下发配置信息，从而消除了 Istio 和 Kubernetes 之间的耦合, 同时使 Istio 的配置信息处理更加灵活和可扩展。
+同时 MCP 是一种基于 xDS 协议的配置管理协议，Higress Core 通过实现 MCP 协议，使 Higress Core 成为 Discovery 的 Istio 配置来源。
+
+4. Config Controller 来源配置
+
+在 `higress-system` 命名空间中，名为 `higress-config` 的 Configmap 中，`mesh` 配置项包含一个 `configSources` 属性用于配置来源。其 Configmap 部分配置项如下：
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: higress-config
+  namespace: higress-system
+data:
+  mesh: |-
+    accessLogEncoding: TEXT
+    ...
+    configSources:
+    - address: xds://127.0.0.1:15051
+    - address: k8s://
+    ...
+  meshNetworks: "networks: {}"
+```
+
+#### 2.1.2 Service Controller
+
+`Service Controller` 用于管理各种 `Service Registry`，提供服务发现数据，目前 Istio 支持的 `Service Registry` 主要包括：
+
+- Kubernetes：对接 Kubernetes Registry，可以将 Kubernetes 中定义的 Service 和 Endpoint 采集到 Istio 中。
+- Memory：一个基于内存的 Service Controller 实现，主要用于测试。
+
+### 2.2 Higress Core 组件
+
+Higress Core 核心逻辑如下图：
+
+![img](./images/img_02_03.png)
+
+
+Higress Core 内部包含两个核心子组件: Ingress Config 和 Cert Server。
+
+#### 2.2.1 Ingress Config
+
+Ingress Config 包含 6 个控制器，各自负责不同的功能：
+
+- Ingress Controller：监听 Ingress 资源，将 Ingress 转换为 Istio 的 Gateway、VirtualService、DestinationRule 等资源。
+- Gateway Controller：监听 Gateway、VirtualService、DestinationRule 等资源。
+- McpBridge Controller：根据 McpBridge 的配置，将来自 Nacos、Eureka、Consul、Zookeeper 等外部注册中心或 DNS 的服务信息转换成 Istio ServiceEntry 资源。
+- Http2Rpc Controller：监听 Http2Rpc 资源，实现 HTTP 协议到 RPC 协议的转换。用户可以通过配置协议转换，将 RPC 服务以 HTTP 接口的形式暴露，从而使用 HTTP 请求调用 RPC 接口。
+- WasmPlugin Controller：监听 WasmPlugin 资源，将 Higress WasmPlugin 转化为 Istio WasmPlugin。Higress WasmPlugin 在 Istio WasmPlugin 的基础上进行了扩展，支持全局、路由、域名、服务级别的配置。
+- ConfigmapMgr：监听 Higress 的全局配置 `higress-config` ConfigMap，可以根据 tracing、gzip 等配置构造 EnvoyFilter。
+
+#### 2.2.2 Cert Server
+
+Cert Server 管理 Secret 资源和证书自动签发。
+
+## 3 Higress Gateway
+
+Higress Gateway 内部包含两个子组件：Pilot Agent 和 Envoy。Pilot Agent 主要负责 Envoy 的启动和配置，同时代理 Envoy xDS 请求到 Discovery。 Envoy 作为数据面，负责接收控制面的配置下发，并代理请求到业务服务。 Pilot Agent 和 Envoy 之间通讯协议是使用 xDS 协议， 通过 Unix Domain Socket（UDS）进行通信。
+Envoy 核心架构如下图：
+
+![img](./images/img_02_04.png)
+
+### 1 Envoy 核心组件
+
+- 下游（Downstream）:
+  下游是 Envoy 的客户端，它们负责发起请求并接收 Envoy 的响应。下游通常是最终用户的设备或服务，它们通过 Envoy 代理与后端服务进行通信。
+
+- 上游（Upstream）:
+  上游是 Envoy 的后端服务器，它们接收 Envoy 代理的连接和请求。上游提供服务或数据，对来自下游客户端的请求进行处理并返回响应。
+
+- 监听器（Listener）:
+  监听器是可以接受来自下游客户端连接的网络地址（如 IP 地址和端口，Unix Domain Socket 等）。Envoy 支持在单个进程中配置任意数量的监听器。监听器可以通过 `Listener Discovery Service（LDS）`来动态发现和更新。
+
+- 路由（Router）:
+  路由器是 Envoy 中连接下游和上游的桥梁。它负责决定如何将监听器接收到的请求路由到适当的集群。路由器根据配置的路由规则，如路径、HTTP 标头 等，来确定请求的目标集群，从而实现精确的流量控制和路由。路由器可以通过 `Route Discovery Service（RDS）`来动态发现和更新。
+
+- 集群（Cluster）:
+  集群是一组逻辑上相似的服务提供者的集合。集群成员的选择由负载均衡策略决定，确保请求能够均匀或按需分配到不同的服务实例。集群可以通过 `Cluster Discovery Service（CDS）`来动态发现和更新。
+
+- 端点（Endpoint）:
+  端点是上游集群中的具体服务实例，可以是 IP 地址和端口号的组合。端点可以通过 `Endpoint Discovery Service（EDS）`来动态发现和更新。
+
+- SSL/TLS:
+  Envoy 可以通过 `Secret Discovery Service (SDS)` 动态获取监听器和集群所需的 TLS 证书、私钥以及信任的根证书和撤销机制等配置信息。
+
+通过这些组件的协同工作，Envoy 能够高效地处理网络请求，提供流量管理、负载均衡、服务发现和动态路由等关键功能。
+要详细了解 Envoy 的工作原理，可以参考[Envoy 官方文档](https://www.envoyproxy.io/docs/envoy/latest/intro/intro)，最佳的方式可以通过一个请求通过 [Envoy 代理的生命周期](https://www.envoyproxy.io/docs/envoy/latest/intro/life_of_a_request)事件的过程来理解 Envoy 的工作原理。
+
+
+## 参考
+
+- [1] [Istio Pilot 组件介绍](https://www.zhaohuabing.com/post/2019-10-21-pilot-discovery-code-analysis/)
+- [2] [Istio 服务注册插件机制代码解析](https://www.zhaohuabing.com/post/2019-02-18-pilot-service-registry-code-analysis/)
+- [3] [Istio Pilot代码深度解析](https://www.zhaohuabing.com/post/2019-10-21-pilot-discovery-code-analysis/)
+- [4] [Envoy 官方文档](https://www.envoyproxy.io/docs/envoy/latest/intro/intro)
--- a/docs/images/img_02_01.png
+++ b/docs/images/img_02_01.png
--- a/docs/images/img_02_02.png
+++ b/docs/images/img_02_02.png
--- a/docs/images/img_02_03.png
+++ b/docs/images/img_02_03.png
--- a/docs/images/img_02_04.png
+++ b/docs/images/img_02_04.png
--- a/envoy/envoy
+++ b/envoy/envoy
--- a/helm/core/Chart.yaml
+++ b/helm/core/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.0.3
+appVersion: 2.0.6-rc.3
 description: Helm chart for deploying higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -10,4 +10,4 @@ name: higress-core
 sources:
 - http://github.com/alibaba/higress
 type: application
-version: 2.0.3
+version: 2.0.6-rc.3
--- a/helm/core/templates/_pod.tpl
+++ b/helm/core/templates/_pod.tpl
@@ -7,9 +7,6 @@ Rendering the pod template of gateway component.
 template:
  metadata:
    annotations:
-    {{- if .Values.global.enableHigressIstio }}
-      "enableHigressIstio": "true"
-    {{- end }}
    {{- if .Values.gateway.podAnnotations }}
      {{- toYaml .Values.gateway.podAnnotations | nindent 6 }}
    {{- end }}
@@ -268,11 +265,7 @@ template:
    {{- end }}
    - name: higress-ca-root-cert
      configMap:
-    {{- if .Values.global.enableHigressIstio }}
-        name: istio-ca-root-cert
-    {{- else }}
        name: higress-ca-root-cert
-    {{- end }}
    - name: config
      configMap:
        name: higress-config
--- a/helm/core/templates/configmap.yaml
+++ b/helm/core/templates/configmap.yaml
@@ -9,7 +9,7 @@
    accessLogFile: "/dev/stdout"
    {{- end }}
    ingressControllerMode: "OFF"
-    accessLogFormat: '{"authority":"%REQ(X-ENVOY-ORIGINAL-HOST?:AUTHORITY)%","bytes_received":"%BYTES_RECEIVED%","bytes_sent":"%BYTES_SENT%","downstream_local_address":"%DOWNSTREAM_LOCAL_ADDRESS%","downstream_remote_address":"%DOWNSTREAM_REMOTE_ADDRESS%","duration":"%DURATION%","istio_policy_status":"%DYNAMIC_METADATA(istio.mixer:status)%","method":"%REQ(:METHOD)%","path":"%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%","protocol":"%PROTOCOL%","request_id":"%REQ(X-REQUEST-ID)%","requested_server_name":"%REQUESTED_SERVER_NAME%","response_code":"%RESPONSE_CODE%","response_flags":"%RESPONSE_FLAGS%","route_name":"%ROUTE_NAME%","start_time":"%START_TIME%","trace_id":"%REQ(X-B3-TRACEID)%","upstream_cluster":"%UPSTREAM_CLUSTER%","upstream_host":"%UPSTREAM_HOST%","upstream_local_address":"%UPSTREAM_LOCAL_ADDRESS%","upstream_service_time":"%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%","upstream_transport_failure_reason":"%UPSTREAM_TRANSPORT_FAILURE_REASON%","user_agent":"%REQ(USER-AGENT)%","x_forwarded_for":"%REQ(X-FORWARDED-FOR)%","response_code_details":"%RESPONSE_CODE_DETAILS%"}
+    accessLogFormat: '{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%","authority":"%REQ(X-ENVOY-ORIGINAL-HOST?:AUTHORITY)%","bytes_received":"%BYTES_RECEIVED%","bytes_sent":"%BYTES_SENT%","downstream_local_address":"%DOWNSTREAM_LOCAL_ADDRESS%","downstream_remote_address":"%DOWNSTREAM_REMOTE_ADDRESS%","duration":"%DURATION%","istio_policy_status":"%DYNAMIC_METADATA(istio.mixer:status)%","method":"%REQ(:METHOD)%","path":"%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%","protocol":"%PROTOCOL%","request_id":"%REQ(X-REQUEST-ID)%","requested_server_name":"%REQUESTED_SERVER_NAME%","response_code":"%RESPONSE_CODE%","response_flags":"%RESPONSE_FLAGS%","route_name":"%ROUTE_NAME%","start_time":"%START_TIME%","trace_id":"%REQ(X-B3-TRACEID)%","upstream_cluster":"%UPSTREAM_CLUSTER%","upstream_host":"%UPSTREAM_HOST%","upstream_local_address":"%UPSTREAM_LOCAL_ADDRESS%","upstream_service_time":"%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%","upstream_transport_failure_reason":"%UPSTREAM_TRANSPORT_FAILURE_REASON%","user_agent":"%REQ(USER-AGENT)%","x_forwarded_for":"%REQ(X-FORWARDED-FOR)%","response_code_details":"%RESPONSE_CODE_DETAILS%"}

    '
    dnsRefreshRate: 200s
@@ -20,11 +20,7 @@
    # When processing a leaf namespace Istio will search for declarations in that namespace first
    # and if none are found it will search in the root namespace. Any matching declaration found in the root namespace
    # is processed as if it were declared in the leaf namespace.
-    {{- if .Values.global.enableHigressIstio }}
-    rootNamespace: {{ .Values.meshConfig.rootNamespace | default .Values.global.istioNamespace }}
-    {{- else }}
    rootNamespace: {{ .Release.Namespace }}
-    {{- end }}

    configSources:
      - address: "xds://127.0.0.1:15051"
@@ -85,12 +81,8 @@
      discoveryAddress: {{ printf "istiod.%s.svc" .Release.Namespace }}:15012
      {{- end }}
      {{- else }}
-      {{- if .Values.global.enableHigressIstio }}
-      discoveryAddress: {{ printf "istiod.%s.svc" .Values.global.istioNamespace }}:15012
-      {{- else }}
      discoveryAddress: {{ include "controller.name" . }}.{{.Release.Namespace}}.svc:15012
      {{- end }}
-      {{- end }}
      proxyStatsMatcher:
        inclusionRegexps:
        - ".*"
--- a/helm/core/templates/controller-deployment.yaml
+++ b/helm/core/templates/controller-deployment.yaml
@@ -96,7 +96,6 @@ spec:
          volumeMounts:
          - name: log
            mountPath: /var/log
-{{- if not .Values.global.enableHigressIstio }}
        - name: discovery
          image: "{{ .Values.pilot.hub | default .Values.global.hub }}/{{ .Values.pilot.image | default "pilot" }}:{{ .Values.pilot.tag | default .Chart.AppVersion }}"
 {{- if .Values.global.imagePullPolicy }}
@@ -137,6 +136,10 @@ spec:
            periodSeconds: 3
            timeoutSeconds: 5
          env:
+          - name: ENABLE_PUSH_ALL_MCP_CLUSTERS
+            value: "{{ .Values.global.enablePushAllMCPClusters }}"
+          - name: PILOT_ENABLE_LDS_CACHE
+            value: "{{ .Values.global.enableLDSCache }}"
          - name: PILOT_ENABLE_QUIC_LISTENERS
            value: "true"
          - name: VALIDATION_WEBHOOK_CONFIG_NAME
@@ -229,10 +232,8 @@ spec:
            value: "false"
          - name: PILOT_ENABLE_GATEWAY_API_DEPLOYMENT_CONTROLLER
            value: "false"
-          {{- if not .Values.global.enableHigressIstio }}
          - name: CUSTOM_CA_CERT_NAME
            value: "higress-ca-root-cert"
-          {{- end }}
          {{- if not (or .Values.global.local .Values.global.kind) }}
          resources:
 {{- if .Values.pilot.resources }}
@@ -269,7 +270,6 @@ spec:
          - name: extracacerts
            mountPath: /cacerts
          {{- end }}
-{{- end }}
      {{- with .Values.controller.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
@@ -285,7 +285,6 @@ spec:
      volumes:
      - name: log
        emptyDir: {}
-      {{- if not .Values.global.enableHigressIstio }}
      - name: config
        configMap:
          name: higress-config
@@ -317,4 +316,3 @@ spec:
        configMap:
          name: pilot-jwks-extra-cacerts{{- if not (eq .Values.revision "") }}-{{ .Values.revision }}{{- end }}
  {{- end }}
-      {{- end }}
--- a/helm/core/templates/controller-service.yaml
+++ b/helm/core/templates/controller-service.yaml
@@ -9,7 +9,6 @@ spec:
  type: {{ .Values.controller.service.type }}
  ports:
    {{- toYaml .Values.controller.ports | nindent 4 }}
-    {{- if not .Values.global.enableHigressIstio }}
    - port: 15010
      name: grpc-xds # plaintext
      protocol: TCP
@@ -23,6 +22,5 @@ spec:
    - port: 15014
      name: http-monitoring # prometheus stats
      protocol: TCP
-    {{- end }}
  selector:
    {{- include "controller.selectorLabels" . | nindent 4 }}
--- a/helm/core/templates/daemonset.yaml
+++ b/helm/core/templates/daemonset.yaml
@@ -1,7 +1,8 @@
 {{- if eq .Values.gateway.kind "DaemonSet" -}}
 {{- $o11y := .Values.global.o11y  }}
-{{- $unprivilegedPortSupported := true }}
-{{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
+{{- if eq .Values.gateway.unprivilegedPortSupported nil -}}
+  {{- $unprivilegedPortSupported := true }}
+  {{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
    {{- $kernelVersion := $node.status.nodeInfo.kernelVersion }}
    {{- if $kernelVersion }}
      {{- $kernelVersion = regexFind "^(\\d+\\.\\d+\\.\\d+)" $kernelVersion }}
@@ -9,8 +10,9 @@
      {{- $unprivilegedPortSupported = false }}
      {{- end }}
    {{- end }}
+  {{- end -}}
+  {{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}
 {{- end -}}
-{{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}

 apiVersion: apps/v1
 kind: DaemonSet
--- a/helm/core/templates/deployment.yaml
+++ b/helm/core/templates/deployment.yaml
@@ -1,6 +1,7 @@
 {{- if eq .Values.gateway.kind "Deployment" -}}
-{{- $unprivilegedPortSupported := true }}
-{{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
+{{- if eq .Values.gateway.unprivilegedPortSupported nil -}}
+  {{- $unprivilegedPortSupported := true }}
+  {{- range $index, $node := (lookup "v1" "Node" "default" "").items }}
    {{- $kernelVersion := $node.status.nodeInfo.kernelVersion }}
    {{- if $kernelVersion }}
      {{- $kernelVersion = regexFind "^(\\d+\\.\\d+\\.\\d+)" $kernelVersion }}
@@ -8,8 +9,9 @@
      {{- $unprivilegedPortSupported = false }}
      {{- end }}
    {{- end }}
+  {{- end -}}
+  {{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}
 {{- end -}}
-{{- $_ := set .Values.gateway "unprivilegedPortSupported" $unprivilegedPortSupported -}}

 apiVersion: apps/v1
 kind: Deployment
--- a/helm/core/values.yaml
+++ b/helm/core/values.yaml
@@ -3,7 +3,9 @@ global:
  enableH3: false
  enableIPv6: false
  enableProxyProtocol: false
-  liteMetrics: true
+  enableLDSCache: true
+  enablePushAllMCPClusters: true
+  liteMetrics: false
  xdsMaxRecvMsgSize: "104857600"
  defaultUpstreamConcurrencyThreshold: 10000
  enableSRDS: true
@@ -40,8 +42,6 @@ global:
  enableIstioAPI: true
  # -- If true, Higress Controller will monitor Gateway API resources as well
  enableGatewayAPI: false
-  # Deprecated
-  enableHigressIstio: false
  # -- Used to locate istiod.
  istioNamespace: istio-system
  # -- enable pod disruption budget for the control plane, which is used to
@@ -467,6 +467,7 @@ gateway:
  # On Kubernetes 1.22+, this only requires the `net.ipv4.ip_unprivileged_port_start` sysctl.
  securityContext: ~
  containerSecurityContext: ~
+  unprivilegedPortSupported: ~

  service:
    # -- Type of service. Set to "None" to disable the service entirely
--- a/helm/higress/Chart.lock
+++ b/helm/higress/Chart.lock
@@ -1,9 +1,9 @@
 dependencies:
 - name: higress-core
  repository: file://../core
-  version: 2.0.3
+  version: 2.0.6-rc.3
 - name: higress-console
  repository: https://higress.io/helm-charts/
-  version: 1.4.5
-digest: sha256:74b772113264168483961f5d0424459fd7359adc509a4b50400229581d7cddbf
-generated: "2024-11-08T14:06:51.871719+08:00"
+  version: 2.0.1
+digest: sha256:6821ee9079a795f3e1de2c5126c36d3285f44863938a88f021ee4fbce82c0f15
+generated: "2025-01-15T20:46:00.498051+08:00"
--- a/helm/higress/Chart.yaml
+++ b/helm/higress/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.0.3
+appVersion: 2.0.6-rc.3
 description: Helm chart for deploying Higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -12,9 +12,9 @@ sources:
 dependencies:
 - name: higress-core
  repository: "file://../core"
-  version: 2.0.3
+  version: 2.0.6-rc.3
 - name: higress-console
  repository: "https://higress.io/helm-charts/"
-  version: 1.4.5
+  version: 2.0.1
 type: application
-version: 2.0.3
+version: 2.0.6-rc.3
--- a/helm/higress/README.md
+++ b/helm/higress/README.md
@@ -149,6 +149,7 @@ The command removes all the Kubernetes components associated with the chart and
 | gateway.serviceAccount.name | string | `""` | The name of the service account to use. If not set, the release name is used |
 | gateway.tag | string | `""` |  |
 | gateway.tolerations | list | `[]` |  |
+| gateway.unprivilegedPortSupported | string | `nil` |  |
 | global.autoscalingv2API | bool | `true` | whether to use autoscaling/v2 template for HPA settings for internal usage only, not to be configured by users. |
 | global.caAddress | string | `""` | The customized CA address to retrieve certificates for the pods in the cluster. CSR clients such as the Istio Agent and ingress gateways can use this to specify the CA endpoint. If not set explicitly, default to the Istio discovery address. |
 | global.caName | string | `""` | The name of the CA for workload certificates. For example, when caName=GkeWorkloadCertificate, GKE workload certificates will be used as the certificates for workloads. The default value is "" and when caName="", the CA will be configured by other mechanisms (e.g., environmental variable CA_PROVIDER). |
@@ -159,10 +160,11 @@ The command removes all the Kubernetes components associated with the chart and
 | global.disableAlpnH2 | bool | `false` | Whether to disable HTTP/2 in ALPN |
 | global.enableGatewayAPI | bool | `false` | If true, Higress Controller will monitor Gateway API resources as well |
 | global.enableH3 | bool | `false` |  |
-| global.enableHigressIstio | bool | `false` |  |
 | global.enableIPv6 | bool | `false` |  |
 | global.enableIstioAPI | bool | `true` | If true, Higress Controller will monitor istio resources as well |
+| global.enableLDSCache | bool | `true` |  |
 | global.enableProxyProtocol | bool | `false` |  |
+| global.enablePushAllMCPClusters | bool | `true` |  |
 | global.enableSRDS | bool | `true` |  |
 | global.enableStatus | bool | `true` | If true, Higress Controller will update the status field of Ingress resources. When migrating from Nginx Ingress, in order to avoid status field of Ingress objects being overwritten, this parameter needs to be set to false, so Higress won't write the entry IP to the status field of the corresponding Ingress object. |
 | global.externalIstiod | bool | `false` | Configure a remote cluster data plane controlled by an external istiod. When set to true, istiod is not deployed locally and only a subset of the other discovery charts are enabled. |
@@ -175,7 +177,7 @@ The command removes all the Kubernetes components associated with the chart and
 | global.istiod | object | `{"enableAnalysis":false}` | Enabled by default in master for maximising testing. |
 | global.jwtPolicy | string | `"third-party-jwt"` | Configure the policy for validating JWT. Currently, two options are supported: "third-party-jwt" and "first-party-jwt". |
 | global.kind | bool | `false` |  |
-| global.liteMetrics | bool | `true` |  |
+| global.liteMetrics | bool | `false` |  |
 | global.local | bool | `false` | When deploying to a local cluster (e.g.: kind cluster), set this to true. |
 | global.logAsJson | bool | `false` |  |
 | global.logging | object | `{"level":"default:info"}` | Comma-separated minimum per-scope logging level of messages to output, in the form of <scope>:<level>,<scope>:<level> The control plane has different scopes depending on component, but can configure default log level across all components If empty, default scope and level will be used as configured in code |
--- a/istio/istio
+++ b/istio/istio
--- a/istio/proxy
+++ b/istio/proxy
--- a/pkg/bootstrap/server.go
+++ b/pkg/bootstrap/server.go
@@ -41,11 +41,11 @@ import (
 	"istio.io/istio/pkg/config/schema/kind"
 	"istio.io/istio/pkg/keepalive"
 	istiokube "istio.io/istio/pkg/kube"
+	"istio.io/istio/pkg/log"
 	"istio.io/istio/pkg/security"
 	"istio.io/istio/security/pkg/server/ca/authenticate"
 	"istio.io/istio/security/pkg/server/ca/authenticate/kubeauth"
 	"istio.io/pkg/ledger"
-	"istio.io/pkg/log"
 	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/cache"

--- a/pkg/cert/certmgr.go
+++ b/pkg/cert/certmgr.go
@@ -173,7 +173,7 @@ func (s *CertMgr) Reconcile(ctx context.Context, oldConfig *Config, newConfig *C
 		s.cache.Start()
 		// sync domains
 		s.configMgr.SetConfig(newConfig)
-		CertLog.Infof("certMgr start to manageSync domains:+v%", newDomains)
+		CertLog.Infof("certMgr start to manageSync domains: %+v", newDomains)
 		s.manageSync(context.Background(), newDomains)
 		CertLog.Infof("certMgr manageSync domains done")
 	} else {
--- a/pkg/cert/log.go
+++ b/pkg/cert/log.go
@@ -14,6 +14,6 @@

 package cert

-import "istio.io/pkg/log"
+import "istio.io/istio/pkg/log"

-var CertLog = log.RegisterScope("cert", "Higress Cert process.", 0)
+var CertLog = log.RegisterScope("cert", "Higress Cert process.")
--- a/pkg/cmd/server.go
+++ b/pkg/cmd/server.go
@@ -25,7 +25,7 @@ import (
 	"istio.io/istio/pkg/config/constants"
 	"istio.io/istio/pkg/env"
 	"istio.io/istio/pkg/keepalive"
-	"istio.io/pkg/log"
+	"istio.io/istio/pkg/log"
 )

 var (
--- a/pkg/ingress/config/ingress_config.go
+++ b/pkg/ingress/config/ingress_config.go
@@ -303,21 +303,21 @@ func (m *IngressConfig) listFromIngressControllers(typ config.GroupVersionKind,
 	common.SortIngressByCreationTime(configs)
 	wrapperConfigs := m.createWrapperConfigs(configs)

-	IngressLog.Infof("resource type %s, configs number %d", typ, len(wrapperConfigs))
+	var result []config.Config
 	switch typ {
 	case gvk.Gateway:
-		return m.convertGateways(wrapperConfigs)
+		result = m.convertGateways(wrapperConfigs)
 	case gvk.VirtualService:
-		return m.convertVirtualService(wrapperConfigs)
+		result = m.convertVirtualService(wrapperConfigs)
 	case gvk.DestinationRule:
-		return m.convertDestinationRule(wrapperConfigs)
+		result = m.convertDestinationRule(wrapperConfigs)
 	case gvk.ServiceEntry:
-		return m.convertServiceEntry(wrapperConfigs)
+		result = m.convertServiceEntry(wrapperConfigs)
 	case gvk.WasmPlugin:
-		return m.convertWasmPlugin(wrapperConfigs)
+		result = m.convertWasmPlugin(wrapperConfigs)
 	}
-
-	return nil
+	IngressLog.Infof("resource type %s, ingress number %d, convert configs number %d", typ, len(configs), len(result))
+	return result
 }

 func (m *IngressConfig) listFromGatewayControllers(typ config.GroupVersionKind, namespace string) []config.Config {
@@ -712,7 +712,6 @@ func (m *IngressConfig) convertDestinationRule(configs []common.WrapperConfig) [

 	if m.RegistryReconciler != nil {
 		drws := m.RegistryReconciler.GetAllDestinationRuleWrapper()
-		IngressLog.Infof("Found mcp destinationRules: %v", drws)
 		for _, destinationRuleWrapper := range drws {
 			serviceName := destinationRuleWrapper.ServiceKey.ServiceFQDN
 			dr, exist := destinationRules[serviceName]
@@ -882,7 +881,7 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 	if result.PluginConfig != nil {
 		return result, nil
 	}
-	if !obj.DefaultConfigDisable {
+	if !isBoolValueTrue(obj.DefaultConfigDisable) {
 		result.PluginConfig = obj.DefaultConfig
 	}
 	hasValidRule := false
@@ -894,7 +893,7 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 		}
 		var ruleValues []*_struct.Value
 		for _, rule := range obj.MatchRules {
-			if rule.ConfigDisable {
+			if isBoolValueTrue(rule.ConfigDisable) {
 				continue
 			}
 			if rule.Config == nil {
@@ -906,6 +905,7 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 				StructValue: rule.Config,
 			}

+			validRule := false
 			var matchItems []*_struct.Value
 			// match ingress
 			for _, ing := range rule.Ingress {
@@ -916,6 +916,7 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 				})
 			}
 			if len(matchItems) > 0 {
+				validRule = true
 				v.StructValue.Fields["_match_route_"] = &_struct.Value{
 					Kind: &_struct.Value_ListValue{
 						ListValue: &_struct.ListValue{
@@ -923,12 +924,9 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 						},
 					},
 				}
-				ruleValues = append(ruleValues, &_struct.Value{
-					Kind: v,
-				})
-				continue
 			}
 			// match service
+			matchItems = nil
 			for _, service := range rule.Service {
 				matchItems = append(matchItems, &_struct.Value{
 					Kind: &_struct.Value_StringValue{
@@ -937,6 +935,7 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 				})
 			}
 			if len(matchItems) > 0 {
+				validRule = true
 				v.StructValue.Fields["_match_service_"] = &_struct.Value{
 					Kind: &_struct.Value_ListValue{
 						ListValue: &_struct.ListValue{
@@ -944,12 +943,9 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 						},
 					},
 				}
-				ruleValues = append(ruleValues, &_struct.Value{
-					Kind: v,
-				})
-				continue
 			}
 			// match domain
+			matchItems = nil
 			for _, domain := range rule.Domain {
 				matchItems = append(matchItems, &_struct.Value{
 					Kind: &_struct.Value_StringValue{
@@ -957,19 +953,23 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 					},
 				})
 			}
-			if len(matchItems) == 0 {
+			if len(matchItems) > 0 {
+				validRule = true
+				v.StructValue.Fields["_match_domain_"] = &_struct.Value{
+					Kind: &_struct.Value_ListValue{
+						ListValue: &_struct.ListValue{
+							Values: matchItems,
+						},
+					},
+				}
+			}
+			if validRule {
+				ruleValues = append(ruleValues, &_struct.Value{
+					Kind: v,
+				})
+			} else {
 				return nil, fmt.Errorf("invalid match rule has no match condition, rule:%v", rule)
 			}
-			v.StructValue.Fields["_match_domain_"] = &_struct.Value{
-				Kind: &_struct.Value_ListValue{
-					ListValue: &_struct.ListValue{
-						Values: matchItems,
-					},
-				},
-			}
-			ruleValues = append(ruleValues, &_struct.Value{
-				Kind: v,
-			})
 		}
 		if len(ruleValues) > 0 {
 			hasValidRule = true
@@ -982,13 +982,17 @@ func (m *IngressConfig) convertIstioWasmPlugin(obj *higressext.WasmPlugin) (*ext
 			}
 		}
 	}
-	if !hasValidRule && obj.DefaultConfigDisable {
+	if !hasValidRule && isBoolValueTrue(obj.DefaultConfigDisable) {
 		return nil, nil
 	}
 	return result, nil

 }

+func isBoolValueTrue(b *wrappers.BoolValue) bool {
+	return b != nil && b.Value
+}
+
 func (m *IngressConfig) AddOrUpdateWasmPlugin(clusterNamespacedName util.ClusterNamespacedName) {
 	if clusterNamespacedName.Namespace != m.namespace {
 		return
--- a/pkg/ingress/config/kingress_config.go
+++ b/pkg/ingress/config/kingress_config.go
@@ -493,7 +493,7 @@ func (m *KIngressConfig) HasSynced() bool {
 	defer m.mutex.RUnlock()

 	for _, remoteIngressController := range m.remoteIngressControllers {
-		IngressLog.Info("In Kingress Synced.", remoteIngressController)
+		IngressLog.Info("In Kingress Synced.")
 		if !remoteIngressController.HasSynced() {
 			return false
 		}
--- a/pkg/ingress/kube/annotations/downstreamtls.go
+++ b/pkg/ingress/kube/annotations/downstreamtls.go
@@ -15,6 +15,7 @@
 package annotations

 import (
+	"fmt"
 	"strings"

 	networking "istio.io/api/networking/v1alpha3"
@@ -27,9 +28,11 @@ import (
 )

 const (
-	authTLSSecret      = "auth-tls-secret"
-	sslCipher          = "ssl-cipher"
-	gatewaySdsCaSuffix = "-cacert"
+	authTLSSecret           = "auth-tls-secret"
+	sslCipher               = "ssl-cipher"
+	gatewaySdsCaSuffix      = "-cacert"
+	annotationMinTLSVersion = "tls-min-protocol-version"
+	annotationMaxTLSVersion = "tls-max-protocol-version"
 )

 var (
@@ -41,6 +44,8 @@ type DownstreamTLSConfig struct {
 	CipherSuites []string
 	Mode         networking.ServerTLSSettings_TLSmode
 	CASecretName types.NamespacedName
+	MinVersion   string
+	MaxVersion   string
 }

 type downstreamTLS struct{}
@@ -82,6 +87,14 @@ func (d downstreamTLS) Parse(annotations Annotations, config *Ingress, _ *Global
 		downstreamTLSConfig.CipherSuites = validCipherSuite
 	}

+	if minVersion, err := annotations.ParseStringASAP(annotationMinTLSVersion); err == nil {
+		downstreamTLSConfig.MinVersion = minVersion
+	}
+
+	if maxVersion, err := annotations.ParseStringASAP(annotationMaxTLSVersion); err == nil {
+		downstreamTLSConfig.MaxVersion = maxVersion
+	}
+
 	return nil
 }

@@ -107,11 +120,44 @@ func (d downstreamTLS) ApplyGateway(gateway *networking.Gateway, config *Ingress
 			if len(downstreamTLSConfig.CipherSuites) != 0 {
 				server.Tls.CipherSuites = downstreamTLSConfig.CipherSuites
 			}
+
+			if downstreamTLSConfig.MinVersion != "" {
+				if version, err := convertTLSVersion(downstreamTLSConfig.MinVersion); err != nil {
+					IngressLog.Errorf("Invalid minimum TLS version: %v", err)
+				} else {
+					server.Tls.MinProtocolVersion = version
+				}
+			}
+
+			if downstreamTLSConfig.MaxVersion != "" {
+				if version, err := convertTLSVersion(downstreamTLSConfig.MaxVersion); err != nil {
+					IngressLog.Errorf("Invalid maximum TLS version: %v", err)
+				} else {
+					server.Tls.MaxProtocolVersion = version
+				}
+			}
+
 		}
 	}
 }

 func needDownstreamTLS(annotations Annotations) bool {
 	return annotations.HasASAP(sslCipher) ||
-		annotations.HasASAP(authTLSSecret)
+		annotations.HasASAP(authTLSSecret) ||
+		annotations.HasASAP(annotationMinTLSVersion) ||
+		annotations.HasASAP(annotationMaxTLSVersion)
+}
+
+func convertTLSVersion(version string) (networking.ServerTLSSettings_TLSProtocol, error) {
+	switch version {
+	case "TLSv1.0":
+		return networking.ServerTLSSettings_TLSV1_0, nil
+	case "TLSv1.1":
+		return networking.ServerTLSSettings_TLSV1_1, nil
+	case "TLSv1.2":
+		return networking.ServerTLSSettings_TLSV1_2, nil
+	case "TLSv1.3":
+		return networking.ServerTLSSettings_TLSV1_3, nil
+	}
+	return networking.ServerTLSSettings_TLS_AUTO, fmt.Errorf("invalid TLS version: %s. Valid values are: TLSv1.0, TLSv1.1, TLSv1.2, TLSv1.3", version)
 }
--- a/pkg/ingress/kube/annotations/downstreamtls_test.go
+++ b/pkg/ingress/kube/annotations/downstreamtls_test.go
@@ -26,11 +26,15 @@ var parser = downstreamTLS{}

 func TestParse(t *testing.T) {
 	testCases := []struct {
+		name   string
 		input  map[string]string
 		expect *DownstreamTLSConfig
 	}{
-		{},
 		{
+			name: "empty config",
+		},
+		{
+			name: "ssl cipher only",
 			input: map[string]string{
 				buildNginxAnnotationKey(sslCipher): "ECDHE-RSA-AES256-GCM-SHA384:AES128-SHA",
 			},
@@ -40,9 +44,24 @@ func TestParse(t *testing.T) {
 			},
 		},
 		{
+			name: "with TLS version config",
 			input: map[string]string{
-				buildNginxAnnotationKey(authTLSSecret): "test",
-				buildNginxAnnotationKey(sslCipher):     "ECDHE-RSA-AES256-GCM-SHA384:AES128-SHA",
+				buildNginxAnnotationKey(annotationMinTLSVersion): "TLSv1.2",
+				buildNginxAnnotationKey(annotationMaxTLSVersion): "TLSv1.3",
+			},
+			expect: &DownstreamTLSConfig{
+				Mode:       networking.ServerTLSSettings_SIMPLE,
+				MinVersion: "TLSv1.2",
+				MaxVersion: "TLSv1.3",
+			},
+		},
+		{
+			name: "complete config",
+			input: map[string]string{
+				buildNginxAnnotationKey(authTLSSecret):           "test",
+				buildNginxAnnotationKey(sslCipher):               "ECDHE-RSA-AES256-GCM-SHA384:AES128-SHA",
+				buildNginxAnnotationKey(annotationMinTLSVersion): "TLSv1.2",
+				buildNginxAnnotationKey(annotationMaxTLSVersion): "TLSv1.3",
 			},
 			expect: &DownstreamTLSConfig{
 				CASecretName: types.NamespacedName{
@@ -51,34 +70,79 @@ func TestParse(t *testing.T) {
 				},
 				Mode:         networking.ServerTLSSettings_MUTUAL,
 				CipherSuites: []string{"ECDHE-RSA-AES256-GCM-SHA384", "AES128-SHA"},
-			},
-		},
-		{
-			input: map[string]string{
-				buildHigressAnnotationKey(authTLSSecret):   "test/foo",
-				DefaultAnnotationsPrefix + "/" + sslCipher: "ECDHE-RSA-AES256-GCM-SHA384:AES128-SHA",
-			},
-			expect: &DownstreamTLSConfig{
-				CASecretName: types.NamespacedName{
-					Namespace: "test",
-					Name:      "foo",
-				},
-				Mode:         networking.ServerTLSSettings_MUTUAL,
-				CipherSuites: []string{"ECDHE-RSA-AES256-GCM-SHA384", "AES128-SHA"},
+				MinVersion:   "TLSv1.2",
+				MaxVersion:   "TLSv1.3",
 			},
 		},
 	}

-	for _, testCase := range testCases {
-		t.Run("", func(t *testing.T) {
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
 			config := &Ingress{
 				Meta: Meta{
 					Namespace: "foo",
 				},
 			}
-			_ = parser.Parse(testCase.input, config, nil)
-			if !reflect.DeepEqual(testCase.expect, config.DownstreamTLS) {
-				t.Fatalf("Should be equal")
+			err := parser.Parse(tc.input, config, nil)
+			if err != nil {
+				t.Fatalf("Parse failed: %v", err)
+			}
+			if !reflect.DeepEqual(tc.expect, config.DownstreamTLS) {
+				t.Fatalf("Parse result mismatch:\nExpect: %+v\nGot: %+v", tc.expect, config.DownstreamTLS)
+			}
+		})
+	}
+}
+
+func TestConvertTLSVersion(t *testing.T) {
+	testCases := []struct {
+		name    string
+		version string
+		expect  networking.ServerTLSSettings_TLSProtocol
+		wantErr bool
+	}{
+		{
+			name:    "TLS 1.0",
+			version: "TLSv1.0",
+			expect:  networking.ServerTLSSettings_TLSV1_0,
+		},
+		{
+			name:    "TLS 1.1",
+			version: "TLSv1.1",
+			expect:  networking.ServerTLSSettings_TLSV1_1,
+		},
+		{
+			name:    "TLS 1.2",
+			version: "TLSv1.2",
+			expect:  networking.ServerTLSSettings_TLSV1_2,
+		},
+		{
+			name:    "TLS 1.3",
+			version: "TLSv1.3",
+			expect:  networking.ServerTLSSettings_TLSV1_3,
+		},
+		{
+			name:    "invalid version",
+			version: "invalid",
+			expect:  networking.ServerTLSSettings_TLS_AUTO,
+			wantErr: true,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			result, err := convertTLSVersion(tc.version)
+			if tc.wantErr {
+				if err == nil {
+					t.Error("Expected error but got none")
+				}
+			} else {
+				if err != nil {
+					t.Errorf("Unexpected error: %v", err)
+				}
+				if result != tc.expect {
+					t.Errorf("Expected %v but got %v", tc.expect, result)
+				}
 			}
 		})
 	}
@@ -86,11 +150,13 @@ func TestParse(t *testing.T) {

 func TestApplyGateway(t *testing.T) {
 	testCases := []struct {
+		name   string
 		input  *networking.Gateway
 		config *Ingress
 		expect *networking.Gateway
 	}{
 		{
+			name: "apply TLS version",
 			input: &networking.Gateway{
 				Servers: []*networking.Server{
 					{
@@ -105,7 +171,8 @@ func TestApplyGateway(t *testing.T) {
 			},
 			config: &Ingress{
 				DownstreamTLS: &DownstreamTLSConfig{
-					CipherSuites: []string{"ECDHE-RSA-AES256-GCM-SHA384"},
+					MinVersion: "TLSv1.2",
+					MaxVersion: "TLSv1.3",
 				},
 			},
 			expect: &networking.Gateway{
@@ -115,14 +182,16 @@ func TestApplyGateway(t *testing.T) {
 							Protocol: "HTTPS",
 						},
 						Tls: &networking.ServerTLSSettings{
-							Mode:         networking.ServerTLSSettings_SIMPLE,
-							CipherSuites: []string{"ECDHE-RSA-AES256-GCM-SHA384"},
+							Mode:               networking.ServerTLSSettings_SIMPLE,
+							MinProtocolVersion: networking.ServerTLSSettings_TLSV1_2,
+							MaxProtocolVersion: networking.ServerTLSSettings_TLSV1_3,
 						},
 					},
 				},
 			},
 		},
 		{
+			name: "complete config",
 			input: &networking.Gateway{
 				Servers: []*networking.Server{
 					{
@@ -144,24 +213,28 @@ func TestApplyGateway(t *testing.T) {
 					},
 					Mode:         networking.ServerTLSSettings_MUTUAL,
 					CipherSuites: []string{"ECDHE-RSA-AES256-GCM-SHA384"},
+					MinVersion:   "TLSv1.2",
+					MaxVersion:   "TLSv1.3",
 				},
 			},
 			expect: &networking.Gateway{
 				Servers: []*networking.Server{
-					{
-						Port: &networking.Port{
-							Protocol: "HTTPS",
-						},
+					{Port: &networking.Port{
+						Protocol: "HTTPS",
+					},
 						Tls: &networking.ServerTLSSettings{
-							CredentialName: "kubernetes-ingress://cluster/foo/bar",
-							Mode:           networking.ServerTLSSettings_MUTUAL,
-							CipherSuites:   []string{"ECDHE-RSA-AES256-GCM-SHA384"},
+							CredentialName:     "kubernetes-ingress://cluster/foo/bar",
+							Mode:               networking.ServerTLSSettings_MUTUAL,
+							CipherSuites:       []string{"ECDHE-RSA-AES256-GCM-SHA384"},
+							MinProtocolVersion: networking.ServerTLSSettings_TLSV1_2,
+							MaxProtocolVersion: networking.ServerTLSSettings_TLSV1_3,
 						},
 					},
 				},
 			},
 		},
 		{
+			name: "invalid TLS version",
 			input: &networking.Gateway{
 				Servers: []*networking.Server{
 					{
@@ -169,20 +242,15 @@ func TestApplyGateway(t *testing.T) {
 							Protocol: "HTTPS",
 						},
 						Tls: &networking.ServerTLSSettings{
-							Mode:           networking.ServerTLSSettings_SIMPLE,
-							CredentialName: "kubernetes-ingress://cluster/foo/bar",
+							Mode: networking.ServerTLSSettings_SIMPLE,
 						},
 					},
 				},
 			},
 			config: &Ingress{
 				DownstreamTLS: &DownstreamTLSConfig{
-					CASecretName: types.NamespacedName{
-						Namespace: "foo",
-						Name:      "bar-cacert",
-					},
-					Mode:         networking.ServerTLSSettings_MUTUAL,
-					CipherSuites: []string{"ECDHE-RSA-AES256-GCM-SHA384"},
+					MinVersion: "invalid",
+					MaxVersion: "invalid",
 				},
 			},
 			expect: &networking.Gateway{
@@ -192,48 +260,10 @@ func TestApplyGateway(t *testing.T) {
 							Protocol: "HTTPS",
 						},
 						Tls: &networking.ServerTLSSettings{
-							CredentialName: "kubernetes-ingress://cluster/foo/bar",
-							Mode:           networking.ServerTLSSettings_MUTUAL,
-							CipherSuites:   []string{"ECDHE-RSA-AES256-GCM-SHA384"},
-						},
-					},
-				},
-			},
-		},
-		{
-			input: &networking.Gateway{
-				Servers: []*networking.Server{
-					{
-						Port: &networking.Port{
-							Protocol: "HTTPS",
-						},
-						Tls: &networking.ServerTLSSettings{
-							Mode:           networking.ServerTLSSettings_SIMPLE,
-							CredentialName: "kubernetes-ingress://cluster/foo/bar",
-						},
-					},
-				},
-			},
-			config: &Ingress{
-				DownstreamTLS: &DownstreamTLSConfig{
-					CASecretName: types.NamespacedName{
-						Namespace: "bar",
-						Name:      "foo",
-					},
-					Mode:         networking.ServerTLSSettings_MUTUAL,
-					CipherSuites: []string{"ECDHE-RSA-AES256-GCM-SHA384"},
-				},
-			},
-			expect: &networking.Gateway{
-				Servers: []*networking.Server{
-					{
-						Port: &networking.Port{
-							Protocol: "HTTPS",
-						},
-						Tls: &networking.ServerTLSSettings{
-							CredentialName: "kubernetes-ingress://cluster/foo/bar",
-							Mode:           networking.ServerTLSSettings_SIMPLE,
-							CipherSuites:   []string{"ECDHE-RSA-AES256-GCM-SHA384"},
+							Mode: networking.ServerTLSSettings_SIMPLE,
+							// Invalid versions should default to TLS_AUTO
+							MinProtocolVersion: networking.ServerTLSSettings_TLS_AUTO,
+							MaxProtocolVersion: networking.ServerTLSSettings_TLS_AUTO,
 						},
 					},
 				},
@@ -241,11 +271,59 @@ func TestApplyGateway(t *testing.T) {
 		},
 	}

-	for _, testCase := range testCases {
-		t.Run("", func(t *testing.T) {
-			parser.ApplyGateway(testCase.input, testCase.config)
-			if !reflect.DeepEqual(testCase.input, testCase.expect) {
-				t.Fatalf("Should be equal")
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			parser.ApplyGateway(tc.input, tc.config)
+			if !reflect.DeepEqual(tc.input, tc.expect) {
+				t.Fatalf("ApplyGateway result mismatch for %s:\nExpect: %+v\nGot: %+v",
+					tc.name, tc.expect, tc.input)
+			}
+		})
+	}
+}
+
+func TestNeedDownstreamTLS(t *testing.T) {
+	testCases := []struct {
+		name        string
+		annotations map[string]string
+		expect      bool
+	}{
+		{
+			name:        "empty annotations",
+			annotations: map[string]string{},
+			expect:      false,
+		},
+		{
+			name: "with ssl cipher",
+			annotations: map[string]string{
+				buildNginxAnnotationKey(sslCipher): "ECDHE-RSA-AES256-GCM-SHA384",
+			},
+			expect: true,
+		},
+		{
+			name: "with TLS version",
+			annotations: map[string]string{
+				buildNginxAnnotationKey(annotationMinTLSVersion): "TLSv1.2",
+			},
+			expect: true,
+		},
+		{
+			name: "with multiple TLS configs",
+			annotations: map[string]string{
+				buildNginxAnnotationKey(sslCipher):               "ECDHE-RSA-AES256-GCM-SHA384",
+				buildNginxAnnotationKey(annotationMinTLSVersion): "TLSv1.2",
+				buildNginxAnnotationKey(annotationMaxTLSVersion): "TLSv1.3",
+			},
+			expect: true,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			result := needDownstreamTLS(tc.annotations)
+			if result != tc.expect {
+				t.Errorf("needDownstreamTLS() for %s = %v, want %v",
+					tc.name, result, tc.expect)
 			}
 		})
 	}
--- a/pkg/ingress/kube/ingress/status.go
+++ b/pkg/ingress/kube/ingress/status.go
@@ -81,8 +81,6 @@ func (s *statusSyncer) runUpdateStatus() error {
 		return err
 	}

-	IngressLog.Debugf("found number %d of svc", len(svcList))
-
 	lbStatusList := common.GetLbStatusListV1Beta1(svcList)
 	if len(lbStatusList) == 0 {
 		return nil
--- a/pkg/ingress/kube/ingressv1/controller.go
+++ b/pkg/ingress/kube/ingressv1/controller.go
@@ -162,6 +162,7 @@ func (c *controller) onEvent(namespacedName types.NamespacedName) error {
 			delete(c.ingresses, namespacedName.String())
 			c.mutex.Unlock()
 		} else {
+			IngressLog.Warnf("ingressLister Get failed, ingress: %s, err: %v", namespacedName, err)
 			return err
 		}
 	}
@@ -171,7 +172,7 @@ func (c *controller) onEvent(namespacedName types.NamespacedName) error {
 		return nil
 	}

-	IngressLog.Debugf("ingress: %s, event: %s", namespacedName, event)
+	IngressLog.Infof("ingress: %s, event: %s", namespacedName, event)

 	// we should check need process only when event is not delete,
 	// if it is delete event, and previously processed, we need to process too.
@@ -181,7 +182,7 @@ func (c *controller) onEvent(namespacedName types.NamespacedName) error {
 			return err
 		}
 		if !shouldProcess {
-			IngressLog.Infof("no need process, ingress %s", namespacedName)
+			IngressLog.Infof("no need process, ingress: %s", namespacedName)
 			return nil
 		}
 	}
@@ -279,10 +280,17 @@ func (c *controller) List() []config.Config {
 	for _, raw := range c.ingressInformer.Informer.GetStore().List() {
 		ing, ok := raw.(*ingress.Ingress)
 		if !ok {
+			IngressLog.Warnf("get ingress from informer failed: %v", raw)
 			continue
 		}

-		if should, err := c.shouldProcessIngress(ing); !should || err != nil {
+		should, err := c.shouldProcessIngress(ing)
+		if err != nil {
+			IngressLog.Warnf("check should process ingress failed: %v", err)
+			continue
+		}
+		if !should {
+			IngressLog.Debugf("no need process ingress: %s/%s", ing.Namespace, ing.Name)
 			continue
 		}

--- a/pkg/ingress/kube/ingressv1/status.go
+++ b/pkg/ingress/kube/ingressv1/status.go
@@ -81,8 +81,6 @@ func (s *statusSyncer) runUpdateStatus() error {
 		return err
 	}

-	IngressLog.Debugf("found number %d of svc", len(svcList))
-
 	lbStatusList := common.GetLbStatusListV1(svcList)
 	if len(lbStatusList) == 0 {
 		return nil
--- a/pkg/ingress/kube/kingress/status.go
+++ b/pkg/ingress/kube/kingress/status.go
@@ -77,7 +77,6 @@ func (s *statusSyncer) runUpdateStatus() error {
 		return err
 	}

-	IngressLog.Debugf("found number %d of svc", len(svcList))
 	lbStatusList := common2.GetLbStatusList(svcList)
 	return s.updateStatus(lbStatusList)
 }
--- a/pkg/ingress/log/log.go
+++ b/pkg/ingress/log/log.go
@@ -14,6 +14,6 @@

 package log

-import "istio.io/pkg/log"
+import "istio.io/istio/pkg/log"

-var IngressLog = log.RegisterScope("ingress", "Higress Ingress process.", 0)
+var IngressLog = log.RegisterScope("ingress", "Higress Ingress process.")
--- a/plugins/wasm-cpp/extensions/model_mapper/README.md
+++ b/plugins/wasm-cpp/extensions/model_mapper/README.md
@@ -1,17 +1,15 @@
-## 功能说明
+# 功能说明
 `model-mapper`插件实现了基于LLM协议中的model参数路由的功能

-## 配置字段
+# 配置字段

 | 名称                 | 数据类型        | 填写要求                | 默认值                   | 描述                                                                                                                                                                                                                                                         |
 | -----------          | --------------- | ----------------------- | ------                   | -------------------------------------------                                                                                                                                                                                                                  |
 | `modelKey`           | string          | 选填                    | model                    | 请求body中model参数的位置                                                                                                                                                                                                                                    |
 | `modelMapping`       | map of string   | 选填                    | -                        | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。 |
-| `enableOnPathSuffix` | array of string | 选填                    | ["/v1/chat/completions"] | 只对这些特定路径后缀的请求生效                                                                                                                              ## 运行属性
+| `enableOnPathSuffix` | array of string | 选填                    | ["/v1/chat/completions"] | 只对这些特定路径后缀的请求生效                                                                                                                                                                                                                               |
+

-插件执行阶段：认证阶段
-插件执行优先级：800
-                                                                                                 |
 ## 效果说明

 如下配置
--- a/plugins/wasm-cpp/extensions/model_router/README.md
+++ b/plugins/wasm-cpp/extensions/model_router/README.md
@@ -1,7 +1,7 @@
-## 功能说明
+# 功能说明
 `model-router`插件实现了基于LLM协议中的model参数路由的功能

-## 配置字段
+# 配置字段

 | 名称                 | 数据类型        | 填写要求                | 默认值                   | 描述                                                  |
 | -----------          | --------------- | ----------------------- | ------                   | -------------------------------------------           |
--- a/plugins/wasm-cpp/extensions/request_block/BUILD
+++ b/plugins/wasm-cpp/extensions/request_block/BUILD
@@ -26,6 +26,7 @@ proxy_wasm_cc_binary(
        "@com_google_absl//absl/time",
        "//common:json_util",
        "//common:http_util",
+        "//common:regex_util",
        "//common:rule_util",
    ],
 )
@@ -44,6 +45,7 @@ cc_library(
        "//common:json_util",
        "@proxy_wasm_cpp_host//:lib",
        "//common:http_util_nullvm",
+        "//common:regex_util",
        "//common:rule_util_nullvm",
    ],
 )
--- a/plugins/wasm-cpp/extensions/request_block/README.md
+++ b/plugins/wasm-cpp/extensions/request_block/README.md
@@ -1,31 +1,22 @@
---
-title: 请求屏蔽
-keywords: [higress,request block]
-description: 请求屏蔽插件配置参考
---
-
-## 功能说明
+# 功能说明
 `request-block`插件实现了基于 URL、请求头等特征屏蔽 HTTP 请求，可以用于防护部分站点资源不对外部暴露

-## 运行属性
+# 配置字段

-插件执行阶段：`鉴权阶段`
-插件执行优先级：`320`
+| 名称              | 数据类型        | 填写要求                                                                                                | 默认值   | 描述                                     |
+| --------          | --------        | --------                                                                                                | -------- | --------                                 |
+| block_urls        | array of string | 选填，`block_urls`,`block_exact_urls`,`block_regexp_urls`,`block_headers`,`block_bodies` 中至少必填一项 | -        | 配置用于匹配需要屏蔽 URL 的字符串        |
+| block_exact_urls  | array of string | 选填，`block_urls`,`block_exact_urls`,`block_regexp_urls`,`block_headers`,`block_bodies` 中至少必填一项 | -        | 配置用于匹配需要精确屏蔽 URL 的字符串    |
+| block_regexp_urls | array of string | 选填，`block_urls`,`block_exact_urls`,`block_regexp_urls`,`block_headers`,`block_bodies` 中至少必填一项 | -        | 配置用于匹配需要屏蔽 URL 的正则表达式    |
+| block_headers     | array of string | 选填，`block_urls`,`block_exact_urls`,`block_regexp_urls`,`block_headers`,`block_bodies` 中至少必填一项 | -        | 配置用于匹配需要屏蔽请求 Header 的字符串 |
+| block_bodies      | array of string | 选填，`block_urls`,`block_exact_urls`,`block_regexp_urls`,`block_headers`,`block_bodies` 中至少必填一项 | -        | 配置用于匹配需要屏蔽请求 Body 的字符串   |
+| blocked_code      | number          | 选填                                                                                                    | 403      | 配置请求被屏蔽时返回的 HTTP 状态码       |
+| blocked_message   | string          | 选填                                                                                                    | -        | 配置请求被屏蔽时返回的 HTTP 应答 Body    |
+| case_sensitive    | bool            | 选填                                                                                                    | true     | 配置匹配时是否区分大小写，默认区分       |

-## 配置字段
+# 配置示例

-| 名称 | 数据类型 | 填写要求 |  默认值 | 描述 |
-| -------- | -------- | -------- | -------- | -------- |
-|  block_urls     |  array of string     | 选填，`block_urls`,`block_headers`,`block_bodies` 中至少必填一项     |   -  |  配置用于匹配需要屏蔽 URL 的字符串   |
-|  block_headers     |  array of string     | 选填，`block_urls`,`block_headers`,`block_bodies` 中至少必填一项     |   -  |  配置用于匹配需要屏蔽请求 Header 的字符串   |
-|  block_bodies     |  array of string     | 选填，`block_urls`,`block_headers`,`block_bodies` 中至少必填一项     |   -  |  配置用于匹配需要屏蔽请求 Body 的字符串   |
-|  blocked_code     |  number     | 选填     |   403  |  配置请求被屏蔽时返回的 HTTP 状态码   |
-|  blocked_message     |  string     | 选填     |   -  |  配置请求被屏蔽时返回的 HTTP 应答 Body   |
-|  case_sensitive     |  bool     | 选填     |   true  |  配置匹配时是否区分大小写，默认区分   |
-
-## 配置示例
-
-### 屏蔽请求 url 路径
+## 屏蔽请求 url 路径
 ```yaml
 block_urls:
 - swagger.html
@@ -40,7 +31,36 @@ curl http://example.com?foo=Bar
 curl http://exmaple.com/Swagger.html
 ```

-### 屏蔽请求 header
+## 屏蔽精确匹配的请求 url 路径
+
+```yaml
+block_exact_urls:
+- /swagger.html?foo=bar
+case_sensitive: false
+```
+
+根据该配置，下列请求将被禁止访问：
+
+```bash
+curl http://exmaple.com/Swagger.html?foo=Bar
+```
+
+## 屏蔽正则匹配的请求 url 路径
+
+```yaml
+block_exact_urls:
+- .*swagger.*
+case_sensitive: false
+```
+
+根据该配置，下列请求将被禁止访问：
+
+```bash
+curl http://exmaple.com/Swagger.html?foo=Bar
+```
+
+
+## 屏蔽请求 header
 ```yaml
 block_headers:
 - example-key
@@ -54,9 +74,9 @@ curl http://example.com -H 'example-key: 123'
 curl http://exmaple.com -H 'my-header: example-value'
 ```

-### 屏蔽请求 body
+## 屏蔽请求 body
 ```yaml
-block_bodies:
+block_bodys:
 - "hello world"
 case_sensitive: false
 ```
@@ -68,8 +88,30 @@ curl http://example.com -d 'Hello World'
 curl http://exmaple.com -d 'hello world'
 ```

+## 对特定路由或域名开启
+```yaml
+# 使用 _rules_ 字段进行细粒度规则配置
+_rules_:
+# 规则一：按路由名称匹配生效
+- _match_route_:
+  - route-a
+  - route-b
+  block_bodys: 
+  - "hello world"
+# 规则二：按域名匹配生效
+- _match_domain_:
+  - "*.example.com"
+  - test.com
+  block_urls: 
+  - "swagger.html"
+  block_bodys:
+  - "hello world"
+```
+此例 `_match_route_` 中指定的 `route-a` 和 `route-b` 即在创建网关路由时填写的路由名称，当匹配到这两个路由时，将使用此段配置；
+此例 `_match_domain_` 中指定的 `*.example.com` 和 `test.com` 用于匹配请求的域名，当发现域名匹配时，将使用此段配置；
+配置的匹配生效顺序，将按照 `_rules_` 下规则的排列顺序，匹配第一个规则后生效对应配置，后续规则将被忽略。

-## 请求 Body 大小限制
+# 请求 Body 大小限制

-当配置了 `block_bodies` 时，仅支持小于 32 MB 的请求 Body 进行匹配。若请求 Body 大于此限制，并且不存在匹配到的 `block_urls` 和 `block_headers` 项时，不会对该请求执行屏蔽操作
-当配置了 `block_bodies` 时，若请求 Body 超过全局配置 DownstreamConnectionBufferLimits，将返回 `413 Payload Too Large`
+当配置了 `block_bodys` 时，仅支持小于 32 MB 的请求 Body 进行匹配。若请求 Body 大于此限制，并且不存在匹配到的 `block_urls` 和 `block_headers` 项时，不会对该请求执行屏蔽操作
+当配置了 `block_bodys` 时，若请求 Body 超过全局配置 DownstreamConnectionBufferLimits，将返回 `413 Payload Too Large`, 请在参数配置页调高此项。注意调高此参数配置后，网关内存使用将有显著增加。
--- a/plugins/wasm-cpp/extensions/request_block/plugin.cc
+++ b/plugins/wasm-cpp/extensions/request_block/plugin.cc
@@ -15,6 +15,7 @@
 #include "extensions/request_block/plugin.h"

 #include <array>
+#include <memory>

 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_join.h"
@@ -89,6 +90,48 @@ bool PluginRootContext::parsePluginConfig(const json& configuration,
    LOG_WARN("failed to parse configuration for block_urls.");
    return false;
  }
+  if (!JsonArrayIterate(
+          configuration, "block_exact_urls", [&](const json& item) -> bool {
+            auto url = JsonValueAs<std::string>(item);
+            if (url.second != Wasm::Common::JsonParserResultDetail::OK) {
+              LOG_WARN("cannot parse block_exact_urls");
+              return false;
+            }
+            if (rule.case_sensitive) {
+              rule.block_exact_urls.push_back(std::move(url.first.value()));
+            } else {
+              rule.block_exact_urls.push_back(
+                  absl::AsciiStrToLower(url.first.value()));
+            }
+            return true;
+          })) {
+    LOG_WARN("failed to parse configuration for block_exact_urls.");
+    return false;
+  }
+  if (!JsonArrayIterate(
+          configuration, "block_regexp_urls", [&](const json& item) -> bool {
+            auto url = JsonValueAs<std::string>(item);
+            if (url.second != Wasm::Common::JsonParserResultDetail::OK) {
+              LOG_WARN("cannot parse block_regexp_urls");
+              return false;
+            }
+            std::string regex;
+            if (rule.case_sensitive) {
+              regex = url.first.value();
+            } else {
+              regex = absl::AsciiStrToLower(url.first.value());
+            }
+            auto re = std::make_unique<ReMatcher>(regex);
+            if (!re->error().empty()) {
+              LOG_WARN(re->error());
+              return false;
+            }
+            rule.block_regexp_urls.push_back(std::move(re));
+            return true;
+          })) {
+    LOG_WARN("failed to parse configuration for block_regexp_urls.");
+    return false;
+  }
  if (!JsonArrayIterate(
          configuration, "block_headers", [&](const json& item) -> bool {
            auto header = JsonValueAs<std::string>(item);
@@ -125,8 +168,28 @@ bool PluginRootContext::parsePluginConfig(const json& configuration,
    LOG_WARN("failed to parse configuration for block_bodys.");
    return false;
  }
+  // compatiable
+  if (!JsonArrayIterate(
+          configuration, "block_bodies", [&](const json& item) -> bool {
+            auto body = JsonValueAs<std::string>(item);
+            if (body.second != Wasm::Common::JsonParserResultDetail::OK) {
+              LOG_WARN("cannot parse block_bodies");
+              return false;
+            }
+            if (rule.case_sensitive) {
+              rule.block_bodys.push_back(std::move(body.first.value()));
+            } else {
+              rule.block_bodys.push_back(
+                  absl::AsciiStrToLower(body.first.value()));
+            }
+            return true;
+          })) {
+    LOG_WARN("failed to parse configuration for block_bodies.");
+    return false;
+  }
  if (rule.block_bodys.empty() && rule.block_headers.empty() &&
-      rule.block_urls.empty()) {
+      rule.block_urls.empty() && rule.block_exact_urls.empty() &&
+      rule.block_regexp_urls.empty()) {
    LOG_WARN("there is no block rules");
    return false;
  }
@@ -172,6 +235,18 @@ bool PluginRootContext::checkHeader(const RequestBlockConfigRule& rule,
      urlstr = absl::AsciiStrToLower(request_url);
      url = urlstr;
    }
+    for (const auto& block_url : rule.block_exact_urls) {
+      if (url == block_url) {
+        sendLocalResponse(rule.blocked_code, "", rule.blocked_message, {});
+        return false;
+      }
+    }
+    for (const auto& block_url : rule.block_regexp_urls) {
+      if (block_url->match(url)) {
+        sendLocalResponse(rule.blocked_code, "", rule.blocked_message, {});
+        return false;
+      }
+    }
    for (const auto& block_url : rule.block_urls) {
      if (absl::StrContains(url, block_url)) {
        sendLocalResponse(rule.blocked_code, "", rule.blocked_message, {});
--- a/plugins/wasm-cpp/extensions/request_block/plugin.h
+++ b/plugins/wasm-cpp/extensions/request_block/plugin.h
@@ -22,6 +22,7 @@
 #include <unordered_map>

 #include "common/http_util.h"
+#include "common/regex.h"
 #include "common/route_rule_matcher.h"
 #define ASSERT(_X) assert(_X)

@@ -39,11 +40,16 @@ namespace request_block {

 #endif

+using ReMatcher = Wasm::Common::Regex::CompiledGoogleReMatcher;
+using ReMatcherPtr = std::unique_ptr<ReMatcher>;
+
 struct RequestBlockConfigRule {
  int blocked_code = 403;
  std::string blocked_message;
  bool case_sensitive = true;
  std::vector<std::string> block_urls;
+  std::vector<std::string> block_exact_urls;
+  std::vector<ReMatcherPtr> block_regexp_urls;
  std::vector<std::string> block_headers;
  std::vector<std::string> block_bodys;
 };
--- a/plugins/wasm-cpp/extensions/request_block/plugin_test.cc
+++ b/plugins/wasm-cpp/extensions/request_block/plugin_test.cc
@@ -127,6 +127,8 @@ TEST_F(RequestBlockTest, CaseSensitive) {
  std::string configuration = R"(
 {
   "block_urls": ["?foo=bar", "swagger.html"],
+   "block_exact_urls": ["/hello.html?abc=123"],
+   "block_regexp_urls": [".*monkey.*"],
   "block_headers": ["headerKey", "headerValue"],
   "block_bodys": ["Hello World"]
 })";
@@ -150,6 +152,22 @@ TEST_F(RequestBlockTest, CaseSensitive) {
  EXPECT_EQ(context_->onRequestHeaders(0, false),
            FilterHeadersStatus::StopIteration);

+  path_ = "/hello.html?abc=123";
+  EXPECT_CALL(*mock_context_, sendLocalResponse(403, testing::_, testing::_,
+                                                testing::_, testing::_));
+  EXPECT_EQ(context_->onRequestHeaders(0, false),
+            FilterHeadersStatus::StopIteration);
+
+  path_ = "/black/Monkey";
+  EXPECT_EQ(context_->onRequestHeaders(0, false),
+            FilterHeadersStatus::Continue);
+
+  path_ = "/black/monkey";
+  EXPECT_CALL(*mock_context_, sendLocalResponse(403, testing::_, testing::_,
+                                                testing::_, testing::_));
+  EXPECT_EQ(context_->onRequestHeaders(0, false),
+            FilterHeadersStatus::StopIteration);
+
  path_ = "";
  headers_ = {{"headerKey", "123"}};
  EXPECT_CALL(*mock_context_, sendLocalResponse(403, testing::_, testing::_,
@@ -188,6 +206,8 @@ TEST_F(RequestBlockTest, CaseInsensitive) {
   "blocked_code": 404,
   "block_urls": ["?foo=bar", "swagger.html"],
   "block_headers": ["headerKey", "headerValue"],
+   "block_exact_urls": ["/hello.html?abc=123"],
+   "block_regexp_urls": [".*monkey.*"],
   "block_bodys": ["Hello World"]
 })";

@@ -206,6 +226,24 @@ TEST_F(RequestBlockTest, CaseInsensitive) {
  EXPECT_EQ(context_->onRequestHeaders(0, false),
            FilterHeadersStatus::StopIteration);

+  path_ = "/Hello.html?abc=123";
+  EXPECT_CALL(*mock_context_, sendLocalResponse(404, testing::_, testing::_,
+                                                testing::_, testing::_));
+  EXPECT_EQ(context_->onRequestHeaders(0, false),
+            FilterHeadersStatus::StopIteration);
+
+  path_ = "/black/Monkey";
+  EXPECT_CALL(*mock_context_, sendLocalResponse(404, testing::_, testing::_,
+                                                testing::_, testing::_));
+  EXPECT_EQ(context_->onRequestHeaders(0, false),
+            FilterHeadersStatus::StopIteration);
+
+  path_ = "/black/monkey";
+  EXPECT_CALL(*mock_context_, sendLocalResponse(404, testing::_, testing::_,
+                                                testing::_, testing::_));
+  EXPECT_EQ(context_->onRequestHeaders(0, false),
+            FilterHeadersStatus::StopIteration);
+
  path_ = "";
  headers_ = {{"headerkey", "123"}};
  EXPECT_CALL(*mock_context_, sendLocalResponse(404, testing::_, testing::_,
@@ -232,6 +270,26 @@ TEST_F(RequestBlockTest, CaseInsensitive) {
            FilterDataStatus::StopIterationNoBuffer);
 }

+TEST_F(RequestBlockTest, Bodies) {
+  std::string configuration = R"(
+{
+   "case_sensitive": false,
+   "blocked_code": 404,
+   "block_bodies": ["Hello World"]
+})";
+
+  config_.set({configuration.data(), configuration.size()});
+  EXPECT_TRUE(root_context_->configure(configuration.size()));
+
+  body_.set("hello world");
+  EXPECT_EQ(context_->onRequestHeaders(0, false),
+            FilterHeadersStatus::Continue);
+  EXPECT_CALL(*mock_context_, sendLocalResponse(404, testing::_, testing::_,
+                                                testing::_, testing::_));
+  EXPECT_EQ(context_->onRequestBody(11, true),
+            FilterDataStatus::StopIterationNoBuffer);
+}
+
 }  // namespace request_block
 }  // namespace null_plugin
 }  // namespace proxy_wasm
--- a/plugins/wasm-go/examples/custom-log/config.yaml
+++ b/plugins/wasm-go/examples/custom-log/config.yaml
@@ -0,0 +1,68 @@
+static_resources:
+  listeners:
+  - name: listener_0
+    address:
+      socket_address:
+        protocol: TCP
+        address: 0.0.0.0
+        port_value: 8080
+    filter_chains:
+    - filters:
+      - name: envoy.filters.network.http_connection_manager
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+          stat_prefix: ingress_http
+          access_log:
+          - name: envoy.access_loggers.file
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+              log_format:
+                text_format_source:
+                  inline_string: "{\"custom_log\":\"%FILTER_STATE(wasm.custom_log:PLAIN)%\",\"ai_log\":\"%FILTER_STATE(wasm.ai_log:PLAIN)%\"}
+                  
+                  "
+              path: /dev/stdout
+          route_config:
+            name: local_route
+            virtual_hosts:
+            - name: local_service
+              domains: ["*"]
+              routes:
+              - name: get
+                match:
+                  prefix: "/get"
+                route:
+                  cluster: httpbin
+          http_filters:
+          - name: test
+            typed_config:
+              "@type": type.googleapis.com/udpa.type.v1.TypedStruct
+              type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
+              value:
+                config:
+                  name: test
+                  vm_config:
+                    runtime: envoy.wasm.runtime.v8
+                    code:
+                      local:
+                        filename: main.wasm
+                  configuration:
+                    "@type": "type.googleapis.com/google.protobuf.StringValue"
+                    value: {}
+          - name: envoy.filters.http.router
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+  clusters:
+  - name: httpbin
+    connect_timeout: 600s
+    type: STRICT_DNS
+    lb_policy: ROUND_ROBIN
+    load_assignment:
+      cluster_name: httpbin
+      endpoints:
+        - lb_endpoints:
+          - endpoint:
+              address:
+                socket_address:
+                  address: httpbin.org
+                  port_value: 80
--- a/plugins/wasm-go/examples/custom-log/go.mod
+++ b/plugins/wasm-go/examples/custom-log/go.mod
@@ -0,0 +1,20 @@
+module github.com/alibaba/higress/plugins/wasm-go/extensions/custom-logs
+
+go 1.18
+
+replace github.com/alibaba/higress/plugins/wasm-go => ../..
+
+require (
+	github.com/alibaba/higress/plugins/wasm-go v0.0.0
+	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
+)
+
+require (
+	github.com/google/uuid v1.3.0 // indirect
+	github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
+	github.com/magefile/mage v1.14.0 // indirect
+	github.com/tidwall/gjson v1.17.3 // indirect
+	github.com/tidwall/match v1.1.1 // indirect
+	github.com/tidwall/pretty v1.2.0 // indirect
+	github.com/tidwall/resp v0.1.1 // indirect
+)
--- a/plugins/wasm-go/examples/custom-log/go.sum
+++ b/plugins/wasm-go/examples/custom-log/go.sum
@@ -0,0 +1,20 @@
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
+github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
+github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/tidwall/gjson v1.17.3 h1:bwWLZU7icoKRG+C+0PNwIKC6FCJO/Q3p2pZvuP0jN94=
+github.com/tidwall/gjson v1.17.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
+github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
--- a/plugins/wasm-go/examples/custom-log/main.go
+++ b/plugins/wasm-go/examples/custom-log/main.go
@@ -0,0 +1,67 @@
+// Copyright (c) 2022 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"math/rand"
+
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+)
+
+func main() {
+	wrapper.SetCtx(
+		"custom-log",
+		wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
+	)
+}
+
+type CustomLogConfig struct {
+}
+
+// Method 1: write custom log
+func writeLog(ctx wrapper.HttpContext) {
+	ctx.SetUserAttribute("question", "当然可以。在Python中，你可以创建一个函数来计算一系列数字的和。下面是一个简单的例子，该函数接受一个数字列表作为输入，并返回它们的总和。\n\n```python\ndef sum_of_numbers(numbers):\n    \"\"\"\n    计算列表中所有数字的和。\n    \n    参数:\n    numbers (list of int or float): 一个包含数字的列表。\n    \n    返回:\n    int or float: 列表中所有数字的总和。\n    \"\"\"\n    total_sum = sum(numbers)  # 使用Python内置的sum函数计算总和\n    return total_sum\n\n# 示例使用\nnumbers_list = [1, 2, 3, 4, 5]\nprint(\"The sum is:\", sum_of_numbers(numbers_list))  # 输出：The sum is: 15\n```\n\n在这段代码中，我们定义了一个名为 `sum_of_numbers` 的函数，它接收一个参数 `numbers`，这是一个包含整数或浮点数的列表。函数内部使用了Python的内置函数 `sum()` 来计算这些数字的总和，并将结果返回。\n\n你也可以手动实现求和逻辑，而不是使用内置的 `sum()` 函数，如下所示：\n\n```python\ndef sum_of_numbers_manual(numbers):\n    \"\"\"\n    手动计算列表中所有数字的和。\n    \n    参数:\n    numbers (list of int or float): 一个包含数字的列表。\n    \n    返回:\n    int or float: 列表中所有数字的总和。\n    \"\"\"\n    total_sum = 0\n    for number in numbers:\n        total_sum += number\n    return total_sum\n\n# 示例使用\nnumbers_list = [1, 2, 3, 4, 5]\nprint(\"The sum is:\", sum_of_numbers_manual(numbers_list))  # 输出：The sum is: 15\n```\n\n在这个版本中，我们初始化 `total_sum` 为0，然后遍历列表中的每个元素，并将其加到 `total_sum` 上。最后返回这个累加的结果。这两种方法都可以达到相同的目的，但是使用内置函数通常更简洁且效率更高。")
+	ctx.SetUserAttribute("k2", 2213.22)
+	ctx.WriteUserAttributeToLog()
+}
+
+// Methods 2: write custom log with specific key
+func writeLogWithKey(ctx wrapper.HttpContext, key string) {
+	ctx.SetUserAttribute("k2", 2213.22)
+	_ = ctx.WriteUserAttributeToLogWithKey(key)
+	ctx.SetUserAttribute("k2", 212939.22)
+	ctx.SetUserAttribute("k3", 123)
+	_ = ctx.WriteUserAttributeToLogWithKey(key)
+}
+
+// Methods 2: write custom log with specific key
+func writeTraceAttribute(ctx wrapper.HttpContext) {
+	ctx.SetUserAttribute("question", "当然可以。在Python中，你可以创建一个函数来计算一系列数字的和。下面是一个简单的例子，该函数接受一个数字列表作为输入，并返回它们的总和。\n\n```python\ndef sum_of_numbers(numbers):\n    \"\"\"\n    计算列表中所有数字的和。\n    \n    参数:\n    numbers (list of int or float): 一个包含数字的列表。\n    \n    返回:\n    int or float: 列表中所有数字的总和。\n    \"\"\"\n    total_sum = sum(numbers)  # 使用Python内置的sum函数计算总和\n    return total_sum\n\n# 示例使用\nnumbers_list = [1, 2, 3, 4, 5]\nprint(\"The sum is:\", sum_of_numbers(numbers_list))  # 输出：The sum is: 15\n```\n\n在这段代码中，我们定义了一个名为 `sum_of_numbers` 的函数，它接收一个参数 `numbers`，这是一个包含整数或浮点数的列表。函数内部使用了Python的内置函数 `sum()` 来计算这些数字的总和，并将结果返回。\n\n你也可以手动实现求和逻辑，而不是使用内置的 `sum()` 函数，如下所示：\n\n```python\ndef sum_of_numbers_manual(numbers):\n    \"\"\"\n    手动计算列表中所有数字的和。\n    \n    参数:\n    numbers (list of int or float): 一个包含数字的列表。\n    \n    返回:\n    int or float: 列表中所有数字的总和。\n    \"\"\"\n    total_sum = 0\n    for number in numbers:\n        total_sum += number\n    return total_sum\n\n# 示例使用\nnumbers_list = [1, 2, 3, 4, 5]\nprint(\"The sum is:\", sum_of_numbers_manual(numbers_list))  # 输出：The sum is: 15\n```\n\n在这个版本中，我们初始化 `total_sum` 为0，然后遍历列表中的每个元素，并将其加到 `total_sum` 上。最后返回这个累加的结果。这两种方法都可以达到相同的目的，但是使用内置函数通常更简洁且效率更高。")
+	ctx.SetUserAttribute("k2", 2213.22)
+	ctx.WriteUserAttributeToTrace()
+}
+
+func onHttpRequestHeaders(ctx wrapper.HttpContext, config CustomLogConfig, log wrapper.Log) types.Action {
+	if rand.Intn(10)%3 == 1 {
+		writeLog(ctx)
+	} else if rand.Intn(10)%3 == 2 {
+		writeLogWithKey(ctx, "ai_log")
+	} else {
+		writeTraceAttribute(ctx)
+	}
+	return types.ActionContinue
+}
--- a/plugins/wasm-go/extensions/ai-cache/cache/provider.go
+++ b/plugins/wasm-go/extensions/ai-cache/cache/provider.go
@@ -2,6 +2,7 @@ package cache

 import (
 	"errors"
+	"strings"

 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/tidwall/gjson"
@@ -62,7 +63,12 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	c.serviceName = json.Get("serviceName").String()
 	c.servicePort = int(json.Get("servicePort").Int())
 	if !json.Get("servicePort").Exists() {
-		c.servicePort = 6379
+		if strings.HasSuffix(c.serviceName, ".static") {
+			// use default logic port which is 80 for static service
+			c.servicePort = 80
+		} else {
+			c.servicePort = 6379
+		}
 	}
 	c.serviceHost = json.Get("serviceHost").String()
 	c.username = json.Get("username").String()
--- a/plugins/wasm-go/extensions/ai-cache/config/config.go
+++ b/plugins/wasm-go/extensions/ai-cache/config/config.go
@@ -79,11 +79,11 @@ func (c *PluginConfig) FromJson(json gjson.Result, log wrapper.Log) {

 	c.StreamResponseTemplate = json.Get("streamResponseTemplate").String()
 	if c.StreamResponseTemplate == "" {
-		c.StreamResponseTemplate = `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` + "\n\ndata:[DONE]\n\n"
+		c.StreamResponseTemplate = `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"from-cache","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` + "\n\ndata:[DONE]\n\n"
 	}
 	c.ResponseTemplate = json.Get("responseTemplate").String()
 	if c.ResponseTemplate == "" {
-		c.ResponseTemplate = `{"id":"from-cache","choices":[{"index":0,"message":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
+		c.ResponseTemplate = `{"id":"from-cache","choices":[{"index":0,"message":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"from-cache","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
 	}

 	if json.Get("enableSemanticCache").Exists() {
--- a/plugins/wasm-go/extensions/ai-cache/core.go
+++ b/plugins/wasm-go/extensions/ai-cache/core.go
@@ -74,6 +74,9 @@ func processCacheHit(key string, response string, stream bool, ctx wrapper.HttpC

 	ctx.SetContext(CACHE_KEY_CONTEXT_KEY, nil)

+	ctx.SetUserAttribute("cache_status", "hit")
+	ctx.WriteUserAttributeToLogWithKey(wrapper.AILogKey)
+
 	if stream {
 		proxywasm.SendHttpResponseWithDetail(200, "ai-cache.hit", [][2]string{{"content-type", "text/event-stream; charset=utf-8"}}, []byte(fmt.Sprintf(c.StreamResponseTemplate, escapedResponse)), -1)
 	} else {
--- a/plugins/wasm-go/extensions/ai-cache/embedding/cohere.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/cohere.go
@@ -0,0 +1,158 @@
+package embedding
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"strconv"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+)
+
+const (
+	COHERE_DOMAIN             = "api.cohere.com"
+	COHERE_PORT               = 443
+	COHERE_DEFAULT_MODEL_NAME = "embed-english-v2.0"
+	COHERE_ENDPOINT           = "/v2/embed"
+)
+
+type cohereProviderInitializer struct {
+}
+
+var cohereConfig cohereProviderConfig
+
+type cohereProviderConfig struct {
+	// @Title zh-CN 文本特征提取服务 API Key
+	// @Description zh-CN 文本特征提取服务 API Key
+	apiKey string
+}
+
+func (c *cohereProviderInitializer) InitConfig(json gjson.Result) {
+	cohereConfig.apiKey = json.Get("apiKey").String()
+}
+func (c *cohereProviderInitializer) ValidateConfig() error {
+	if cohereConfig.apiKey == "" {
+		return errors.New("[Cohere] apiKey is required")
+	}
+	return nil
+}
+
+func (t *cohereProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
+	if c.servicePort == 0 {
+		c.servicePort = COHERE_PORT
+	}
+	if c.serviceHost == "" {
+		c.serviceHost = COHERE_DOMAIN
+	}
+	return &CohereProvider{
+		config: c,
+		client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+			FQDN: c.serviceName,
+			Host: c.serviceHost,
+			Port: int64(c.servicePort),
+		}),
+	}, nil
+}
+
+type cohereResponse struct {
+	Embeddings cohereEmbeddings `json:"embeddings"`
+}
+
+type cohereEmbeddings struct {
+	FloatTypeEebedding [][]float64 `json:"float"`
+}
+
+type cohereEmbeddingRequest struct {
+	Texts          []string `json:"texts"`
+	Model          string   `json:"model"`
+	InputType      string   `json:"input_type"`
+	EmbeddingTypes []string `json:"embedding_types"`
+}
+
+type CohereProvider struct {
+	config ProviderConfig
+	client wrapper.HttpClient
+}
+
+func (t *CohereProvider) GetProviderType() string {
+	return PROVIDER_TYPE_COHERE
+}
+func (t *CohereProvider) constructParameters(texts []string, log wrapper.Log) (string, [][2]string, []byte, error) {
+	model := t.config.model
+
+	if model == "" {
+		model = COHERE_DEFAULT_MODEL_NAME
+	}
+	data := cohereEmbeddingRequest{
+		Texts:          texts,
+		Model:          model,
+		InputType:      "search_document",
+		EmbeddingTypes: []string{"float"},
+	}
+
+	requestBody, err := json.Marshal(data)
+	if err != nil {
+		log.Errorf("failed to marshal request data: %v", err)
+		return "", nil, nil, err
+	}
+
+	headers := [][2]string{
+		{"Authorization", fmt.Sprintf("BEARER %s", cohereConfig.apiKey)},
+		{"Content-Type", "application/json"},
+	}
+
+	return COHERE_ENDPOINT, headers, requestBody, nil
+}
+
+func (t *CohereProvider) parseTextEmbedding(responseBody []byte) (*cohereResponse, error) {
+	var resp cohereResponse
+	err := json.Unmarshal(responseBody, &resp)
+	if err != nil {
+		return nil, err
+	}
+	return &resp, nil
+}
+
+func (t *CohereProvider) GetEmbedding(
+	queryString string,
+	ctx wrapper.HttpContext,
+	log wrapper.Log,
+	callback func(emb []float64, err error)) error {
+	embUrl, embHeaders, embRequestBody, err := t.constructParameters([]string{queryString}, log)
+	if err != nil {
+		log.Errorf("failed to construct parameters: %v", err)
+		return err
+	}
+
+	var resp *cohereResponse
+	err = t.client.Post(embUrl, embHeaders, embRequestBody,
+		func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+
+			if statusCode != http.StatusOK {
+				err = errors.New("failed to get embedding due to status code: " + strconv.Itoa(statusCode))
+				callback(nil, err)
+				return
+			}
+
+			log.Debugf("get embedding response: %d, %s", statusCode, responseBody)
+
+			resp, err = t.parseTextEmbedding(responseBody)
+			if err != nil {
+				err = fmt.Errorf("failed to parse response: %v", err)
+				callback(nil, err)
+				return
+			}
+
+			if len(resp.Embeddings.FloatTypeEebedding) == 0 {
+				err = errors.New("no embedding found in response")
+				callback(nil, err)
+				return
+			}
+
+			callback(resp.Embeddings.FloatTypeEebedding[0], nil)
+
+		}, t.config.timeout)
+	return err
+}
--- a/plugins/wasm-go/extensions/ai-cache/embedding/dashscope.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/dashscope.go
@@ -8,6 +8,7 @@ import (
 	"strconv"

 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
 )

 const (
@@ -17,11 +18,22 @@ const (
 	DASHSCOPE_ENDPOINT           = "/api/v1/services/embeddings/text-embedding/text-embedding"
 )

+var dashScopeConfig dashScopeProviderConfig
+
 type dashScopeProviderInitializer struct {
 }
+type dashScopeProviderConfig struct {
+	// @Title zh-CN 文本特征提取服务 API Key
+	// @Description zh-CN 文本特征提取服务 API Key
+	apiKey string
+}

-func (d *dashScopeProviderInitializer) ValidateConfig(config ProviderConfig) error {
-	if config.apiKey == "" {
+func (c *dashScopeProviderInitializer) InitConfig(json gjson.Result) {
+	dashScopeConfig.apiKey = json.Get("apiKey").String()
+}
+
+func (c *dashScopeProviderInitializer) ValidateConfig() error {
+	if dashScopeConfig.apiKey == "" {
 		return errors.New("[DashScope] apiKey is required")
 	}
 	return nil
@@ -114,14 +126,14 @@ func (d *DSProvider) constructParameters(texts []string, log wrapper.Log) (strin
 		return "", nil, nil, err
 	}

-	if d.config.apiKey == "" {
+	if dashScopeConfig.apiKey == "" {
 		err := errors.New("dashScopeKey is empty")
 		log.Errorf("failed to construct headers: %v", err)
 		return "", nil, nil, err
 	}

 	headers := [][2]string{
-		{"Authorization", "Bearer " + d.config.apiKey},
+		{"Authorization", "Bearer " + dashScopeConfig.apiKey},
 		{"Content-Type", "application/json"},
 	}

--- a/plugins/wasm-go/extensions/ai-cache/embedding/openai.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/openai.go
@@ -0,0 +1,170 @@
+package embedding
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+)
+
+const (
+	OPENAI_DOMAIN             = "api.openai.com"
+	OPENAI_PORT               = 443
+	OPENAI_DEFAULT_MODEL_NAME = "text-embedding-3-small"
+	OPENAI_ENDPOINT           = "/v1/embeddings"
+)
+
+type openAIProviderInitializer struct {
+}
+
+var openAIConfig openAIProviderConfig
+
+type openAIProviderConfig struct {
+	// @Title zh-CN 文本特征提取服务 API Key
+	// @Description zh-CN 文本特征提取服务 API Key
+	apiKey string
+}
+
+func (c *openAIProviderInitializer) InitConfig(json gjson.Result) {
+	openAIConfig.apiKey = json.Get("apiKey").String()
+}
+
+func (c *openAIProviderInitializer) ValidateConfig() error {
+	if openAIConfig.apiKey == "" {
+		return errors.New("[openAI] apiKey is required")
+	}
+	return nil
+}
+
+func (t *openAIProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
+	if c.servicePort == 0 {
+		c.servicePort = OPENAI_PORT
+	}
+	if c.serviceHost == "" {
+		c.serviceHost = OPENAI_DOMAIN
+	}
+	if c.model == "" {
+		c.model = OPENAI_DEFAULT_MODEL_NAME
+	}
+	return &OpenAIProvider{
+		config: c,
+		client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+			FQDN: c.serviceName,
+			Host: c.serviceHost,
+			Port: c.servicePort,
+		}),
+	}, nil
+}
+
+func (t *OpenAIProvider) GetProviderType() string {
+	return PROVIDER_TYPE_OPENAI
+}
+
+type OpenAIResponse struct {
+	Object string         `json:"object"`
+	Data   []OpenAIResult `json:"data"`
+	Model  string         `json:"model"`
+	Error  *OpenAIError   `json:"error"`
+}
+
+type OpenAIResult struct {
+	Object    string    `json:"object"`
+	Embedding []float64 `json:"embedding"`
+	Index     int       `json:"index"`
+}
+
+type OpenAIError struct {
+	Message string `json:"prompt_tokens"`
+	Type    string `json:"type"`
+	Code    string `json:"code"`
+	Param   string `json:"param"`
+}
+
+type OpenAIEmbeddingRequest struct {
+	Input string `json:"input"`
+	Model string `json:"model"`
+}
+
+type OpenAIProvider struct {
+	config ProviderConfig
+	client wrapper.HttpClient
+}
+
+func (t *OpenAIProvider) constructParameters(text string, log wrapper.Log) (string, [][2]string, []byte, error) {
+	if text == "" {
+		err := errors.New("queryString text cannot be empty")
+		return "", nil, nil, err
+	}
+
+	data := OpenAIEmbeddingRequest{
+		Input: text,
+		Model: t.config.model,
+	}
+
+	requestBody, err := json.Marshal(data)
+	if err != nil {
+		log.Errorf("failed to marshal request data: %v", err)
+		return "", nil, nil, err
+	}
+
+	headers := [][2]string{
+		{"Authorization", fmt.Sprintf("Bearer %s", openAIConfig.apiKey)},
+		{"Content-Type", "application/json"},
+	}
+
+	return OPENAI_ENDPOINT, headers, requestBody, err
+}
+
+func (t *OpenAIProvider) parseTextEmbedding(responseBody []byte) (*OpenAIResponse, error) {
+	var resp OpenAIResponse
+	err := json.Unmarshal(responseBody, &resp)
+	if err != nil {
+		return nil, err
+	}
+	return &resp, nil
+}
+
+func (t *OpenAIProvider) GetEmbedding(
+	queryString string,
+	ctx wrapper.HttpContext,
+	log wrapper.Log,
+	callback func(emb []float64, err error)) error {
+	embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString, log)
+	if err != nil {
+		log.Errorf("failed to construct parameters: %v", err)
+		return err
+	}
+
+	var resp *OpenAIResponse
+	err = t.client.Post(embUrl, embHeaders, embRequestBody,
+		func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+
+			if statusCode != http.StatusOK {
+				err = fmt.Errorf("failed to get embedding due to status code: %d, resp: %s", statusCode, responseBody)
+				callback(nil, err)
+				return
+			}
+
+			resp, err = t.parseTextEmbedding(responseBody)
+			if err != nil {
+				err = fmt.Errorf("failed to parse response: %v", err)
+				callback(nil, err)
+				return
+			}
+
+			log.Debugf("get embedding response: %d, %s", statusCode, responseBody)
+
+			if len(resp.Data) == 0 {
+				err = errors.New("no embedding found in response")
+				callback(nil, err)
+				return
+			}
+
+			callback(resp.Data[0].Embedding, nil)
+
+		}, t.config.timeout)
+	return err
+}
--- a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
@@ -10,10 +10,13 @@ import (
 const (
 	PROVIDER_TYPE_DASHSCOPE = "dashscope"
 	PROVIDER_TYPE_TEXTIN    = "textin"
+	PROVIDER_TYPE_COHERE    = "cohere"
+	PROVIDER_TYPE_OPENAI    = "openai"
 )

 type providerInitializer interface {
-	ValidateConfig(ProviderConfig) error
+	InitConfig(json gjson.Result)
+	ValidateConfig() error
 	CreateProvider(ProviderConfig) (Provider, error)
 }

@@ -21,6 +24,8 @@ var (
 	providerInitializers = map[string]providerInitializer{
 		PROVIDER_TYPE_DASHSCOPE: &dashScopeProviderInitializer{},
 		PROVIDER_TYPE_TEXTIN:    &textInProviderInitializer{},
+		PROVIDER_TYPE_COHERE:    &cohereProviderInitializer{},
+		PROVIDER_TYPE_OPENAI:    &openAIProviderInitializer{},
 	}
 )

@@ -37,35 +42,26 @@ type ProviderConfig struct {
 	// @Title zh-CN 文本特征提取服务端口
 	// @Description zh-CN 文本特征提取服务端口
 	servicePort int64
-	// @Title zh-CN 文本特征提取服务 API Key
-	// @Description zh-CN 文本特征提取服务 API Key
-	apiKey string
-	//@Title zh-CN TextIn x-ti-app-id
-	// @Description zh-CN 仅适用于 TextIn 服务。参考 https://www.textin.com/document/acge_text_embedding
-	textinAppId string
-	//@Title zh-CN TextIn x-ti-secret-code
-	// @Description zh-CN 仅适用于 TextIn 服务。参考 https://www.textin.com/document/acge_text_embedding
-	textinSecretCode string
-	//@Title zh-CN TextIn request matryoshka_dim
-	// @Description zh-CN 仅适用于 TextIn 服务, 指定返回的向量维度。参考 https://www.textin.com/document/acge_text_embedding
-	textinMatryoshkaDim int
 	// @Title zh-CN 文本特征提取服务超时时间
 	// @Description zh-CN 文本特征提取服务超时时间
 	timeout uint32
 	// @Title zh-CN 文本特征提取服务使用的模型
 	// @Description zh-CN 用于文本特征提取的模型名称, 在 DashScope 中默认为 "text-embedding-v1"
 	model string
+
+	initializer providerInitializer
 }

 func (c *ProviderConfig) FromJson(json gjson.Result) {
 	c.typ = json.Get("type").String()
+	i, has := providerInitializers[c.typ]
+	if has {
+		i.InitConfig(json)
+		c.initializer = i
+	}
 	c.serviceName = json.Get("serviceName").String()
 	c.serviceHost = json.Get("serviceHost").String()
 	c.servicePort = json.Get("servicePort").Int()
-	c.apiKey = json.Get("apiKey").String()
-	c.textinAppId = json.Get("textinAppId").String()
-	c.textinSecretCode = json.Get("textinSecretCode").String()
-	c.textinMatryoshkaDim = int(json.Get("textinMatryoshkaDim").Int())
 	c.timeout = uint32(json.Get("timeout").Int())
 	c.model = json.Get("model").String()
 	if c.timeout == 0 {
@@ -80,11 +76,10 @@ func (c *ProviderConfig) Validate() error {
 	if c.typ == "" {
 		return errors.New("embedding service type is required")
 	}
-	initializer, has := providerInitializers[c.typ]
-	if !has {
+	if c.initializer == nil {
 		return errors.New("unknown embedding service provider type: " + c.typ)
 	}
-	if err := initializer.ValidateConfig(*c); err != nil {
+	if err := c.initializer.ValidateConfig(); err != nil {
 		return err
 	}
 	return nil
--- a/plugins/wasm-go/extensions/ai-cache/embedding/textin.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/textin.go
@@ -8,6 +8,7 @@ import (
 	"strconv"

 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
 )

 const (
@@ -20,14 +21,34 @@ const (
 type textInProviderInitializer struct {
 }

-func (t *textInProviderInitializer) ValidateConfig(config ProviderConfig) error {
-	if config.textinAppId == "" {
-		return errors.New("embedding service TextIn App ID is required")
+var textInConfig textInProviderConfig
+
+type textInProviderConfig struct {
+	//@Title zh-CN TextIn x-ti-app-id
+	// @Description zh-CN 仅适用于 TextIn 服务。参考 https://www.textin.com/document/acge_text_embedding
+	textinAppId string
+	//@Title zh-CN TextIn x-ti-secret-code
+	// @Description zh-CN 仅适用于 TextIn 服务。参考 https://www.textin.com/document/acge_text_embedding
+	textinSecretCode string
+	//@Title zh-CN TextIn request matryoshka_dim
+	// @Description zh-CN 仅适用于 TextIn 服务, 指定返回的向量维度。参考 https://www.textin.com/document/acge_text_embedding
+	textinMatryoshkaDim int
+}
+
+func (c *textInProviderInitializer) InitConfig(json gjson.Result) {
+	textInConfig.textinAppId = json.Get("textinAppId").String()
+	textInConfig.textinSecretCode = json.Get("textinSecretCode").String()
+	textInConfig.textinMatryoshkaDim = int(json.Get("textinMatryoshkaDim").Int())
+}
+
+func (c *textInProviderInitializer) ValidateConfig() error {
+	if textInConfig.textinAppId == "" {
+		return errors.New("textinAppId is required")
 	}
-	if config.textinSecretCode == "" {
-		return errors.New("embedding service TextIn Secret Code is required")
+	if textInConfig.textinSecretCode == "" {
+		return errors.New("textinSecretCode is required")
 	}
-	if config.textinMatryoshkaDim == 0 {
+	if textInConfig.textinMatryoshkaDim == 0 {
 		return errors.New("embedding service TextIn Matryoshka Dim is required")
 	}
 	return nil
@@ -62,7 +83,7 @@ type TextInResponse struct {
 }

 type TextInResult struct {
-	Embeddings    [][]float64 `json:"embedding"` 
+	Embeddings    [][]float64 `json:"embedding"`
 	MatryoshkaDim int         `json:"matryoshka_dim"`
 }

@@ -80,7 +101,7 @@ func (t *TIProvider) constructParameters(texts []string, log wrapper.Log) (strin

 	data := TextInEmbeddingRequest{
 		Input:         texts,
-		MatryoshkaDim: t.config.textinMatryoshkaDim,
+		MatryoshkaDim: textInConfig.textinMatryoshkaDim,
 	}

 	requestBody, err := json.Marshal(data)
@@ -89,20 +110,20 @@ func (t *TIProvider) constructParameters(texts []string, log wrapper.Log) (strin
 		return "", nil, nil, err
 	}

-	if t.config.textinAppId == "" {
+	if textInConfig.textinAppId == "" {
 		err := errors.New("textinAppId is empty")
 		log.Errorf("failed to construct headers: %v", err)
 		return "", nil, nil, err
 	}
-	if t.config.textinSecretCode == "" {
+	if textInConfig.textinSecretCode == "" {
 		err := errors.New("textinSecretCode is empty")
 		log.Errorf("failed to construct headers: %v", err)
 		return "", nil, nil, err
 	}

 	headers := [][2]string{
-		{"x-ti-app-id", t.config.textinAppId},
-		{"x-ti-secret-code", t.config.textinSecretCode},
+		{"x-ti-app-id", textInConfig.textinAppId},
+		{"x-ti-secret-code", textInConfig.textinSecretCode},
 		{"Content-Type", "application/json"},
 	}

--- a/plugins/wasm-go/extensions/ai-cache/go.mod
+++ b/plugins/wasm-go/extensions/ai-cache/go.mod
@@ -8,14 +8,14 @@ replace github.com/alibaba/higress/plugins/wasm-go => ../..

 require (
 	github.com/alibaba/higress/plugins/wasm-go v1.4.2
-	github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f
+	github.com/google/uuid v1.6.0
+	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
 	github.com/tidwall/gjson v1.17.3
 	github.com/tidwall/resp v0.1.1
 // github.com/weaviate/weaviate-go-client/v4 v4.15.1
 )

 require (
-	github.com/google/uuid v1.6.0 // indirect
 	github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
 	github.com/magefile/mage v1.14.0 // indirect
 	github.com/stretchr/testify v1.9.0 // indirect
--- a/plugins/wasm-go/extensions/ai-cache/go.sum
+++ b/plugins/wasm-go/extensions/ai-cache/go.sum
@@ -3,8 +3,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f h1:ZIiIBRvIw62gA5MJhuwp1+2wWbqL9IGElQ499rUsYYg=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
 github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
 github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
--- a/plugins/wasm-go/extensions/ai-cache/main.go
+++ b/plugins/wasm-go/extensions/ai-cache/main.go
@@ -22,6 +22,8 @@ const (
 	STREAM_CONTEXT_KEY          = "stream"
 	SKIP_CACHE_HEADER           = "x-higress-skip-ai-cache"
 	ERROR_PARTIAL_MESSAGE_KEY   = "errorPartialMessage"
+
+	DEFAULT_MAX_BODY_BYTES uint32 = 10 * 1024 * 1024
 )

 func main() {
@@ -69,6 +71,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, c config.PluginConfig, log wr
 		ctx.DontReadRequestBody()
 		return types.ActionContinue
 	}
+	ctx.SetRequestBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
 	_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
 	// The request has a body and requires delaying the header transmission until a cache miss occurs,
 	// at which point the header should be sent.
@@ -128,12 +131,20 @@ func onHttpRequestBody(ctx wrapper.HttpContext, c config.PluginConfig, body []by
 func onHttpResponseHeaders(ctx wrapper.HttpContext, c config.PluginConfig, log wrapper.Log) types.Action {
 	skipCache := ctx.GetContext(SKIP_CACHE_HEADER)
 	if skipCache != nil {
+		ctx.SetUserAttribute("cache_status", "skip")
+		ctx.WriteUserAttributeToLogWithKey(wrapper.AILogKey)
 		ctx.DontReadResponseBody()
 		return types.ActionContinue
 	}
+	if ctx.GetContext(CACHE_KEY_CONTEXT_KEY) != nil {
+		ctx.SetUserAttribute("cache_status", "miss")
+		ctx.WriteUserAttributeToLogWithKey(wrapper.AILogKey)
+	}
 	contentType, _ := proxywasm.GetHttpResponseHeader("content-type")
 	if strings.Contains(contentType, "text/event-stream") {
 		ctx.SetContext(STREAM_CONTEXT_KEY, struct{}{})
+	} else {
+		ctx.SetResponseBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
 	}

 	if ctx.GetContext(ERROR_PARTIAL_MESSAGE_KEY) != nil {
@@ -158,22 +169,26 @@ func onHttpResponseBody(ctx wrapper.HttpContext, c config.PluginConfig, chunk []
 		return chunk
 	}

+	stream := ctx.GetContext(STREAM_CONTEXT_KEY)
+	var err error
 	if !isLastChunk {
-		if err := handleNonLastChunk(ctx, c, chunk, log); err != nil {
+		if stream == nil {
+			err = handleNonStreamChunk(ctx, c, chunk, log)
+		} else {
+			err = handleStreamChunk(ctx, c, unifySSEChunk(chunk), log)
+		}
+		if err != nil {
 			log.Errorf("[onHttpResponseBody] handle non last chunk failed, error: %v", err)
 			// Set an empty struct in the context to indicate an error in processing the partial message
 			ctx.SetContext(ERROR_PARTIAL_MESSAGE_KEY, struct{}{})
 		}
 		return chunk
 	}
-
-	stream := ctx.GetContext(STREAM_CONTEXT_KEY)
 	var value string
-	var err error
 	if stream == nil {
 		value, err = processNonStreamLastChunk(ctx, c, chunk, log)
 	} else {
-		value, err = processStreamLastChunk(ctx, c, chunk, log)
+		value, err = processStreamLastChunk(ctx, c, unifySSEChunk(chunk), log)
 	}

 	if err != nil {
--- a/plugins/wasm-go/extensions/ai-cache/util.go
+++ b/plugins/wasm-go/extensions/ai-cache/util.go
@@ -1,6 +1,7 @@
 package main

 import (
+	"bytes"
 	"fmt"
 	"strings"

@@ -9,17 +10,6 @@ import (
 	"github.com/tidwall/gjson"
 )

-func handleNonLastChunk(ctx wrapper.HttpContext, c config.PluginConfig, chunk []byte, log wrapper.Log) error {
-	stream := ctx.GetContext(STREAM_CONTEXT_KEY)
-	err := error(nil)
-	if stream == nil {
-		err = handleNonStreamChunk(ctx, c, chunk, log)
-	} else {
-		err = handleStreamChunk(ctx, c, chunk, log)
-	}
-	return err
-}
-
 func handleNonStreamChunk(ctx wrapper.HttpContext, c config.PluginConfig, chunk []byte, log wrapper.Log) error {
 	tempContentI := ctx.GetContext(CACHE_CONTENT_CONTEXT_KEY)
 	if tempContentI == nil {
@@ -32,6 +22,12 @@ func handleNonStreamChunk(ctx wrapper.HttpContext, c config.PluginConfig, chunk
 	return nil
 }

+func unifySSEChunk(data []byte) []byte {
+	data = bytes.ReplaceAll(data, []byte("\r\n"), []byte("\n"))
+	data = bytes.ReplaceAll(data, []byte("\r"), []byte("\n"))
+	return data
+}
+
 func handleStreamChunk(ctx wrapper.HttpContext, c config.PluginConfig, chunk []byte, log wrapper.Log) error {
 	var partialMessage []byte
 	partialMessageI := ctx.GetContext(PARTIAL_MESSAGE_CONTEXT_KEY)
@@ -101,55 +97,54 @@ func processStreamLastChunk(ctx wrapper.HttpContext, c config.PluginConfig, chun
 }

 func processSSEMessage(ctx wrapper.HttpContext, c config.PluginConfig, sseMessage string, log wrapper.Log) (string, error) {
-	subMessages := strings.Split(sseMessage, "\n")
-	var message string
-	for _, msg := range subMessages {
-		if strings.HasPrefix(msg, "data:") {
-			message = msg
-			break
+	content := ""
+	for _, chunk := range strings.Split(sseMessage, "\n\n") {
+		log.Debugf("single sse message: %s", chunk)
+		subMessages := strings.Split(chunk, "\n")
+		var message string
+		for _, msg := range subMessages {
+			if strings.HasPrefix(msg, "data:") {
+				message = msg
+				break
+			}
 		}
-	}
-	if len(message) < 6 {
-		return "", fmt.Errorf("[processSSEMessage] invalid message: %s", message)
-	}
-
-	// skip the prefix "data:"
-	bodyJson := message[5:]
-
-	if strings.TrimSpace(bodyJson) == "[DONE]" {
-		return "", nil
-	}
-
-	// Extract values from JSON fields
-	responseBody := gjson.Get(bodyJson, c.CacheStreamValueFrom)
-	toolCalls := gjson.Get(bodyJson, c.CacheToolCallsFrom)
-
-	if toolCalls.Exists() {
-		// TODO: Temporarily store the tool_calls value in the context for processing
-		ctx.SetContext(TOOL_CALLS_CONTEXT_KEY, toolCalls.String())
-	}
-
-	// Check if the ResponseBody field exists
-	if !responseBody.Exists() {
-		if ctx.GetContext(CACHE_CONTENT_CONTEXT_KEY) != nil {
-			log.Debugf("[processSSEMessage] unable to extract content from message; cache content is not nil: %s", message)
-			return "", nil
+		if len(message) < 6 {
+			return content, fmt.Errorf("[processSSEMessage] invalid message: %s", message)
 		}
-		return "", fmt.Errorf("[processSSEMessage] unable to extract content from message; cache content is nil: %s", message)
-	} else {
-		tempContentI := ctx.GetContext(CACHE_CONTENT_CONTEXT_KEY)

-		// If there is no content in the cache, initialize and set the content
-		if tempContentI == nil {
-			content := responseBody.String()
-			ctx.SetContext(CACHE_CONTENT_CONTEXT_KEY, content)
+		// skip the prefix "data:"
+		bodyJson := message[5:]
+
+		if strings.TrimSpace(bodyJson) == "[DONE]" {
 			return content, nil
 		}

-		// Update the content in the cache
-		appendMsg := responseBody.String()
-		content := tempContentI.(string) + appendMsg
-		ctx.SetContext(CACHE_CONTENT_CONTEXT_KEY, content)
-		return content, nil
+		// Extract values from JSON fields
+		responseBody := gjson.Get(bodyJson, c.CacheStreamValueFrom)
+		toolCalls := gjson.Get(bodyJson, c.CacheToolCallsFrom)
+
+		if toolCalls.Exists() {
+			// TODO: Temporarily store the tool_calls value in the context for processing
+			ctx.SetContext(TOOL_CALLS_CONTEXT_KEY, toolCalls.String())
+		}
+
+		// Check if the ResponseBody field exists
+		if !responseBody.Exists() {
+			if ctx.GetContext(CACHE_CONTENT_CONTEXT_KEY) != nil {
+				log.Debugf("[processSSEMessage] unable to extract content from message; cache content is not nil: %s", message)
+				return content, nil
+			}
+			return content, fmt.Errorf("[processSSEMessage] unable to extract content from message; cache content is nil: %s", message)
+		} else {
+			content += responseBody.String()
+		}
 	}
+	tempContentI := ctx.GetContext(CACHE_CONTENT_CONTEXT_KEY)
+	// If there is no content in the cache, initialize and set the content
+	if tempContentI == nil {
+		ctx.SetContext(CACHE_CONTENT_CONTEXT_KEY, content)
+	} else {
+		ctx.SetContext(CACHE_CONTENT_CONTEXT_KEY, tempContentI.(string)+content)
+	}
+	return content, nil
 }
--- a/plugins/wasm-go/extensions/ai-history/go.sum
+++ b/plugins/wasm-go/extensions/ai-history/go.sum
@@ -3,15 +3,13 @@ github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f h1:ZIiIBRvIw62gA5MJhuwp1+2wWbqL9IGElQ499rUsYYg=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
 github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
 github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
 github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
-github.com/tidwall/gjson v1.14.3 h1:9jvXn7olKEHU1S9vwoMGliaT8jq1vJ7IH/n9zD9Dnlw=
-github.com/tidwall/gjson v1.14.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.17.3 h1:bwWLZU7icoKRG+C+0PNwIKC6FCJO/Q3p2pZvuP0jN94=
 github.com/tidwall/gjson v1.17.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
 github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
--- a/plugins/wasm-go/extensions/ai-history/main.go
+++ b/plugins/wasm-go/extensions/ai-history/main.go
@@ -194,6 +194,12 @@ func onHttpRequestBody(ctx wrapper.HttpContext, config PluginConfig, body []byte
 		ctx.SetContext(StreamContextKey, struct{}{})
 	}
 	identityKey := ctx.GetStringContext(IdentityKey, "")
+	question := TrimQuote(bodyJson.Get(config.QuestionFrom.RequestBody).String())
+	if question == "" {
+		log.Debug("parse question from request body failed")
+		return types.ActionContinue
+	}
+	ctx.SetContext(QuestionContextKey, question)
 	err := config.redisClient.Get(config.CacheKeyPrefix+identityKey, func(response resp.Value) {
 		if err := response.Error(); err != nil {
 			log.Errorf("redis get  failed, err:%v", err)
@@ -230,13 +236,6 @@ func onHttpRequestBody(ctx wrapper.HttpContext, config PluginConfig, body []byte
 			_ = proxywasm.SendHttpResponseWithDetail(200, "OK", [][2]string{{"content-type", "application/json; charset=utf-8"}}, res, -1)
 			return
 		}
-		question := TrimQuote(bodyJson.Get(config.QuestionFrom.RequestBody).String())
-		if question == "" {
-			log.Debug("parse question from request body failed")
-			_ = proxywasm.ResumeHttpRequest()
-			return
-		}
-		ctx.SetContext(QuestionContextKey, question)
 		fillHistoryCnt := getIntQueryParameter("fill_history_cnt", path, config.FillHistoryCnt) * 2
 		currJson := bodyJson.Get("messages").String()
 		var currMessage []ChatHistory
@@ -317,38 +316,39 @@ func getIntQueryParameter(name string, path string, defaultValue int) int {
 }

 func processSSEMessage(ctx wrapper.HttpContext, config PluginConfig, sseMessage string, log wrapper.Log) string {
-	subMessages := strings.Split(sseMessage, "\n")
-	var message string
-	for _, msg := range subMessages {
-		if strings.HasPrefix(msg, "data:") {
-			message = msg
-			break
+	content := ""
+	for _, chunk := range strings.Split(sseMessage, "\n\n") {
+		subMessages := strings.Split(chunk, "\n")
+		var message string
+		for _, msg := range subMessages {
+			if strings.HasPrefix(msg, "data:") {
+				message = msg
+				break
+			}
 		}
-	}
-	if len(message) < 6 {
-		log.Errorf("invalid message:%s", message)
-		return ""
-	}
-	// skip the prefix "data:"
-	bodyJson := message[5:]
-	if gjson.Get(bodyJson, config.AnswerStreamValueFrom.ResponseBody).Exists() {
-		tempContentI := ctx.GetContext(AnswerContentContextKey)
-		if tempContentI == nil {
-			content := TrimQuote(gjson.Get(bodyJson, config.AnswerStreamValueFrom.ResponseBody).Raw)
-			ctx.SetContext(AnswerContentContextKey, content)
+		if len(message) < 6 {
+			log.Errorf("invalid message:%s", message)
 			return content
 		}
-		append := TrimQuote(gjson.Get(bodyJson, config.AnswerStreamValueFrom.ResponseBody).Raw)
-		content := tempContentI.(string) + append
-		ctx.SetContext(AnswerContentContextKey, content)
-		return content
-	} else if gjson.Get(bodyJson, "choices.0.delta.content.tool_calls").Exists() {
-		// TODO: compatible with other providers
-		ctx.SetContext(ToolCallsContextKey, struct{}{})
-		return ""
+		// skip the prefix "data:"
+		bodyJson := message[5:]
+		if gjson.Get(bodyJson, config.AnswerStreamValueFrom.ResponseBody).Exists() {
+			tempContentI := ctx.GetContext(AnswerContentContextKey)
+			if tempContentI == nil {
+				content = TrimQuote(gjson.Get(bodyJson, config.AnswerStreamValueFrom.ResponseBody).Raw)
+				ctx.SetContext(AnswerContentContextKey, content)
+			} else {
+				append := TrimQuote(gjson.Get(bodyJson, config.AnswerStreamValueFrom.ResponseBody).Raw)
+				content = tempContentI.(string) + append
+				ctx.SetContext(AnswerContentContextKey, content)
+			}
+		} else if gjson.Get(bodyJson, "choices.0.delta.content.tool_calls").Exists() {
+			// TODO: compatible with other providers
+			ctx.SetContext(ToolCallsContextKey, struct{}{})
+		}
+		log.Debugf("unknown message:%s", bodyJson)
 	}
-	log.Debugf("unknown message:%s", bodyJson)
-	return ""
+	return content
 }

 func onHttpResponseHeaders(ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log) types.Action {
--- a/plugins/wasm-go/extensions/ai-prompt-template/main.go
+++ b/plugins/wasm-go/extensions/ai-prompt-template/main.go
@@ -34,7 +34,7 @@ func parseConfig(json gjson.Result, config *AIPromptTemplateConfig, log wrapper.

 func onHttpRequestHeaders(ctx wrapper.HttpContext, config AIPromptTemplateConfig, log wrapper.Log) types.Action {
 	templateEnable, _ := proxywasm.GetHttpRequestHeader("template-enable")
-	if templateEnable != "true" {
+	if templateEnable == "false" {
 		ctx.DontReadRequestBody()
 		return types.ActionContinue
 	}
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -41,6 +41,7 @@ description: AI 代理插件配置参考
 | `context`        | object          | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                             |
 | `customSettings` | array of customSetting | 非必填   | -      | 为AI请求指定覆盖或者填充参数                                                                                                                                           |
 | `failover`       | object | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                      |
+| `retryOnFailure` | object | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                              |  

 `context`的配置字段说明如下：

@@ -78,14 +79,22 @@ custom-setting会遵循如下表格，根据`name`和协议来替换对应的字

 `failover` 的配置字段说明如下：

-| 名称               | 数据类型   | 填写要求 | 默认值   | 描述                          |
-|------------------|--------|------|-------|-----------------------------|
-| enabled | bool   | 非必填   | false | 是否启用 apiToken 的 failover 机制 |
-| failureThreshold | int    | 非必填   | 3     | 触发 failover 连续请求失败的阈值（次数）   |
-| successThreshold | int    | 非必填   | 1     | 健康检测的成功阈值（次数）               |
-| healthCheckInterval | int    | 非必填   | 5000  | 健康检测的间隔时间，单位毫秒              |
-| healthCheckTimeout | int    | 非必填   | 5000  | 健康检测的超时时间，单位毫秒              |
-| healthCheckModel | string | 必填   |      | 健康检测使用的模型                   |
+| 名称               | 数据类型   | 填写要求            | 默认值   | 描述                          |
+|------------------|--------|-----------------|-------|-----------------------------|
+| enabled | bool   | 非必填             | false | 是否启用 apiToken 的 failover 机制 |
+| failureThreshold | int    | 非必填             | 3     | 触发 failover 连续请求失败的阈值（次数）   |
+| successThreshold | int    | 非必填             | 1     | 健康检测的成功阈值（次数）               |
+| healthCheckInterval | int    | 非必填             | 5000  | 健康检测的间隔时间，单位毫秒              |
+| healthCheckTimeout | int    | 非必填             | 5000  | 健康检测的超时时间，单位毫秒              |
+| healthCheckModel | string | 启用 failover 时必填 |      | 健康检测使用的模型                   |
+
+`retryOnFailure` 的配置字段说明如下：
+
+| 名称               | 数据类型   | 填写要求            | 默认值   | 描述          |
+|------------------|--------|-----------------|-------|-------------|
+| enabled | bool   | 非必填             | false | 是否启用失败请求重试  |
+| maxRetries | int    | 非必填             | 1     | 最大重试次数      |
+| retryTimeout | int    | 非必填             | 30000 | 重试超时时间，单位毫秒 |

 ### 提供商特有配置

@@ -148,7 +157,15 @@ Groq 所对应的 `type` 为 `groq`。它并无特有的配置字段。

 #### 文心一言（Baidu）

-文心一言所对应的 `type` 为 `baidu`。它并无特有的配置字段。
+文心一言所对应的 `type` 为 `baidu`。它特有的配置字段如下：
+
+| 名称                 | 数据类型            | 填写要求 | 默认值 | 描述                                                        |
+|--------------------|-----------------|------|-----|-----------------------------------------------------------|
+| `baiduAccessKeyAndSecret`      | array of string | 必填   | -   | Baidu 的 Access Key 和 Secret Key，中间用 `:` 分隔，用于申请 apiToken。 |
+| `baiduApiTokenServiceName`      | string          | 必填   | -   | 请求刷新百度 apiToken 服务名称。                                     |
+| `baiduApiTokenServiceHost`      | string          | 非必填  | -   | 请求刷新百度 apiToken 服务域名，默认是 iam.bj.baidubce.com。             |
+| `baiduApiTokenServicePort`      | int64           | 非必填  | -   | 请求刷新百度 apiToken 服务端口，默认是 443。                             |
+

 #### 360智脑

@@ -166,9 +183,10 @@ Mistral 所对应的 `type` 为 `mistral`。它并无特有的配置字段。

 MiniMax所对应的 `type` 为 `minimax`。它特有的配置字段如下：

-| 名称             | 数据类型 | 填写要求                                                     | 默认值 | 描述                                                         |
-| ---------------- | -------- | ------------------------------------------------------------ | ------ | ------------------------------------------------------------ |
-| `minimaxGroupId` | string   | 当使用`abab6.5-chat`, `abab6.5s-chat`, `abab5.5s-chat`, `abab5.5-chat`四种模型时必填 | -      | 当使用`abab6.5-chat`, `abab6.5s-chat`, `abab5.5s-chat`, `abab5.5-chat`四种模型时会使用ChatCompletion Pro，需要设置groupID |
+| 名称             | 数据类型 | 填写要求                       | 默认值 | 描述                                                             |
+| ---------------- | -------- | ------------------------------ | ------ |----------------------------------------------------------------|
+| `minimaxApiType` | string   | v2 和 pro 中选填一项           | v2     | v2 代表 ChatCompletion v2 API，pro 代表 ChatCompletion Pro API      |
+| `minimaxGroupId` | string   | `minimaxApiType` 为 pro 时必填 | -      | `minimaxApiType` 为 pro 时使用 ChatCompletion Pro API，需要设置 groupID |

 #### Anthropic Claude

@@ -234,6 +252,9 @@ DeepL 所对应的 `type` 为 `deepl`。它特有的配置字段如下：

 Cohere 所对应的 `type` 为 `cohere`。它并无特有的配置字段。

+#### Together-AI
+Together-AI 所对应的 `type` 为 `together-ai`。它并无特有的配置字段。
+
 ## 用法示例

 ### 使用 OpenAI 协议代理 Azure OpenAI 服务
@@ -992,17 +1013,16 @@ provider:
  apiTokens:
    - "YOUR_MINIMAX_API_TOKEN"
  modelMapping:
-    "gpt-3": "abab6.5g-chat"
-    "gpt-4": "abab6.5-chat"
-    "*": "abab6.5g-chat"
-  minimaxGroupId: "YOUR_MINIMAX_GROUP_ID"
+    "gpt-3": "abab6.5s-chat"
+    "gpt-4": "abab6.5g-chat"
+    "*": "abab6.5t-chat"
 ```

 **请求示例**

 ```json
 {
-    "model": "gpt-4-turbo",
+    "model": "gpt-3",
    "messages": [
        {
            "role": "user",
@@ -1017,27 +1037,33 @@ provider:

 ```json
 {
-    "id": "02b2251f8c6c09d68c1743f07c72afd7",
+    "id": "03ac4fcfe1c6cc9c6a60f9d12046e2b4",
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "message": {
-                "content": "你好！我是MM智能助理，一款由MiniMax自研的大型语言模型。我可以帮助你解答问题，提供信息，进行对话等。有什么可以帮助你的吗？",
-                "role": "assistant"
+                "content": "你好，我是一个由MiniMax公司研发的大型语言模型，名为MM智能助理。我可以帮助回答问题、提供信息、进行对话和执行多种语言处理任务。如果你有任何问题或需要帮助，请随时告诉我！",
+                "role": "assistant",
+                "name": "MM智能助理",
+                "audio_content": ""
            }
        }
    ],
-    "created": 1717760544,
+    "created": 1734155471,
    "model": "abab6.5s-chat",
    "object": "chat.completion",
    "usage": {
-        "total_tokens": 106
+        "total_tokens": 116,
+        "total_characters": 0,
+        "prompt_tokens": 70,
+        "completion_tokens": 46
    },
    "input_sensitive": false,
    "output_sensitive": false,
    "input_sensitive_type": 0,
    "output_sensitive_type": 0,
+    "output_sensitive_int": 0,
    "base_resp": {
        "status_code": 0,
        "status_msg": ""
@@ -1482,6 +1508,61 @@ provider:
 }
 ```

+### 使用 OpenAI 协议代理 Together-AI 服务
+
+**配置信息**
+```yaml
+provider:
+  type: together-ai
+  apiTokens:
+    - "YOUR_TOGETHER_AI_API_TOKEN"
+  modelMapping:
+    "*": "Qwen/Qwen2.5-72B-Instruct-Turbo"
+```
+
+**请求示例**
+```json
+{
+    "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+    "messages": [
+        {
+            "role": "user",
+            "content": "Who are you?"
+        }
+    ]
+}
+```
+
+**响应示例**
+```json
+{
+  "id": "8f5809d54b73efac",
+  "object": "chat.completion",
+  "created": 1734785851,
+  "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+  "prompt": [],
+  "choices": [
+    {
+      "finish_reason": "eos",
+      "seed": 12830868308626506000,
+      "logprobs": null,
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?",
+        "tool_calls": []
+      }
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 33,
+    "completion_tokens": 61,
+    "total_tokens": 94
+  }
+}
+```
+
+
 ## 完整配置示例

 ### Kubernetes 示例
--- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md
@@ -1356,6 +1356,60 @@ Here, `model` denotes the service tier of DeepL and can only be either `Free` or
 }
 ```

+### Utilizing OpenAI Protocol Proxy for Together-AI Services
+
+**Configuration Information**
+```yaml
+provider:
+  type: together-ai
+  apiTokens:
+    - "YOUR_TOGETHER_AI_API_TOKEN"
+  modelMapping:
+    "*": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
+```
+
+**Request Example**
+```json
+{
+    "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+    "messages": [
+        {
+            "role": "user",
+            "content": "Who are you?"
+        }
+    ]
+}
+```
+
+**Response Example**
+```json
+{
+  "id": "8f5809d54b73efac",
+  "object": "chat.completion",
+  "created": 1734785851,
+  "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+  "prompt": [],
+  "choices": [
+    {
+      "finish_reason": "eos",
+      "seed": 12830868308626506000,
+      "logprobs": null,
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "I am Qwen, a large language model created by Alibaba Cloud. I am designed to assist users in generating various types of text, such as articles, stories, poems, and more, as well as answering questions and providing information on a wide range of topics. How can I assist you today?",
+        "tool_calls": []
+      }
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 33,
+    "completion_tokens": 61,
+    "total_tokens": 94
+  }
+}
+```
+
 ## Full Configuration Example

 ### Kubernetes Example
--- a/plugins/wasm-go/extensions/ai-proxy/config/config.go
+++ b/plugins/wasm-go/extensions/ai-proxy/config/config.go
@@ -86,6 +86,11 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 	providerConfig := c.GetProviderConfig()
 	err = providerConfig.SetApiTokensFailover(log, c.activeProvider)

+	if handler, ok := c.activeProvider.(provider.TickFuncHandler); ok {
+		tickPeriod, tickFunc := handler.GetTickFunc(log)
+		wrapper.RegisteTickFunc(tickPeriod, tickFunc)
+	}
+
 	return err
 }

--- a/plugins/wasm-go/extensions/ai-proxy/go.mod
+++ b/plugins/wasm-go/extensions/ai-proxy/go.mod
@@ -8,7 +8,7 @@ replace github.com/alibaba/higress/plugins/wasm-go => ../..

 require (
 	github.com/alibaba/higress/plugins/wasm-go v0.0.0
-	github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f
+	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
 	github.com/stretchr/testify v1.8.4
 	github.com/tidwall/gjson v1.17.3
 )
--- a/plugins/wasm-go/extensions/ai-proxy/go.sum
+++ b/plugins/wasm-go/extensions/ai-proxy/go.sum
@@ -4,8 +4,8 @@ github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f h1:ZIiIBRvIw62gA5MJhuwp1+2wWbqL9IGElQ499rUsYYg=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
 github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
 github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -20,8 +20,6 @@ import (
 const (
 	pluginName = "ai-proxy"

-	ctxKeyApiName = "apiName"
-
 	defaultMaxBodyBytes uint32 = 10 * 1024 * 1024
 )

@@ -89,32 +87,37 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf
 	}

 	if apiName == "" {
-		log.Debugf("[onHttpRequestHeader] unsupported path: %s", path.Path)
-		// _ = util.SendResponse(404, "ai-proxy.unknown_api", util.MimeTypeTextPlain, "API not found: "+path.Path)
-		log.Debugf("[onHttpRequestHeader] no send response")
+		log.Warnf("[onHttpRequestHeader] unsupported path: %s", path.Path)
 		return types.ActionContinue
 	}
-	ctx.SetContext(ctxKeyApiName, apiName)
+
+	ctx.SetContext(provider.CtxKeyApiName, apiName)
+	// Disable the route re-calculation since the plugin may modify some headers related to the chosen route.
+	ctx.DisableReroute()
+
+	_, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
+	if needHandleStreamingBody {
+		proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+	}

 	if handler, ok := activeProvider.(provider.RequestHeadersHandler); ok {
-		// Disable the route re-calculation since the plugin may modify some headers related to the chosen route.
-		ctx.DisableReroute()
 		// Set the apiToken for the current request.
 		providerConfig.SetApiTokenInUse(ctx, log)

 		hasRequestBody := wrapper.HasRequestBody()
-		action, err := handler.OnRequestHeaders(ctx, apiName, log)
+		err := handler.OnRequestHeaders(ctx, apiName, log)
 		if err == nil {
 			if hasRequestBody {
+				proxywasm.RemoveHttpRequestHeader("Content-Length")
 				ctx.SetRequestBodyBufferLimit(defaultMaxBodyBytes)
-				// Always return types.HeaderStopIteration to support fallback routing,
-				// as long as onHttpRequestBody can be called.
+				// Delay the header processing to allow changing in OnRequestBody
 				return types.HeaderStopIteration
 			}
-			return action
+			ctx.DontReadRequestBody()
+			return types.ActionContinue
 		}

-		_ = util.SendResponse(500, "ai-proxy.proc_req_headers_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to process request headers: %v", err))
+		util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
 		return types.ActionContinue
 	}

@@ -132,11 +135,14 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
 	log.Debugf("[onHttpRequestBody] provider=%s", activeProvider.GetProviderType())

 	if handler, ok := activeProvider.(provider.RequestBodyHandler); ok {
-		apiName, _ := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
+		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)

 		newBody, settingErr := pluginConfig.GetProviderConfig().ReplaceByCustomSettings(body)
 		if settingErr != nil {
-			_ = util.SendResponse(500, "ai-proxy.proc_req_body_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to rewrite request body by custom settings: %v", settingErr))
+			util.ErrorHandler(
+				"ai-proxy.proc_req_body_failed",
+				fmt.Errorf("failed to replace request body by custom settings: %v", settingErr),
+			)
 			return types.ActionContinue
 		}

@@ -146,7 +152,7 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
 		if err == nil {
 			return action
 		}
-		_ = util.SendResponse(500, "ai-proxy.proc_req_body_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to process request body: %v", err))
+		util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
 	}
 	return types.ActionContinue
 }
@@ -177,32 +183,25 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
 			log.Errorf("unable to load :status header from response: %v", err)
 		}
 		ctx.DontReadResponseBody()
-		providerConfig.OnRequestFailed(ctx, apiTokenInUse, log)
-
-		return types.ActionContinue
+		return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, log)
 	}

 	// Reset ctxApiTokenRequestFailureCount if the request is successful,
 	// the apiToken is removed only when the number of consecutive request failures exceeds the threshold.
 	providerConfig.ResetApiTokenRequestFailureCount(apiTokenInUse, log)

-	if handler, ok := activeProvider.(provider.ResponseHeadersHandler); ok {
-		apiName, _ := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
-		action, err := handler.OnResponseHeaders(ctx, apiName, log)
-		if err == nil {
-			checkStream(&ctx, &log)
-			return action
-		}
-		_ = util.SendResponse(500, "ai-proxy.proc_resp_headers_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to process response headers: %v", err))
-		return types.ActionContinue
+	headers := util.GetOriginalResponseHeaders()
+	if handler, ok := activeProvider.(provider.TransformResponseHeadersHandler); ok {
+		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
+		handler.TransformResponseHeaders(ctx, apiName, headers, log)
+	} else {
+		providerConfig.DefaultTransformResponseHeaders(ctx, headers)
 	}
+	util.ReplaceResponseHeaders(headers)

-	checkStream(&ctx, &log)
-	_, needHandleBody := activeProvider.(provider.ResponseBodyHandler)
+	checkStream(ctx, log)
 	_, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
-	if !needHandleBody && !needHandleStreamingBody {
-		ctx.DontReadResponseBody()
-	} else if !needHandleStreamingBody {
+	if !needHandleStreamingBody {
 		ctx.BufferResponseBody()
 	}

@@ -221,7 +220,7 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 	log.Debugf("isLastChunk=%v chunk: %s", isLastChunk, string(chunk))

 	if handler, ok := activeProvider.(provider.StreamingResponseBodyHandler); ok {
-		apiName, _ := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
+		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
 		modifiedChunk, err := handler.OnStreamingResponseBody(ctx, apiName, chunk, isLastChunk, log)
 		if err == nil && modifiedChunk != nil {
 			return modifiedChunk
@@ -240,27 +239,29 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
 	}

 	log.Debugf("[onHttpResponseBody] provider=%s", activeProvider.GetProviderType())
-	//log.Debugf("response body: %s", string(body))

-	if handler, ok := activeProvider.(provider.ResponseBodyHandler); ok {
-		apiName, _ := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
-		action, err := handler.OnResponseBody(ctx, apiName, body, log)
-		if err == nil {
-			return action
+	if handler, ok := activeProvider.(provider.TransformResponseBodyHandler); ok {
+		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
+		body, err := handler.TransformResponseBody(ctx, apiName, body, log)
+		if err != nil {
+			util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
+			return types.ActionContinue
+		}
+		if err = provider.ReplaceResponseBody(body, log); err != nil {
+			util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
 		}
-		_ = util.SendResponse(500, "ai-proxy.proc_resp_body_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to process response body: %v", err))
-		return types.ActionContinue
 	}
 	return types.ActionContinue
 }

-func checkStream(ctx *wrapper.HttpContext, log *wrapper.Log) {
+func checkStream(ctx wrapper.HttpContext, log wrapper.Log) {
 	contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
 	if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {
 		if err != nil {
 			log.Errorf("unable to load content-type header from response: %v", err)
 		}
-		(*ctx).BufferResponseBody()
+		ctx.BufferResponseBody()
+		ctx.SetResponseBodyBufferLimit(defaultMaxBodyBytes)
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/ai360.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/ai360.go
@@ -40,13 +40,13 @@ func (m *ai360Provider) GetProviderType() string {
 	return providerTypeAi360
 }

-func (m *ai360Provider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *ai360Provider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
-	return types.HeaderStopIteration, nil
+	return nil
 }

 func (m *ai360Provider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -58,7 +58,5 @@ func (m *ai360Provider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,

 func (m *ai360Provider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
 	util.OverwriteRequestHostHeader(headers, ai360Domain)
-	util.OverwriteRequestAuthorizationHeader(headers, "Authorization "+m.config.GetApiTokenInUse(ctx))
-	headers.Del("Accept-Encoding")
-	headers.Del("Content-Length")
+	util.OverwriteRequestAuthorizationHeader(headers, m.config.GetApiTokenInUse(ctx))
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/azure.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/azure.go
@@ -53,12 +53,12 @@ func (m *azureProvider) GetProviderType() string {
 	return providerTypeAzure
 }

-func (m *azureProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *azureProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *azureProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -69,8 +69,23 @@ func (m *azureProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 }

 func (m *azureProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, m.serviceUrl.RequestURI())
+	u, e := url.Parse(ctx.Path())
+	if e == nil {
+		customApiVersion := u.Query().Get("api-version")
+		if customApiVersion == "" {
+			util.OverwriteRequestPathHeader(headers, m.serviceUrl.RequestURI())
+		} else {
+			q := m.serviceUrl.Query()
+			q.Set("api-version", customApiVersion)
+			newUrl := *m.serviceUrl
+			newUrl.RawQuery = q.Encode()
+			util.OverwriteRequestPathHeader(headers, newUrl.RequestURI())
+		}
+	} else {
+		log.Errorf("failed to parse request path: %v", e)
+		util.OverwriteRequestPathHeader(headers, m.serviceUrl.RequestURI())
+	}
 	util.OverwriteRequestHostHeader(headers, m.serviceUrl.Host)
-	util.OverwriteRequestAuthorizationHeader(headers, "api-key "+m.config.GetApiTokenInUse(ctx))
+	headers.Set("api-key", m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/baichuan.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/baichuan.go
@@ -42,12 +42,12 @@ func (m *baichuanProvider) GetProviderType() string {
 	return providerTypeBaichuan
 }

-func (m *baichuanProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *baichuanProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *baichuanProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/baidu.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/baidu.go
@@ -1,48 +1,53 @@
 package provider

 import (
+	"crypto/hmac"
+	"crypto/sha256"
+	"encoding/hex"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"net/http"
+	"sort"
 	"strings"
 	"time"

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 )

-// baiduProvider is the provider for baidu ernie bot service.
-
+// baiduProvider is the provider for baidu service.
 const (
-	baiduDomain             = "aip.baidubce.com"
-	baiduChatCompletionPath = "/chat"
+	baiduDomain             = "qianfan.baidubce.com"
+	baiduChatCompletionPath = "/v2/chat/completions"
+	baiduApiTokenDomain     = "iam.bj.baidubce.com"
+	baiduApiTokenPort       = 443
+	baiduApiTokenPath       = "/v1/BCE-BEARER/token"
+	// refresh apiToken every 1 hour
+	baiduApiTokenRefreshInterval = 3600
+	// authorizationString expires in 30 minutes, authorizationString is used to generate apiToken
+	// the default expiration time of apiToken is 24 hours
+	baiduAuthorizationStringExpirationSeconds = 1800
+	bce_prefix                                = "x-bce-"
 )

-var baiduModelToPathSuffixMap = map[string]string{
-	"ERNIE-4.0-8K":     "completions_pro",
-	"ERNIE-3.5-8K":     "completions",
-	"ERNIE-3.5-128K":   "ernie-3.5-128k",
-	"ERNIE-Speed-8K":   "ernie_speed",
-	"ERNIE-Speed-128K": "ernie-speed-128k",
-	"ERNIE-Tiny-8K":    "ernie-tiny-8k",
-	"ERNIE-Bot-8K":     "ernie_bot_8k",
-	"BLOOMZ-7B":        "bloomz_7b1",
-}
+type baiduProviderInitializer struct{}

-type baiduProviderInitializer struct {
-}
-
-func (b *baiduProviderInitializer) ValidateConfig(config ProviderConfig) error {
-	if config.apiTokens == nil || len(config.apiTokens) == 0 {
-		return errors.New("no apiToken found in provider config")
+func (g *baiduProviderInitializer) ValidateConfig(config ProviderConfig) error {
+	if config.baiduAccessKeyAndSecret == nil || len(config.baiduAccessKeyAndSecret) == 0 {
+		return errors.New("no baiduAccessKeyAndSecret found in provider config")
+	}
+	if config.baiduApiTokenServiceName == "" {
+		return errors.New("no baiduApiTokenServiceName found in provider config")
+	}
+	if !config.failover.enabled {
+		config.useGlobalApiToken = true
 	}
 	return nil
 }

-func (b *baiduProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+func (g *baiduProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
 	return &baiduProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -54,234 +59,235 @@ type baiduProvider struct {
 	contextCache *contextCache
 }

-func (b *baiduProvider) GetProviderType() string {
+func (g *baiduProvider) GetProviderType() string {
 	return providerTypeBaidu
 }

-func (b *baiduProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (g *baiduProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
+	if apiName != ApiNameChatCompletion {
+		return errUnsupportedApiName
+	}
+	g.config.handleRequestHeaders(g, ctx, apiName, log)
+	return nil
+}
+
+func (g *baiduProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
 	if apiName != ApiNameChatCompletion {
 		return types.ActionContinue, errUnsupportedApiName
 	}
-	b.config.handleRequestHeaders(b, ctx, apiName, log)
-	// Delay the header processing to allow changing streaming mode in OnRequestBody
-	return types.HeaderStopIteration, nil
+	return g.config.handleRequestBody(g, g.contextCache, ctx, apiName, body, log)
 }

-func (b *baiduProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
+func (g *baiduProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
+	util.OverwriteRequestPathHeader(headers, baiduChatCompletionPath)
 	util.OverwriteRequestHostHeader(headers, baiduDomain)
-	headers.Del("Accept-Encoding")
+	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+g.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
 }

-func (b *baiduProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
-	}
-	return b.config.handleRequestBody(b, b.contextCache, ctx, apiName, body, log)
-}
-
-func (b *baiduProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
-	request := &chatCompletionRequest{}
-	err := b.config.parseRequestAndMapModel(ctx, request, body, log)
-	if err != nil {
-		return nil, err
-	}
-	path := b.getRequestPath(ctx, request.Model)
-	util.OverwriteRequestPathHeader(headers, path)
-
-	baiduRequest := b.baiduTextGenRequest(request)
-	return json.Marshal(baiduRequest)
-}
-
-func (b *baiduProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	// 使用文心一言接口协议,跳过OnStreamingResponseBody()和OnResponseBody()
-	if b.config.protocol == protocolOriginal {
-		ctx.DontReadResponseBody()
-		return types.ActionContinue, nil
-	}
-
-	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
-	return types.ActionContinue, nil
-}
-
-func (b *baiduProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
-	if isLastChunk || len(chunk) == 0 {
-		return nil, nil
-	}
-	// sample event response:
-	// data: {"id":"as-vb0m37ti8y","object":"chat.completion","created":1709089502,"sentence_id":0,"is_end":false,"is_truncated":false,"result":"当然可以，","need_clear_history":false,"finish_reason":"normal","usage":{"prompt_tokens":5,"completion_tokens":2,"total_tokens":7}}
-
-	// sample end event response:
-	// data: {"id":"as-vb0m37ti8y","object":"chat.completion","created":1709089531,"sentence_id":20,"is_end":true,"is_truncated":false,"result":"","need_clear_history":false,"finish_reason":"normal","usage":{"prompt_tokens":5,"completion_tokens":420,"total_tokens":425}}
-	responseBuilder := &strings.Builder{}
-	lines := strings.Split(string(chunk), "\n")
-	for _, data := range lines {
-		if len(data) < 6 {
-			// ignore blank line or wrong format
-			continue
-		}
-		data = data[6:]
-		var baiduResponse baiduTextGenStreamResponse
-		if err := json.Unmarshal([]byte(data), &baiduResponse); err != nil {
-			log.Errorf("unable to unmarshal baidu response: %v", err)
-			continue
-		}
-		response := b.streamResponseBaidu2OpenAI(ctx, &baiduResponse)
-		responseBody, err := json.Marshal(response)
-		if err != nil {
-			log.Errorf("unable to marshal response: %v", err)
-			return nil, err
-		}
-		b.appendResponse(responseBuilder, string(responseBody))
-	}
-	modifiedResponseChunk := responseBuilder.String()
-	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
-	return []byte(modifiedResponseChunk), nil
-}
-
-func (b *baiduProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	baiduResponse := &baiduTextGenResponse{}
-	if err := json.Unmarshal(body, baiduResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal baidu response: %v", err)
-	}
-	if baiduResponse.ErrorMsg != "" {
-		return types.ActionContinue, fmt.Errorf("baidu response error, error_code: %d, error_message: %s", baiduResponse.ErrorCode, baiduResponse.ErrorMsg)
-	}
-	response := b.responseBaidu2OpenAI(ctx, baiduResponse)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
-}
-
-type baiduTextGenRequest struct {
-	Model           string        `json:"model"`
-	Messages        []chatMessage `json:"messages"`
-	Temperature     float64       `json:"temperature,omitempty"`
-	TopP            float64       `json:"top_p,omitempty"`
-	PenaltyScore    float64       `json:"penalty_score,omitempty"`
-	Stream          bool          `json:"stream,omitempty"`
-	System          string        `json:"system,omitempty"`
-	DisableSearch   bool          `json:"disable_search,omitempty"`
-	EnableCitation  bool          `json:"enable_citation,omitempty"`
-	MaxOutputTokens int           `json:"max_output_tokens,omitempty"`
-	UserId          string        `json:"user_id,omitempty"`
-}
-
-func (b *baiduProvider) getRequestPath(ctx wrapper.HttpContext, baiduModel string) string {
-	// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/clntwmv7t
-	suffix, ok := baiduModelToPathSuffixMap[baiduModel]
-	if !ok {
-		suffix = baiduModel
-	}
-	return fmt.Sprintf("/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/%s?access_token=%s", suffix, b.config.GetApiTokenInUse(ctx))
-}
-
-func (b *baiduProvider) setSystemContent(request *baiduTextGenRequest, content string) {
-	request.System = content
-}
-
-func (b *baiduProvider) baiduTextGenRequest(request *chatCompletionRequest) *baiduTextGenRequest {
-	baiduRequest := baiduTextGenRequest{
-		Messages:        make([]chatMessage, 0, len(request.Messages)),
-		Temperature:     request.Temperature,
-		TopP:            request.TopP,
-		PenaltyScore:    request.FrequencyPenalty,
-		Stream:          request.Stream,
-		DisableSearch:   false,
-		EnableCitation:  false,
-		MaxOutputTokens: request.MaxTokens,
-		UserId:          request.User,
-	}
-	for _, message := range request.Messages {
-		if message.Role == roleSystem {
-			baiduRequest.System = message.StringContent()
-		} else {
-			baiduRequest.Messages = append(baiduRequest.Messages, chatMessage{
-				Role:    message.Role,
-				Content: message.Content,
-			})
-		}
-	}
-	return &baiduRequest
-}
-
-type baiduTextGenResponse struct {
-	Id               string                    `json:"id"`
-	Object           string                    `json:"object"`
-	Created          int64                     `json:"created"`
-	Result           string                    `json:"result"`
-	IsTruncated      bool                      `json:"is_truncated"`
-	NeedClearHistory bool                      `json:"need_clear_history"`
-	Usage            baiduTextGenResponseUsage `json:"usage"`
-	baiduTextGenResponseError
-}
-
-type baiduTextGenResponseError struct {
-	ErrorCode int    `json:"error_code"`
-	ErrorMsg  string `json:"error_msg"`
-}
-
-type baiduTextGenStreamResponse struct {
-	baiduTextGenResponse
-	SentenceId int  `json:"sentence_id"`
-	IsEnd      bool `json:"is_end"`
-}
-
-type baiduTextGenResponseUsage struct {
-	PromptTokens     int `json:"prompt_tokens"`
-	CompletionTokens int `json:"completion_tokens"`
-	TotalTokens      int `json:"total_tokens"`
-}
-
-func (b *baiduProvider) responseBaidu2OpenAI(ctx wrapper.HttpContext, response *baiduTextGenResponse) *chatCompletionResponse {
-	choice := chatCompletionChoice{
-		Index:        0,
-		Message:      &chatMessage{Role: roleAssistant, Content: response.Result},
-		FinishReason: finishReasonStop,
-	}
-	return &chatCompletionResponse{
-		Id:                response.Id,
-		Created:           time.Now().UnixMilli() / 1000,
-		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
-		SystemFingerprint: "",
-		Object:            objectChatCompletion,
-		Choices:           []chatCompletionChoice{choice},
-		Usage: usage{
-			PromptTokens:     response.Usage.PromptTokens,
-			CompletionTokens: response.Usage.CompletionTokens,
-			TotalTokens:      response.Usage.TotalTokens,
-		},
-	}
-}
-
-func (b *baiduProvider) streamResponseBaidu2OpenAI(ctx wrapper.HttpContext, response *baiduTextGenStreamResponse) *chatCompletionResponse {
-	choice := chatCompletionChoice{
-		Index:   0,
-		Message: &chatMessage{Role: roleAssistant, Content: response.Result},
-	}
-	if response.IsEnd {
-		choice.FinishReason = finishReasonStop
-	}
-	return &chatCompletionResponse{
-		Id:                response.Id,
-		Created:           time.Now().UnixMilli() / 1000,
-		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
-		SystemFingerprint: "",
-		Object:            objectChatCompletionChunk,
-		Choices:           []chatCompletionChoice{choice},
-		Usage: usage{
-			PromptTokens:     response.Usage.PromptTokens,
-			CompletionTokens: response.Usage.CompletionTokens,
-			TotalTokens:      response.Usage.TotalTokens,
-		},
-	}
-}
-
-func (b *baiduProvider) appendResponse(responseBuilder *strings.Builder, responseBody string) {
-	responseBuilder.WriteString(fmt.Sprintf("%s %s\n\n", streamDataItemKey, responseBody))
-}
-
-func (b *baiduProvider) GetApiName(path string) ApiName {
+func (g *baiduProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, baiduChatCompletionPath) {
 		return ApiNameChatCompletion
 	}
 	return ""
 }
+
+func generateAuthorizationString(accessKeyAndSecret string, expirationInSeconds int) string {
+	c := strings.Split(accessKeyAndSecret, ":")
+	credentials := BceCredentials{
+		AccessKeyId:     c[0],
+		SecretAccessKey: c[1],
+	}
+	httpMethod := "GET"
+	path := baiduApiTokenPath
+	headers := map[string]string{"host": baiduApiTokenDomain}
+	timestamp := time.Now().Unix()
+
+	headersToSign := make([]string, 0, len(headers))
+	for k := range headers {
+		headersToSign = append(headersToSign, k)
+	}
+
+	return sign(credentials, httpMethod, path, headers, timestamp, expirationInSeconds, headersToSign)
+}
+
+// BceCredentials holds the access key and secret key
+type BceCredentials struct {
+	AccessKeyId     string
+	SecretAccessKey string
+}
+
+// normalizeString performs URI encoding according to RFC 3986
+func normalizeString(inStr string, encodingSlash bool) string {
+	if inStr == "" {
+		return ""
+	}
+
+	var result strings.Builder
+	for _, ch := range []byte(inStr) {
+		if (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') ||
+			(ch >= '0' && ch <= '9') || ch == '.' || ch == '-' ||
+			ch == '_' || ch == '~' || (!encodingSlash && ch == '/') {
+			result.WriteByte(ch)
+		} else {
+			result.WriteString(fmt.Sprintf("%%%02X", ch))
+		}
+	}
+	return result.String()
+}
+
+// getCanonicalTime generates a timestamp in UTC format
+func getCanonicalTime(timestamp int64) string {
+	if timestamp == 0 {
+		timestamp = time.Now().Unix()
+	}
+	t := time.Unix(timestamp, 0).UTC()
+	return t.Format("2006-01-02T15:04:05Z")
+}
+
+// getCanonicalUri generates a canonical URI
+func getCanonicalUri(path string) string {
+	return normalizeString(path, false)
+}
+
+// getCanonicalHeaders generates canonical headers
+func getCanonicalHeaders(headers map[string]string, headersToSign []string) string {
+	if len(headers) == 0 {
+		return ""
+	}
+
+	// If headersToSign is not specified, use default headers
+	if len(headersToSign) == 0 {
+		headersToSign = []string{"host", "content-md5", "content-length", "content-type"}
+	}
+
+	// Convert headersToSign to a map for easier lookup
+	headerMap := make(map[string]bool)
+	for _, header := range headersToSign {
+		headerMap[strings.ToLower(strings.TrimSpace(header))] = true
+	}
+
+	// Create a slice to hold the canonical headers
+	var canonicalHeaders []string
+	for k, v := range headers {
+		k = strings.ToLower(strings.TrimSpace(k))
+		v = strings.TrimSpace(v)
+
+		// Add headers that start with x-bce- or are in headersToSign
+		if strings.HasPrefix(k, bce_prefix) || headerMap[k] {
+			canonicalHeaders = append(canonicalHeaders,
+				fmt.Sprintf("%s:%s", normalizeString(k, true), normalizeString(v, true)))
+		}
+	}
+
+	// Sort the canonical headers
+	sort.Strings(canonicalHeaders)
+
+	return strings.Join(canonicalHeaders, "\n")
+}
+
+// sign generates the authorization string
+func sign(credentials BceCredentials, httpMethod, path string, headers map[string]string,
+	timestamp int64, expirationInSeconds int,
+	headersToSign []string) string {
+
+	// Generate sign key
+	signKeyInfo := fmt.Sprintf("bce-auth-v1/%s/%s/%d",
+		credentials.AccessKeyId,
+		getCanonicalTime(timestamp),
+		expirationInSeconds)
+
+	// Generate sign key using HMAC-SHA256
+	h := hmac.New(sha256.New, []byte(credentials.SecretAccessKey))
+	h.Write([]byte(signKeyInfo))
+	signKey := hex.EncodeToString(h.Sum(nil))
+
+	// Generate canonical URI
+	canonicalUri := getCanonicalUri(path)
+
+	// Generate canonical headers
+	canonicalHeaders := getCanonicalHeaders(headers, headersToSign)
+
+	// Generate string to sign
+	stringToSign := strings.Join([]string{
+		httpMethod,
+		canonicalUri,
+		"",
+		canonicalHeaders,
+	}, "\n")
+
+	// Calculate final signature
+	h = hmac.New(sha256.New, []byte(signKey))
+	h.Write([]byte(stringToSign))
+	signature := hex.EncodeToString(h.Sum(nil))
+
+	// Generate final authorization string
+	if len(headersToSign) > 0 {
+		return fmt.Sprintf("%s/%s/%s", signKeyInfo, strings.Join(headersToSign, ";"), signature)
+	}
+	return fmt.Sprintf("%s//%s", signKeyInfo, signature)
+}
+
+// GetTickFunc Refresh apiToken (apiToken) periodically, the maximum apiToken expiration time is 24 hours
+func (g *baiduProvider) GetTickFunc(log wrapper.Log) (tickPeriod int64, tickFunc func()) {
+	vmID := generateVMID()
+
+	return baiduApiTokenRefreshInterval * 1000, func() {
+		// Only the Wasm VM that successfully acquires the lease will refresh the apiToken
+		if g.config.tryAcquireOrRenewLease(vmID, log) {
+			log.Debugf("Successfully acquired or renewed lease for baidu apiToken refresh task, vmID: %v", vmID)
+			// Get the apiToken that is about to expire, will be removed after the new apiToken is obtained
+			oldApiTokens, _, err := getApiTokens(g.config.failover.ctxApiTokens)
+			if err != nil {
+				log.Errorf("Get old apiToken failed: %v", err)
+				return
+			}
+			log.Debugf("Old apiTokens: %v", oldApiTokens)
+
+			for _, accessKeyAndSecret := range g.config.baiduAccessKeyAndSecret {
+				authorizationString := generateAuthorizationString(accessKeyAndSecret, baiduAuthorizationStringExpirationSeconds)
+				log.Debugf("Generate authorizationString: %v", authorizationString)
+				g.generateNewApiToken(authorizationString, log)
+			}
+
+			// remove old old apiToken
+			for _, token := range oldApiTokens {
+				log.Debugf("Remove old apiToken: %v", token)
+				removeApiToken(g.config.failover.ctxApiTokens, token, log)
+			}
+		}
+	}
+}
+
+func (g *baiduProvider) generateNewApiToken(authorizationString string, log wrapper.Log) {
+	client := wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: g.config.baiduApiTokenServiceName,
+		Host: g.config.baiduApiTokenServiceHost,
+		Port: g.config.baiduApiTokenServicePort,
+	})
+
+	headers := [][2]string{
+		{"content-type", "application/json"},
+		{"Authorization", authorizationString},
+	}
+
+	var apiToken string
+	err := client.Get(baiduApiTokenPath, headers, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+		if statusCode == 201 {
+			var response map[string]interface{}
+			err := json.Unmarshal(responseBody, &response)
+			if err != nil {
+				log.Errorf("Unmarshal response failed: %v", err)
+			} else {
+				apiToken = response["token"].(string)
+				addApiToken(g.config.failover.ctxApiTokens, apiToken, log)
+			}
+		} else {
+			log.Errorf("Get apiToken failed, status code: %d, response body: %s", statusCode, string(responseBody))
+		}
+	}, 30000)
+
+	if err != nil {
+		log.Errorf("Get apiToken failed: %v", err)
+	}
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
@@ -10,7 +10,6 @@ import (

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 )

@@ -102,27 +101,25 @@ func (c *claudeProvider) GetProviderType() string {
 	return providerTypeClaude
 }

-func (c *claudeProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (c *claudeProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	c.config.handleRequestHeaders(c, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (c *claudeProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
 	util.OverwriteRequestPathHeader(headers, claudeChatCompletionPath)
 	util.OverwriteRequestHostHeader(headers, claudeDomain)

-	headers.Add("x-api-key", c.config.GetApiTokenInUse(ctx))
+	headers.Set("x-api-key", c.config.GetApiTokenInUse(ctx))

 	if c.config.claudeVersion == "" {
 		c.config.claudeVersion = defaultVersion
 	}

-	headers.Add("anthropic-version", c.config.claudeVersion)
-	headers.Del("Accept-Encoding")
-	headers.Del("Content-Length")
+	headers.Set("anthropic-version", c.config.claudeVersion)
 }

 func (c *claudeProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -141,27 +138,16 @@ func (c *claudeProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName A
 	return json.Marshal(claudeRequest)
 }

-func (c *claudeProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+func (c *claudeProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
 	claudeResponse := &claudeTextGenResponse{}
 	if err := json.Unmarshal(body, claudeResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal claude response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal claude response: %v", err)
 	}
 	if claudeResponse.Error != nil {
-		return types.ActionContinue, fmt.Errorf("claude response error, error_type: %s, error_message: %s", claudeResponse.Error.Type, claudeResponse.Error.Message)
+		return nil, fmt.Errorf("claude response error, error_type: %s, error_message: %s", claudeResponse.Error.Type, claudeResponse.Error.Message)
 	}
 	response := c.responseClaude2OpenAI(ctx, claudeResponse)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
-}
-
-func (c *claudeProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	// use original protocol, skip OnStreamingResponseBody() and OnResponseBody()
-	if c.config.protocol == protocolOriginal {
-		ctx.DontReadResponseBody()
-		return types.ActionContinue, nil
-	}
-
-	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
-	return types.ActionContinue, nil
+	return json.Marshal(response)
 }

 func (c *claudeProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/cloudflare.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/cloudflare.go
@@ -42,12 +42,12 @@ func (c *cloudflareProvider) GetProviderType() string {
 	return providerTypeCloudflare
 }

-func (c *cloudflareProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (c *cloudflareProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	c.config.handleRequestHeaders(c, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (c *cloudflareProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -61,6 +61,4 @@ func (c *cloudflareProvider) TransformRequestHeaders(ctx wrapper.HttpContext, ap
 	util.OverwriteRequestPathHeader(headers, strings.Replace(cloudflareChatCompletionPath, "{account_id}", c.config.cloudflareAccountId, 1))
 	util.OverwriteRequestHostHeader(headers, cloudflareDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+c.config.GetApiTokenInUse(ctx))
-	headers.Del("Accept-Encoding")
-	headers.Del("Content-Length")
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/cohere.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/cohere.go
@@ -3,11 +3,12 @@ package provider
 import (
 	"encoding/json"
 	"errors"
+	"net/http"
+	"strings"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
-	"net/http"
-	"strings"
 )

 const (
@@ -54,12 +55,12 @@ func (m *cohereProvider) GetProviderType() string {
 	return providerTypeCohere
 }

-func (m *cohereProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *cohereProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *cohereProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/context.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/context.go
@@ -139,7 +139,7 @@ func insertContext(provider Provider, content string, err error, body []byte, lo
 	typ := provider.GetProviderType()
 	if err != nil {
 		log.Errorf("failed to load context file: %v", err)
-		_ = util.SendResponse(500, fmt.Sprintf("ai-proxy.%s.load_ctx_failed", typ), util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+		util.ErrorHandler(fmt.Sprintf("ai-proxy.%s.load_ctx_failed", typ), fmt.Errorf("failed to load context file: %v", err))
 	}

 	if inserter, ok := provider.(ContextInserter); ok {
@@ -149,10 +149,10 @@ func insertContext(provider Provider, content string, err error, body []byte, lo
 	}

 	if err != nil {
-		_ = util.SendResponse(500, fmt.Sprintf("ai-proxy.%s.insert_ctx_failed", typ), util.MimeTypeTextPlain, fmt.Sprintf("failed to insert context message: %v", err))
+		util.ErrorHandler(fmt.Sprintf("ai-proxy.%s.insert_ctx_failed", typ), fmt.Errorf("failed to insert context message: %v", err))
 	}
-	if err := replaceHttpJsonRequestBody(body, log); err != nil {
-		_ = util.SendResponse(500, fmt.Sprintf("ai-proxy.%s.replace_request_body_failed", typ), util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
+	if err := replaceRequestBody(body, log); err != nil {
+		util.ErrorHandler(fmt.Sprintf("ai-proxy.%s.replace_request_body_failed", typ), fmt.Errorf("failed to replace request body: %v", err))
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/coze.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/coze.go
@@ -6,7 +6,6 @@ import (

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 )

 const (
@@ -38,9 +37,9 @@ func (m *cozeProvider) GetProviderType() string {
 	return providerTypeCoze
 }

-func (m *cozeProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *cozeProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *cozeProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/deepl.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/deepl.go
@@ -10,7 +10,6 @@ import (

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 )

@@ -76,19 +75,17 @@ func (d *deeplProvider) GetProviderType() string {
 	return providerTypeDeepl
 }

-func (d *deeplProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (d *deeplProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	d.config.handleRequestHeaders(d, ctx, apiName, log)
-	return types.HeaderStopIteration, nil
+	return nil
 }

 func (d *deeplProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
 	util.OverwriteRequestPathHeader(headers, deeplChatCompletionPath)
 	util.OverwriteRequestAuthorizationHeader(headers, "DeepL-Auth-Key "+d.config.GetApiTokenInUse(ctx))
-	headers.Del("Content-Length")
-	headers.Del("Accept-Encoding")
 }

 func (d *deeplProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -114,18 +111,13 @@ func (d *deeplProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, api
 	return json.Marshal(baiduRequest)
 }

-func (d *deeplProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
-	return types.ActionContinue, nil
-}
-
-func (d *deeplProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+func (d *deeplProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
 	deeplResponse := &deeplResponse{}
 	if err := json.Unmarshal(body, deeplResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal deepl response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal deepl response: %v", err)
 	}
 	response := d.responseDeepl2OpenAI(ctx, deeplResponse)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
+	return json.Marshal(response)
 }

 func (d *deeplProvider) responseDeepl2OpenAI(ctx wrapper.HttpContext, deeplResponse *deeplResponse) *chatCompletionResponse {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/deepseek.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/deepseek.go
@@ -2,10 +2,11 @@ package provider

 import (
 	"errors"
+	"net/http"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
-	"net/http"
 )

 // deepseekProvider is the provider for deepseek Ai service.
@@ -41,12 +42,12 @@ func (m *deepseekProvider) GetProviderType() string {
 	return providerTypeDeepSeek
 }

-func (m *deepseekProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *deepseekProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *deepseekProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/doubao.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/doubao.go
@@ -2,11 +2,12 @@ package provider

 import (
 	"errors"
+	"net/http"
+	"strings"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
-	"net/http"
-	"strings"
 )

 const (
@@ -39,12 +40,12 @@ func (m *doubaoProvider) GetProviderType() string {
 	return providerTypeDoubao
 }

-func (m *doubaoProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *doubaoProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *doubaoProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
@@ -4,14 +4,14 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
-	"github.com/google/uuid"
 	"math/rand"
 	"net/http"
 	"strings"
 	"time"
-
+	
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/google/uuid"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 	"github.com/tidwall/gjson"
@@ -19,7 +19,7 @@ import (

 type failover struct {
 	// @Title zh-CN 是否启用 apiToken 的 failover 机制
-	enabled bool `required:"true" yaml:"enabled" json:"enabled"`
+	enabled bool `required:"false" yaml:"enabled" json:"enabled"`
 	// @Title zh-CN 触发 failover 连续请求失败的阈值
 	failureThreshold int64 `required:"false" yaml:"failureThreshold" json:"failureThreshold"`
 	// @Title zh-CN 健康检测的成功阈值
@@ -29,7 +29,7 @@ type failover struct {
 	// @Title zh-CN 健康检测的超时时间，单位毫秒
 	healthCheckTimeout int64 `required:"false" yaml:"healthCheckTimeout" json:"healthCheckTimeout"`
 	// @Title zh-CN 健康检测使用的模型
-	healthCheckModel string `required:"true" yaml:"healthCheckModel" json:"healthCheckModel"`
+	healthCheckModel string `required:"false" yaml:"healthCheckModel" json:"healthCheckModel"`
 	// @Title zh-CN 本次请求使用的 apiToken
 	ctxApiTokenInUse string
 	// @Title zh-CN 记录 apiToken 请求失败的次数，key 为 apiToken，value 为失败次数
@@ -184,9 +184,9 @@ func (c *ProviderConfig) transformRequestHeadersAndBody(ctx wrapper.HttpContext,
 	if handler, ok := activeProvider.(TransformRequestBodyHandler); ok {
 		body, err = handler.TransformRequestBody(ctx, ApiNameChatCompletion, body, log)
 	} else if handler, ok := activeProvider.(TransformRequestBodyHeadersHandler); ok {
-		headers := util.GetOriginalHttpHeaders()
+		headers := util.GetOriginalRequestHeaders()
 		body, err = handler.TransformRequestBodyHeaders(ctx, ApiNameChatCompletion, body, originalHeaders, log)
-		util.ReplaceOriginalHttpHeaders(headers)
+		util.ReplaceRequestHeaders(headers)
 	} else {
 		body, err = c.defaultTransformRequestBody(ctx, ApiNameChatCompletion, body, log)
 	}
@@ -467,7 +467,7 @@ func (c *ProviderConfig) ResetApiTokenRequestFailureCount(apiTokenInUse string,
 			log.Errorf("failed to get failureApiTokenRequestCount: %v", err)
 		}
 		if _, ok := failureApiTokenRequestCount[apiTokenInUse]; ok {
-			log.Infof("reset apiToken %s request failure count", apiTokenInUse)
+			log.Infof("Reset apiToken %s request failure count", apiTokenInUse)
 			resetApiTokenRequestCount(c.failover.ctxApiTokenRequestFailureCount, apiTokenInUse, log)
 		}
 	}
@@ -489,7 +489,7 @@ func modifyApiTokenRequestCount(key, apiToken string, op string, log wrapper.Log

 		apiTokenRequestCountByte, err := json.Marshal(apiTokenRequestCount)
 		if err != nil {
-			log.Errorf("failed to marshal apiTokenRequestCount: %v", err)
+			log.Errorf("Failed to marshal apiTokenRequestCount: %v", err)
 		}

 		if err := proxywasm.SetSharedData(key, apiTokenRequestCountByte, cas); err == nil {
@@ -539,25 +539,31 @@ func (c *ProviderConfig) resetSharedData() {
 	_ = proxywasm.SetSharedData(c.failover.ctxApiTokenRequestFailureCount, nil, 0)
 }

-func (c *ProviderConfig) OnRequestFailed(ctx wrapper.HttpContext, apiTokenInUse string, log wrapper.Log) {
+func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, log wrapper.Log) types.Action {
 	if c.isFailoverEnabled() {
 		c.handleUnavailableApiToken(ctx, apiTokenInUse, log)
 	}
+	if c.isRetryOnFailureEnabled() && ctx.GetContext(ctxKeyIsStreaming) != nil && !ctx.GetContext(ctxKeyIsStreaming).(bool) {
+		c.retryFailedRequest(activeProvider, ctx, log)
+		return types.HeaderStopAllIterationAndWatermark
+	}
+	return types.ActionContinue
 }

 func (c *ProviderConfig) GetApiTokenInUse(ctx wrapper.HttpContext) string {
-	return ctx.GetContext(c.failover.ctxApiTokenInUse).(string)
+	token, _ := ctx.GetContext(c.failover.ctxApiTokenInUse).(string)
+	return token
 }

 func (c *ProviderConfig) SetApiTokenInUse(ctx wrapper.HttpContext, log wrapper.Log) {
 	var apiToken string
-	if c.isFailoverEnabled() {
+	if c.isFailoverEnabled() || c.useGlobalApiToken {
 		// if enable apiToken failover, only use available apiToken
 		apiToken = c.GetGlobalRandomToken(log)
 	} else {
 		apiToken = c.GetRandomToken()
 	}
-	log.Debugf("[onHttpRequestHeader] use apiToken %s to send request", apiToken)
+	log.Debugf("Use apiToken %s to send request", apiToken)
 	ctx.SetContext(c.failover.ctxApiTokenInUse, apiToken)
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/gemini.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/gemini.go
@@ -51,20 +51,18 @@ func (g *geminiProvider) GetProviderType() string {
 	return providerTypeGemini
 }

-func (g *geminiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (g *geminiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	g.config.handleRequestHeaders(g, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
-	return types.HeaderStopIteration, nil
+	return nil
 }

 func (g *geminiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
 	util.OverwriteRequestHostHeader(headers, geminiDomain)
-	headers.Add(geminiApiKeyHeader, g.config.GetApiTokenInUse(ctx))
-	headers.Del("Accept-Encoding")
-	headers.Del("Content-Length")
+	headers.Set(geminiApiKeyHeader, g.config.GetApiTokenInUse(ctx))
 }

 func (g *geminiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -107,16 +105,6 @@ func (g *geminiProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body [
 	return json.Marshal(geminiRequest)
 }

-func (g *geminiProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	if g.config.protocol == protocolOriginal {
-		ctx.DontReadResponseBody()
-		return types.ActionContinue, nil
-	}
-
-	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
-	return types.ActionContinue, nil
-}
-
 func (g *geminiProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
 	log.Infof("chunk body:%s", string(chunk))
 	if isLastChunk || len(chunk) == 0 {
@@ -150,39 +138,38 @@ func (g *geminiProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 	return []byte(modifiedResponseChunk), nil
 }

-func (g *geminiProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+func (g *geminiProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
 	if apiName == ApiNameChatCompletion {
 		return g.onChatCompletionResponseBody(ctx, body, log)
-	} else if apiName == ApiNameEmbeddings {
+	} else {
 		return g.onEmbeddingsResponseBody(ctx, body, log)
 	}
-	return types.ActionContinue, errUnsupportedApiName
 }

-func (g *geminiProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) (types.Action, error) {
+func (g *geminiProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) ([]byte, error) {
 	geminiResponse := &geminiChatResponse{}
 	if err := json.Unmarshal(body, geminiResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal gemini chat response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal gemini chat response: %v", err)
 	}
 	if geminiResponse.Error != nil {
-		return types.ActionContinue, fmt.Errorf("gemini chat completion response error, error_code: %d, error_status:%s, error_message: %s",
+		return nil, fmt.Errorf("gemini chat completion response error, error_code: %d, error_status:%s, error_message: %s",
 			geminiResponse.Error.Code, geminiResponse.Error.Status, geminiResponse.Error.Message)
 	}
 	response := g.buildChatCompletionResponse(ctx, geminiResponse)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
+	return json.Marshal(response)
 }

-func (g *geminiProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) (types.Action, error) {
+func (g *geminiProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) ([]byte, error) {
 	geminiResponse := &geminiEmbeddingResponse{}
 	if err := json.Unmarshal(body, geminiResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal gemini embeddings response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal gemini embeddings response: %v", err)
 	}
 	if geminiResponse.Error != nil {
-		return types.ActionContinue, fmt.Errorf("gemini embeddings response error, error_code: %d, error_status:%s, error_message: %s",
+		return nil, fmt.Errorf("gemini embeddings response error, error_code: %d, error_status:%s, error_message: %s",
 			geminiResponse.Error.Code, geminiResponse.Error.Status, geminiResponse.Error.Message)
 	}
 	response := g.buildEmbeddingsResponse(ctx, geminiResponse)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
+	return json.Marshal(response)
 }

 func (g *geminiProvider) getRequestPath(apiName ApiName, geminiModel string, stream bool) string {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/github.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/github.go
@@ -2,11 +2,12 @@ package provider

 import (
 	"errors"
+	"net/http"
+	"strings"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
-	"net/http"
-	"strings"
 )

 // githubProvider is the provider for GitHub OpenAI service.
@@ -42,13 +43,13 @@ func (m *githubProvider) GetProviderType() string {
 	return providerTypeGithub
 }

-func (m *githubProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *githubProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
-	return types.HeaderStopIteration, nil
+	return nil
 }

 func (m *githubProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -67,8 +68,6 @@ func (m *githubProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam
 		util.OverwriteRequestPathHeader(headers, githubEmbeddingPath)
 	}
 	util.OverwriteRequestAuthorizationHeader(headers, m.config.GetApiTokenInUse(ctx))
-	headers.Del("Accept-Encoding")
-	headers.Del("Content-Length")
 }

 func (m *githubProvider) GetApiName(path string) ApiName {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/groq.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/groq.go
@@ -41,12 +41,12 @@ func (g *groqProvider) GetProviderType() string {
 	return providerTypeGroq
 }

-func (g *groqProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (g *groqProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	g.config.handleRequestHeaders(g, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (g *groqProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/hunyuan.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/hunyuan.go
@@ -114,13 +114,13 @@ func (m *hunyuanProvider) GetProviderType() string {
 	return providerTypeHunyuan
 }

-func (m *hunyuanProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *hunyuanProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
-	return types.HeaderStopIteration, nil
+	return nil
 }

 func (m *hunyuanProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
@@ -128,11 +128,8 @@ func (m *hunyuanProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNa
 	util.OverwriteRequestPathHeader(headers, hunyuanRequestPath)

 	// 添加 hunyuan 需要的自定义字段
-	headers.Add(actionKey, hunyuanChatCompletionTCAction)
-	headers.Add(versionKey, versionValue)
-
-	headers.Del("Accept-Encoding")
-	headers.Del("Content-Length")
+	headers.Set(actionKey, hunyuanChatCompletionTCAction)
+	headers.Set(versionKey, versionValue)
 }

 // hunyuan 的 OnRequestBody 逻辑中包含了对 headers 签名的逻辑，并且插入 context 以后还要重新计算签名，因此无法复用 handleRequestBody 方法
@@ -172,7 +169,7 @@ func (m *hunyuanProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName

 			if err != nil {
 				log.Errorf("failed to load context file: %v", err)
-				_ = util.SendResponse(500, "ai-proxy.hunyuan.load_ctx_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+				util.ErrorHandler("ai-proxy.hunyuan.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
 			}
 			m.insertContextMessageIntoHunyuanRequest(request, content)

@@ -182,7 +179,7 @@ func (m *hunyuanProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName
 			_ = util.OverwriteRequestAuthorization(authorizedValueNew)

 			if err := replaceJsonRequestBody(request, log); err != nil {
-				_ = util.SendResponse(500, "ai-proxy.hunyuan.insert_ctx_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
+				util.ErrorHandler("ai-proxy.hunyuan.insert_ctx_failed", fmt.Errorf("failed to replace request body: %v", err))
 			}
 		}, log)
 		if err == nil {
@@ -244,7 +241,7 @@ func (m *hunyuanProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName
 		}()
 		if err != nil {
 			log.Errorf("failed to load context file: %v", err)
-			_ = util.SendResponse(500, "ai-proxy.hunyuan.load_ctx_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+			util.ErrorHandler("ai-proxy.hunyuan.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
 			return
 		}
 		insertContextMessage(request, content)
@@ -256,7 +253,7 @@ func (m *hunyuanProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName
 		_ = util.OverwriteRequestAuthorization(authorizedValueNew)

 		if err := replaceJsonRequestBody(hunyuanRequest, log); err != nil {
-			_ = util.SendResponse(500, "ai-proxy.hunyuan.insert_ctx_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
+			util.ErrorHandler("ai-proxy.hunyuan.insert_ctx_failed", fmt.Errorf("failed to replace request body: %v", err))
 		}
 	}, log)
 	if err == nil {
@@ -291,11 +288,6 @@ func (m *hunyuanProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, a
 	return json.Marshal(hunyuanRequest)
 }

-func (m *hunyuanProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
-	return types.ActionContinue, nil
-}
-
 func (m *hunyuanProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
 	if m.config.protocol == protocolOriginal {
 		return chunk, nil
@@ -412,21 +404,14 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
 	return []byte(openAIChunk.String()), nil
 }

-func (m *hunyuanProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-
+func (m *hunyuanProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
 	log.Debugf("#debug nash5# onRespBody's resp is: %s", string(body))
 	hunyuanResponse := &hunyuanTextGenResponseNonStreaming{}
 	if err := json.Unmarshal(body, hunyuanResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal hunyuan response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal hunyuan response: %v", err)
 	}
-
-	if m.config.protocol == protocolOriginal {
-		return types.ActionContinue, replaceJsonResponseBody(hunyuanResponse, log)
-	}
-
 	response := m.buildChatCompletionResponse(ctx, hunyuanResponse)
-
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
+	return json.Marshal(response)
 }

 func (m *hunyuanProvider) insertContextMessageIntoHunyuanRequest(request *hunyuanTextGenRequest, content string) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go
@@ -11,47 +11,37 @@ import (
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )

 // minimaxProvider is the provider for minimax service.

 const (
-	minimaxDomain = "api.minimax.chat"
-	// minimaxChatCompletionV2Path 接口请求响应格式与OpenAI相同
-	// 接口文档: https://platform.minimaxi.com/document/guides/chat-model/V2?id=65e0736ab2845de20908e2dd
+	minimaxApiTypeV2  = "v2"  // minimaxApiTypeV2 represents chat completion V2 API.
+	minimaxApiTypePro = "pro" // minimaxApiTypePro represents chat completion Pro API.
+	minimaxDomain     = "api.minimax.chat"
+	// minimaxChatCompletionV2Path represents the API path for chat completion V2 API which has a response format similar to OpenAI's.
 	minimaxChatCompletionV2Path = "/v1/text/chatcompletion_v2"
-	// minimaxChatCompletionProPath 接口请求响应格式与OpenAI不同
-	// 接口文档: https://platform.minimaxi.com/document/guides/chat-model/pro/api?id=6569c85948bc7b684b30377e
+	// minimaxChatCompletionProPath represents the API path for chat completion Pro API which has a different response format from OpenAI's.
 	minimaxChatCompletionProPath = "/v1/text/chatcompletion_pro"

-	senderTypeUser string = "USER" // 用户发送的内容
-	senderTypeBot  string = "BOT"  // 模型生成的内容
+	senderTypeUser string = "USER" // Content sent by the user.
+	senderTypeBot  string = "BOT"  // Content generated by the model.

-	// 默认机器人设置
+	// Default bot settings.
 	defaultBotName           string = "MM智能助理"
 	defaultBotSettingContent string = "MM智能助理是一款由MiniMax自研的，没有调用其他产品的接口的大型语言模型。MiniMax是一家中国科技公司，一直致力于进行大模型相关的研究。"
 	defaultSenderName        string = "小明"
 )

-// chatCompletionProModels 这些模型对应接口为ChatCompletion Pro
-var chatCompletionProModels = map[string]struct{}{
-	"abab6.5-chat":  {},
-	"abab6.5s-chat": {},
-	"abab5.5s-chat": {},
-	"abab5.5-chat":  {},
-}
-
 type minimaxProviderInitializer struct {
 }

 func (m *minimaxProviderInitializer) ValidateConfig(config ProviderConfig) error {
-	// 如果存在模型对应接口为ChatCompletion Pro必须配置minimaxGroupId
-	if len(config.modelMapping) > 0 && config.minimaxGroupId == "" {
-		for _, minimaxModel := range config.modelMapping {
-			if _, exists := chatCompletionProModels[minimaxModel]; exists {
-				return errors.New(fmt.Sprintf("missing minimaxGroupId in provider config when %s model is provided", minimaxModel))
-			}
-		}
+	// If using the chat completion Pro API, a group ID must be set.
+	if minimaxApiTypePro == config.minimaxApiType && config.minimaxGroupId == "" {
+		return errors.New(fmt.Sprintf("missing minimaxGroupId in provider config when minimaxApiType is %s", minimaxApiTypePro))
 	}
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
@@ -75,13 +65,13 @@ func (m *minimaxProvider) GetProviderType() string {
 	return providerTypeMinimax
 }

-func (m *minimaxProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *minimaxProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
-	return types.HeaderStopIteration, nil
+	return nil
 }

 func (m *minimaxProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
@@ -94,44 +84,28 @@ func (m *minimaxProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName
 	if apiName != ApiNameChatCompletion {
 		return types.ActionContinue, errUnsupportedApiName
 	}
-	// 解析并映射模型,设置上下文
-	model, err := m.parseModel(body)
-	if err != nil {
-		return types.ActionContinue, err
-	}
-	ctx.SetContext(ctxKeyOriginalRequestModel, model)
-	mappedModel := getMappedModel(model, m.config.modelMapping, log)
-	if mappedModel == "" {
-		return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping")
-	}
-	ctx.SetContext(ctxKeyFinalRequestModel, mappedModel)
-	_, ok := chatCompletionProModels[mappedModel]
-	if ok {
-		// 使用ChatCompletion Pro接口
+	if minimaxApiTypePro == m.config.minimaxApiType {
+		// Use chat completion Pro API.
 		return m.handleRequestBodyByChatCompletionPro(body, log)
 	} else {
-		// 使用ChatCompletion v2接口
+		// Use chat completion V2 API.
 		return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 	}
 }

-func (m *minimaxProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
-	return m.handleRequestBodyByChatCompletionV2(body, headers, log)
-}
-
-// handleRequestBodyByChatCompletionPro 使用ChatCompletion Pro接口处理请求体
+// handleRequestBodyByChatCompletionPro processes the request body using the chat completion Pro API.
 func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte, log wrapper.Log) (types.Action, error) {
 	request := &chatCompletionRequest{}
 	if err := decodeChatCompletionRequest(body, request); err != nil {
 		return types.ActionContinue, err
 	}

-	// 映射模型重写requestPath
+	// Map the model and rewrite the request path.
 	request.Model = getMappedModel(request.Model, m.config.modelMapping, log)
 	_ = util.OverwriteRequestPath(fmt.Sprintf("%s?GroupId=%s", minimaxChatCompletionProPath, m.config.minimaxGroupId))

 	if m.config.context == nil {
-		minimaxRequest := m.buildMinimaxChatCompletionV2Request(request, "")
+		minimaxRequest := m.buildMinimaxChatCompletionProRequest(request, "")
 		return types.ActionContinue, replaceJsonRequestBody(minimaxRequest, log)
 	}

@@ -141,14 +115,14 @@ func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte, log
 		}()
 		if err != nil {
 			log.Errorf("failed to load context file: %v", err)
-			_ = util.SendResponse(500, "ai-proxy.minimax.load_ctx_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+			util.ErrorHandler("ai-proxy.minimax.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
 		}
-		// 由于 minimaxChatCompletionV2（格式和 OpenAI 一致）和 minimaxChatCompletionPro（格式和 OpenAI 不一致）中 insertHttpContextMessage 的逻辑不同，无法做到同一个 provider 统一
-		// 因此对于 minimaxChatCompletionPro 需要手动处理 context 消息
-		// minimaxChatCompletionV2 交给默认的 defaultInsertHttpContextMessage 方法插入 context 消息
-		minimaxRequest := m.buildMinimaxChatCompletionV2Request(request, content)
+		// Since minimaxChatCompletionV2 (format consistent with OpenAI) and minimaxChatCompletionPro (different format from OpenAI) have different logic for insertHttpContextMessage, we cannot unify them within one provider.
+		// For minimaxChatCompletionPro, we need to manually handle context messages.
+		// minimaxChatCompletionV2 uses the default defaultInsertHttpContextMessage method to insert context messages.
+		minimaxRequest := m.buildMinimaxChatCompletionProRequest(request, content)
 		if err := replaceJsonRequestBody(minimaxRequest, log); err != nil {
-			_ = util.SendResponse(500, "ai-proxy.minimax.insert_ctx_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to replace Request body: %v", err))
+			util.ErrorHandler("ai-proxy.minimax.insert_ctx_failed", fmt.Errorf("failed to replace Request body: %v", err))
 		}
 	}, log)
 	if err == nil {
@@ -157,63 +131,53 @@ func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte, log
 	return types.ActionContinue, err
 }

-// handleRequestBodyByChatCompletionV2 使用ChatCompletion v2接口处理请求体
-func (m *minimaxProvider) handleRequestBodyByChatCompletionV2(body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
-	request := &chatCompletionRequest{}
-	if err := decodeChatCompletionRequest(body, request); err != nil {
-		return nil, err
-	}
+func (m *minimaxProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
+	return m.handleRequestBodyByChatCompletionV2(body, headers, log)
+}

-	// 映射模型重写requestPath
-	request.Model = getMappedModel(request.Model, m.config.modelMapping, log)
+// handleRequestBodyByChatCompletionV2 processes the request body using the chat completion V2 API.
+func (m *minimaxProvider) handleRequestBodyByChatCompletionV2(body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
 	util.OverwriteRequestPathHeader(headers, minimaxChatCompletionV2Path)

-	return body, nil
+	rawModel := gjson.GetBytes(body, "model").String()
+	mappedModel := getMappedModel(rawModel, m.config.modelMapping, log)
+	return sjson.SetBytes(body, "model", mappedModel)
 }

-func (m *minimaxProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	// 使用minimax接口协议,跳过OnStreamingResponseBody()和OnResponseBody()
-	if m.config.protocol == protocolOriginal {
+func (m *minimaxProvider) TransformResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
+	// Skip OnStreamingResponseBody() and OnResponseBody() when using the original protocol
+	// or when the model corresponds to the chat completion V2 interface.
+	if m.config.protocol == protocolOriginal || minimaxApiTypePro != m.config.minimaxApiType {
 		ctx.DontReadResponseBody()
-		return types.ActionContinue, nil
+	} else {
+		headers.Del("Content-Length")
 	}
-	// 模型对应接口为ChatCompletion v2,跳过OnStreamingResponseBody()和OnResponseBody()
-	model := ctx.GetStringContext(ctxKeyFinalRequestModel, "")
-	if model != "" {
-		_, ok := chatCompletionProModels[model]
-		if !ok {
-			ctx.DontReadResponseBody()
-			return types.ActionContinue, nil
-		}
-	}
-	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
-	return types.ActionContinue, nil
 }

-// OnStreamingResponseBody 只处理使用OpenAI协议 且 模型对应接口为ChatCompletion Pro的流式响应
+// OnStreamingResponseBody handles streaming response chunks from the Minimax service only for requests using the OpenAI protocol and corresponding to the chat completion Pro API.
 func (m *minimaxProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
 	if isLastChunk || len(chunk) == 0 {
 		return nil, nil
 	}
-	// sample event response:
+	// Sample event response:
 	// data: {"created":1689747645,"model":"abab6.5s-chat","reply":"","choices":[{"messages":[{"sender_type":"BOT","sender_name":"MM智能助理","text":"am from China."}]}],"output_sensitive":false}

-	// sample end event response:
+	// Sample end event response:
 	// data: {"created":1689747645,"model":"abab6.5s-chat","reply":"I am from China.","choices":[{"finish_reason":"stop","messages":[{"sender_type":"BOT","sender_name":"MM智能助理","text":"I am from China."}]}],"usage":{"total_tokens":187},"input_sensitive":false,"output_sensitive":false,"id":"0106b3bc9fd844a9f3de1aa06004e2ab","base_resp":{"status_code":0,"status_msg":""}}
 	responseBuilder := &strings.Builder{}
 	lines := strings.Split(string(chunk), "\n")
 	for _, data := range lines {
 		if len(data) < 6 {
-			// ignore blank line or wrong format
+			// Ignore blank line or improperly formatted lines.
 			continue
 		}
 		data = data[6:]
-		var minimaxResp minimaxChatCompletionV2Resp
+		var minimaxResp minimaxChatCompletionProResp
 		if err := json.Unmarshal([]byte(data), &minimaxResp); err != nil {
 			log.Errorf("unable to unmarshal minimax response: %v", err)
 			continue
 		}
-		response := m.responseV2ToOpenAI(&minimaxResp)
+		response := m.responseProToOpenAI(&minimaxResp)
 		responseBody, err := json.Marshal(response)
 		if err != nil {
 			log.Errorf("unable to marshal response: %v", err)
@@ -226,82 +190,82 @@ func (m *minimaxProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name
 	return []byte(modifiedResponseChunk), nil
 }

-// OnResponseBody 只处理使用OpenAI协议 且 模型对应接口为ChatCompletion Pro的流式响应
-func (m *minimaxProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	minimaxResp := &minimaxChatCompletionV2Resp{}
+// TransformResponseBody handles the final response body from the Minimax service only for requests using the OpenAI protocol and corresponding to the chat completion Pro API.
+func (m *minimaxProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	minimaxResp := &minimaxChatCompletionProResp{}
 	if err := json.Unmarshal(body, minimaxResp); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal minimax response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal minimax response: %v", err)
 	}
 	if minimaxResp.BaseResp.StatusCode != 0 {
-		return types.ActionContinue, fmt.Errorf("minimax response error, error_code: %d, error_message: %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg)
+		return nil, fmt.Errorf("minimax response error, error_code: %d, error_message: %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg)
 	}
-	response := m.responseV2ToOpenAI(minimaxResp)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
+	response := m.responseProToOpenAI(minimaxResp)
+	return json.Marshal(response)
 }

-// minimaxChatCompletionV2Request 表示ChatCompletion V2请求的结构体
-type minimaxChatCompletionV2Request struct {
+// minimaxChatCompletionProRequest represents the structure of a chat completion Pro request.
+type minimaxChatCompletionProRequest struct {
 	Model             string                  `json:"model"`
 	Stream            bool                    `json:"stream,omitempty"`
 	TokensToGenerate  int64                   `json:"tokens_to_generate,omitempty"`
 	Temperature       float64                 `json:"temperature,omitempty"`
 	TopP              float64                 `json:"top_p,omitempty"`
-	MaskSensitiveInfo bool                    `json:"mask_sensitive_info"` // 是否开启隐私信息打码,默认true
+	MaskSensitiveInfo bool                    `json:"mask_sensitive_info"` // Whether to mask sensitive information, defaults to true.
 	Messages          []minimaxMessage        `json:"messages"`
 	BotSettings       []minimaxBotSetting     `json:"bot_setting"`
 	ReplyConstraints  minimaxReplyConstraints `json:"reply_constraints"`
 }

-// minimaxMessage 表示对话中的消息
+// minimaxMessage represents a message in the conversation.
 type minimaxMessage struct {
 	SenderType string `json:"sender_type"`
 	SenderName string `json:"sender_name"`
 	Text       string `json:"text"`
 }

-// minimaxBotSetting 表示机器人的设置
+// minimaxBotSetting represents the bot's settings.
 type minimaxBotSetting struct {
 	BotName string `json:"bot_name"`
 	Content string `json:"content"`
 }

-// minimaxReplyConstraints 表示模型回复要求
+// minimaxReplyConstraints represents requirements for model replies.
 type minimaxReplyConstraints struct {
 	SenderType string `json:"sender_type"`
 	SenderName string `json:"sender_name"`
 }

-// minimaxChatCompletionV2Resp Minimax Chat Completion V2响应结构体
-type minimaxChatCompletionV2Resp struct {
-	Created             int64           `json:"created"`
-	Model               string          `json:"model"`
-	Reply               string          `json:"reply"`
-	InputSensitive      bool            `json:"input_sensitive,omitempty"`
-	InputSensitiveType  int64           `json:"input_sensitive_type,omitempty"`
-	OutputSensitive     bool            `json:"output_sensitive,omitempty"`
-	OutputSensitiveType int64           `json:"output_sensitive_type,omitempty"`
-	Choices             []minimaxChoice `json:"choices,omitempty"`
-	Usage               minimaxUsage    `json:"usage,omitempty"`
-	Id                  string          `json:"id"`
-	BaseResp            minimaxBaseResp `json:"base_resp"`
+// minimaxChatCompletionProResp represents the structure of a Minimax Chat Completion Pro response.
+type minimaxChatCompletionProResp struct {
+	Created         int64           `json:"created"`
+	Model           string          `json:"model"`
+	Reply           string          `json:"reply"`
+	InputSensitive  bool            `json:"input_sensitive,omitempty"`
+	OutputSensitive bool            `json:"output_sensitive,omitempty"`
+	Choices         []minimaxChoice `json:"choices,omitempty"`
+	Usage           minimaxUsage    `json:"usage,omitempty"`
+	Id              string          `json:"id"`
+	BaseResp        minimaxBaseResp `json:"base_resp"`
 }

-// minimaxBaseResp 包含错误状态码和详情
+// minimaxBaseResp contains error status code and details.
 type minimaxBaseResp struct {
 	StatusCode int64  `json:"status_code"`
 	StatusMsg  string `json:"status_msg"`
 }

-// minimaxChoice 结果选项
+// minimaxChoice represents a result option.
 type minimaxChoice struct {
 	Messages     []minimaxMessage `json:"messages"`
 	Index        int64            `json:"index"`
 	FinishReason string           `json:"finish_reason"`
 }

-// minimaxUsage 令牌使用情况
+// minimaxUsage represents token usage statistics.
 type minimaxUsage struct {
-	TotalTokens int64 `json:"total_tokens"`
+	TotalTokens      int64 `json:"total_tokens"`
+	PromptTokens     int64 `json:"prompt_tokens"`
+	CompletionTokens int64 `json:"completion_tokens"`
 }

 func (m *minimaxProvider) parseModel(body []byte) (string, error) {
@@ -316,7 +280,7 @@ func (m *minimaxProvider) parseModel(body []byte) (string, error) {
 	return model, nil
 }

-func (m *minimaxProvider) setBotSettings(request *minimaxChatCompletionV2Request, botSettingContent string) {
+func (m *minimaxProvider) setBotSettings(request *minimaxChatCompletionProRequest, botSettingContent string) {
 	if len(request.BotSettings) == 0 {
 		request.BotSettings = []minimaxBotSetting{
 			{
@@ -338,7 +302,7 @@ func (m *minimaxProvider) setBotSettings(request *minimaxChatCompletionV2Request
 	}
 }

-func (m *minimaxProvider) buildMinimaxChatCompletionV2Request(request *chatCompletionRequest, botSettingContent string) *minimaxChatCompletionV2Request {
+func (m *minimaxProvider) buildMinimaxChatCompletionProRequest(request *chatCompletionRequest, botSettingContent string) *minimaxChatCompletionProRequest {
 	var messages []minimaxMessage
 	var botSetting []minimaxBotSetting
 	var botName string
@@ -377,7 +341,7 @@ func (m *minimaxProvider) buildMinimaxChatCompletionV2Request(request *chatCompl
 		SenderType: senderTypeBot,
 		SenderName: determineName(botName, defaultBotName),
 	}
-	result := &minimaxChatCompletionV2Request{
+	result := &minimaxChatCompletionProRequest{
 		Model:             request.Model,
 		Stream:            request.Stream,
 		TokensToGenerate:  int64(request.MaxTokens),
@@ -393,7 +357,7 @@ func (m *minimaxProvider) buildMinimaxChatCompletionV2Request(request *chatCompl
 	return result
 }

-func (m *minimaxProvider) responseV2ToOpenAI(response *minimaxChatCompletionV2Resp) *chatCompletionResponse {
+func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionProResp) *chatCompletionResponse {
 	var choices []chatCompletionChoice
 	messageIndex := 0
 	for _, choice := range response.Choices {
@@ -418,7 +382,9 @@ func (m *minimaxProvider) responseV2ToOpenAI(response *minimaxChatCompletionV2Re
 		Model:   response.Model,
 		Choices: choices,
 		Usage: usage{
-			TotalTokens: int(response.Usage.TotalTokens),
+			TotalTokens:      int(response.Usage.TotalTokens),
+			PromptTokens:     int(response.Usage.PromptTokens),
+			CompletionTokens: int(response.Usage.CompletionTokens),
 		},
 	}
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/mistral.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/mistral.go
@@ -2,10 +2,11 @@ package provider

 import (
 	"errors"
+	"net/http"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
-	"net/http"
 )

 const (
@@ -37,12 +38,12 @@ func (m *mistralProvider) GetProviderType() string {
 	return providerTypeMistral
 }

-func (m *mistralProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *mistralProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *mistralProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
@@ -3,12 +3,15 @@ package provider
 import (
 	"errors"
 	"fmt"
+	"net/http"
+	"strings"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 	"github.com/tidwall/gjson"
-	"net/http"
+	"github.com/tidwall/sjson"
 )

 // moonshotProvider is the provider for Moonshot AI service.
@@ -53,12 +56,12 @@ func (m *moonshotProvider) GetProviderType() string {
 	return providerTypeMoonshot
 }

-func (m *moonshotProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *moonshotProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *moonshotProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
@@ -91,12 +94,12 @@ func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiNam
 		}()
 		if err != nil {
 			log.Errorf("failed to load context file: %v", err)
-			_ = util.SendResponse(500, "ai-proxy.moonshot.load_ctx_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+			util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
 			return
 		}
 		err = m.performChatCompletion(ctx, content, request, log)
 		if err != nil {
-			_ = util.SendResponse(500, "ai-proxy.moonshot.insert_ctx_failed", util.MimeTypeTextPlain, fmt.Sprintf("failed to perform chat completion: %v", err))
+			util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
 		}
 	}, log)
 	if err == nil {
@@ -149,3 +152,99 @@ func (m *moonshotProvider) sendRequest(method, path, body, apiKey string, callba
 		return errors.New("unsupported method: " + method)
 	}
 }
+
+func (m *moonshotProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
+	receivedBody := chunk
+	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
+		receivedBody = append(bufferedStreamingBody, chunk...)
+	}
+
+	eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
+
+	defer func() {
+		if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
+			// Just in case the received chunk is not a complete event.
+			ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
+		} else {
+			ctx.SetContext(ctxKeyStreamingBody, nil)
+		}
+	}()
+
+	var responseBuilder strings.Builder
+	currentKey := ""
+	currentEvent := &streamEvent{}
+	i, length := 0, len(receivedBody)
+	for i = 0; i < length; i++ {
+		ch := receivedBody[i]
+		if ch != '\n' {
+			if lineStartIndex == -1 {
+				if eventStartIndex == -1 {
+					eventStartIndex = i
+				}
+				lineStartIndex = i
+				valueStartIndex = -1
+			}
+			if valueStartIndex == -1 {
+				if ch == ':' {
+					valueStartIndex = i + 1
+					currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
+				}
+			} else if valueStartIndex == i && ch == ' ' {
+				// Skip leading spaces in data.
+				valueStartIndex = i + 1
+			}
+			continue
+		}
+
+		if lineStartIndex != -1 {
+			value := string(receivedBody[valueStartIndex:i])
+			currentEvent.setValue(currentKey, value)
+		} else {
+			// Extra new line. The current event is complete.
+			log.Debugf("processing event: %v", currentEvent)
+			m.convertStreamEvent(&responseBuilder, currentEvent, log)
+			// Reset event parsing state.
+			eventStartIndex = -1
+			currentEvent = &streamEvent{}
+		}
+
+		// Reset line parsing state.
+		lineStartIndex = -1
+		valueStartIndex = -1
+		currentKey = ""
+	}
+
+	modifiedResponseChunk := responseBuilder.String()
+	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
+	return []byte(modifiedResponseChunk), nil
+}
+
+func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder, event *streamEvent, log wrapper.Log) error {
+	if event.Data == streamEndDataValue {
+		m.appendStreamEvent(responseBuilder, event)
+		return nil
+	}
+
+	if gjson.Get(event.Data, "choices.0.usage").Exists() {
+		usageStr := gjson.Get(event.Data, "choices.0.usage").Raw
+		newData, err := sjson.Delete(event.Data, "choices.0.usage")
+		if err != nil {
+			log.Errorf("convert usage event error: %v", err)
+			return err
+		}
+		newData, err = sjson.SetRaw(newData, "usage", usageStr)
+		if err != nil {
+			log.Errorf("convert usage event error: %v", err)
+			return err
+		}
+		event.Data = newData
+	}
+	m.appendStreamEvent(responseBuilder, event)
+	return nil
+}
+
+func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
+	responseBuilder.WriteString(streamDataItemKey)
+	responseBuilder.WriteString(event.Data)
+	responseBuilder.WriteString("\n\n")
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/ollama.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/ollama.go
@@ -3,10 +3,11 @@ package provider
 import (
 	"errors"
 	"fmt"
+	"net/http"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
-	"net/http"
 )

 // ollamaProvider is the provider for Ollama service.
@@ -48,12 +49,12 @@ func (m *ollamaProvider) GetProviderType() string {
 	return providerTypeOllama
 }

-func (m *ollamaProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *ollamaProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *ollamaProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
@@ -57,9 +57,9 @@ func (m *openaiProvider) GetProviderType() string {
 	return providerTypeOpenAI
 }

-func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (m *openaiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -46,6 +46,7 @@ const (
 	providerTypeCohere     = "cohere"
 	providerTypeDoubao     = "doubao"
 	providerTypeCoze       = "coze"
+	providerTypeTogetherAI = "together-ai"

 	protocolOpenAI   = "openai"
 	protocolOriginal = "original"
@@ -58,7 +59,9 @@ const (
 	finishReasonLength = "length"

 	ctxKeyIncrementalStreaming = "incrementalStreaming"
-	ctxKeyApiName              = "apiKey"
+	ctxKeyApiKey               = "apiKey"
+	CtxKeyApiName              = "apiName"
+	ctxKeyIsStreaming          = "isStreaming"
 	ctxKeyStreamingBody        = "streamingBody"
 	ctxKeyOriginalRequestModel = "originalRequestModel"
 	ctxKeyFinalRequestModel    = "finalRequestModel"
@@ -106,6 +109,7 @@ var (
 		providerTypeCohere:     &cohereProviderInitializer{},
 		providerTypeDoubao:     &doubaoProviderInitializer{},
 		providerTypeCoze:       &cozeProviderInitializer{},
+		providerTypeTogetherAI: &togetherAIProviderInitializer{},
 	}
 )

@@ -113,42 +117,48 @@ type Provider interface {
 	GetProviderType() string
 }

-type ApiNameHandler interface {
-	GetApiName(path string) ApiName
-}
-
 type RequestHeadersHandler interface {
-	OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error)
-}
-
-type TransformRequestHeadersHandler interface {
-	TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log)
+	OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error
 }

 type RequestBodyHandler interface {
 	OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error)
 }

+type StreamingResponseBodyHandler interface {
+	OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error)
+}
+
+type ApiNameHandler interface {
+	GetApiName(path string) ApiName
+}
+
+type TransformRequestHeadersHandler interface {
+	TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log)
+}
+
 type TransformRequestBodyHandler interface {
 	TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error)
 }

 // TransformRequestBodyHeadersHandler allows to transform request headers based on the request body.
-// Some providers (e.g. baidu, gemini) transform request headers (e.g., path) based on the request body (e.g., model).
+// Some providers (e.g. gemini) transform request headers (e.g., path) based on the request body (e.g., model).
 type TransformRequestBodyHeadersHandler interface {
 	TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error)
 }

-type ResponseHeadersHandler interface {
-	OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error)
+type TransformResponseHeadersHandler interface {
+	TransformResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log)
 }

-type StreamingResponseBodyHandler interface {
-	OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error)
+type TransformResponseBodyHandler interface {
+	TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error)
 }

-type ResponseBodyHandler interface {
-	OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error)
+// TickFuncHandler allows the provider to execute a function periodically
+// Use case: the maximum expiration time of baidu apiToken is 24 hours, need to refresh periodically
+type TickFuncHandler interface {
+	GetTickFunc(log wrapper.Log) (tickPeriod int64, tickFunc func())
 }

 type ProviderConfig struct {
@@ -167,6 +177,9 @@ type ProviderConfig struct {
 	// @Title zh-CN apiToken 故障切换
 	// @Description zh-CN 当 apiToken 不可用时移出 apiTokens 列表，对移除的 apiToken 进行健康检查，当重新可用后加回 apiTokens 列表
 	failover *failover `required:"false" yaml:"failover" json:"failover"`
+	// @Title zh-CN 失败请求重试
+	// @Description zh-CN 对失败的请求立即进行重试
+	retryOnFailure *retryOnFailure `required:"false" yaml:"retryOnFailure" json:"retryOnFailure"`
 	// @Title zh-CN 基于OpenAI协议的自定义后端URL
 	// @Description zh-CN 仅适用于支持 openai 协议的服务。
 	openaiCustomUrl string `required:"false" yaml:"openaiCustomUrl" json:"openaiCustomUrl"`
@@ -182,6 +195,9 @@ type ProviderConfig struct {
 	// @Title zh-CN 启用通义千问搜索服务
 	// @Description zh-CN 仅适用于通义千问服务，表示是否启用通义千问的互联网搜索功能。
 	qwenEnableSearch bool `required:"false" yaml:"qwenEnableSearch" json:"qwenEnableSearch"`
+	// @Title zh-CN 通义千问服务域名
+	// @Description zh-CN 仅适用于通义千问服务，默认转发域名为 dashscope.aliyuncs.com, 当使用金融云服务时，可以设置为 dashscope-finance.aliyuncs.com
+	qwenDomain string `required:"false" yaml:"qwenDomain" json:"qwenDomain"`
 	// @Title zh-CN 开启通义千问兼容模式
 	// @Description zh-CN 启用通义千问兼容模式后，将调用千问的兼容模式接口，同时对请求/响应不做修改。
 	qwenEnableCompatible bool `required:"false" yaml:"qwenEnableCompatible" json:"qwenEnableCompatible"`
@@ -197,8 +213,11 @@ type ProviderConfig struct {
 	// @Title zh-CN hunyuan api id for authorization
 	// @Description zh-CN 仅适用于Hun Yuan AI服务鉴权
 	hunyuanAuthId string `required:"false" yaml:"hunyuanAuthId" json:"hunyuanAuthId"`
+	// @Title zh-CN minimax API type
+	// @Description zh-CN 仅适用于 minimax 服务。minimax API 类型，v2 和 pro 中选填一项，默认值为 v2
+	minimaxApiType string `required:"false" yaml:"minimaxApiType" json:"minimaxApiType"`
 	// @Title zh-CN minimax group id
-	// @Description zh-CN 仅适用于minimax使用ChatCompletion Pro接口的模型
+	// @Description zh-CN 仅适用于 minimax 服务。minimax API 类型为 pro 时必填
 	minimaxGroupId string `required:"false" yaml:"minimaxGroupId" json:"minimaxGroupId"`
 	// @Title zh-CN 模型名称映射表
 	// @Description zh-CN 用于将请求中的模型名称映射为目标AI服务商支持的模型名称。支持通过“*”来配置全局映射
@@ -227,6 +246,17 @@ type ProviderConfig struct {
 	// @Title zh-CN 自定义大模型参数配置
 	// @Description zh-CN 用于填充或者覆盖大模型调用时的参数
 	customSettings []CustomSetting
+	// @Title zh-CN Baidu 的 Access Key 和 Secret Key，中间用 : 分隔，用于申请 apiToken
+	baiduAccessKeyAndSecret []string `required:"false" yaml:"baiduAccessKeyAndSecret" json:"baiduAccessKeyAndSecret"`
+	// @Title zh-CN 请求刷新百度 apiToken 服务名称
+	baiduApiTokenServiceName string `required:"false" yaml:"baiduApiTokenServiceName" json:"baiduApiTokenServiceName"`
+	// @Title zh-CN 请求刷新百度 apiToken 服务域名
+	baiduApiTokenServiceHost string `required:"false" yaml:"baiduApiTokenServiceHost" json:"baiduApiTokenServiceHost"`
+	// @Title zh-CN 请求刷新百度 apiToken 服务端口
+	baiduApiTokenServicePort int64 `required:"false" yaml:"baiduApiTokenServicePort" json:"baiduApiTokenServicePort"`
+	// @Title zh-CN 是否使用全局的 apiToken
+	// @Description zh-CN 如果没有启用 apiToken failover，但是 apiToken 的状态又需要在多个 Wasm VM 中同步时需要将该参数设置为 true，例如 Baidu 的 apiToken 需要定时刷新
+	useGlobalApiToken bool `required:"false" yaml:"useGlobalApiToken" json:"useGlobalApiToken"`
 }

 func (c *ProviderConfig) GetId() string {
@@ -261,6 +291,10 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	}
 	c.qwenEnableSearch = json.Get("qwenEnableSearch").Bool()
 	c.qwenEnableCompatible = json.Get("qwenEnableCompatible").Bool()
+	c.qwenDomain = json.Get("qwenDomain").String()
+	if c.qwenDomain != "" {
+		// TODO: validate the domain, if not valid, set to default
+	}
 	c.ollamaServerHost = json.Get("ollamaServerHost").String()
 	c.ollamaServerPort = uint32(json.Get("ollamaServerPort").Uint())
 	c.modelMapping = make(map[string]string)
@@ -279,6 +313,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	c.claudeVersion = json.Get("claudeVersion").String()
 	c.hunyuanAuthId = json.Get("hunyuanAuthId").String()
 	c.hunyuanAuthKey = json.Get("hunyuanAuthKey").String()
+	c.minimaxApiType = json.Get("minimaxApiType").String()
 	c.minimaxGroupId = json.Get("minimaxGroupId").String()
 	c.cloudflareAccountId = json.Get("cloudflareAccountId").String()
 	if c.typ == providerTypeGemini {
@@ -321,6 +356,27 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	if failoverJson.Exists() {
 		c.failover.FromJson(failoverJson)
 	}
+
+	retryOnFailureJson := json.Get("retryOnFailure")
+	c.retryOnFailure = &retryOnFailure{
+		enabled: false,
+	}
+	if retryOnFailureJson.Exists() {
+		c.retryOnFailure.FromJson(retryOnFailureJson)
+	}
+
+	for _, accessKeyAndSecret := range json.Get("baiduAccessKeyAndSecret").Array() {
+		c.baiduAccessKeyAndSecret = append(c.baiduAccessKeyAndSecret, accessKeyAndSecret.String())
+	}
+	c.baiduApiTokenServiceName = json.Get("baiduApiTokenServiceName").String()
+	c.baiduApiTokenServiceHost = json.Get("baiduApiTokenServiceHost").String()
+	if c.baiduApiTokenServiceHost == "" {
+		c.baiduApiTokenServiceHost = baiduApiTokenDomain
+	}
+	c.baiduApiTokenServicePort = json.Get("baiduApiTokenServicePort").Int()
+	if c.baiduApiTokenServicePort == 0 {
+		c.baiduApiTokenServicePort = baiduApiTokenPort
+	}
 }

 func (c *ProviderConfig) Validate() error {
@@ -356,10 +412,10 @@ func (c *ProviderConfig) Validate() error {
 }

 func (c *ProviderConfig) GetOrSetTokenWithContext(ctx wrapper.HttpContext) string {
-	ctxApiKey := ctx.GetContext(ctxKeyApiName)
+	ctxApiKey := ctx.GetContext(ctxKeyApiKey)
 	if ctxApiKey == nil {
 		ctxApiKey = c.GetRandomToken()
-		ctx.SetContext(ctxKeyApiName, ctxApiKey)
+		ctx.SetContext(ctxKeyApiKey, ctxApiKey)
 	}
 	return ctxApiKey.(string)
 }
@@ -403,6 +459,9 @@ func (c *ProviderConfig) parseRequestAndMapModel(ctx wrapper.HttpContext, reques
 		streaming := req.Stream
 		if streaming {
 			_ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream")
+			ctx.SetContext(ctxKeyIsStreaming, true)
+		} else {
+			ctx.SetContext(ctxKeyIsStreaming, false)
 		}

 		return c.setRequestModel(ctx, req, log)
@@ -497,9 +556,9 @@ func (c *ProviderConfig) handleRequestBody(
 	if handler, ok := provider.(TransformRequestBodyHandler); ok {
 		body, err = handler.TransformRequestBody(ctx, apiName, body, log)
 	} else if handler, ok := provider.(TransformRequestBodyHeadersHandler); ok {
-		headers := util.GetOriginalHttpHeaders()
+		headers := util.GetOriginalRequestHeaders()
 		body, err = handler.TransformRequestBodyHeaders(ctx, apiName, body, headers, log)
-		util.ReplaceOriginalHttpHeaders(headers)
+		util.ReplaceRequestHeaders(headers)
 	} else {
 		body, err = c.defaultTransformRequestBody(ctx, apiName, body, log)
 	}
@@ -508,9 +567,14 @@ func (c *ProviderConfig) handleRequestBody(
 		return types.ActionContinue, err
 	}

+	// If retryOnFailure is enabled, save the transformed body to the context in case of retry
+	if c.isRetryOnFailureEnabled() {
+		ctx.SetContext(ctxRequestBody, body)
+	}
+
 	if apiName == ApiNameChatCompletion {
 		if c.context == nil {
-			return types.ActionContinue, replaceHttpJsonRequestBody(body, log)
+			return types.ActionContinue, replaceRequestBody(body, log)
 		}
 		err = contextCache.GetContextFromFile(ctx, provider, body, log)

@@ -519,14 +583,14 @@ func (c *ProviderConfig) handleRequestBody(
 		}
 		return types.ActionContinue, err
 	}
-	return types.ActionContinue, replaceHttpJsonRequestBody(body, log)
+	return types.ActionContinue, replaceRequestBody(body, log)
 }

 func (c *ProviderConfig) handleRequestHeaders(provider Provider, ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) {
+	headers := util.GetOriginalRequestHeaders()
 	if handler, ok := provider.(TransformRequestHeadersHandler); ok {
-		originalHeaders := util.GetOriginalHttpHeaders()
-		handler.TransformRequestHeaders(ctx, apiName, originalHeaders, log)
-		util.ReplaceOriginalHttpHeaders(originalHeaders)
+		handler.TransformRequestHeaders(ctx, apiName, headers, log)
+		util.ReplaceRequestHeaders(headers)
 	}
 }

@@ -542,3 +606,11 @@ func (c *ProviderConfig) defaultTransformRequestBody(ctx wrapper.HttpContext, ap
 	}
 	return json.Marshal(request)
 }
+
+func (c *ProviderConfig) DefaultTransformResponseHeaders(ctx wrapper.HttpContext, headers http.Header) {
+	if c.protocol == protocolOriginal {
+		ctx.DontReadResponseBody()
+	} else {
+		headers.Del("Content-Length")
+	}
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -23,10 +23,11 @@ import (
 const (
 	qwenResultFormatMessage = "message"

-	qwenDomain                   = "dashscope.aliyuncs.com"
+	qwenDefaultDomain            = "dashscope.aliyuncs.com"
 	qwenChatCompletionPath       = "/api/v1/services/aigc/text-generation/generation"
 	qwenTextEmbeddingPath        = "/api/v1/services/embeddings/text-embedding/text-embedding"
 	qwenCompatiblePath           = "/compatible-mode/v1/chat/completions"
+	qwenBailianPath              = "/api/v1/apps"
 	qwenMultimodalGenerationPath = "/api/v1/services/aigc/multimodal-generation/generation"

 	qwenTopPMin = 0.000001
@@ -64,19 +65,21 @@ type qwenProvider struct {
 }

 func (m *qwenProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestHostHeader(headers, qwenDomain)
+	if m.config.qwenDomain != "" {
+		util.OverwriteRequestHostHeader(headers, m.config.qwenDomain)
+	} else {
+		util.OverwriteRequestHostHeader(headers, qwenDefaultDomain)
+	}
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))

-	if m.config.qwenEnableCompatible {
+	if m.config.IsOriginal() {
+	} else if m.config.qwenEnableCompatible {
 		util.OverwriteRequestPathHeader(headers, qwenCompatiblePath)
 	} else if apiName == ApiNameChatCompletion {
 		util.OverwriteRequestPathHeader(headers, qwenChatCompletionPath)
 	} else if apiName == ApiNameEmbeddings {
 		util.OverwriteRequestPathHeader(headers, qwenTextEmbeddingPath)
 	}
-
-	headers.Del("Accept-Encoding")
-	headers.Del("Content-Length")
 }

 func (m *qwenProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
@@ -91,20 +94,19 @@ func (m *qwenProvider) GetProviderType() string {
 	return providerTypeQwen
 }

-func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}

 	m.config.handleRequestHeaders(m, ctx, apiName, log)

 	if m.config.protocol == protocolOriginal {
 		ctx.DontReadRequestBody()
-		return types.ActionContinue, nil
+		return nil
 	}

-	// Delay the header processing to allow changing streaming mode in OnRequestBody
-	return types.HeaderStopIteration, nil
+	return nil
 }

 func (m *qwenProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -158,11 +160,11 @@ func (m *qwenProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, body

 	streaming := request.Stream
 	if streaming {
-		_ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream")
-		_ = proxywasm.ReplaceHttpRequestHeader("X-DashScope-SSE", "enable")
+		headers.Set("Accept", "text/event-stream")
+		headers.Set("X-DashScope-SSE", "enable")
 	} else {
-		_ = proxywasm.ReplaceHttpRequestHeader("Accept", "*/*")
-		_ = proxywasm.RemoveHttpRequestHeader("X-DashScope-SSE")
+		headers.Set("Accept", "*/*")
+		headers.Del("X-DashScope-SSE")
 	}

 	return m.buildQwenTextGenerationRequest(ctx, request, streaming)
@@ -181,16 +183,6 @@ func (m *qwenProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []b
 	return json.Marshal(qwenRequest)
 }

-func (m *qwenProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	if m.config.protocol == protocolOriginal {
-		ctx.DontReadResponseBody()
-		return types.ActionContinue, nil
-	}
-
-	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
-	return types.ActionContinue, nil
-}
-
 func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
 	if m.config.qwenEnableCompatible || name != ApiNameChatCompletion {
 		return chunk, nil
@@ -276,9 +268,9 @@ func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name Api
 	return []byte(modifiedResponseChunk), nil
 }

-func (m *qwenProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+func (m *qwenProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
 	if m.config.qwenEnableCompatible {
-		return types.ActionContinue, nil
+		return body, nil
 	}
 	if apiName == ApiNameChatCompletion {
 		return m.onChatCompletionResponseBody(ctx, body, log)
@@ -286,25 +278,25 @@ func (m *qwenProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName,
 	if apiName == ApiNameEmbeddings {
 		return m.onEmbeddingsResponseBody(ctx, body, log)
 	}
-	return types.ActionContinue, errUnsupportedApiName
+	return nil, errUnsupportedApiName
 }

-func (m *qwenProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) (types.Action, error) {
+func (m *qwenProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) ([]byte, error) {
 	qwenResponse := &qwenTextGenResponse{}
 	if err := json.Unmarshal(body, qwenResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
 	}
 	response := m.buildChatCompletionResponse(ctx, qwenResponse)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
+	return json.Marshal(response)
 }

-func (m *qwenProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) (types.Action, error) {
+func (m *qwenProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) ([]byte, error) {
 	qwenResponse := &qwenTextEmbeddingResponse{}
 	if err := json.Unmarshal(body, qwenResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
 	}
 	response := m.buildEmbeddingsResponse(ctx, qwenResponse)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
+	return json.Marshal(response)
 }

 func (m *qwenProvider) buildQwenTextGenerationRequest(ctx wrapper.HttpContext, origRequest *chatCompletionRequest, streaming bool) ([]byte, error) {
@@ -758,6 +750,7 @@ func (m *qwenProvider) GetApiName(path string) ApiName {
 	switch {
 	case strings.Contains(path, qwenChatCompletionPath),
 		strings.Contains(path, qwenMultimodalGenerationPath),
+		strings.Contains(path, qwenBailianPath),
 		strings.Contains(path, qwenCompatiblePath):
 		return ApiNameChatCompletion
 	case strings.Contains(path, qwenTextEmbeddingPath):
--- a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go
@@ -37,7 +37,7 @@ func replaceJsonRequestBody(request interface{}, log wrapper.Log) error {
 	return err
 }

-func replaceHttpJsonRequestBody(body []byte, log wrapper.Log) error {
+func replaceRequestBody(body []byte, log wrapper.Log) error {
 	log.Debugf("request body: %s", string(body))
 	err := proxywasm.ReplaceHttpRequestBody(body)
 	if err != nil {
@@ -65,15 +65,11 @@ func insertContextMessage(request *chatCompletionRequest, content string) {
 	}
 }

-func replaceJsonResponseBody(response interface{}, log wrapper.Log) error {
-	body, err := json.Marshal(response)
-	if err != nil {
-		return fmt.Errorf("unable to marshal response: %v", err)
-	}
+func ReplaceResponseBody(body []byte, log wrapper.Log) error {
 	log.Debugf("response body: %s", string(body))
-	err = proxywasm.ReplaceHttpResponseBody(body)
+	err := proxywasm.ReplaceHttpResponseBody(body)
 	if err != nil {
 		return fmt.Errorf("unable to replace the original response body: %v", err)
 	}
-	return err
+	return nil
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
@@ -0,0 +1,141 @@
+package provider
+
+import (
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/tidwall/gjson"
+	"net/http"
+)
+
+const (
+	ctxRequestBody = "requestBody"
+	ctxRetryCount  = "retryCount"
+)
+
+type retryOnFailure struct {
+	// @Title zh-CN 是否启用请求重试
+	enabled bool `required:"false" yaml:"enabled" json:"enabled"`
+	// @Title zh-CN 重试次数
+	maxRetries int64 `required:"false" yaml:"maxRetries" json:"maxRetries"`
+	// @Title zh-CN 重试超时时间
+	retryTimeout int64 `required:"false" yaml:"retryTimeout" json:"retryTimeout"`
+}
+
+func (r *retryOnFailure) FromJson(json gjson.Result) {
+	r.enabled = json.Get("enabled").Bool()
+	r.maxRetries = json.Get("maxRetries").Int()
+	if r.maxRetries == 0 {
+		r.maxRetries = 1
+	}
+	r.retryTimeout = json.Get("retryTimeout").Int()
+	if r.retryTimeout == 0 {
+		r.retryTimeout = 30 * 1000
+	}
+}
+
+func (c *ProviderConfig) isRetryOnFailureEnabled() bool {
+	return c.retryOnFailure.enabled
+}
+
+func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, log wrapper.Log) {
+	log.Debugf("Retry failed request: provider=%s", activeProvider.GetProviderType())
+	retryClient := createRetryClient(ctx)
+	apiName, _ := ctx.GetContext(CtxKeyApiName).(ApiName)
+	ctx.SetContext(ctxRetryCount, 1)
+	c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
+}
+
+func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, headers http.Header, body []byte, log wrapper.Log) ([][2]string, []byte) {
+	if handler, ok := activeProvider.(TransformResponseHeadersHandler); ok {
+		handler.TransformResponseHeaders(ctx, apiName, headers, log)
+	} else {
+		c.DefaultTransformResponseHeaders(ctx, headers)
+	}
+
+	if handler, ok := activeProvider.(TransformResponseBodyHandler); ok {
+		var err error
+		body, err = handler.TransformResponseBody(ctx, apiName, body, log)
+		if err != nil {
+			log.Errorf("Failed to transform response body: %v", err)
+		}
+	}
+
+	return util.HeaderToSlice(headers), body
+}
+
+func (c *ProviderConfig) retryCall(
+	ctx wrapper.HttpContext, log wrapper.Log, activeProvider Provider,
+	apiName ApiName, statusCode int, responseHeaders http.Header, responseBody []byte,
+	retryClient *wrapper.ClusterClient[wrapper.RouteCluster]) {
+
+	retryCount := ctx.GetContext(ctxRetryCount).(int)
+	log.Debugf("Sent retry request: %d/%d", retryCount, c.retryOnFailure.maxRetries)
+
+	if statusCode == 200 {
+		log.Debugf("Retry request succeeded")
+		headers, body := c.transformResponseHeadersAndBody(ctx, activeProvider, apiName, responseHeaders, responseBody, log)
+		proxywasm.SendHttpResponse(200, headers, body, -1)
+	} else {
+		log.Debugf("The retry request still failed, status: %d, responseHeaders: %v, responseBody: %s", statusCode, responseHeaders, string(responseBody))
+	}
+
+	retryCount++
+	if retryCount <= int(c.retryOnFailure.maxRetries) {
+		ctx.SetContext(ctxRetryCount, retryCount)
+		c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
+	} else {
+		log.Debugf("Reached the maximum retry count: %d", c.retryOnFailure.maxRetries)
+		proxywasm.ResumeHttpResponse()
+	}
+}
+
+func (c *ProviderConfig) sendRetryRequest(
+	ctx wrapper.HttpContext, apiName ApiName, activeProvider Provider,
+	retryClient *wrapper.ClusterClient[wrapper.RouteCluster], log wrapper.Log) {
+
+	requestHeaders, requestBody := c.getRetryRequestHeadersAndBody(ctx, activeProvider, apiName, log)
+	path := getRetryPath(ctx)
+
+	err := retryClient.Post(path, util.HeaderToSlice(requestHeaders), requestBody, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+		c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient)
+	}, uint32(c.retryOnFailure.retryTimeout))
+	if err != nil {
+		log.Errorf("Failed to send retry request: %v", err)
+		proxywasm.ResumeHttpResponse()
+	}
+}
+
+func createRetryClient(ctx wrapper.HttpContext) *wrapper.ClusterClient[wrapper.RouteCluster] {
+	host := wrapper.GetRequestHost()
+	if host == "" {
+		host = ctx.GetContext(ctxRequestHost).(string)
+	}
+	retryClient := wrapper.NewClusterClient(wrapper.RouteCluster{
+		Host: host,
+	})
+	return retryClient
+}
+
+func getRetryPath(ctx wrapper.HttpContext) string {
+	path := wrapper.GetRequestPath()
+	if path == "" {
+		path = ctx.GetContext(ctxRequestPath).(string)
+	}
+	return path
+}
+
+func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, log wrapper.Log) (http.Header, []byte) {
+	// The retry request may be sent with different apiToken, so the header needs to be regenerated
+	c.SetApiTokenInUse(ctx, log)
+
+	requestHeaders := http.Header{
+		"Content-Type": []string{"application/json"},
+	}
+	if handler, ok := activeProvider.(TransformRequestHeadersHandler); ok {
+		handler.TransformRequestHeaders(ctx, apiName, requestHeaders, log)
+	}
+	requestBody := ctx.GetContext(ctxRequestBody).([]byte)
+
+	return requestHeaders, requestBody
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/spark.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/spark.go
@@ -9,7 +9,6 @@ import (

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 )

@@ -67,12 +66,12 @@ func (p *sparkProvider) GetProviderType() string {
 	return providerTypeSpark
 }

-func (p *sparkProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+func (p *sparkProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
 	if apiName != ApiNameChatCompletion {
-		return types.ActionContinue, errUnsupportedApiName
+		return errUnsupportedApiName
 	}
 	p.config.handleRequestHeaders(p, ctx, apiName, log)
-	return types.ActionContinue, nil
+	return nil
 }

 func (p *sparkProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
@@ -82,21 +81,16 @@ func (p *sparkProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 	return p.config.handleRequestBody(p, p.contextCache, ctx, apiName, body, log)
 }

-func (p *sparkProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
-	return types.ActionContinue, nil
-}
-
-func (p *sparkProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+func (p *sparkProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
 	sparkResponse := &sparkResponse{}
 	if err := json.Unmarshal(body, sparkResponse); err != nil {
-		return types.ActionContinue, fmt.Errorf("unable to unmarshal spark response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal spark response: %v", err)
 	}
 	if sparkResponse.Code != 0 {
-		return types.ActionContinue, fmt.Errorf("spark response error, error_code: %d, error_message: %s", sparkResponse.Code, sparkResponse.Message)
+		return nil, fmt.Errorf("spark response error, error_code: %d, error_message: %s", sparkResponse.Code, sparkResponse.Message)
 	}
 	response := p.responseSpark2OpenAI(ctx, sparkResponse)
-	return types.ActionContinue, replaceJsonResponseBody(response, log)
+	return json.Marshal(response)
 }

 func (p *sparkProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
@@ -177,6 +171,4 @@ func (p *sparkProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName
 	util.OverwriteRequestPathHeader(headers, sparkChatCompletionPath)
 	util.OverwriteRequestHostHeader(headers, sparkHost)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+p.config.GetApiTokenInUse(ctx))
-	headers.Del("Accept-Encoding")
-	headers.Del("Content-Length")
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
澄潭	9e418dafd9	release 2.0.6-rc.3 (#1680 )	2025-01-15 20:47:20 +08:00
澄潭	95523a1bc7	Fix istio lds cache (#1679 )	2025-01-15 20:44:13 +08:00
澄潭	dcd8466127	Update build-and-test-plugin.yaml	2025-01-15 20:19:58 +08:00
澄潭	cceae6ad2a	update cpp wasm plugins (#1675 )	2025-01-15 19:15:11 +08:00
zty98751	32f9a5ff32	fix istio commit	2025-01-15 15:29:44 +08:00
澄潭	6f95297b80	Release 2.0.6-rc.2 (#1671 )	2025-01-14 20:10:53 +08:00
Kent Dong	95426d5ccf	fix: Fix a typo in the README files of ai-statistics plugin (#1670 )	2025-01-14 13:39:55 +08:00
澄潭	a05b6b1e9d	add ai_log field (#1669 )	2025-01-14 10:03:24 +08:00
Jun	d0628344da	add higress architecture doc (#1662 )	2025-01-14 09:48:32 +08:00
韩贤涛	a1bf315b13	fix: resolve blocking issue with minimax responses in ai-proxy (#1663 )	2025-01-14 09:43:19 +08:00
mamba	b3d9123d59	[frontend-gray] 微前端灰度场景,支持 IncludePathPrefixes字段 (#1666 )	2025-01-13 16:24:51 +08:00
rinfx	817061c6cc	remove dependency for ai-statistic (#1660 )	2025-01-10 13:43:29 +08:00
rinfx	ea0d5e7564	Improve ai plugins (#1657 ) Co-authored-by: Kent Dong <ch3cho@qq.com>	2025-01-09 22:04:51 +08:00
澄潭	2a89c3bb70	Optimize wasmplugin proto (#1656 )	2025-01-09 13:19:46 +08:00
johnlanni	a570c72504	Update Chart.lock	2025-01-08 17:14:27 +08:00
澄潭	ab1316dfe1	rel: Release 2.0.6-rc.1 (#1653 )	2025-01-08 17:08:09 +08:00
澄潭	e97448b71b	Update metrics & enable lds cache (#1650 )	2025-01-08 16:49:23 +08:00
澄潭	6820a06a99	fix tls version annotation (#1652 )	2025-01-08 15:31:39 +08:00
澄潭	4733af849d	Update README.md	2025-01-08 11:30:29 +08:00
yunmaoQu	1c2330e33b	feat: add TLS version annotation support for per-rule configuration (#1592 ) Co-authored-by: 澄潭 <zty98751@alibaba-inc.com>	2025-01-06 21:29:09 +08:00
澄潭	61fef0ecf8	Release 2.0.5 (#1646 )	2025-01-06 19:42:18 +08:00
澄潭	d29b8d7ca8	fix ai proxy checkStream (#1645 )	2025-01-06 15:30:02 +08:00
澄潭	2501895b66	ai-cache update body buffer limit size (#1644 )	2025-01-06 14:53:29 +08:00
Kent Dong	187a7b5408	fix: Enlarge the default retry timeout in ai-proxy (#1640 )	2025-01-03 11:19:40 +08:00
Jingze	00be491d02	feat: support github provider for oidc wasm plugin (#1639 )	2025-01-02 10:01:54 +08:00
ayanami-desu	2d74c48e8a	Add cohere embedding for ai-cache (#1572 )	2024-12-27 17:48:44 +08:00
澄潭	6dc4d43df5	optimize ai cache (#1626 )	2024-12-27 10:10:57 +08:00
rinfx	2a4e55d46f	move oidcHandler from global to pluginconfig (#1601 )	2024-12-26 19:15:20 +08:00
Se7en	579c986915	feat: retry failed request (#1590 )	2024-12-26 18:30:50 +08:00
Kent Dong	380717ae3d	fix: Make opa listen to all IPs (#1621 )	2024-12-26 17:41:28 +08:00
Kent Dong	8f3723f554	feat: Support setting gateway.unprivilegedPortSupported manually (#1616 )	2024-12-23 19:45:47 +08:00
VinciWu557	909cc0f088	feat: AI 代理 Wasm 插件接入 Together AI (#1617 )	2024-12-23 15:39:56 +08:00
007gzs	4eaf204737	Enhance the capabilities of the AI Intent plugin (#1605 )	2024-12-20 10:25:17 +08:00
澄潭	748bcb083a	redis wrapper support lazy init and database options (#1602 )	2024-12-19 16:22:56 +08:00
澄潭	39c007d045	optimize ai proxy (#1603 )	2024-12-19 16:22:35 +08:00
rinfx	d74d327b68	bugfix: cannot parse content if one streaming body has multi chunks (#1606 )	2024-12-19 16:21:57 +08:00
澄潭	be27726721	Update CODEOWNERS	2024-12-19 14:36:11 +08:00
澄潭	34cc1c0632	Update README.md	2024-12-18 17:02:28 +08:00
澄潭	5694475872	Update README.md	2024-12-18 16:59:03 +08:00
rinfx	2f5709a93e	qwen bailian compatible bug fix (#1597 )	2024-12-17 16:57:31 +08:00
StarryNight	2a200cdd42	AI proxy return unified status in header phase (#1588 )	2024-12-16 18:41:38 +08:00
rinfx	ec39d56731	AI observability upgrade (#1587 ) Co-authored-by: Kent Dong <ch3cho@qq.com>	2024-12-16 10:27:49 +08:00
韩贤涛	8544fa604d	feat: support choosing chatCompletionV2 or chatCompletionPro API for minimax provider (#1593 )	2024-12-15 15:12:00 +08:00
mirror	0ba63e5dd4	fix: default port of static service in ai-cache plugin (#1591 )	2024-12-13 19:03:26 +08:00
mirror	441408c593	docs: fix typos in ai-quota document (#1589 )	2024-12-13 08:56:43 +08:00
duxin40	be57960c22	Support OpenAI embedding. (#1542 )	2024-12-11 11:42:51 +08:00
rinfx	f32020068a	bugfix and extend ai log (#1576 )	2024-12-09 20:39:13 +08:00
澄潭	1a8fce48f0	Update release-hgctl.yaml	2024-12-06 14:01:18 +08:00
澄潭	85c7b1f501	rel: Release 2.0.4 (#1571 )	2024-12-06 13:52:03 +08:00
pepesi	8f660211e3	feat: ai-proxy support custom error handler by cover util.ErrorHandler (#1537 )	2024-12-06 11:47:50 +08:00
rinfx	433227323d	extension mechanism for custom logs and span attributes (#1451 )	2024-12-05 18:39:00 +08:00
pepesi	b36e5ea26b	feat: allow cover api-version when use ai-proxy azure provider (#1535 )	2024-12-05 13:41:02 +08:00
rinfx	ce66ff68ce	solve aliyun lvwang content length limit problem (#1569 )	2024-12-05 13:39:20 +08:00
pepesi	d026f0fca5	feat: ai-proxy support dashscope-finance (#1554 )	2024-12-05 11:48:09 +08:00
rinfx	22790aa149	fix moonshot usage compatible problem (#1568 )	2024-12-05 11:35:25 +08:00
澄潭	7ce6d7aba1	fix xds cache (#1559 )	2024-12-04 00:55:29 +08:00
Se7en	e705a0344f	fix: qwen stream issue (#1564 )	2024-12-03 13:10:47 +08:00
澄潭	d6094974c2	update ai proxy go mod (#1556 )	2024-12-02 14:41:55 +08:00
mamba	6187be97e5	fix: 🐛 frontend-grayurl 解析不正确导致路由失败 (#1550 )	2024-11-29 13:09:05 +08:00
澄潭	bb64b43f23	set concurrency argument of proxy by cpu limit/request (#1552 )	2024-11-28 16:55:57 +08:00
澄潭	ca7458cf1c	Optimize the overall log output (#1549 )	2024-11-27 20:44:34 +08:00
Se7en	ee2dd76ae1	feat: migrate baidu provider to v2 api (#1527 )	2024-11-27 20:12:00 +08:00
pepesi	8154cf95f1	feat: support custom log (#1521 )	2024-11-27 20:11:29 +08:00
澄潭	a7593381e1	fix ai fallback (#1541 )	2024-11-25 16:48:59 +08:00