fix higress-console version in helm chart

fix: The mcp to rest capability of the mcp server supports returning status without returning a body from the backend, and instead responds via sse (#2445 )
release 2.1.5-rc.1 (#2446 )
2026-02-25 21:21:01 +08:00 · 2025-06-18 09:15:46 +08:00 · 2025-06-17 21:26:38 +08:00 · 2025-06-17 21:23:42 +08:00 · 2025-06-17 17:22:46 +08:00 · 2025-06-17 15:40:13 +08:00
159 changed files with 10123 additions and 2377 deletions
--- a/.github/workflows/helm-docs.yaml
+++ b/.github/workflows/helm-docs.yaml
@@ -39,126 +39,3 @@ jobs:
          fi
          git diff --exit-code
          rm -f ./helm-docs
-
-  translate-readme:
-    needs: helm
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Install dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y jq
-
-      - name: Compare README.md
-        id: compare_readme
-        run: |
-          cd ./helm/higress
-          
-          BASE_BRANCH=${GITHUB_BASE_REF:-main} 
-          git fetch origin $BASE_BRANCH
-          
-          if git diff --quiet origin/$BASE_BRANCH -- README.md; then
-            echo "README.md has no local changes compared to $BASE_BRANCH. Skipping translation."
-            echo "skip_translation=true" >> $GITHUB_ENV
-          else
-            echo "README.md has local changes compared to $BASE_BRANCH. Proceeding with translation."
-            echo "skip_translation=false" >> $GITHUB_ENV
-            echo "--------- diff ---------"
-            git diff origin/$BASE_BRANCH -- README.md
-            echo "------------------------"
-          fi
-
-      - name: Translate README.md to Chinese
-        if: env.skip_translation == 'false'
-        env:
-          API_URL: ${{ secrets.HIGRESS_OPENAI_API_URL }}
-          API_KEY: ${{ secrets.HIGRESS_OPENAI_API_KEY }}
-          API_MODEL: ${{ secrets.HIGRESS_OPENAI_API_MODEL }}
-        run: |
-          cat << 'EOF' > translate_readme.py
-          import os
-          import json
-          import requests
-
-          API_URL = os.environ["API_URL"]
-          API_KEY = os.environ["API_KEY"]
-          API_MODEL = os.environ["API_MODEL"]
-          README_PATH = "./helm/higress/README.md"
-          OUTPUT_PATH = "./helm/higress/README.zh.md"
-
-          def stream_translation(api_url, api_key, payload):
-              headers = {
-                  "Content-Type": "application/json",
-                  "Authorization": f"Bearer {api_key}",
-              }
-              response = requests.post(api_url, headers=headers, json=payload, stream=True)
-              response.raise_for_status()
-
-              with open(OUTPUT_PATH, "w", encoding="utf-8") as out_file:
-                  for line in response.iter_lines(decode_unicode=True):
-                      if line.strip() == "" or not line.startswith("data: "):
-                          continue
-                      data = line[6:]
-                      if data.strip() == "[DONE]":
-                          break
-                      try:
-                          chunk = json.loads(data)
-                          content = chunk["choices"][0]["delta"].get("content", "")
-                          if content:
-                              out_file.write(content)
-                      except Exception as e:
-                          print("Error parsing chunk:", e)
-
-          def main():
-              if not os.path.exists(README_PATH):
-                  print("README.md not found!")
-                  return
-
-              with open(README_PATH, "r", encoding="utf-8") as f:
-                  content = f.read()
-
-              payload = {
-                  "model": API_MODEL,
-                  "messages": [
-                      {
-                          "role": "system",
-                          "content": "You are a translation assistant that translates English Markdown text to Chinese. Preserve original Markdown formatting and line breaks."
-                      },
-                      {
-                          "role": "user",
-                          "content": content
-                      }
-                  ],
-                  "temperature": 0.3,
-                  "stream": True
-              }
-
-              print("Streaming translation started...")
-              stream_translation(API_URL, API_KEY, payload)
-              print(f"Translation completed and saved to {OUTPUT_PATH}.")
-
-          if __name__ == "__main__":
-              main()
-          EOF
-          
-          python3 translate_readme.py
-          rm -rf translate_readme.py
-
-      - name: Create Pull Request
-        if: env.skip_translation == 'false'
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "Update helm translated README.zh.md"
-          branch: update-helm-readme-zh
-          title: "Update helm translated README.zh.md"
-          body: |
-            This PR updates the translated README.zh.md file.
-
-            - Automatically generated by GitHub Actions
-          labels: translation, automated
-          base: main
--- a/.github/workflows/release-crd.yaml
+++ b/.github/workflows/release-crd.yaml
@@ -17,7 +17,7 @@ jobs:
        cat helm/core/crds/customresourcedefinitions.gen.yaml helm/core/crds/istio-envoyfilter.yaml > crd.yaml

    - name: Upload hgctl packages to the GitHub release
-      uses: softprops/action-gh-release@v2
+      uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631
      if: startsWith(github.ref, 'refs/tags/')
      with:
        files: |
--- a/.github/workflows/release-hgctl.yaml
+++ b/.github/workflows/release-hgctl.yaml
@@ -26,7 +26,7 @@ jobs:
        zip -q -r hgctl_${{ env.HGCTL_VERSION }}_windows_arm64.zip out/windows_arm64/

    - name: Upload hgctl packages to the GitHub release
-      uses: softprops/action-gh-release@v2
+      uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631
      if: startsWith(github.ref, 'refs/tags/')
      with:
        files: |
@@ -51,7 +51,7 @@ jobs:
        tar -zcvf hgctl_${{ env.HGCTL_VERSION }}_darwin_arm64.tar.gz out/darwin_arm64/

    - name: Upload hgctl packages to the GitHub release
-      uses: softprops/action-gh-release@v2
+      uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631
      if: startsWith(github.ref, 'refs/tags/')
      with:
        files: |
@@ -73,7 +73,7 @@ jobs:
        tar -zcvf hgctl_${{ env.HGCTL_VERSION }}_darwin_amd64.tar.gz out/darwin_amd64/

    - name: Upload hgctl packages to the GitHub release
-      uses: softprops/action-gh-release@v2
+      uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631
      if: startsWith(github.ref, 'refs/tags/')
      with:
        files: |
--- a/.github/workflows/sync-crds.yaml
+++ b/.github/workflows/sync-crds.yaml
@@ -0,0 +1,36 @@
+name: "Sync CRDs to Helm Chart"
+
+on:
+  workflow_dispatch: ~
+  push:
+    branches: [ main ]
+    paths:
+      - 'api/kubernetes/customresourcedefinitions.gen.yaml'
+
+jobs:
+  sync-crds:
+    name: Sync CRDs
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Copy the CRD YAML File to Helm Folder
+        run: |
+          cp api/kubernetes/customresourcedefinitions.gen.yaml helm/core/crds/customresourcedefinitions.gen.yaml
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          commit-message: "Update CRD file in the helm folder"
+          branch: sync-crds
+          title: "Update CRD file in the helm folder"
+          body: |
+            This PR updates CRD file in the helm folder.
+
+            - Automatically copied by GitHub Actions
+          labels: crds, automated
+          base: main
--- a/.github/workflows/translate-readme.yaml
+++ b/.github/workflows/translate-readme.yaml
@@ -0,0 +1,131 @@
+name: "Helm Docs"
+
+on:
+  workflow_dispatch: ~
+  push:
+    branches: [ main ]
+    paths:
+      - 'helm/higress/README.md'
+
+jobs:
+  translate-readme:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y jq
+
+      - name: Compare README.md
+        id: compare_readme
+        run: |
+          cd ./helm/higress
+          
+          BASE_BRANCH=${GITHUB_BASE_REF:-main} 
+          git fetch origin $BASE_BRANCH
+          
+          if git diff --quiet origin/$BASE_BRANCH -- README.md; then
+            echo "README.md has no local changes compared to $BASE_BRANCH. Skipping translation."
+            echo "skip_translation=true" >> $GITHUB_ENV
+          else
+            echo "README.md has local changes compared to $BASE_BRANCH. Proceeding with translation."
+            echo "skip_translation=false" >> $GITHUB_ENV
+            echo "--------- diff ---------"
+            git diff origin/$BASE_BRANCH -- README.md
+            echo "------------------------"
+          fi
+
+      - name: Translate README.md to Chinese
+        if: env.skip_translation == 'false'
+        env:
+          API_URL: ${{ secrets.HIGRESS_OPENAI_API_URL }}
+          API_KEY: ${{ secrets.HIGRESS_OPENAI_API_KEY }}
+          API_MODEL: ${{ secrets.HIGRESS_OPENAI_API_MODEL }}
+        run: |
+          cat << 'EOF' > translate_readme.py
+          import os
+          import json
+          import requests
+
+          API_URL = os.environ["API_URL"]
+          API_KEY = os.environ["API_KEY"]
+          API_MODEL = os.environ["API_MODEL"]
+          README_PATH = "./helm/higress/README.md"
+          OUTPUT_PATH = "./helm/higress/README.zh.md"
+
+          def stream_translation(api_url, api_key, payload):
+              headers = {
+                  "Content-Type": "application/json",
+                  "Authorization": f"Bearer {api_key}",
+              }
+              response = requests.post(api_url, headers=headers, json=payload, stream=True)
+              response.raise_for_status()
+
+              with open(OUTPUT_PATH, "w", encoding="utf-8") as out_file:
+                  for line in response.iter_lines(decode_unicode=True):
+                      if line.strip() == "" or not line.startswith("data: "):
+                          continue
+                      data = line[6:]
+                      if data.strip() == "[DONE]":
+                          break
+                      try:
+                          chunk = json.loads(data)
+                          content = chunk["choices"][0]["delta"].get("content", "")
+                          if content:
+                              out_file.write(content)
+                      except Exception as e:
+                          print("Error parsing chunk:", e)
+
+          def main():
+              if not os.path.exists(README_PATH):
+                  print("README.md not found!")
+                  return
+
+              with open(README_PATH, "r", encoding="utf-8") as f:
+                  content = f.read()
+
+              payload = {
+                  "model": API_MODEL,
+                  "messages": [
+                      {
+                          "role": "system",
+                          "content": "You are a translation assistant that translates English Markdown text to Chinese. Preserve original Markdown formatting and line breaks."
+                      },
+                      {
+                          "role": "user",
+                          "content": content
+                      }
+                  ],
+                  "temperature": 0.3,
+                  "stream": True
+              }
+
+              print("Streaming translation started...")
+              stream_translation(API_URL, API_KEY, payload)
+              print(f"Translation completed and saved to {OUTPUT_PATH}.")
+
+          if __name__ == "__main__":
+              main()
+          EOF
+          
+          python3 translate_readme.py
+          rm -rf translate_readme.py
+
+      - name: Create Pull Request
+        if: env.skip_translation == 'false'
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          commit-message: "Update helm translated README.zh.md"
+          branch: update-helm-readme-zh
+          title: "Update helm translated README.zh.md"
+          body: |
+            This PR updates the translated README.zh.md file.
+
+            - Automatically generated by GitHub Actions
+          labels: translation, automated
+          base: main
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -33,6 +33,7 @@ header:
    - 'hgctl/cmd/hgctl/config/testdata/config'
    - 'hgctl/pkg/manifests'
    - 'pkg/ingress/kube/gateway/istio/testdata'
+    - 'release-notes/**'

  comment: on-failure
 dependency:
--- a/Makefile.core.mk
+++ b/Makefile.core.mk
@@ -144,7 +144,7 @@ docker-buildx-push: clean-env docker.higress-buildx
 export PARENT_GIT_TAG:=$(shell cat VERSION)
 export PARENT_GIT_REVISION:=$(TAG)

-export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.5/envoy-symbol-ARCH.tar.gz
+export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.7/envoy-symbol-ARCH.tar.gz

 build-envoy: prebuild
 	./tools/hack/build-envoy.sh
@@ -191,6 +191,7 @@ install: pre-install
 	cd helm/higress; helm dependency build
 	helm install higress helm/higress -n higress-system --create-namespace --set 'global.local=true'

+HIGRESS_LATEST_IMAGE_TAG ?= latest
 ENVOY_LATEST_IMAGE_TAG ?= 958467a353d411ae3f06e03b096bfd342cddb2c6
 ISTIO_LATEST_IMAGE_TAG ?= d9c728d3b01f64855e012b08d136e306f1160397

@@ -268,10 +269,26 @@ higress-conformance-test-clean: $(tools/kind) delete-cluster
 .PHONY: higress-wasmplugin-test-prepare
 higress-wasmplugin-test-prepare: $(tools/kind) delete-cluster create-cluster docker-build kube-load-image install-dev-wasmplugin

+# higress-wasmplugin-test-prepare-skip-docker-build prepares the environment for higress wasmplugin tests without build higress docker image.
+.PHONY: higress-wasmplugin-test-prepare-skip-docker-build
+higress-wasmplugin-test-prepare-skip-docker-build: $(tools/kind) delete-cluster create-cluster prebuild
+	@export TAG="$(HIGRESS_LATEST_IMAGE_TAG)" && \
+	$(MAKE) kube-load-image && \
+	$(MAKE) install-dev-wasmplugin
+
 # higress-wasmplugin-test runs ingress wasmplugin tests.
 .PHONY: higress-wasmplugin-test
 higress-wasmplugin-test: $(tools/kind) delete-cluster create-cluster docker-build kube-load-image install-dev-wasmplugin run-higress-e2e-test-wasmplugin delete-cluster

+# higress-wasmplugin-test-skip-docker-build runs ingress wasmplugin tests without build higress docker image
+.PHONY: higress-wasmplugin-test-skip-docker-build
+higress-wasmplugin-test-skip-docker-build: $(tools/kind) delete-cluster create-cluster prebuild
+	@export TAG="$(HIGRESS_LATEST_IMAGE_TAG)" && \
+	$(MAKE) kube-load-image && \
+	$(MAKE) install-dev-wasmplugin && \
+	$(MAKE) run-higress-e2e-test-wasmplugin && \
+	$(MAKE) delete-cluster
+
 # higress-wasmplugin-test-clean cleans the environment for higress wasmplugin tests.
 .PHONY: higress-wasmplugin-test-clean
 higress-wasmplugin-test-clean: $(tools/kind) delete-cluster
@@ -290,8 +307,12 @@ delete-cluster: $(tools/kind) ## Delete kind cluster.
 # dubbo-provider-demo和nacos-standlone-rc3的镜像已经上传到阿里云镜像库，第一次需要先拉到本地
 # docker pull registry.cn-hangzhou.aliyuncs.com/hinsteny/dubbo-provider-demo:0.0.1
 # docker pull registry.cn-hangzhou.aliyuncs.com/hinsteny/nacos-standlone-rc3:1.0.0-RC3
+# If TAG is HIGRESS_LATEST_IMAGE_TAG, means we skip building higress docker image, so we need to pull the image first.
 .PHONY: kube-load-image
 kube-load-image: $(tools/kind) ## Install the Higress image to a kind cluster using the provided $IMAGE and $TAG.
+	@if [ "$(TAG)" = "$(HIGRESS_LATEST_IMAGE_TAG)" ]; then \
+		tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress $(TAG); \
+	fi
 	tools/hack/kind-load-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress $(TAG)
 	tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/pilot $(ISTIO_LATEST_IMAGE_TAG)
 	tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/gateway $(ENVOY_LATEST_IMAGE_TAG)
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
    
 [![Build Status](https://github.com/alibaba/higress/actions/workflows/build-and-test.yaml/badge.svg?branch=main)](https://github.com/alibaba/higress/actions)
 [![license](https://img.shields.io/github/license/alibaba/higress.svg)](https://www.apache.org/licenses/LICENSE-2.0.html)
-[![discord](https://img.shields.io/discord/1364956090566971515?color=5865F2&label=discord&labelColor=black&logo=discord&logoColor=white&style=flat-square)](https://discord.gg/reymxYM5)
+[![discord](https://img.shields.io/discord/1364956090566971515?color=5865F2&label=discord&labelColor=black&logo=discord&logoColor=white&style=flat-square)](https://discord.gg/tSbww9VDaM)

 <a href="https://trendshift.io/repositories/10918" target="_blank"><img src="https://trendshift.io/api/badge/repositories/10918" alt="alibaba%2Fhigress | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a> <a href="https://www.producthunt.com/posts/higress?embed=true&utm_source=badge-featured&utm_medium=badge&utm_souce=badge-higress" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=951287&theme=light&t=1745492822283" alt="Higress - Global&#0032;APIs&#0032;as&#0032;MCP&#0032;powered&#0032;by&#0032;AI&#0032;Gateway | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>

@@ -24,7 +24,7 @@
   English | <a href="README_ZH.md">中文<a/> | <a href="README_JP.md">日本語<a/>
 </p>

-## Test What is Higress?
+## What is Higress?

 Higress is a cloud-native API gateway based on Istio and Envoy, which can be extended with Wasm plugins written in Go/Rust/JS. It provides dozens of ready-to-use general-purpose plugins and an out-of-the-box console (try the [demo here](http://demo.higress.io/)).

@@ -69,6 +69,10 @@ Port descriptions:

 > All Higress Docker images use Higress's own image repository and are not affected by Docker Hub rate limits.
 > In addition, the submission and updates of the images are protected by a security scanning mechanism (powered by Alibaba Cloud ACR), making them very secure for use in production environments.
+> 
+> If you experience a timeout when pulling image from `higress-registry.cn-hangzhou.cr.aliyuncs.com`, you can try replacing it with the following docker registry mirror source:
+> 
+> **Southeast Asia**: `higress-registry.ap-southeast-7.cr.aliyuncs.com`

 For other installation methods such as Helm deployment under K8s, please refer to the official [Quick Start documentation](https://higress.io/en-us/docs/user/quickstart).

@@ -143,7 +147,7 @@ For other installation methods such as Helm deployment under K8s, please refer t

 Join our Discord community! This is where you can connect with developers and other enthusiastic users of Higress.

-[![discord](https://img.shields.io/discord/1364956090566971515?color=5865F2&label=discord&labelColor=black&logo=discord&logoColor=white&style=for-the-badge)](https://discord.gg/reymxYM5)
+[![discord](https://img.shields.io/discord/1364956090566971515?color=5865F2&label=discord&labelColor=black&logo=discord&logoColor=white&style=for-the-badge)](https://discord.gg/tSbww9VDaM)


 ### Thanks
--- a/2
+++ b/2
@@ -1 +1 @@
-v2.1.3
+v2.1.5-rc.1
--- a/api/kubernetes/customresourcedefinitions.gen.yaml
+++ b/api/kubernetes/customresourcedefinitions.gen.yaml
@@ -250,6 +250,10 @@ spec:
              registries:
                items:
                  properties:
+                    allowMcpServers:
+                      items:
+                        type: string
+                      type: array
                    authSecretName:
                      type: string
                    consulDatacenter:
@@ -265,12 +269,23 @@ spec:
                      type: string
                    enableMCPServer:
                      type: boolean
+                    enableScopeMcpServers:
+                      type: boolean
                    mcpServerBaseUrl:
                      type: string
                    mcpServerExportDomains:
                      items:
                        type: string
                      type: array
+                    metadata:
+                      additionalProperties:
+                        properties:
+                          innerMap:
+                            additionalProperties:
+                              type: string
+                            type: object
+                        type: object
+                      type: object
                    nacosAccessKey:
                      type: string
                    nacosAddressServer:
--- a/api/networking/v1/mcp_bridge.pb.go
+++ b/api/networking/v1/mcp_bridge.pb.go
@@ -111,28 +111,31 @@ type RegistryConfig struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields

-	Type                   string              `protobuf:"bytes,1,opt,name=type,proto3" json:"type,omitempty"`
-	Name                   string              `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"`
-	Domain                 string              `protobuf:"bytes,3,opt,name=domain,proto3" json:"domain,omitempty"`
-	Port                   uint32              `protobuf:"varint,4,opt,name=port,proto3" json:"port,omitempty"`
-	NacosAddressServer     string              `protobuf:"bytes,5,opt,name=nacosAddressServer,proto3" json:"nacosAddressServer,omitempty"`
-	NacosAccessKey         string              `protobuf:"bytes,6,opt,name=nacosAccessKey,proto3" json:"nacosAccessKey,omitempty"`
-	NacosSecretKey         string              `protobuf:"bytes,7,opt,name=nacosSecretKey,proto3" json:"nacosSecretKey,omitempty"`
-	NacosNamespaceId       string              `protobuf:"bytes,8,opt,name=nacosNamespaceId,proto3" json:"nacosNamespaceId,omitempty"`
-	NacosNamespace         string              `protobuf:"bytes,9,opt,name=nacosNamespace,proto3" json:"nacosNamespace,omitempty"`
-	NacosGroups            []string            `protobuf:"bytes,10,rep,name=nacosGroups,proto3" json:"nacosGroups,omitempty"`
-	NacosRefreshInterval   int64               `protobuf:"varint,11,opt,name=nacosRefreshInterval,proto3" json:"nacosRefreshInterval,omitempty"`
-	ConsulNamespace        string              `protobuf:"bytes,12,opt,name=consulNamespace,proto3" json:"consulNamespace,omitempty"`
-	ZkServicesPath         []string            `protobuf:"bytes,13,rep,name=zkServicesPath,proto3" json:"zkServicesPath,omitempty"`
-	ConsulDatacenter       string              `protobuf:"bytes,14,opt,name=consulDatacenter,proto3" json:"consulDatacenter,omitempty"`
-	ConsulServiceTag       string              `protobuf:"bytes,15,opt,name=consulServiceTag,proto3" json:"consulServiceTag,omitempty"`
-	ConsulRefreshInterval  int64               `protobuf:"varint,16,opt,name=consulRefreshInterval,proto3" json:"consulRefreshInterval,omitempty"`
-	AuthSecretName         string              `protobuf:"bytes,17,opt,name=authSecretName,proto3" json:"authSecretName,omitempty"`
-	Protocol               string              `protobuf:"bytes,18,opt,name=protocol,proto3" json:"protocol,omitempty"`
-	Sni                    string              `protobuf:"bytes,19,opt,name=sni,proto3" json:"sni,omitempty"`
-	McpServerExportDomains []string            `protobuf:"bytes,20,rep,name=mcpServerExportDomains,proto3" json:"mcpServerExportDomains,omitempty"`
-	McpServerBaseUrl       string              `protobuf:"bytes,21,opt,name=mcpServerBaseUrl,proto3" json:"mcpServerBaseUrl,omitempty"`
-	EnableMCPServer        *wrappers.BoolValue `protobuf:"bytes,22,opt,name=enableMCPServer,proto3" json:"enableMCPServer,omitempty"`
+	Type                   string               `protobuf:"bytes,1,opt,name=type,proto3" json:"type,omitempty"`
+	Name                   string               `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"`
+	Domain                 string               `protobuf:"bytes,3,opt,name=domain,proto3" json:"domain,omitempty"`
+	Port                   uint32               `protobuf:"varint,4,opt,name=port,proto3" json:"port,omitempty"`
+	NacosAddressServer     string               `protobuf:"bytes,5,opt,name=nacosAddressServer,proto3" json:"nacosAddressServer,omitempty"`
+	NacosAccessKey         string               `protobuf:"bytes,6,opt,name=nacosAccessKey,proto3" json:"nacosAccessKey,omitempty"`
+	NacosSecretKey         string               `protobuf:"bytes,7,opt,name=nacosSecretKey,proto3" json:"nacosSecretKey,omitempty"`
+	NacosNamespaceId       string               `protobuf:"bytes,8,opt,name=nacosNamespaceId,proto3" json:"nacosNamespaceId,omitempty"`
+	NacosNamespace         string               `protobuf:"bytes,9,opt,name=nacosNamespace,proto3" json:"nacosNamespace,omitempty"`
+	NacosGroups            []string             `protobuf:"bytes,10,rep,name=nacosGroups,proto3" json:"nacosGroups,omitempty"`
+	NacosRefreshInterval   int64                `protobuf:"varint,11,opt,name=nacosRefreshInterval,proto3" json:"nacosRefreshInterval,omitempty"`
+	ConsulNamespace        string               `protobuf:"bytes,12,opt,name=consulNamespace,proto3" json:"consulNamespace,omitempty"`
+	ZkServicesPath         []string             `protobuf:"bytes,13,rep,name=zkServicesPath,proto3" json:"zkServicesPath,omitempty"`
+	ConsulDatacenter       string               `protobuf:"bytes,14,opt,name=consulDatacenter,proto3" json:"consulDatacenter,omitempty"`
+	ConsulServiceTag       string               `protobuf:"bytes,15,opt,name=consulServiceTag,proto3" json:"consulServiceTag,omitempty"`
+	ConsulRefreshInterval  int64                `protobuf:"varint,16,opt,name=consulRefreshInterval,proto3" json:"consulRefreshInterval,omitempty"`
+	AuthSecretName         string               `protobuf:"bytes,17,opt,name=authSecretName,proto3" json:"authSecretName,omitempty"`
+	Protocol               string               `protobuf:"bytes,18,opt,name=protocol,proto3" json:"protocol,omitempty"`
+	Sni                    string               `protobuf:"bytes,19,opt,name=sni,proto3" json:"sni,omitempty"`
+	McpServerExportDomains []string             `protobuf:"bytes,20,rep,name=mcpServerExportDomains,proto3" json:"mcpServerExportDomains,omitempty"`
+	McpServerBaseUrl       string               `protobuf:"bytes,21,opt,name=mcpServerBaseUrl,proto3" json:"mcpServerBaseUrl,omitempty"`
+	EnableMCPServer        *wrappers.BoolValue  `protobuf:"bytes,22,opt,name=enableMCPServer,proto3" json:"enableMCPServer,omitempty"`
+	EnableScopeMcpServers  *wrappers.BoolValue  `protobuf:"bytes,23,opt,name=enableScopeMcpServers,proto3" json:"enableScopeMcpServers,omitempty"`
+	AllowMcpServers        []string             `protobuf:"bytes,24,rep,name=allowMcpServers,proto3" json:"allowMcpServers,omitempty"`
+	Metadata               map[string]*InnerMap `protobuf:"bytes,25,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
 }

 func (x *RegistryConfig) Reset() {
@@ -321,6 +324,74 @@ func (x *RegistryConfig) GetEnableMCPServer() *wrappers.BoolValue {
 	return nil
 }

+func (x *RegistryConfig) GetEnableScopeMcpServers() *wrappers.BoolValue {
+	if x != nil {
+		return x.EnableScopeMcpServers
+	}
+	return nil
+}
+
+func (x *RegistryConfig) GetAllowMcpServers() []string {
+	if x != nil {
+		return x.AllowMcpServers
+	}
+	return nil
+}
+
+func (x *RegistryConfig) GetMetadata() map[string]*InnerMap {
+	if x != nil {
+		return x.Metadata
+	}
+	return nil
+}
+
+type InnerMap struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	InnerMap map[string]string `protobuf:"bytes,1,rep,name=inner_map,json=innerMap,proto3" json:"inner_map,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+}
+
+func (x *InnerMap) Reset() {
+	*x = InnerMap{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_networking_v1_mcp_bridge_proto_msgTypes[2]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *InnerMap) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*InnerMap) ProtoMessage() {}
+
+func (x *InnerMap) ProtoReflect() protoreflect.Message {
+	mi := &file_networking_v1_mcp_bridge_proto_msgTypes[2]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use InnerMap.ProtoReflect.Descriptor instead.
+func (*InnerMap) Descriptor() ([]byte, []int) {
+	return file_networking_v1_mcp_bridge_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *InnerMap) GetInnerMap() map[string]string {
+	if x != nil {
+		return x.InnerMap
+	}
+	return nil
+}
+
 var File_networking_v1_mcp_bridge_proto protoreflect.FileDescriptor

 var file_networking_v1_mcp_bridge_proto_rawDesc = []byte{
@@ -338,7 +409,7 @@ var file_networking_v1_mcp_bridge_proto_rawDesc = []byte{
 	0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73,
 	0x73, 0x2e, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2e, 0x76, 0x31, 0x2e,
 	0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x0a,
-	0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x69, 0x65, 0x73, 0x22, 0xfd, 0x06, 0x0a, 0x0e, 0x52,
+	0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x69, 0x65, 0x73, 0x22, 0xa8, 0x09, 0x0a, 0x0e, 0x52,
 	0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x17, 0x0a,
 	0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02,
 	0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02,
@@ -394,11 +465,39 @@ var file_networking_v1_mcp_bridge_proto_rawDesc = []byte{
 	0x50, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x18, 0x16, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e,
 	0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e,
 	0x42, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x0f, 0x65, 0x6e, 0x61, 0x62, 0x6c,
-	0x65, 0x4d, 0x43, 0x50, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x42, 0x2e, 0x5a, 0x2c, 0x67, 0x69,
-	0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x6c, 0x69, 0x62, 0x61, 0x62, 0x61,
-	0x2f, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x6e, 0x65, 0x74,
-	0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74,
-	0x6f, 0x33,
+	0x65, 0x4d, 0x43, 0x50, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x12, 0x50, 0x0a, 0x15, 0x65, 0x6e,
+	0x61, 0x62, 0x6c, 0x65, 0x53, 0x63, 0x6f, 0x70, 0x65, 0x4d, 0x63, 0x70, 0x53, 0x65, 0x72, 0x76,
+	0x65, 0x72, 0x73, 0x18, 0x17, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67,
+	0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x42, 0x6f, 0x6f, 0x6c,
+	0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x15, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x53, 0x63, 0x6f,
+	0x70, 0x65, 0x4d, 0x63, 0x70, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x12, 0x28, 0x0a, 0x0f,
+	0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x4d, 0x63, 0x70, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x18,
+	0x18, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0f, 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x4d, 0x63, 0x70, 0x53,
+	0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x12, 0x4f, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61,
+	0x74, 0x61, 0x18, 0x19, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x33, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65,
+	0x73, 0x73, 0x2e, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2e, 0x76, 0x31,
+	0x2e, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e,
+	0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d,
+	0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x5c, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64,
+	0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x35, 0x0a, 0x05, 0x76, 0x61,
+	0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x68, 0x69, 0x67, 0x72,
+	0x65, 0x73, 0x73, 0x2e, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2e, 0x76,
+	0x31, 0x2e, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75,
+	0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x93, 0x01, 0x0a, 0x08, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d,
+	0x61, 0x70, 0x12, 0x4a, 0x0a, 0x09, 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x5f, 0x6d, 0x61, 0x70, 0x18,
+	0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2e,
+	0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2e, 0x76, 0x31, 0x2e, 0x49, 0x6e,
+	0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x2e, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x45,
+	0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x1a, 0x3b,
+	0x0a, 0x0d, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12,
+	0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65,
+	0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x2e, 0x5a, 0x2c, 0x67,
+	0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x6c, 0x69, 0x62, 0x61, 0x62,
+	0x61, 0x2f, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x6e, 0x65,
+	0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f,
+	0x74, 0x6f, 0x33,
 }

 var (
@@ -413,20 +512,27 @@ func file_networking_v1_mcp_bridge_proto_rawDescGZIP() []byte {
 	return file_networking_v1_mcp_bridge_proto_rawDescData
 }

-var file_networking_v1_mcp_bridge_proto_msgTypes = make([]protoimpl.MessageInfo, 2)
+var file_networking_v1_mcp_bridge_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
 var file_networking_v1_mcp_bridge_proto_goTypes = []interface{}{
 	(*McpBridge)(nil),          // 0: higress.networking.v1.McpBridge
 	(*RegistryConfig)(nil),     // 1: higress.networking.v1.RegistryConfig
-	(*wrappers.BoolValue)(nil), // 2: google.protobuf.BoolValue
+	(*InnerMap)(nil),           // 2: higress.networking.v1.InnerMap
+	nil,                        // 3: higress.networking.v1.RegistryConfig.MetadataEntry
+	nil,                        // 4: higress.networking.v1.InnerMap.InnerMapEntry
+	(*wrappers.BoolValue)(nil), // 5: google.protobuf.BoolValue
 }
 var file_networking_v1_mcp_bridge_proto_depIdxs = []int32{
 	1, // 0: higress.networking.v1.McpBridge.registries:type_name -> higress.networking.v1.RegistryConfig
-	2, // 1: higress.networking.v1.RegistryConfig.enableMCPServer:type_name -> google.protobuf.BoolValue
-	2, // [2:2] is the sub-list for method output_type
-	2, // [2:2] is the sub-list for method input_type
-	2, // [2:2] is the sub-list for extension type_name
-	2, // [2:2] is the sub-list for extension extendee
-	0, // [0:2] is the sub-list for field type_name
+	5, // 1: higress.networking.v1.RegistryConfig.enableMCPServer:type_name -> google.protobuf.BoolValue
+	5, // 2: higress.networking.v1.RegistryConfig.enableScopeMcpServers:type_name -> google.protobuf.BoolValue
+	3, // 3: higress.networking.v1.RegistryConfig.metadata:type_name -> higress.networking.v1.RegistryConfig.MetadataEntry
+	4, // 4: higress.networking.v1.InnerMap.inner_map:type_name -> higress.networking.v1.InnerMap.InnerMapEntry
+	2, // 5: higress.networking.v1.RegistryConfig.MetadataEntry.value:type_name -> higress.networking.v1.InnerMap
+	6, // [6:6] is the sub-list for method output_type
+	6, // [6:6] is the sub-list for method input_type
+	6, // [6:6] is the sub-list for extension type_name
+	6, // [6:6] is the sub-list for extension extendee
+	0, // [0:6] is the sub-list for field type_name
 }

 func init() { file_networking_v1_mcp_bridge_proto_init() }
@@ -459,6 +565,18 @@ func file_networking_v1_mcp_bridge_proto_init() {
 				return nil
 			}
 		}
+		file_networking_v1_mcp_bridge_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*InnerMap); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
 	}
 	type x struct{}
 	out := protoimpl.TypeBuilder{
@@ -466,7 +584,7 @@ func file_networking_v1_mcp_bridge_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: file_networking_v1_mcp_bridge_proto_rawDesc,
 			NumEnums:      0,
-			NumMessages:   2,
+			NumMessages:   5,
 			NumExtensions: 0,
 			NumServices:   0,
 		},
--- a/api/networking/v1/mcp_bridge.proto
+++ b/api/networking/v1/mcp_bridge.proto
@@ -71,4 +71,11 @@ message RegistryConfig {
  repeated string mcpServerExportDomains = 20;
  string mcpServerBaseUrl = 21;
  google.protobuf.BoolValue enableMCPServer = 22;
+  google.protobuf.BoolValue enableScopeMcpServers = 23;
+  repeated string allowMcpServers = 24;
+  map<string, InnerMap> metadata = 25;
 }
+
+message InnerMap {
+  map<string, string> inner_map = 1;
+}
--- a/api/networking/v1/mcp_bridge_deepcopy.gen.go
+++ b/api/networking/v1/mcp_bridge_deepcopy.gen.go
@@ -46,3 +46,24 @@ func (in *RegistryConfig) DeepCopy() *RegistryConfig {
 func (in *RegistryConfig) DeepCopyInterface() interface{} {
 	return in.DeepCopy()
 }
+
+// DeepCopyInto supports using InnerMap within kubernetes types, where deepcopy-gen is used.
+func (in *InnerMap) DeepCopyInto(out *InnerMap) {
+	p := proto.Clone(in).(*InnerMap)
+	*out = *p
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InnerMap. Required by controller-gen.
+func (in *InnerMap) DeepCopy() *InnerMap {
+	if in == nil {
+		return nil
+	}
+	out := new(InnerMap)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInterface is an autogenerated deepcopy function, copying the receiver, creating a new InnerMap. Required by controller-gen.
+func (in *InnerMap) DeepCopyInterface() interface{} {
+	return in.DeepCopy()
+}
--- a/api/networking/v1/mcp_bridge_json.gen.go
+++ b/api/networking/v1/mcp_bridge_json.gen.go
@@ -28,6 +28,17 @@ func (this *RegistryConfig) UnmarshalJSON(b []byte) error {
 	return McpBridgeUnmarshaler.Unmarshal(bytes.NewReader(b), this)
 }

+// MarshalJSON is a custom marshaler for InnerMap
+func (this *InnerMap) MarshalJSON() ([]byte, error) {
+	str, err := McpBridgeMarshaler.MarshalToString(this)
+	return []byte(str), err
+}
+
+// UnmarshalJSON is a custom unmarshaler for InnerMap
+func (this *InnerMap) UnmarshalJSON(b []byte) error {
+	return McpBridgeUnmarshaler.Unmarshal(bytes.NewReader(b), this)
+}
+
 var (
 	McpBridgeMarshaler   = &jsonpb.Marshaler{}
 	McpBridgeUnmarshaler = &jsonpb.Unmarshaler{AllowUnknownFields: true}
--- a/envoy/envoy
+++ b/envoy/envoy
--- a/go.mod
+++ b/go.mod
@@ -31,7 +31,7 @@ require (
 	github.com/hudl/fargo v1.4.0
 	github.com/mholt/acmez v1.2.0
 	github.com/nacos-group/nacos-sdk-go v1.0.8
-	github.com/nacos-group/nacos-sdk-go/v2 v2.1.2
+	github.com/nacos-group/nacos-sdk-go/v2 v2.3.2
 	github.com/onsi/gomega v1.27.10
 	github.com/spf13/cobra v1.8.0
 	github.com/spf13/pflag v1.0.5
@@ -202,6 +202,7 @@ require (
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
 	github.com/spf13/cast v1.5.1 // indirect
 	github.com/stoewer/go-strcase v1.3.0 // indirect
+	github.com/stretchr/objx v0.5.0 // indirect
 	github.com/tetratelabs/wazero v1.7.3 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
@@ -274,6 +275,5 @@ replace github.com/caddyserver/certmagic => github.com/2456868764/certmagic v1.0

 replace (
 	github.com/dubbogo/gost => github.com/johnlanni/gost v1.11.23-0.20220713132522-0967a24036c6
-	github.com/nacos-group/nacos-sdk-go/v2 => github.com/luoxiner/nacos-sdk-go/v2 v2.2.9-60
 	golang.org/x/exp => golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
 )
--- a/go.sum
+++ b/go.sum
@@ -1434,8 +1434,6 @@ github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhn
 github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
 github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
 github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
-github.com/luoxiner/nacos-sdk-go/v2 v2.2.9-60 h1:FA/azfz2nSkMc1XR8LeqhcAiA/2/sOMcyBGYCTUc+Cs=
-github.com/luoxiner/nacos-sdk-go/v2 v2.2.9-60/go.mod h1:9FKXl6FqOiVmm72i8kADtbeK71egyG9y3uRDBg41tpQ=
 github.com/lyft/protoc-gen-star v0.6.1/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA=
 github.com/lyft/protoc-gen-star/v2 v2.0.1/go.mod h1:RcCdONR2ScXaYnQC5tUzxzlpA3WVYF7/opLeUgcQs/o=
 github.com/lyft/protoc-gen-star/v2 v2.0.3/go.mod h1:amey7yeodaJhXSbf/TlLvWiqQfLOSpEk//mLlc+axEk=
@@ -1525,6 +1523,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRW
 github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
 github.com/nacos-group/nacos-sdk-go v1.0.8 h1:8pEm05Cdav9sQgJSv5kyvlgfz0SzFUUGI3pWX6SiSnM=
 github.com/nacos-group/nacos-sdk-go v1.0.8/go.mod h1:hlAPn3UdzlxIlSILAyOXKxjFSvDJ9oLzTJ9hLAK1KzA=
+github.com/nacos-group/nacos-sdk-go/v2 v2.3.2 h1:9QB2nCJzT5wkTVlxNYl3XL/7+G6p2USMi2gQh/ouQQo=
+github.com/nacos-group/nacos-sdk-go/v2 v2.3.2/go.mod h1:9FKXl6FqOiVmm72i8kADtbeK71egyG9y3uRDBg41tpQ=
 github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg=
 github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU=
 github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k=
--- a/helm/core/Chart.yaml
+++ b/helm/core/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.1.3
+appVersion: 2.1.5-rc.1
 description: Helm chart for deploying higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -15,4 +15,4 @@ dependencies:
    repository: "file://../redis"
    version: 0.0.1
 type: application
-version: 2.1.3
+version: 2.1.5-rc.1
--- a/helm/core/crds/customresourcedefinitions.gen.yaml
+++ b/helm/core/crds/customresourcedefinitions.gen.yaml
@@ -250,6 +250,10 @@ spec:
              registries:
                items:
                  properties:
+                    allowMcpServers:
+                      items:
+                        type: string
+                      type: array
                    authSecretName:
                      type: string
                    consulDatacenter:
@@ -263,6 +267,25 @@ spec:
                      type: string
                    domain:
                      type: string
+                    enableMCPServer:
+                      type: boolean
+                    enableScopeMcpServers:
+                      type: boolean
+                    mcpServerBaseUrl:
+                      type: string
+                    mcpServerExportDomains:
+                      items:
+                        type: string
+                      type: array
+                    metadata:
+                      additionalProperties:
+                        properties:
+                          innerMap:
+                            additionalProperties:
+                              type: string
+                            type: object
+                        type: object
+                      type: object
                    nacosAccessKey:
                      type: string
                    nacosAddressServer:
--- a/helm/core/templates/_helpers.tpl
+++ b/helm/core/templates/_helpers.tpl
@@ -113,3 +113,36 @@ kind: VMPodScrape
 {{- fail "unexpected gateway.metrics.provider" -}}
 {{- end -}}
 {{- end -}}
+
+{{- define "pluginServer.name" -}}
+{{- .Values.pluginServer.name | default "higress-plugin-server" -}}
+{{- end }}
+
+{{- define "pluginServer.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{- define "pluginServer.labels" -}}
+helm.sh/chart: {{ include "pluginServer.chart" . }}
+{{ include "pluginServer.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+app.kubernetes.io/name: {{ include "pluginServer.name" . }}
+{{- end }}
+
+{{- define "pluginServer.selectorLabels" -}}
+{{- if hasKey .Values.pluginServer.labels "app" }}
+{{- with .Values.pluginServer.labels.app }}app: {{.|quote}}
+{{- end}}
+{{- else }}app: {{ include "pluginServer.name" . }}
+{{- end }}
+{{- if hasKey .Values.pluginServer.labels "higress" }}
+{{- with .Values.pluginServer.labels.higress }}
+higress: {{.|quote}}
+{{- end}}
+{{- else }}
+higress: {{ include "pluginServer.name" . }}
+{{- end }}
+{{- end }}
--- a/helm/core/templates/plugin-server-deployment.yaml
+++ b/helm/core/templates/plugin-server-deployment.yaml
@@ -0,0 +1,39 @@
+{{- if .Values.global.enablePluginServer }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "pluginServer.name" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  replicas: {{ .Values.pluginServer.replicas }}
+  selector:
+    matchLabels:
+      {{- include "pluginServer.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- with .Values.pluginServer.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- include "pluginServer.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.pluginServer.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          image: {{ .Values.pluginServer.hub | default .Values.global.hub }}/{{ .Values.pluginServer.image | default "plugin-server" }}:{{ .Values.pluginServer.tag | default "1.0.0" }}
+          {{- if .Values.global.imagePullPolicy }}
+          imagePullPolicy: {{ .Values.global.imagePullPolicy }}
+          {{- end }}
+          ports:
+            - containerPort: 8080
+          resources:
+            requests:
+              cpu: {{ .Values.pluginServer.resources.requests.cpu }}
+              memory: {{ .Values.pluginServer.resources.requests.memory }}
+            limits:
+              cpu: {{ .Values.pluginServer.resources.limits.cpu }}
+              memory: {{ .Values.pluginServer.resources.limits.memory }}
+{{- end }}
--- a/helm/core/templates/plugin-server-service.yaml
+++ b/helm/core/templates/plugin-server-service.yaml
@@ -0,0 +1,16 @@
+{{- if .Values.global.enablePluginServer }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "pluginServer.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "pluginServer.labels" . | nindent 4 }}
+spec:
+  ports:
+    - protocol: TCP
+      port: {{ .Values.pluginServer.service.port }}
+      targetPort: 8080
+  selector:
+    {{- include "pluginServer.selectorLabels" . | nindent 4 }}
+{{- end }}
--- a/helm/core/values.yaml
+++ b/helm/core/values.yaml
@@ -11,6 +11,7 @@ global:
  enableSRDS: true
  # -- Whether to enable Redis(redis-stack-server) for Higress, default is false.
  enableRedis: false
+  enablePluginServer: false
  onDemandRDS: false
  hostRDSMergeSubset: false
  onlyPushRouteCluster: true
@@ -767,4 +768,31 @@ redis:
      accessModes:
        - ReadWriteOnce
      # -- Persistent Volume size
-      size: 1Gi
+      size: 1Gi
+
+pluginServer:
+  name: "higress-plugin-server"
+  # -- Number of Higress Plugin Server pods, 2 recommended for high availability
+  replicas: 2
+  image: plugin-server
+
+  hub: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress
+  tag: ""
+
+  imagePullSecrets: []
+
+  labels: {}
+  # -- Labels to apply to the pod
+  podLabels: {}
+
+  #  Plugin-server Service configuration
+  service:
+    port: 80                    # Container target port (usually fixed)
+
+  resources:
+    requests:
+      cpu: 200m
+      memory: 128Mi
+    limits:
+      cpu: 500m
+      memory: 256Mi
--- a/helm/higress/Chart.lock
+++ b/helm/higress/Chart.lock
@@ -1,9 +1,9 @@
 dependencies:
 - name: higress-core
  repository: file://../core
-  version: 2.1.3
+  version: 2.1.5-rc.1
 - name: higress-console
  repository: https://higress.io/helm-charts/
-  version: 2.1.3
-digest: sha256:c7307d5398c3c1178758c5372bd1aa4cb8dee7beeab3832d3e9ce0a04d1adc23
-generated: "2025-05-09T15:29:50.616179+08:00"
+  version: 2.1.4
+digest: sha256:6dbbfb24eabe0927a167c11896799ea20c7f8590aa2889b853dc9a210d075d3a
+generated: "2025-06-18T09:15:09.621898+08:00"
--- a/helm/higress/Chart.yaml
+++ b/helm/higress/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.1.3
+appVersion: 2.1.5-rc.1
 description: Helm chart for deploying Higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -12,9 +12,9 @@ sources:
 dependencies:
 - name: higress-core
  repository: "file://../core"
-  version: 2.1.3
+  version: 2.1.5-rc.1
 - name: higress-console
  repository: "https://higress.io/helm-charts/"
-  version: 2.1.3
+  version: 2.1.4
 type: application
-version: 2.1.3
+version: 2.1.5-rc.1
--- a/helm/higress/README.md
+++ b/helm/higress/README.md
@@ -165,6 +165,7 @@ The command removes all the Kubernetes components associated with the chart and
 | global.enableIPv6 | bool | `false` |  |
 | global.enableIstioAPI | bool | `true` | If true, Higress Controller will monitor istio resources as well |
 | global.enableLDSCache | bool | `false` |  |
+| global.enablePluginServer | bool | `false` |  |
 | global.enableProxyProtocol | bool | `false` |  |
 | global.enablePushAllMCPClusters | bool | `true` |  |
 | global.enableRedis | bool | `false` | Whether to enable Redis(redis-stack-server) for Higress, default is false. |
@@ -273,6 +274,19 @@ The command removes all the Kubernetes components associated with the chart and
 | pilot.serviceAnnotations | object | `{}` |  |
 | pilot.tag | string | `""` |  |
 | pilot.traceSampling | float | `1` |  |
+| pluginServer.hub | string | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` |  |
+| pluginServer.image | string | `"plugin-server"` |  |
+| pluginServer.imagePullSecrets | list | `[]` |  |
+| pluginServer.labels | object | `{}` |  |
+| pluginServer.name | string | `"higress-plugin-server"` |  |
+| pluginServer.podLabels | object | `{}` | Labels to apply to the pod |
+| pluginServer.replicas | int | `2` | Number of Higress Plugin Server pods, 2 recommended for high availability |
+| pluginServer.resources.limits.cpu | string | `"500m"` |  |
+| pluginServer.resources.limits.memory | string | `"256Mi"` |  |
+| pluginServer.resources.requests.cpu | string | `"200m"` |  |
+| pluginServer.resources.requests.memory | string | `"128Mi"` |  |
+| pluginServer.service.port | int | `80` |  |
+| pluginServer.tag | string | `""` |  |
 | redis.redis.affinity | object | `{}` | Affinity for Redis |
 | redis.redis.image | string | `"redis-stack-server"` | Specify the image |
 | redis.redis.name | string | `"redis-stack-server"` |  |
--- a/istio/istio
+++ b/istio/istio
--- a/pkg/ingress/config/ingress_config.go
+++ b/pkg/ingress/config/ingress_config.go
@@ -63,6 +63,7 @@ import (
 	"github.com/alibaba/higress/pkg/ingress/kube/ingress"
 	"github.com/alibaba/higress/pkg/ingress/kube/ingressv1"
 	"github.com/alibaba/higress/pkg/ingress/kube/mcpbridge"
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
 	"github.com/alibaba/higress/pkg/ingress/kube/secret"
 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	"github.com/alibaba/higress/pkg/ingress/kube/wasmplugin"
@@ -158,6 +159,8 @@ type IngressConfig struct {

 	// secretConfigMgr manages secret dependencies
 	secretConfigMgr *SecretConfigMgr
+
+	mcpServerCache mcpserver.McpServerCache
 }

 // getSecretValue implements the getValue function for secret references
@@ -224,6 +227,7 @@ func NewIngressConfig(localKubeClient kube.Client, xdsUpdater istiomodel.XDSUpda

 	higressConfigController := configmap.NewController(localKubeClient, clusterId, namespace)
 	config.configmapMgr = configmap.NewConfigmapMgr(xdsUpdater, namespace, higressConfigController, higressConfigController.Lister())
+	config.configmapMgr.RegisterMcpServerProvider(&config.mcpServerCache)

 	httpsConfigMgr, _ := cert.NewConfigMgr(namespace, localKubeClient.Kube())
 	config.httpsConfigMgr = httpsConfigMgr
@@ -421,6 +425,10 @@ func (m *IngressConfig) createWrapperConfigs(configs []config.Config) []common.W
 	m.watchedSecretSet = globalContext.WatchedSecrets
 	m.mutex.Unlock()

+	if m.mcpServerCache.SetMcpServers(globalContext.McpServers) {
+		m.notifyXDSFullUpdate(mcpserver.GvkMcpServer, "mcp-server-annotation-change", nil)
+	}
+
 	return wrapperConfigs
 }

@@ -590,7 +598,7 @@ func (m *IngressConfig) convertVirtualService(configs []common.WrapperConfig) []
 			Spec: vs,
 		})
 	}
-	// add vs from naco3 for mcp server
+	// add vs from nacos3 for mcp server
 	if m.RegistryReconciler != nil {
 		allConfigsFromMcp := m.RegistryReconciler.GetAllConfigs(gvk.VirtualService)
 		for _, cfg := range allConfigsFromMcp {
@@ -794,23 +802,38 @@ func (m *IngressConfig) convertDestinationRule(configs []common.WrapperConfig) [
 			if !exist {
 				destinationRules[serviceName] = destinationRuleWrapper
 			} else if dr.DestinationRule.TrafficPolicy != nil {
-				if dr.DestinationRule.TrafficPolicy.LoadBalancer == nil &&
-					destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer != nil {
-					dr.DestinationRule.TrafficPolicy.LoadBalancer = destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer
-				}
-				portTrafficPolicy := destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings[0]
-				portUpdated := false
-				for _, policy := range dr.DestinationRule.TrafficPolicy.PortLevelSettings {
-					if policy.Port.Number == portTrafficPolicy.Port.Number {
-						policy.Tls = portTrafficPolicy.Tls
-						portUpdated = true
-						break
+				// if the service is referenced by an sse type mcp server, an source ip based consistent hashing policy needs to be configured
+				// consistent hashing policy will be generated by mcp server watcher, then if service do not have LoadBalancer settings, it will be merged
+				if destinationRuleWrapper.DestinationRule.TrafficPolicy != nil && destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer != nil {
+					if dr.DestinationRule.TrafficPolicy.LoadBalancer == nil {
+						dr.DestinationRule.TrafficPolicy.LoadBalancer = destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer
+					} else if dr.DestinationRule.TrafficPolicy.LoadBalancer.LbPolicy == nil {
+						dr.DestinationRule.TrafficPolicy.LoadBalancer.LbPolicy = destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer.LbPolicy
 					}
 				}
-				if portUpdated {
-					continue
+				// if the service is referenced by an https type mcp server, an client side simple mode tls policy needs to be configured
+				// simple mode tls policy will be generated by mcp server watcher, then if service do not have tls settings, it will be merged
+				if dr.DestinationRule.TrafficPolicy.Tls == nil && destinationRuleWrapper.DestinationRule.TrafficPolicy != nil &&
+					destinationRuleWrapper.DestinationRule.TrafficPolicy.Tls != nil {
+					dr.DestinationRule.TrafficPolicy.Tls = destinationRuleWrapper.DestinationRule.TrafficPolicy.Tls
+				}
+				// Directly inherit or override the port policy (if it exists)
+				if len(destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings) > 0 {
+					portTrafficPolicy := destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings[0]
+					portUpdated := false
+					for _, policy := range dr.DestinationRule.TrafficPolicy.PortLevelSettings {
+						if policy.Port.Number == portTrafficPolicy.Port.Number {
+							policy.Tls = portTrafficPolicy.Tls
+							policy.LoadBalancer = portTrafficPolicy.LoadBalancer
+							portUpdated = true
+							break
+						}
+					}
+					if portUpdated {
+						continue
+					}
+					dr.DestinationRule.TrafficPolicy.PortLevelSettings = append(dr.DestinationRule.TrafficPolicy.PortLevelSettings, portTrafficPolicy)
 				}
-				dr.DestinationRule.TrafficPolicy.PortLevelSettings = append(dr.DestinationRule.TrafficPolicy.PortLevelSettings, portTrafficPolicy)
 			}
 		}
 	}
@@ -1208,9 +1231,9 @@ func (m *IngressConfig) AddOrUpdateMcpBridge(clusterNamespacedName util.ClusterN
 				f(config.Config{Meta: efMetadata}, config.Config{Meta: efMetadata}, istiomodel.EventUpdate)
 			}
 		}, m.localKubeClient, m.namespace, m.clusterId.String())
+		m.configmapMgr.RegisterMcpServerProvider(m.RegistryReconciler)
 	}
 	reconciler := m.RegistryReconciler
-	m.configmapMgr.SetMcpReconciler(m.RegistryReconciler)
 	err = reconciler.Reconcile(mcpbridge)
 	if err != nil {
 		IngressLog.Errorf("Mcpbridge reconcile failed, err:%v", err)
@@ -1776,3 +1799,19 @@ func (m *IngressConfig) Patch(config.Config, config.PatchFunc) (string, error) {
 func (m *IngressConfig) Delete(config.GroupVersionKind, string, string, *string) error {
 	return common.ErrUnsupportedOp
 }
+
+func (m *IngressConfig) notifyXDSFullUpdate(gvk config.GroupVersionKind, reason istiomodel.TriggerReason, updatedConfigName *util.ClusterNamespacedName) {
+	var configsUpdated map[istiomodel.ConfigKey]struct{}
+	if updatedConfigName != nil {
+		configsUpdated = map[istiomodel.ConfigKey]struct{}{{
+			Kind:      kind.MustFromGVK(gvk),
+			Name:      updatedConfigName.Name,
+			Namespace: updatedConfigName.Namespace,
+		}: {}}
+	}
+	m.XDSUpdater.ConfigUpdate(&istiomodel.PushRequest{
+		Full:           true,
+		ConfigsUpdated: configsUpdated,
+		Reason:         istiomodel.NewReasonStats(reason),
+	})
+}
--- a/pkg/ingress/kube/annotations/annotations.go
+++ b/pkg/ingress/kube/annotations/annotations.go
@@ -21,6 +21,8 @@ import (
 	"istio.io/istio/pkg/cluster"
 	"istio.io/istio/pkg/util/sets"
 	listersv1 "k8s.io/client-go/listers/core/v1"
+
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
 )

 type GlobalContext struct {
@@ -30,6 +32,8 @@ type GlobalContext struct {
 	ClusterSecretLister map[cluster.ID]listersv1.SecretLister

 	ClusterServiceList map[cluster.ID]listersv1.ServiceLister
+
+	McpServers []*mcpserver.McpServer
 }

 type Meta struct {
@@ -169,6 +173,7 @@ func NewAnnotationHandlerManager() AnnotationHandler {
 			match{},
 			headerControl{},
 			http2rpc{},
+			mcpServer{},
 		},
 		gatewayHandlers: []GatewayHandler{
 			downstreamTLS{},
--- a/pkg/ingress/kube/annotations/mcpserver.go
+++ b/pkg/ingress/kube/annotations/mcpserver.go
@@ -0,0 +1,94 @@
+// Copyright (c) 2023 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package annotations
+
+import (
+	"strings"
+
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
+	"github.com/alibaba/higress/pkg/ingress/log"
+)
+
+const (
+	enableMcpServer            = "mcp-server"
+	mcpServerMatchRuleDomains  = "mcp-server-match-rule-domains"
+	mcpServerMatchRuleType     = "mcp-server-match-rule-type"
+	mcpServerMatchRuleValue    = "mcp-server-match-rule-value"
+	mcpServerUpstreamType      = "mcp-server-upstream-type"
+	mcpServerEnablePathRewrite = "mcp-server-enable-path-rewrite"
+	mcpServerPathRewritePrefix = "mcp-server-path-rewrite-prefix"
+)
+
+// help to conform mcpServer implements method of Parse
+var (
+	_ Parser = &mcpServer{}
+)
+
+type mcpServer struct{}
+
+func (a mcpServer) Parse(annotations Annotations, config *Ingress, globalContext *GlobalContext) error {
+	if globalContext == nil {
+		return nil
+	}
+
+	ingressKey := config.Namespace + "/" + config.Name
+
+	enabled, _ := annotations.ParseBoolASAP(enableMcpServer)
+	if !enabled {
+		return nil
+	}
+
+	var matchRuleDomains []string
+	rawMatchRuleDomains, _ := annotations.ParseStringASAP(mcpServerMatchRuleDomains)
+	if rawMatchRuleDomains == "" || rawMatchRuleDomains == "*" {
+		// Match all domains. Leave an empty slice.
+	} else if strings.Contains(rawMatchRuleDomains, ",") {
+		matchRuleDomains = strings.Split(rawMatchRuleDomains, ",")
+	} else {
+		matchRuleDomains = []string{rawMatchRuleDomains}
+	}
+
+	matchRuleType, _ := annotations.ParseStringASAP(mcpServerMatchRuleType)
+	if matchRuleType == "" {
+		log.IngressLog.Errorf("ingress %s: mcp-server-match-rule-path-type is empty", ingressKey)
+		return nil
+	} else if !mcpserver.ValidPathMatchTypes[matchRuleType] {
+		log.IngressLog.Errorf("ingress %s: mcp-server-match-rule-path-type %s is not supported", ingressKey, matchRuleType)
+		return nil
+	}
+
+	matchRuleValue, _ := annotations.ParseStringASAP(mcpServerMatchRuleValue)
+
+	upstreamType, _ := annotations.ParseStringASAP(mcpServerUpstreamType)
+	if upstreamType != "" && !mcpserver.ValidUpstreamTypes[upstreamType] {
+		log.IngressLog.Errorf("mcp-server-upstream-type %s is not supported", upstreamType)
+		return nil
+	}
+
+	enablePathRewrite, _ := annotations.ParseBoolASAP(mcpServerEnablePathRewrite)
+	pathRewritePrefix, _ := annotations.ParseStringASAP(mcpServerPathRewritePrefix)
+
+	globalContext.McpServers = append(globalContext.McpServers, &mcpserver.McpServer{
+		Name:              ingressKey,
+		Domains:           matchRuleDomains,
+		PathMatchType:     matchRuleType,
+		PathMatchValue:    matchRuleValue,
+		UpstreamType:      upstreamType,
+		EnablePathRewrite: enablePathRewrite,
+		PathRewritePrefix: pathRewritePrefix,
+	})
+
+	return nil
+}
--- a/pkg/ingress/kube/annotations/mcpserver_test.go
+++ b/pkg/ingress/kube/annotations/mcpserver_test.go
@@ -0,0 +1,257 @@
+// Copyright (c) 2025 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package annotations
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
+)
+
+func TestMCPServer_Parse(t *testing.T) {
+	parser := mcpServer{}
+	testCases := []struct {
+		skip   bool
+		input  Annotations
+		expect *mcpserver.McpServer
+	}{
+		{
+			// No annotation
+			input:  Annotations{},
+			expect: nil,
+		},
+		{
+			// Not enabled
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "false",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled but no match rule type
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled but empty match rule type
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled but bad match rule type
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "bad-type",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled but bad upstream type
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "bad-type",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled and rewrite not enabled
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "false",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           []string{"www.foo.com"},
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: false,
+				PathRewritePrefix: "/",
+			},
+		},
+		{
+			// Enabled and rewrite not enabled and empty domain
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "false",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           nil,
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: false,
+				PathRewritePrefix: "/",
+			},
+		},
+		{
+			// Enabled and rewrite not enabled and wildcard domain
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "*",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "false",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           nil,
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: false,
+				PathRewritePrefix: "/",
+			},
+		},
+		{
+			// Enabled and rewrite enabled with root
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "true",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           []string{"www.foo.com"},
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: true,
+				PathRewritePrefix: "/",
+			},
+		},
+		{
+			// Enabled and rewrite enabled with root
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "true",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/mcp-api",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           []string{"www.foo.com"},
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: true,
+				PathRewritePrefix: "/mcp-api",
+			},
+		},
+		{
+			// Enabled and multiple domains
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com,www.bar.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "exact",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "sse",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "true",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           []string{"www.foo.com", "www.bar.com"},
+				PathMatchType:     "exact",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "sse",
+				EnablePathRewrite: true,
+				PathRewritePrefix: "/",
+			},
+		},
+	}
+
+	for _, tt := range testCases {
+		if tt.skip {
+			return
+		}
+
+		t.Run("", func(t *testing.T) {
+			config := &Ingress{Meta: Meta{
+				Namespace: "default",
+				Name:      "route",
+			}}
+			globalContext := &GlobalContext{}
+			_ = parser.Parse(tt.input, config, globalContext)
+			if tt.expect == nil {
+				if len(globalContext.McpServers) != 0 {
+					t.Fatalf("globalContext.McpServers is not empty: %v", globalContext.McpServers)
+				}
+				return
+			}
+
+			if len(globalContext.McpServers) != 1 {
+				t.Fatalf("globalContext.McpServers length is not 1: %v", globalContext.McpServers)
+			}
+
+			if diff := cmp.Diff(tt.expect, globalContext.McpServers[0]); diff != "" {
+				t.Fatalf("TestMCPServer_Parse() mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
--- a/pkg/ingress/kube/configmap/controller.go
+++ b/pkg/ingress/kube/configmap/controller.go
@@ -18,7 +18,6 @@ import (
 	"reflect"
 	"sync/atomic"

-	"github.com/alibaba/higress/registry/reconcile"
 	"istio.io/istio/pilot/pkg/model"
 	"istio.io/istio/pkg/cluster"
 	"istio.io/istio/pkg/config"
@@ -33,6 +32,7 @@ import (
 	"sigs.k8s.io/yaml"

 	"github.com/alibaba/higress/pkg/ingress/kube/controller"
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	. "github.com/alibaba/higress/pkg/ingress/log"
 )
@@ -59,7 +59,6 @@ type ItemController interface {
 	ValidHigressConfig(higressConfig *HigressConfig) error
 	ConstructEnvoyFilters() ([]*config.Config, error)
 	RegisterItemEventHandler(eventHandler ItemEventHandler)
-	RegisterMcpReconciler(reconciler *reconcile.Reconciler)
 }

 type ConfigmapMgr struct {
@@ -113,9 +112,11 @@ func (c *ConfigmapMgr) GetHigressConfig() *HigressConfig {
 	return nil
 }

-func (c *ConfigmapMgr) SetMcpReconciler(reconciler *reconcile.Reconciler) {
+func (c *ConfigmapMgr) RegisterMcpServerProvider(provider mcpserver.McpServerProvider) {
 	for _, itemController := range c.ItemControllers {
-		itemController.RegisterMcpReconciler(reconciler)
+		if mcpRouteProviderAware, ok := itemController.(mcpserver.McpRouteProviderAware); ok {
+			mcpRouteProviderAware.RegisterMcpServerProvider(provider)
+		}
 	}
 }

--- a/pkg/ingress/kube/configmap/global.go
+++ b/pkg/ingress/kube/configmap/global.go
@@ -21,7 +21,6 @@ import (

 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	. "github.com/alibaba/higress/pkg/ingress/log"
-	"github.com/alibaba/higress/registry/reconcile"
 	networking "istio.io/api/networking/v1alpha3"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/gvk"
@@ -377,9 +376,6 @@ func (g *GlobalOptionController) RegisterItemEventHandler(eventHandler ItemEvent
 	g.eventHandler = eventHandler
 }

-func (g *GlobalOptionController) RegisterMcpReconciler(reconciler *reconcile.Reconciler) {
-}
-
 // generateDownstreamEnvoyFilter generates the downstream envoy filter.
 func (g *GlobalOptionController) generateDownstreamEnvoyFilter(downstreamValueStruct string, bufferLimitStruct string, routeTimeoutStruct string, namespace string) []*networking.EnvoyFilter_EnvoyConfigObjectPatch {
 	var downstreamConfig []*networking.EnvoyFilter_EnvoyConfigObjectPatch
--- a/pkg/ingress/kube/configmap/gzip.go
+++ b/pkg/ingress/kube/configmap/gzip.go
@@ -23,7 +23,6 @@ import (

 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	. "github.com/alibaba/higress/pkg/ingress/log"
-	"github.com/alibaba/higress/registry/reconcile"
 	networking "istio.io/api/networking/v1alpha3"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/gvk"
@@ -292,9 +291,6 @@ func (g *GzipController) RegisterItemEventHandler(eventHandler ItemEventHandler)
 	g.eventHandler = eventHandler
 }

-func (g *GzipController) RegisterMcpReconciler(reconciler *reconcile.Reconciler) {
-}
-
 func (g *GzipController) constructGzipStruct(gzip *Gzip, namespace string) string {
 	gzipConfig := ""
 	contentType := ""
--- a/pkg/ingress/kube/configmap/mcp_server.go
+++ b/pkg/ingress/kube/configmap/mcp_server.go
@@ -22,12 +22,13 @@ import (
 	"strings"
 	"sync/atomic"

-	"github.com/alibaba/higress/pkg/ingress/kube/util"
-	. "github.com/alibaba/higress/pkg/ingress/log"
-	"github.com/alibaba/higress/registry/reconcile"
 	networking "istio.io/api/networking/v1alpha3"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/gvk"
+
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
+	"github.com/alibaba/higress/pkg/ingress/kube/util"
+	. "github.com/alibaba/higress/pkg/ingress/log"
 )

 // RedisConfig defines the configuration for Redis connection
@@ -232,18 +233,19 @@ func deepCopyMcpServer(mcp *McpServer) (*McpServer, error) {
 }

 type McpServerController struct {
-	Namespace    string
-	mcpServer    atomic.Value
-	Name         string
-	eventHandler ItemEventHandler
-	reconciler   *reconcile.Reconciler
+	Namespace          string
+	mcpServer          atomic.Value
+	Name               string
+	eventHandler       ItemEventHandler
+	mcpServerProviders map[mcpserver.McpServerProvider]bool
 }

 func NewMcpServerController(namespace string) *McpServerController {
 	mcpController := &McpServerController{
-		Namespace: namespace,
-		mcpServer: atomic.Value{},
-		Name:      "mcpServer",
+		Namespace:          namespace,
+		Name:               "mcpServer",
+		mcpServer:          atomic.Value{},
+		mcpServerProviders: make(map[mcpserver.McpServerProvider]bool),
 	}
 	mcpController.SetMcpServer(NewDefaultMcpServer())
 	return mcpController
@@ -310,8 +312,11 @@ func (m *McpServerController) RegisterItemEventHandler(eventHandler ItemEventHan
 	m.eventHandler = eventHandler
 }

-func (m *McpServerController) RegisterMcpReconciler(reconciler *reconcile.Reconciler) {
-	m.reconciler = reconciler
+func (m *McpServerController) RegisterMcpServerProvider(provider mcpserver.McpServerProvider) {
+	if m.mcpServerProviders == nil {
+		m.mcpServerProviders = make(map[mcpserver.McpServerProvider]bool)
+	}
+	m.mcpServerProviders[provider] = true
 }

 func (m *McpServerController) ConstructEnvoyFilters() ([]*config.Config, error) {
@@ -406,10 +411,36 @@ func (m *McpServerController) ConstructEnvoyFilters() ([]*config.Config, error)

 func (m *McpServerController) constructMcpSessionStruct(mcp *McpServer) string {
 	// Build match_list configuration
-	matchList := "[]"
-	var matchConfigs []string
-	if len(mcp.MatchList) > 0 {
-		for _, rule := range mcp.MatchList {
+	var matchList []*MatchRule
+	matchList = append(matchList, mcp.MatchList...)
+	for provider, _ := range m.mcpServerProviders {
+		servers := provider.GetMcpServers()
+		if len(servers) == 0 {
+			continue
+		}
+		for _, server := range servers {
+			matchRuleDomain := ""
+			if len(server.Domains) != 0 {
+				if len(server.Domains) > 1 {
+					matchRuleDomain = fmt.Sprintf("(%s)", strings.Join(server.Domains, "|"))
+				} else {
+					matchRuleDomain = server.Domains[0]
+				}
+			}
+			matchList = append(matchList, &MatchRule{
+				MatchRuleDomain:   matchRuleDomain,
+				MatchRuleType:     server.PathMatchType,
+				MatchRulePath:     server.PathMatchValue,
+				UpstreamType:      server.UpstreamType,
+				EnablePathRewrite: server.EnablePathRewrite,
+				PathRewritePrefix: server.PathRewritePrefix,
+			})
+		}
+	}
+	matchListConfig := "[]"
+	if len(matchList) > 0 {
+		matchConfigs := make([]string, 0, len(matchList))
+		for _, rule := range matchList {
 			matchConfigs = append(matchConfigs, fmt.Sprintf(`{
 				"match_rule_domain": "%s",
 				"match_rule_path": "%s",
@@ -419,28 +450,9 @@ func (m *McpServerController) constructMcpSessionStruct(mcp *McpServer) string {
 				"path_rewrite_prefix": "%s"
 			}`, rule.MatchRuleDomain, rule.MatchRulePath, rule.MatchRuleType, rule.UpstreamType, rule.EnablePathRewrite, rule.PathRewritePrefix))
 		}
+		matchListConfig = fmt.Sprintf("[%s]", strings.Join(matchConfigs, ","))
 	}

-	if m.reconciler != nil {
-		vsFromMcp := m.reconciler.GetAllConfigs(gvk.VirtualService)
-		for _, c := range vsFromMcp {
-			vs := c.Spec.(*networking.VirtualService)
-			var host string
-			if len(vs.Hosts) > 1 {
-				host = fmt.Sprintf("(%s)", strings.Join(vs.Hosts, "|"))
-			} else {
-				host = vs.Hosts[0]
-			}
-			path := vs.Http[0].Match[0].Uri.GetPrefix()
-			matchConfigs = append(matchConfigs, fmt.Sprintf(`{
-				"match_rule_domain": "%s",
-				"match_rule_path": "%s",
-				"match_rule_type": "prefix"
-			}`, host, path))
-		}
-	}
-	matchList = fmt.Sprintf("[%s]", strings.Join(matchConfigs, ","))
-
 	// Build redis configuration
 	redisConfig := "null"
 	if mcp.Redis != nil {
@@ -492,7 +504,7 @@ func (m *McpServerController) constructMcpSessionStruct(mcp *McpServer) string {
 		redisConfig,
 		rateLimitConfig,
 		mcp.SSEPathSuffix,
-		matchList,
+		matchListConfig,
 		mcp.EnableUserLevelServer)
 }

--- a/pkg/ingress/kube/configmap/tracing.go
+++ b/pkg/ingress/kube/configmap/tracing.go
@@ -21,7 +21,6 @@ import (
 	"reflect"
 	"sync/atomic"

-	"github.com/alibaba/higress/registry/reconcile"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/gvk"

@@ -238,9 +237,6 @@ func (t *TracingController) RegisterItemEventHandler(eventHandler ItemEventHandl
 	t.eventHandler = eventHandler
 }

-func (t *TracingController) RegisterMcpReconciler(reconciler *reconcile.Reconciler) {
-}
-
 func (t *TracingController) ConstructEnvoyFilters() ([]*config.Config, error) {
 	configs := make([]*config.Config, 0)
 	tracing := t.GetTracing()
--- a/pkg/ingress/kube/mcpserver/model.go
+++ b/pkg/ingress/kube/mcpserver/model.go
@@ -0,0 +1,60 @@
+// Copyright (c) 2025 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mcpserver
+
+import (
+	"istio.io/istio/pkg/config"
+)
+
+var (
+	GvkMcpServer = config.GroupVersionKind{Group: "networking.higress.io", Version: "v1alpha1", Kind: "McpServer"}
+)
+
+const (
+	UpstreamTypeRest       string = "rest"
+	UpstreamTypeSSE        string = "sse"
+	UpstreamTypeStreamable string = "streamable"
+
+	ExactMatchType    string = "exact"
+	PrefixMatchType   string = "prefix"
+	SuffixMatchType   string = "suffix"
+	ContainsMatchType string = "contains"
+	RegexMatchType    string = "regex"
+)
+
+var (
+	ValidUpstreamTypes = map[string]bool{
+		UpstreamTypeRest:       true,
+		UpstreamTypeSSE:        true,
+		UpstreamTypeStreamable: true,
+	}
+	ValidPathMatchTypes = map[string]bool{
+		ExactMatchType:    true,
+		PrefixMatchType:   true,
+		SuffixMatchType:   true,
+		ContainsMatchType: true,
+		RegexMatchType:    true,
+	}
+)
+
+type McpServer struct {
+	Name              string   `json:"name,omitempty"`
+	Domains           []string `json:"domains,omitempty"`
+	PathMatchType     string   `json:"path_match_type,omitempty"`
+	PathMatchValue    string   `json:"path_match_value,omitempty"`
+	UpstreamType      string   `json:"upstream_type,omitempty"`
+	EnablePathRewrite bool     `json:"enable_path_rewrite,omitempty"`
+	PathRewritePrefix string   `json:"path_rewrite_prefix,omitempty"`
+}
--- a/pkg/ingress/kube/mcpserver/provider.go
+++ b/pkg/ingress/kube/mcpserver/provider.go
@@ -0,0 +1,70 @@
+// Copyright (c) 2025 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mcpserver
+
+import (
+	"reflect"
+	"slices"
+	"strings"
+	"sync"
+)
+
+type McpServerProvider interface {
+	GetMcpServers() []*McpServer
+}
+
+type McpRouteProviderAware interface {
+	RegisterMcpServerProvider(provider McpServerProvider)
+}
+
+type McpServerCache struct {
+	mcpServers []*McpServer
+	mutex      sync.RWMutex
+}
+
+func (c *McpServerCache) GetMcpServers() []*McpServer {
+	c.mutex.RLock()
+	defer c.mutex.RUnlock()
+	return c.mcpServers
+}
+
+// SetMcpServers sets the mcp servers and returns true if the cached list is changed
+func (c *McpServerCache) SetMcpServers(mcpServers []*McpServer) bool {
+	c.mutex.Lock()
+	defer c.mutex.Unlock()
+
+	sortedMcpServers := make([]*McpServer, 0, len(mcpServers))
+	sortedMcpServers = append(sortedMcpServers, mcpServers...)
+	// Sort the mcp servers by PathMatchValue in descending order
+	slices.SortFunc(sortedMcpServers, func(a, b *McpServer) int {
+		return strings.Compare(a.Name, b.Name)
+	})
+
+	if len(c.mcpServers) == len(sortedMcpServers) {
+		changed := false
+		for i := range c.mcpServers {
+			if !reflect.DeepEqual(c.mcpServers[i], sortedMcpServers[i]) {
+				changed = true
+				break
+			}
+		}
+		if !changed {
+			return false
+		}
+	}
+
+	c.mcpServers = sortedMcpServers
+	return true
+}
--- a/pkg/ingress/kube/mcpserver/provider_test.go
+++ b/pkg/ingress/kube/mcpserver/provider_test.go
@@ -0,0 +1,654 @@
+// Copyright (c) 2025 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mcpserver
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestMcpServerCache_GetSet(t *testing.T) {
+	testCases := []struct {
+		name    string
+		skip    bool
+		init    []*McpServer
+		input   []*McpServer
+		expect  []*McpServer
+		changed bool
+	}{
+		{
+			name:    "nil",
+			init:    nil,
+			input:   nil,
+			changed: false,
+			expect:  nil,
+		},
+		{
+			name: "nil to non-nil",
+			init: nil,
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (length increase)",
+			init: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (length decrease)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (length unchanged + name field changed)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3-1",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3-1",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (length unchanged + non-name field changed)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar-2.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test4",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar-2.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test4",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (content unchanged + order unchanged)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: false,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (content unchanged + order changed)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+			},
+			changed: false,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+	}
+
+	for _, tt := range testCases {
+		if tt.skip {
+			continue
+		}
+		t.Run(tt.name, func(t *testing.T) {
+			provider := &McpServerCache{}
+
+			if provider.GetMcpServers() != nil {
+				t.Fatalf("GetMcpServers doesn't return nil before testing.")
+			}
+
+			_ = provider.SetMcpServers(tt.init)
+
+			changed := provider.SetMcpServers(tt.input)
+			if changed != tt.changed {
+				t.Fatalf("actual changed %t != expect changed %t", changed, tt.changed)
+				return
+			}
+
+			actual := provider.GetMcpServers()
+
+			if len(actual) != len(tt.expect) {
+				t.Fatalf("actual length %d != expect length %d", len(actual), len(tt.expect))
+			}
+			for i := range actual {
+				if diff := cmp.Diff(tt.expect[i], actual[i]); diff != "" {
+					t.Fatalf("TestMcpServerCache_GetSet() mismatch (-want +got):\n%s", diff)
+				}
+			}
+		})
+	}
+}
--- a/plugins/golang-filter/mcp-session/common/sse.go
+++ b/plugins/golang-filter/mcp-session/common/sse.go
@@ -136,7 +136,7 @@ func (s *SSEServer) HandleSSE(cb api.FilterCallbackHandler, stopChan chan struct
 	}

 	// Send the initial endpoint event
-	initialEvent := fmt.Sprintf("event: endpoint\ndata: %s\r\n\r\n", messageEndpoint)
+	initialEvent := fmt.Sprintf("event: endpoint\ndata: %s\n\n", messageEndpoint)
 	err = s.redisClient.Publish(channel, initialEvent)
 	if err != nil {
 		api.LogErrorf("Failed to send initial event: %v", err)
@@ -210,7 +210,7 @@ func (s *SSEServer) HandleMessage(w http.ResponseWriter, r *http.Request, body j
 	var status int
 	// Only send response if there is one (not for notifications)
 	if response != nil {
-		if sessionID != ""{
+		if sessionID != ""  {
 			w.WriteHeader(http.StatusAccepted)
 			status = http.StatusAccepted
 		} else {
--- a/plugins/golang-filter/mcp-session/filter.go
+++ b/plugins/golang-filter/mcp-session/filter.go
@@ -143,50 +143,9 @@ func (f *filter) processMcpRequestHeadersForRestUpstream(header api.RequestHeade
 func (f *filter) processMcpRequestHeadersForSSEUpstream(header api.RequestHeaderMap, endStream bool) api.StatusType {
 	// We don't need to process the request body for SSE upstream.
 	f.skipRequestBody = true
-	f.rewritePathForSSEUpstream(header)
 	return api.Continue
 }

-func (f *filter) rewritePathForSSEUpstream(header api.RequestHeaderMap) {
-	matchedRule := f.matchedRule
-	if !matchedRule.EnablePathRewrite || matchedRule.MatchRuleType != common.PrefixMatch {
-		// No rewrite required, so we don't need to process the response body, either.
-		f.skipResponseBody = true
-		return
-	}
-
-	path := f.req.URL.Path
-	if !strings.HasPrefix(path, matchedRule.MatchRulePath) {
-		api.LogWarnf("Unexpected: Path %s does not match the configured prefix %s", path, matchedRule.MatchRulePath)
-		return
-	}
-
-	rewrittenPath := path[len(matchedRule.MatchRulePath):]
-
-	if rewrittenPath == "" {
-		rewrittenPath = matchedRule.PathRewritePrefix
-	} else {
-		rewritePrefixHasTrailingSlash := strings.HasSuffix(matchedRule.PathRewritePrefix, "/")
-		pathSuffixHasLeadingSlash := strings.HasPrefix(rewrittenPath, "/")
-		if rewritePrefixHasTrailingSlash != pathSuffixHasLeadingSlash {
-			// One has, the other doesn't have.
-			rewrittenPath = matchedRule.PathRewritePrefix + rewrittenPath
-		} else if pathSuffixHasLeadingSlash {
-			// Both have.
-			rewrittenPath = matchedRule.PathRewritePrefix + rewrittenPath[1:]
-		} else {
-			// Neither have.
-			rewrittenPath = matchedRule.PathRewritePrefix + "/" + rewrittenPath
-		}
-	}
-
-	if f.req.URL.RawQuery != "" {
-		rewrittenPath = rewrittenPath + "?" + f.req.URL.RawQuery
-	}
-
-	header.SetPath(rewrittenPath)
-}
-
 // DecodeData might be called multiple times during handling the request body.
 // The endStream is true when handling the last piece of the body.
 func (f *filter) DecodeData(buffer api.BufferInstance, endStream bool) api.StatusType {
@@ -322,20 +281,7 @@ func (f *filter) encodeDataFromSSEUpstream(buffer api.BufferInstance, endStream
 	bufferBytes := buffer.Bytes()
 	bufferData := string(bufferBytes)

-	err, lineBreak := f.findSSELineBreak(bufferData)
-	if err != nil {
-		api.LogWarnf("Failed to find line break in SSE data: %v", err)
-		f.needProcess = false
-		return api.Continue
-	}
-	if lineBreak == "" {
-		// Have not found any line break. Need to buffer and check again.
-		return api.StopAndBuffer
-	}
-
-	api.LogDebugf("Line break sequence: %v", []byte(lineBreak))
-
-	err, endpointUrl := f.findEndpointUrl(bufferData, lineBreak)
+	err, endpointUrl := f.findEndpointUrl(bufferData)
 	if err != nil {
 		api.LogWarnf("Failed to find endpoint URL in SSE data: %v", err)
 		f.needProcess = false
@@ -412,7 +358,7 @@ func (f *filter) rewriteEndpointUrl(endpointUrl string) (bool, string) {
 	return true, endpointUrl
 }

-func (f *filter) findSSELineBreak(bufferData string) (error, string) {
+func (f *filter) findNextLineBreak(bufferData string) (error, string) {
 	// See https://html.spec.whatwg.org/multipage/server-sent-events.html
 	crIndex := strings.IndexAny(bufferData, "\r")
 	lfIndex := strings.IndexAny(bufferData, "\n")
@@ -422,11 +368,20 @@ func (f *filter) findSSELineBreak(bufferData string) (error, string) {
 	}
 	lineBreak := ""
 	if crIndex != -1 && lfIndex != -1 {
-		if crIndex+1 != lfIndex {
-			// Found both line breaks, but they are not adjacent. Skip body processing.
-			return errors.New("found non-adjacent CR and LF"), ""
+		if crIndex < lfIndex {
+			if crIndex+1 == lfIndex {
+				lineBreak = "\r\n"
+			} else {
+				lineBreak = "\r"
+			}
+		} else {
+			if crIndex == lfIndex+1 {
+				// Found unexpected "\n\r". Skip body processing.
+				return errors.New("found unexpected LF+CR"), ""
+			} else {
+				lineBreak = "\n"
+			}
 		}
-		lineBreak = "\r\n"
 	} else if crIndex != -1 {
 		lineBreak = "\r"
 	} else {
@@ -435,12 +390,21 @@ func (f *filter) findSSELineBreak(bufferData string) (error, string) {
 	return nil, lineBreak
 }

-func (f *filter) findEndpointUrl(bufferData, lineBreak string) (error, string) {
+func (f *filter) findEndpointUrl(bufferData string) (error, string) {
 	eventIndex := strings.Index(bufferData, "event:")
 	if eventIndex == -1 {
 		return nil, ""
 	}
 	bufferData = bufferData[eventIndex:]
+	err, lineBreak := f.findNextLineBreak(bufferData)
+	if err != nil {
+		return fmt.Errorf("failed to find endpoint URL in SSE data: %v", err), ""
+	}
+	if lineBreak == "" {
+		// No line break found, which means the data is not enough.
+		return nil, ""
+	}
+	api.LogDebugf("event line break sequence: %v", []byte(lineBreak))
 	eventEndIndex := strings.Index(bufferData, lineBreak)
 	if eventEndIndex == -1 {
 		return nil, ""
@@ -450,6 +414,15 @@ func (f *filter) findEndpointUrl(bufferData, lineBreak string) (error, string) {
 		return fmt.Errorf("the initial event [%s] is not an endpoint event. Skip processing", eventName), ""
 	}
 	bufferData = bufferData[eventEndIndex+len(lineBreak):]
+	err, lineBreak = f.findNextLineBreak(bufferData)
+	if err != nil {
+		return fmt.Errorf("failed to find endpoint URL in SSE data: %v", err), ""
+	}
+	if lineBreak == "" {
+		// No line break found, which means the data is not enough.
+		return nil, ""
+	}
+	api.LogDebugf("data line break sequence: %v", []byte(lineBreak))
 	dataEndIndex := strings.Index(bufferData, lineBreak)
 	if dataEndIndex == -1 {
 		// Data received not enough.
--- a/plugins/wasm-cpp/.bazelrc
+++ b/plugins/wasm-cpp/.bazelrc
@@ -4,4 +4,6 @@ build:gcc --cxxopt=-std=c++17
 build:clang --action_env=CC=clang --action_env=CXX=clang++
 build:clang --action_env=BAZEL_COMPILER=clang
 build:clang --linkopt=-fuse-ld=lld
-build:clang --cxxopt=-std=c++17
+build:clang --cxxopt=-std=c++17
+
+build --incompatible_use_platforms_repo_for_constraints=false
--- a/plugins/wasm-cpp/.bazelversion
+++ b/plugins/wasm-cpp/.bazelversion
@@ -1 +1 @@
-5.4.0
+6.0.0
--- a/plugins/wasm-cpp/WORKSPACE
+++ b/plugins/wasm-cpp/WORKSPACE
@@ -1,6 +1,13 @@
 workspace(name = "istio_ecosystem_wasm_extensions")

 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+http_archive(
+    name = "platforms",
+    url = "https://github.com/bazelbuild/platforms/releases/download/0.0.9/platforms-0.0.9.tar.gz",
+    sha256 = "5eda539c841265031c2f82d8ae7a3a6490bd62176e0c038fc469eabf91f6149b",
+)
+
 load("//bazel:third_party.bzl", "wasm_extension_dependency")

 wasm_extension_dependency()
@@ -16,9 +23,9 @@ load("@io_bazel_rules_docker//repositories:deps.bzl", container_deps = "deps")

 container_deps()

-PROXY_WASM_CPP_SDK_SHA = "eaec483b5b3c7bcb89fd208b5a1fa5d79d626f61"
+PROXY_WASM_CPP_SDK_SHA = "0ceca8c81dddc4c9875cf0cb997454764905658c"

-PROXY_WASM_CPP_SDK_SHA256 = "1140bc8114d75db56a6ca6b18423d4df50d988d40b4cec929a1eb246cf5a4a3d"
+PROXY_WASM_CPP_SDK_SHA256 = "cb010b242d49fb02b39124421b6acb69bd4ece64fb6299ba3f98f3b36eef7004"

 http_archive(
    name = "proxy_wasm_cpp_sdk",
--- a/plugins/wasm-cpp/extensions/key_auth/plugin.cc
+++ b/plugins/wasm-cpp/extensions/key_auth/plugin.cc
@@ -202,7 +202,7 @@ bool PluginRootContext::parsePluginConfig(const json& configuration,
          }
          item = consumer.find("keys");
          if (item == consumer.end()) {
-            LOG_WARN("not found keys configuration for consumer " + c.name + ", will use global configuration to extract keys");
+            LOG_DEBUG("not found keys configuration for consumer " + c.name + ", will use global configuration to extract keys");
            need_global_keys = true;
          } else {
            c.keys = std::vector<std::string>{OriginalAuthKey};
--- a/plugins/wasm-cpp/extensions/model_mapper/README.md
+++ b/plugins/wasm-cpp/extensions/model_mapper/README.md
@@ -7,7 +7,7 @@
 | -----------          | --------------- | ----------------------- | ------                   | -------------------------------------------                                                                                                                                                                                                                  |
 | `modelKey`           | string          | 选填                    | model                    | 请求body中model参数的位置                                                                                                                                                                                                                                    |
 | `modelMapping`       | map of string   | 选填                    | -                        | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。 |
-| `enableOnPathSuffix` | array of string | 选填                    | ["/v1/chat/completions"] | 只对这些特定路径后缀的请求生效                                                                                                                                                                                                                               |
+| `enableOnPathSuffix` | array of string | 选填                    | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | 只对这些特定路径后缀的请求生效|


 ## 效果说明
--- a/plugins/wasm-cpp/extensions/model_mapper/README_EN.md
+++ b/plugins/wasm-cpp/extensions/model_mapper/README_EN.md
@@ -7,7 +7,7 @@ The `model-mapper` plugin implements the functionality of routing based on the m
 | -----------          | --------------- | -----------------------            | ------                          | -------------------------------------------                                                                                                                                                                                                                  |
 | `modelKey`           | string          | Optional                           | model                           | The location of the model parameter in the request body.                                                                                                                                                                                                            |
 | `modelMapping`       | map of string   | Optional                           | -                               | AI model mapping table, used to map the model names in the request to the model names supported by the service provider.<br/>1. Supports prefix matching. For example, use "gpt-3-*" to match all models whose names start with “gpt-3-”;<br/>2. Supports using "*" as the key to configure a generic fallback mapping relationship;<br/>3. If the target name in the mapping is an empty string "", it means to keep the original model name. |
-| `enableOnPathSuffix` | array of string | Optional                           | ["/v1/chat/completions"]        | Only applies to requests with these specific path suffixes.                                                                                                                                           |
+| `enableOnPathSuffix` | array of string | Optional                           | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"]        | Only applies to requests with these specific path suffixes.                                                                                                                                           |

 ## Runtime Properties

--- a/plugins/wasm-cpp/extensions/model_mapper/plugin.h
+++ b/plugins/wasm-cpp/extensions/model_mapper/plugin.h
@@ -43,7 +43,8 @@ struct ModelMapperConfigRule {
  std::string default_model_mapping_;
  std::vector<std::string> enable_on_path_suffix_ = {
      "/completions",  "/embeddings",       "/images/generations",
-      "/audio/speech", "/fine_tuning/jobs", "/moderations"};
+      "/audio/speech", "/fine_tuning/jobs", "/moderations",
+      "/image-synthesis", "/video-synthesis"};
 };

 // PluginRootContext is the root context for all streams processed by the
--- a/plugins/wasm-cpp/extensions/model_router/README.md
+++ b/plugins/wasm-cpp/extensions/model_router/README.md
@@ -8,7 +8,7 @@
 | `modelKey`           | string          | 选填                    | model                    | 请求body中model参数的位置                             |
 | `addProviderHeader`  | string          | 选填                    | -                        | 从model参数中解析出的provider名字放到哪个请求header中 |
 | `modelToHeader`      | string          | 选填                    | -                        | 直接将model参数放到哪个请求header中                   |
-| `enableOnPathSuffix` | array of string | 选填                    | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations"] | 只对这些特定路径后缀的请求生效，可以配置为 "*" 以匹配所有路径 |
+| `enableOnPathSuffix` | array of string | 选填                    | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | 只对这些特定路径后缀的请求生效，可以配置为 "*" 以匹配所有路径 |

 ## 运行属性

--- a/plugins/wasm-cpp/extensions/model_router/README_EN.md
+++ b/plugins/wasm-cpp/extensions/model_router/README_EN.md
@@ -8,7 +8,7 @@ The `model-router` plugin implements routing functionality based on the model pa
 | `modelKey`           | string          | Optional                | model                    | Location of the model parameter in the request body          |
 | `addProviderHeader`  | string          | Optional                | -                        | Which request header to add the provider name parsed from the model parameter |
 | `modelToHeader`      | string          | Optional                | -                        | Which request header to directly add the model parameter to  |
-| `enableOnPathSuffix` | array of string | Optional                | ["/v1/chat/completions"] | Only effective for requests with these specific path suffixes, can be configured as "*" to match all paths |
+| `enableOnPathSuffix` | array of string | Optional                | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | Only effective for requests with these specific path suffixes, can be configured as "*" to match all paths |

 ## Runtime Properties

--- a/plugins/wasm-cpp/extensions/model_router/plugin.h
+++ b/plugins/wasm-cpp/extensions/model_router/plugin.h
@@ -49,7 +49,8 @@ struct ModelRouterConfigRule {
  std::string model_to_header_;
  std::vector<std::string> enable_on_path_suffix_ = {
      "/completions",  "/embeddings",       "/images/generations",
-      "/audio/speech", "/fine_tuning/jobs", "/moderations"};
+      "/audio/speech", "/fine_tuning/jobs", "/moderations",
+      "/image-synthesis", "/video-synthesis"};
 };

 class PluginContext;
--- a/plugins/wasm-go/extensions/ai-cache/config/config.go
+++ b/plugins/wasm-go/extensions/ai-cache/config/config.go
@@ -90,6 +90,8 @@ func (c *PluginConfig) FromJson(json gjson.Result, log wrapper.Log) {

 	if json.Get("enableSemanticCache").Exists() {
 		c.EnableSemanticCache = json.Get("enableSemanticCache").Bool()
+	} else if c.GetVectorProvider() == nil {
+		c.EnableSemanticCache = false	// set value to false when no vector provider 
 	} else {
 		c.EnableSemanticCache = true // set default value to true
 	}
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -1,6 +1,6 @@
 ---
 title: AI 代理
-keywords: [ AI网关, AI代理 ]
+keywords: [AI网关, AI代理]
 description: AI 代理插件配置参考
 ---

@@ -20,53 +20,49 @@ description: AI 代理插件配置参考
 插件执行阶段：`默认阶段`
 插件执行优先级：`100`

-
 ## 配置字段

 ### 基本配置

-| 名称         | 数据类型   | 填写要求 | 默认值 | 描述               |
-|------------|--------|------|-----|------------------|
-| `provider` | object | 必填   | -   | 配置目标 AI 服务提供商的信息 |
+| 名称       | 数据类型 | 填写要求 | 默认值 | 描述                         |
+| ---------- | -------- | -------- | ------ | ---------------------------- |
+| `provider` | object   | 必填     | -      | 配置目标 AI 服务提供商的信息 |

 `provider`的配置字段说明如下：

-| 名称               | 数据类型        | 填写要求 | 默认值 | 描述                                                                                                                                                                                                                                        |
-|------------------| --------------- | -------- | ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `type`           | string          | 必填     | -      | AI 服务提供商名称                                                                                                                                                                                                                                |
-| `apiTokens`      | array of string | 非必填   | -      | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。                                                                                                                                                                     |
-| `timeout`        | number          | 非必填   | -      | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟。此项配置目前仅用于获取上下文信息，并不影响实际转发大模型请求。                                                                                                                                                                    |
-| `modelMapping`   | map of string   | 非必填   | -      | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-\*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "\*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。                                                                               |
-| `protocol`       | string          | 非必填   | -      | 插件对外提供的 API 接口契约。目前支持以下取值：openai（默认值，使用 OpenAI 的接口契约）、original（使用目标服务提供商的原始接口契约）                                                                                                                                                          |
-| `context`        | object          | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                                                                                                             |
-| `customSettings` | array of customSetting | 非必填   | -      | 为AI请求指定覆盖或者填充参数                                                                                                                                                                                                                           |
-| `failover`       | object | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                                                                                                      |
-| `retryOnFailure` | object | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                                                                                                              |  
-| `reasoningContentMode`       | string          | 非必填   | -      | 如何处理大模型服务返回的推理内容。目前支持以下取值：passthrough（正常输出推理内容）、ignore（不输出推理内容）、concat（将推理内容拼接在常规输出内容之前）。默认为 passthrough。仅支持通义千问服务。                                                                                                                            |
-| `capabilities`       | map of string | 非必填   | -      | 部分provider的部分ai能力原生兼容openai/v1格式，不需要重写，可以直接转发，通过此配置项指定来开启转发, key表示的是采用的厂商协议能力，values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
+| 名称                   | 数据类型               | 填写要求 | 默认值 | 描述                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| ---------------------- | ---------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `type`                 | string                 | 必填     | -      | AI 服务提供商名称                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| `apiTokens`            | array of string        | 非必填   | -      | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。                                                                                                                                                                                                                                                                                                               |
+| `timeout`              | number                 | 非必填   | -      | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟。此项配置目前仅用于获取上下文信息，并不影响实际转发大模型请求。                                                                                                                                                                                                                                                                                                           |
+| `modelMapping`         | map of string          | 非必填   | -      | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-\*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "\*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。<br/>4. 支持以 `~` 前缀使用正则匹配。例如用 "~gpt(.\*)" 匹配所有以 "gpt" 开头的模型并支持在目标模型中使用 capture group 引用匹配到的内容。示例: "~gpt(.\*): openai/gpt\$1" |
+| `protocol`             | string                 | 非必填   | -      | 插件对外提供的 API 接口契约。目前支持以下取值：openai（默认值，使用 OpenAI 的接口契约）、original（使用目标服务提供商的原始接口契约）                                                                                                                                                                                                                                                                                                    |
+| `context`              | object                 | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| `customSettings`       | array of customSetting | 非必填   | -      | 为 AI 请求指定覆盖或者填充参数                                                                                                                                                                                                                                                                                                                                                                                                           |
+| `failover`             | object                 | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                                                                                                                                                                                                                                                                   |
+| `retryOnFailure`       | object                 | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| `reasoningContentMode` | string                 | 非必填   | -      | 如何处理大模型服务返回的推理内容。目前支持以下取值：passthrough（正常输出推理内容）、ignore（不输出推理内容）、concat（将推理内容拼接在常规输出内容之前）。默认为 passthrough。仅支持通义千问服务。                                                                                                                                                                                                                                      |
+| `capabilities`         | map of string          | 非必填   | -      | 部分 provider 的部分 ai 能力原生兼容 openai/v1 格式，不需要重写，可以直接转发，通过此配置项指定来开启转发, key 表示的是采用的厂商协议能力，values 表示的真实的厂商该能力的 api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank                                                                                                           |

 `context`的配置字段说明如下：

-| 名称            | 数据类型   | 填写要求 | 默认值 | 描述                               |
-|---------------|--------|------|-----|----------------------------------|
-| `fileUrl`     | string | 必填   | -   | 保存 AI 对话上下文的文件 URL。仅支持纯文本类型的文件内容 |
-| `serviceName` | string | 必填   | -   | URL 所对应的 Higress 后端服务完整名称        |
-| `servicePort` | number | 必填   | -   | URL 所对应的 Higress 后端服务访问端口        |
-
+| 名称          | 数据类型 | 填写要求 | 默认值 | 描述                                                     |
+| ------------- | -------- | -------- | ------ | -------------------------------------------------------- |
+| `fileUrl`     | string   | 必填     | -      | 保存 AI 对话上下文的文件 URL。仅支持纯文本类型的文件内容 |
+| `serviceName` | string   | 必填     | -      | URL 所对应的 Higress 后端服务完整名称                    |
+| `servicePort` | number   | 必填     | -      | URL 所对应的 Higress 后端服务访问端口                    |

 `customSettings`的配置字段说明如下：

 | 名称        | 数据类型              | 填写要求 | 默认值 | 描述                                                                                                                         |
 | ----------- | --------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------- |
 | `name`      | string                | 必填     | -      | 想要设置的参数的名称，例如`max_tokens`                                                                                       |
-| `value`     | string/int/float/bool | 必填     | -      | 想要设置的参数的值，例如0                                                                                                    |
+| `value`     | string/int/float/bool | 必填     | -      | 想要设置的参数的值，例如 0                                                                                                   |
 | `mode`      | string                | 非必填   | "auto" | 参数设置的模式，可以设置为"auto"或者"raw"，如果为"auto"则会自动根据协议对参数名做改写，如果为"raw"则不会有任何改写和限制检查 |
-| `overwrite` | bool                  | 非必填   | true   | 如果为false则只在用户没有设置这个参数时填充参数，否则会直接覆盖用户原有的参数设置                                            |
-
-
-custom-setting会遵循如下表格，根据`name`和协议来替换对应的字段，用户需要填写表格中`settingName`列中存在的值。例如用户将`name`设置为`max_tokens`，在openai协议中会替换`max_tokens`，在gemini中会替换`maxOutputTokens`。
-`none`表示该协议不支持此参数。如果`name`不在此表格中或者对应协议不支持此参数，同时没有设置raw模式，则配置不会生效。
+| `overwrite` | bool                  | 非必填   | true   | 如果为 false 则只在用户没有设置这个参数时填充参数，否则会直接覆盖用户原有的参数设置                                          |

+custom-setting 会遵循如下表格，根据`name`和协议来替换对应的字段，用户需要填写表格中`settingName`列中存在的值。例如用户将`name`设置为`max_tokens`，在 openai 协议中会替换`max_tokens`，在 gemini 中会替换`maxOutputTokens`。
+`none`表示该协议不支持此参数。如果`name`不在此表格中或者对应协议不支持此参数，同时没有设置 raw 模式，则配置不会生效。

 | settingName | openai      | baidu             | spark       | qwen        | gemini          | hunyuan     | claude      | minimax            |
 | ----------- | ----------- | ----------------- | ----------- | ----------- | --------------- | ----------- | ----------- | ------------------ |
@@ -76,32 +72,31 @@ custom-setting会遵循如下表格，根据`name`和协议来替换对应的字
 | top_k       | none        | none              | top_k       | none        | topK            | none        | top_k       | none               |
 | seed        | seed        | none              | none        | seed        | none            | none        | none        | none               |

-如果启用了raw模式，custom-setting会直接用输入的`name`和`value`去更改请求中的json内容，而不对参数名称做任何限制和修改。
-对于大多数协议，custom-setting都会在json内容的根路径修改或者填充参数。对于`qwen`协议，ai-proxy会在json的`parameters`子路径下做配置。对于`gemini`协议，则会在`generation_config`子路径下做配置。
+如果启用了 raw 模式，custom-setting 会直接用输入的`name`和`value`去更改请求中的 json 内容，而不对参数名称做任何限制和修改。
+对于大多数协议，custom-setting 都会在 json 内容的根路径修改或者填充参数。对于`qwen`协议，ai-proxy 会在 json 的`parameters`子路径下做配置。对于`gemini`协议，则会在`generation_config`子路径下做配置。

 `failover` 的配置字段说明如下：

-| 名称               | 数据类型   | 填写要求            | 默认值   | 描述                                |
-|------------------|--------|-----------------|-------|-----------------------------------|
-| enabled | bool   | 非必填             | false | 是否启用 apiToken 的 failover 机制       |
-| failureThreshold | int    | 非必填             | 3     | 触发 failover 连续请求失败的阈值（次数）         |
-| successThreshold | int    | 非必填             | 1     | 健康检测的成功阈值（次数）                     |
-| healthCheckInterval | int    | 非必填             | 5000  | 健康检测的间隔时间，单位毫秒                    |
-| healthCheckTimeout | int    | 非必填             | 5000  | 健康检测的超时时间，单位毫秒                    |
-| healthCheckModel | string | 启用 failover 时必填 |      | 健康检测使用的模型                         |
-| failoverOnStatus | array of string | 非必填    | ["4.*", "5.*"]     | 需要进行 failover 的原始请求的状态码，支持正则表达式匹配 |
+| 名称                | 数据类型        | 填写要求             | 默认值         | 描述                                                     |
+| ------------------- | --------------- | -------------------- | -------------- | -------------------------------------------------------- |
+| enabled             | bool            | 非必填               | false          | 是否启用 apiToken 的 failover 机制                       |
+| failureThreshold    | int             | 非必填               | 3              | 触发 failover 连续请求失败的阈值（次数）                 |
+| successThreshold    | int             | 非必填               | 1              | 健康检测的成功阈值（次数）                               |
+| healthCheckInterval | int             | 非必填               | 5000           | 健康检测的间隔时间，单位毫秒                             |
+| healthCheckTimeout  | int             | 非必填               | 5000           | 健康检测的超时时间，单位毫秒                             |
+| healthCheckModel    | string          | 启用 failover 时必填 |                | 健康检测使用的模型                                       |
+| failoverOnStatus    | array of string | 非必填               | ["4.*", "5.*"] | 需要进行 failover 的原始请求的状态码，支持正则表达式匹配 |

 `retryOnFailure` 的配置字段说明如下：

 目前仅支持对非流式请求进行重试。

-
-| 名称               | 数据类型   | 填写要求   | 默认值   | 描述                        |
-|------------------|--------|--------|-------|---------------------------|
-| enabled | bool   | 非必填    | false | 是否启用失败请求重试                |
-| maxRetries | int    | 非必填    | 1     | 最大重试次数                    |
-| retryTimeout | int    | 非必填    | 30000 | 重试超时时间，单位毫秒               |
-| retryOnStatus | array of string | 非必填    | ["4.*", "5.*"]     | 需要进行重试的原始请求的状态码，支持正则表达式匹配 |
+| 名称          | 数据类型        | 填写要求 | 默认值         | 描述                                               |
+| ------------- | --------------- | -------- | -------------- | -------------------------------------------------- |
+| enabled       | bool            | 非必填   | false          | 是否启用失败请求重试                               |
+| maxRetries    | int             | 非必填   | 1              | 最大重试次数                                       |
+| retryTimeout  | int             | 非必填   | 30000          | 重试超时时间，单位毫秒                             |
+| retryOnStatus | array of string | 非必填   | ["4.*", "5.*"] | 需要进行重试的原始请求的状态码，支持正则表达式匹配 |

 ### 提供商特有配置

@@ -109,19 +104,18 @@ custom-setting会遵循如下表格，根据`name`和协议来替换对应的字

 OpenAI 所对应的 `type` 为 `openai`。它特有的配置字段如下:

-| 名称              | 数据类型 | 填写要求 | 默认值 | 描述                                                                          |
-|-------------------|----------|----------|--------|-------------------------------------------------------------------------------|
-| `openaiCustomUrl` | string   | 非必填   | -      | 基于OpenAI协议的自定义后端URL，例如: www.example.com/myai/v1/chat/completions |
-| `responseJsonSchema` | object | 非必填 | - | 预先定义OpenAI响应需满足的Json Schema, 注意目前仅特定的几种模型支持该用法|
-
+| 名称                 | 数据类型 | 填写要求 | 默认值 | 描述                                                                               |
+| -------------------- | -------- | -------- | ------ | ---------------------------------------------------------------------------------- |
+| `openaiCustomUrl`    | string   | 非必填   | -      | 基于 OpenAI 协议的自定义后端 URL，例如: <www.example.com/myai/v1/chat/completions> |
+| `responseJsonSchema` | object   | 非必填   | -      | 预先定义 OpenAI 响应需满足的 Json Schema, 注意目前仅特定的几种模型支持该用法       |

 #### Azure OpenAI

 Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：

-| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                           |
-|-------------------|--------|------|-----|----------------------------------------------|
-| `azureServiceUrl` | string | 必填   | -   | Azure OpenAI 服务的 URL，须包含 `api-version` 查询参数。 |
+| 名称              | 数据类型 | 填写要求 | 默认值 | 描述                                                     |
+| ----------------- | -------- | -------- | ------ | -------------------------------------------------------- |
+| `azureServiceUrl` | string   | 必填     | -      | Azure OpenAI 服务的 URL，须包含 `api-version` 查询参数。 |

 **注意：** Azure OpenAI 只支持配置一个 API Token。

@@ -129,19 +123,19 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：

 月之暗面所对应的 `type` 为 `moonshot`。它特有的配置字段如下：

-| 名称               | 数据类型   | 填写要求 | 默认值 | 描述                                                          |
-|------------------|--------|------|-----|-------------------------------------------------------------|
-| `moonshotFileId` | string | 非必填  | -   | 通过文件接口上传至月之暗面的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+| 名称             | 数据类型 | 填写要求 | 默认值 | 描述                                                                                                 |
+| ---------------- | -------- | -------- | ------ | ---------------------------------------------------------------------------------------------------- |
+| `moonshotFileId` | string   | 非必填   | -      | 通过文件接口上传至月之暗面的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |

 #### 通义千问（Qwen）

 通义千问所对应的 `type` 为 `qwen`。它特有的配置字段如下：

-| 名称                   | 数据类型        | 填写要求 | 默认值 | 描述                                                         |
-| ---------------------- | --------------- | -------- | ------ | ------------------------------------------------------------ |
-| `qwenEnableSearch`     | boolean         | 非必填   | -      | 是否启用通义千问内置的互联网搜索功能。                       |
-| `qwenFileIds`          | array of string | 非必填   | -      | 通过文件接口上传至Dashscope的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
-| `qwenEnableCompatible` | boolean         | 非必填   | false  | 开启通义千问兼容模式。启用通义千问兼容模式后，将调用千问的兼容模式接口，同时对请求/响应不做修改。 |
+| 名称                   | 数据类型        | 填写要求 | 默认值 | 描述                                                                                                    |
+| ---------------------- | --------------- | -------- | ------ | ------------------------------------------------------------------------------------------------------- |
+| `qwenEnableSearch`     | boolean         | 非必填   | -      | 是否启用通义千问内置的互联网搜索功能。                                                                  |
+| `qwenFileIds`          | array of string | 非必填   | -      | 通过文件接口上传至 Dashscope 的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+| `qwenEnableCompatible` | boolean         | 非必填   | false  | 开启通义千问兼容模式。启用通义千问兼容模式后，将调用千问的兼容模式接口，同时对请求/响应不做修改。       |

 #### 百川智能 (Baichuan AI)

@@ -151,13 +145,13 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：

 零一万物所对应的 `type` 为 `yi`。它并无特有的配置字段。

-#### 智谱AI（Zhipu AI）
+#### 智谱 AI（Zhipu AI）

-智谱AI所对应的 `type` 为 `zhipuai`。它并无特有的配置字段。
+智谱 AI 所对应的 `type` 为 `zhipuai`。它并无特有的配置字段。

 #### DeepSeek（DeepSeek）

-DeepSeek所对应的 `type` 为 `deepseek`。它并无特有的配置字段。
+DeepSeek 所对应的 `type` 为 `deepseek`。它并无特有的配置字段。

 #### Groq

@@ -167,13 +161,13 @@ Groq 所对应的 `type` 为 `groq`。它并无特有的配置字段。

 文心一言所对应的 `type` 为 `baidu`。它并无特有的配置字段。

-#### 360智脑
+#### 360 智脑

-360智脑所对应的 `type` 为 `ai360`。它并无特有的配置字段。
+360 智脑所对应的 `type` 为 `ai360`。它并无特有的配置字段。

-#### GitHub模型
+#### GitHub 模型

-GitHub模型所对应的 `type` 为 `github`。它并无特有的配置字段。
+GitHub 模型所对应的 `type` 为 `github`。它并无特有的配置字段。

 #### Mistral

@@ -181,38 +175,38 @@ Mistral 所对应的 `type` 为 `mistral`。它并无特有的配置字段。

 #### MiniMax

-MiniMax所对应的 `type` 为 `minimax`。它特有的配置字段如下：
+MiniMax 所对应的 `type` 为 `minimax`。它特有的配置字段如下：

-| 名称             | 数据类型 | 填写要求                       | 默认值 | 描述                                                             |
-| ---------------- | -------- | ------------------------------ | ------ |----------------------------------------------------------------|
-| `minimaxApiType` | string   | v2 和 pro 中选填一项           | v2     | v2 代表 ChatCompletion v2 API，pro 代表 ChatCompletion Pro API      |
+| 名称             | 数据类型 | 填写要求                       | 默认值 | 描述                                                                    |
+| ---------------- | -------- | ------------------------------ | ------ | ----------------------------------------------------------------------- |
+| `minimaxApiType` | string   | v2 和 pro 中选填一项           | v2     | v2 代表 ChatCompletion v2 API，pro 代表 ChatCompletion Pro API          |
 | `minimaxGroupId` | string   | `minimaxApiType` 为 pro 时必填 | -      | `minimaxApiType` 为 pro 时使用 ChatCompletion Pro API，需要设置 groupID |

 #### Anthropic Claude

 Anthropic Claude 所对应的 `type` 为 `claude`。它特有的配置字段如下：

-| 名称        | 数据类型   | 填写要求 | 默认值 | 描述                               |
-|-----------|--------|------|-----|----------------------------------|
-| `claudeVersion` | string | 可选   | -   | Claude 服务的 API 版本，默认为 2023-06-01 |
+| 名称            | 数据类型 | 填写要求 | 默认值 | 描述                                      |
+| --------------- | -------- | -------- | ------ | ----------------------------------------- |
+| `claudeVersion` | string   | 可选     | -      | Claude 服务的 API 版本，默认为 2023-06-01 |

 #### Ollama

 Ollama 所对应的 `type` 为 `ollama`。它特有的配置字段如下：

-| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                           |
-|-------------------|--------|------|-----|----------------------------------------------|
-| `ollamaServerHost` | string | 必填   | -   | Ollama 服务器的主机地址 |
-| `ollamaServerPort` | number | 必填   | -   | Ollama 服务器的端口号，默认为11434 |
+| 名称               | 数据类型 | 填写要求 | 默认值 | 描述                                |
+| ------------------ | -------- | -------- | ------ | ----------------------------------- |
+| `ollamaServerHost` | string   | 必填     | -      | Ollama 服务器的主机地址             |
+| `ollamaServerPort` | number   | 必填     | -      | Ollama 服务器的端口号，默认为 11434 |

 #### 混元

 混元所对应的 `type` 为 `hunyuan`。它特有的配置字段如下：

-| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                           |
-|-------------------|--------|------|-----|----------------------------------------------|
-| `hunyuanAuthId` | string | 必填   | -   | 混元用于v3版本认证的id |
-| `hunyuanAuthKey` | string | 必填   | -   | 混元用于v3版本认证的key |
+| 名称             | 数据类型 | 填写要求 | 默认值 | 描述                       |
+| ---------------- | -------- | -------- | ------ | -------------------------- |
+| `hunyuanAuthId`  | string   | 必填     | -      | 混元用于 v3 版本认证的 id  |
+| `hunyuanAuthKey` | string   | 必填     | -      | 混元用于 v3 版本认证的 key |

 #### 阶跃星辰 (Stepfun)

@@ -222,23 +216,24 @@ Ollama 所对应的 `type` 为 `ollama`。它特有的配置字段如下：

 Cloudflare Workers AI 所对应的 `type` 为 `cloudflare`。它特有的配置字段如下：

-| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                                                                                                         |
-|-------------------|--------|------|-----|----------------------------------------------------------------------------------------------------------------------------|
-| `cloudflareAccountId` | string | 必填   | -   | [Cloudflare Account ID](https://developers.cloudflare.com/workers-ai/get-started/rest-api/#1-get-api-token-and-account-id) |
+| 名称                  | 数据类型 | 填写要求 | 默认值 | 描述                                                                                                                       |
+| --------------------- | -------- | -------- | ------ | -------------------------------------------------------------------------------------------------------------------------- |
+| `cloudflareAccountId` | string   | 必填     | -      | [Cloudflare Account ID](https://developers.cloudflare.com/workers-ai/get-started/rest-api/#1-get-api-token-and-account-id) |

 #### 星火 (Spark)

 星火所对应的 `type` 为 `spark`。它并无特有的配置字段。

-讯飞星火认知大模型的`apiTokens`字段值为`APIKey:APISecret`。即填入自己的APIKey与APISecret，并以`:`分隔。
+讯飞星火认知大模型的`apiTokens`字段值为`APIKey:APISecret`。即填入自己的 APIKey 与 APISecret，并以`:`分隔。

 #### Gemini

 Gemini 所对应的 `type` 为 `gemini`。它特有的配置字段如下：

-| 名称                  | 数据类型 | 填写要求 | 默认值 | 描述                                                                                              |
-| --------------------- | -------- | -------- |-----|-------------------------------------------------------------------------------------------------|
-| `geminiSafetySetting` | map of string   | 非必填     | -   | Gemini AI内容过滤和安全级别设定。参考[Safety settings](https://ai.google.dev/gemini-api/docs/safety-settings) |
+| 名称                  | 数据类型      | 填写要求 | 默认值   | 描述                                                                                                                                       |
+| --------------------- | ------------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| `geminiSafetySetting` | map of string | 非必填   | -        | Gemini AI 内容过滤和安全级别设定。参考[Safety settings](https://ai.google.dev/gemini-api/docs/safety-settings)                             |
+| `apiVersion`          | string        | 非必填   | `v1beta` | 用于指定 API 的版本, 可选择 `v1` 或 `v1beta` 。 版本差异请参考[API versions explained](https://ai.google.dev/gemini-api/docs/api-versions)。 |

 #### DeepL

@@ -253,18 +248,42 @@ DeepL 所对应的 `type` 为 `deepl`。它特有的配置字段如下：
 Cohere 所对应的 `type` 为 `cohere`。它并无特有的配置字段。

 #### Together-AI
+
 Together-AI 所对应的 `type` 为 `together-ai`。它并无特有的配置字段。

 #### Dify
+
 Dify 所对应的 `type` 为 `dify`。它特有的配置字段如下:

-| 名称 | 数据类型 | 填写要求 | 默认值 | 描述                         |
-| -- | -------- |------| ------ | ---------------------------- |
-| `difyApiUrl` | string   | 非必填  | -      | dify私有化部署的url |
-| `botType` | string   | 非必填  | -      | dify的应用类型，Chat/Completion/Agent/Workflow |
-| `inputVariable` | string   | 非必填  | -      | dify中应用类型为workflow时需要设置输入变量，当botType为workflow时一起使用 |
-| `outputVariable` | string   | 非必填  | -      | dify中应用类型为workflow时需要设置输出变量，当botType为workflow时一起使用 |
+| 名称             | 数据类型 | 填写要求 | 默认值 | 描述                                                                             |
+| ---------------- | -------- | -------- | ------ | -------------------------------------------------------------------------------- |
+| `difyApiUrl`     | string   | 非必填   | -      | dify 私有化部署的 url                                                            |
+| `botType`        | string   | 非必填   | -      | dify 的应用类型，Chat/Completion/Agent/Workflow                                  |
+| `inputVariable`  | string   | 非必填   | -      | dify 中应用类型为 workflow 时需要设置输入变量，当 botType 为 workflow 时一起使用 |
+| `outputVariable` | string   | 非必填   | -      | dify 中应用类型为 workflow 时需要设置输出变量，当 botType 为 workflow 时一起使用 |

+#### Google Vertex AI
+
+Google Vertex AI 所对应的 type 为 vertex。它特有的配置字段如下：
+
+| 名称                         | 数据类型       | 填写要求   | 默认值    | 描述                                                                            |
+|-----------------------------|---------------|--------|--------|-------------------------------------------------------------------------------|
+| `vertexAuthKey`             | string        | 必填     | -      | 用于认证的 Google Service Account JSON Key，格式为 PEM 编码的 PKCS#8 私钥和 client_email 等信息 |
+| `vertexRegion`              | string        | 必填     | -      | Google Cloud 区域（如 us-central1, europe-west4 等），用于构建 Vertex API 地址             |
+| `vertexProjectId`           | string        | 必填     | -      | Google Cloud 项目 ID，用于标识目标 GCP 项目                                              |
+| `vertexAuthServiceName`     | string        | 必填     | -      | 用于 OAuth2 认证的服务名称，该服务为了访问oauth2.googleapis.com                                |
+| `vertexGeminiSafetySetting` | map of string | 非必填    | -      | Gemini 模型的内容安全过滤设置。                                                           |
+| `vertexTokenRefreshAhead`   | number        | 非必填    | -      | Vertex access token刷新提前时间(单位秒)                                                |
+
+#### AWS Bedrock
+
+AWS Bedrock 所对应的 type 为 bedrock。它特有的配置字段如下：
+
+| 名称             | 数据类型   | 填写要求 | 默认值 | 描述                           |
+|----------------|--------|------|-----|------------------------------|
+| `awsAccessKey` | string | 必填   | -   | AWS Access Key，用于身份认证        |
+| `awsSecretKey` | string | 必填   | -   | AWS Secret Access Key，用于身份认证 |
+| `awsRegion`    | string | 必填   | -   | AWS 区域，例如：us-east-1          |

 ## 用法示例

@@ -376,20 +395,20 @@ provider:
 provider:
  type: qwen
  apiTokens:
-    - "YOUR_QWEN_API_TOKEN"
+    - 'YOUR_QWEN_API_TOKEN'
  modelMapping:
-    'gpt-3': "qwen-turbo"
-    'gpt-35-turbo': "qwen-plus"
-    'gpt-4-turbo': "qwen-max"
-    'gpt-4-*': "qwen-max"
-    'gpt-4o': "qwen-vl-plus"
+    'gpt-3': 'qwen-turbo'
+    'gpt-35-turbo': 'qwen-plus'
+    'gpt-4-turbo': 'qwen-max'
+    'gpt-4-*': 'qwen-max'
+    'gpt-4o': 'qwen-vl-plus'
    'text-embedding-v1': 'text-embedding-v1'
-    '*': "qwen-turbo"
+    '*': 'qwen-turbo'
 ```

 **AI 对话请求示例**

-URL: http://your-domain/v1/chat/completions
+URL: <http://your-domain/v1/chat/completions>

 请求示例：

@@ -434,7 +453,7 @@ URL: http://your-domain/v1/chat/completions

 **多模态模型 API 请求示例（适用于 `qwen-vl-plus` 和 `qwen-vl-max` 模型）**

-URL: http://your-domain/v1/chat/completions
+URL: <http://your-domain/v1/chat/completions>

 请求示例：

@@ -493,7 +512,7 @@ URL: http://your-domain/v1/chat/completions

 **文本向量请求示例**

-URL: http://your-domain/v1/embeddings
+URL: <http://your-domain/v1/embeddings>

 请求示例：

@@ -606,12 +625,12 @@ provider:
 provider:
  type: qwen
  apiTokens:
-    - "YOUR_QWEN_API_TOKEN"
+    - 'YOUR_QWEN_API_TOKEN'
  modelMapping:
-    "*": "qwen-long" # 通义千问的文件上下文只能在 qwen-long 模型下使用
+    '*': 'qwen-long' # 通义千问的文件上下文只能在 qwen-long 模型下使用
  qwenFileIds:
-    - "file-fe-xxx"
-    - "file-fe-yyy"
+    - 'file-fe-xxx'
+    - 'file-fe-yyy'
 ```

 **请求示例**
@@ -653,7 +672,7 @@ provider:
 }
 ```

-### 使用original协议代理百炼智能体应用
+### 使用 original 协议代理百炼智能体应用

 **配置信息**

@@ -661,17 +680,18 @@ provider:
 provider:
  type: qwen
  apiTokens:
-    - "YOUR_DASHSCOPE_API_TOKEN"
+    - 'YOUR_DASHSCOPE_API_TOKEN'
  protocol: original
 ```

 **请求实例**
+
 ```json
 {
  "input": {
    "prompt": "介绍一下Dubbo"
  },
-  "parameters":  {},
+  "parameters": {},
  "debug": {}
 }
 ```
@@ -789,7 +809,7 @@ provider:
 provider:
  type: groq
  apiTokens:
-    - "YOUR_GROQ_API_TOKEN"
+    - 'YOUR_GROQ_API_TOKEN'
 ```

 **请求示例**
@@ -848,8 +868,8 @@ provider:
 provider:
  type: claude
  apiTokens:
-    - "YOUR_CLAUDE_API_TOKEN"
-  version: "2023-06-01"
+    - 'YOUR_CLAUDE_API_TOKEN'
+  version: '2023-06-01'
 ```

 **请求示例**
@@ -899,14 +919,14 @@ provider:

 ```yaml
 provider:
-  type: "hunyuan"
-  hunyuanAuthKey: "<YOUR AUTH KEY>"
+  type: 'hunyuan'
+  hunyuanAuthKey: '<YOUR AUTH KEY>'
  apiTokens:
-    - ""
-  hunyuanAuthId: "<YOUR AUTH ID>"
+    - ''
+  hunyuanAuthId: '<YOUR AUTH ID>'
  timeout: 1200000
  modelMapping:
-    "*": "hunyuan-lite"
+    '*': 'hunyuan-lite'
 ```

 **请求示例**
@@ -967,10 +987,10 @@ curl --location 'http://<your higress domain>/v1/chat/completions' \
 provider:
  type: baidu
  apiTokens:
-    - "YOUR_BAIDU_API_TOKEN"
+    - 'YOUR_BAIDU_API_TOKEN'
  modelMapping:
-    'gpt-3': "ERNIE-4.0"
-    '*': "ERNIE-4.0"
+    'gpt-3': 'ERNIE-4.0'
+    '*': 'ERNIE-4.0'
 ```

 **请求示例**
@@ -1014,7 +1034,7 @@ provider:
 }
 ```

-### 使用 OpenAI 协议代理MiniMax服务
+### 使用 OpenAI 协议代理 MiniMax 服务

 **配置信息**

@@ -1022,11 +1042,11 @@ provider:
 provider:
  type: minimax
  apiTokens:
-    - "YOUR_MINIMAX_API_TOKEN"
+    - 'YOUR_MINIMAX_API_TOKEN'
  modelMapping:
-    "gpt-3": "abab6.5s-chat"
-    "gpt-4": "abab6.5g-chat"
-    "*": "abab6.5t-chat"
+    'gpt-3': 'abab6.5s-chat'
+    'gpt-4': 'abab6.5g-chat'
+    '*': 'abab6.5t-chat'
 ```

 **请求示例**
@@ -1090,12 +1110,12 @@ provider:
 provider:
  type: github
  apiTokens:
-    - "YOUR_GITHUB_ACCESS_TOKEN"
+    - 'YOUR_GITHUB_ACCESS_TOKEN'
  modelMapping:
-    "gpt-4o": "gpt-4o"
-    "gpt-4": "Phi-3.5-MoE-instruct"
-    "gpt-3.5": "cohere-command-r-08-2024"
-    "text-embedding-3-large": "text-embedding-3-large"
+    'gpt-4o': 'gpt-4o'
+    'gpt-4': 'Phi-3.5-MoE-instruct'
+    'gpt-3.5': 'cohere-command-r-08-2024'
+    'text-embedding-3-large': 'text-embedding-3-large'
 ```

 **请求示例**
@@ -1121,6 +1141,7 @@ provider:
 ```

 **响应示例**
+
 ```json
 {
  "choices": [
@@ -1183,7 +1204,7 @@ provider:
 }
 ```

-### 使用 OpenAI 协议代理360智脑服务
+### 使用 OpenAI 协议代理 360 智脑服务

 **配置信息**

@@ -1191,13 +1212,13 @@ provider:
 provider:
  type: ai360
  apiTokens:
-    - "YOUR_360_API_TOKEN"
+    - 'YOUR_360_API_TOKEN'
  modelMapping:
-    "gpt-4o": "360gpt-turbo-responsibility-8k"
-    "gpt-4": "360gpt2-pro"
-    "gpt-3.5": "360gpt-turbo"
-    "text-embedding-3-small": "embedding_s1_v1.2"
-    "*": "360gpt-pro"
+    'gpt-4o': '360gpt-turbo-responsibility-8k'
+    'gpt-4': '360gpt2-pro'
+    'gpt-3.5': '360gpt-turbo'
+    'text-embedding-3-small': 'embedding_s1_v1.2'
+    '*': '360gpt-pro'
 ```

 **请求示例**
@@ -1257,14 +1278,14 @@ provider:

 **文本向量请求示例**

-URL: http://your-domain/v1/embeddings
+URL: <http://your-domain/v1/embeddings>

 请求示例：

 ```json
 {
-  "input":["你好"],
-  "model":"text-embedding-3-small"
+  "input": ["你好"],
+  "model": "text-embedding-3-small"
 }
 ```

@@ -1305,10 +1326,10 @@ URL: http://your-domain/v1/embeddings
 provider:
  type: cloudflare
  apiTokens:
-    - "YOUR_WORKERS_AI_API_TOKEN"
-  cloudflareAccountId: "YOUR_CLOUDFLARE_ACCOUNT_ID"
+    - 'YOUR_WORKERS_AI_API_TOKEN'
+  cloudflareAccountId: 'YOUR_CLOUDFLARE_ACCOUNT_ID'
  modelMapping:
-    "*": "@cf/meta/llama-3-8b-instruct"
+    '*': '@cf/meta/llama-3-8b-instruct'
 ```

 **请求示例**
@@ -1348,7 +1369,7 @@ provider:
 }
 ```

-### 使用 OpenAI 协议代理Spark服务
+### 使用 OpenAI 协议代理 Spark 服务

 **配置信息**

@@ -1356,11 +1377,11 @@ provider:
 provider:
  type: spark
  apiTokens:
-    - "APIKey:APISecret"
+    - 'APIKey:APISecret'
  modelMapping:
-    "gpt-4o": "generalv3.5"
-    "gpt-4": "generalv3"
-    "*": "general"
+    'gpt-4o': 'generalv3.5'
+    'gpt-4': 'generalv3'
+    '*': 'general'
 ```

 **请求示例**
@@ -1474,8 +1495,8 @@ provider:
 provider:
  type: deepl
  apiTokens:
-    - "YOUR_DEEPL_API_TOKEN"
-  targetLang: "ZH"
+    - 'YOUR_DEEPL_API_TOKEN'
+  targetLang: 'ZH'
 ```

 **请求示例**
@@ -1500,6 +1521,7 @@ provider:
 ```

 **响应示例**
+
 ```json
 {
  "choices": [
@@ -1522,16 +1544,18 @@ provider:
 ### 使用 OpenAI 协议代理 Together-AI 服务

 **配置信息**
+
 ```yaml
 provider:
  type: together-ai
  apiTokens:
-    - "YOUR_TOGETHER_AI_API_TOKEN"
+    - 'YOUR_TOGETHER_AI_API_TOKEN'
  modelMapping:
-    "*": "Qwen/Qwen2.5-72B-Instruct-Turbo"
+    '*': 'Qwen/Qwen2.5-72B-Instruct-Turbo'
 ```

 **请求示例**
+
 ```json
 {
  "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
@@ -1545,6 +1569,7 @@ provider:
 ```

 **响应示例**
+
 ```json
 {
  "id": "8f5809d54b73efac",
@@ -1576,16 +1601,18 @@ provider:
 ### 使用 OpenAI 协议代理 Dify 服务

 **配置信息**
+
 ```yaml
 provider:
  type: dify
  apiTokens:
-    - "YOUR_DIFY_API_TOKEN"
+    - 'YOUR_DIFY_API_TOKEN'
  modelMapping:
-    "*": "dify"
+    '*': 'dify'
 ```

 **请求示例**
+
 ```json
 {
  "model": "gpt-4-turbo",
@@ -1600,6 +1627,7 @@ provider:
 ```

 **响应示例**
+
 ```json
 {
  "id": "e33fc636-f9e8-4fae-8d5e-fbd0acb09401",
@@ -1624,6 +1652,121 @@ provider:
 }
 ```

+### 使用 OpenAI 协议代理 Google Vertex 服务
+
+**配置信息**
+
+```yaml
+provider:
+  type: vertex
+  vertexAuthKey: |
+    {
+      "type": "service_account",
+      "project_id": "your-project-id",
+      "private_key_id": "your-private-key-id",
+      "private_key": "-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n",
+      "client_email": "your-service-account@your-project.iam.gserviceaccount.com",
+      "token_uri": "https://oauth2.googleapis.com/token"
+    }
+  vertexRegion: us-central1
+  vertexProjectId: your-project-id
+  vertexAuthServiceName: your-auth-service-name
+```
+
+**请求示例**
+
+```json
+{
+  "model": "gemini-2.0-flash-001",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
+}
+```
+
+**响应示例**
+
+```json
+{
+  "id": "chatcmpl-0000000000000",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "你好！我是 Vertex AI 提供的 Gemini 模型，由 Google 开发的人工智能助手。我可以回答问题、提供信息和帮助完成各种任务。有什么我可以帮您的吗？"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1729986750,
+  "model": "gemini-2.0-flash-001",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 43,
+    "total_tokens": 58
+  }
+}
+```
+
+### 使用 OpenAI 协议代理 AWS Bedrock 服务
+
+**配置信息**
+
+```yaml
+provider:
+  type: bedrock
+  awsAccessKey: "YOUR_AWS_ACCESS_KEY_ID"
+  awsSecretKey: "YOUR_AWS_SECRET_ACCESS_KEY"
+  awsRegion: "YOUR_AWS_REGION"
+```
+
+**请求示例**
+
+```json
+{
+  "model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
+}
+```
+
+**响应示例**
+
+```json
+{
+  "id": "dc5812e2-6a62-49d6-829e-5c327b15e4e2",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "你好!我是Claude,一个由Anthropic开发的AI助手。很高兴认识你!我的目标是以诚实、有益且有意义的方式与人类交流。我会尽力提供准确和有帮助的信息,同时保持诚实和正直。请问我今天能为你做些什么呢?"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1749657608,
+  "model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 16,
+    "completion_tokens": 101,
+    "total_tokens": 117
+  }
+}
+```
+

 ## 完整配置示例

@@ -1643,7 +1786,7 @@ spec:
        provider:
          type: groq
          apiTokens:
-            - "YOUR_API_TOKEN"
+            - 'YOUR_API_TOKEN'
      ingress:
        - groq
  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
@@ -1655,7 +1798,7 @@ metadata:
    higress.io/backend-protocol: HTTPS
    higress.io/destination: groq.dns
    higress.io/proxy-ssl-name: api.groq.com
-    higress.io/proxy-ssl-server-name: "on"
+    higress.io/proxy-ssl-server-name: 'on'
  labels:
    higress.io/resource-definer: higress
  name: groq
@@ -1716,7 +1859,7 @@ services:
    networks:
      - higress-net
    ports:
-      - "10000:10000"
+      - '10000:10000'
    volumes:
      - ./envoy.yaml:/etc/envoy/envoy.yaml
      - ./plugin.wasm:/etc/envoy/plugin.wasm
@@ -1745,7 +1888,7 @@ static_resources:
        - filters:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
-                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                '@type': type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
                scheme_header_transformation:
                  scheme_to_overwrite: https
                stat_prefix: ingress_http
@@ -1753,23 +1896,23 @@ static_resources:
                access_log:
                  - name: envoy.access_loggers.stdout
                    typed_config:
-                      "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
+                      '@type': type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
                # Modify as required
                route_config:
                  name: local_route
                  virtual_hosts:
                    - name: local_service
-                      domains: [ "*" ]
+                      domains: ['*']
                      routes:
                        - match:
-                            prefix: "/"
+                            prefix: '/'
                          route:
                            cluster: claude
                            timeout: 300s
                http_filters:
                  - name: claude
                    typed_config:
-                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
+                      '@type': type.googleapis.com/udpa.type.v1.TypedStruct
                      type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                      value:
                        config:
@@ -1780,7 +1923,7 @@ static_resources:
                              local:
                                filename: /etc/envoy/plugin.wasm
                          configuration:
-                            "@type": "type.googleapis.com/google.protobuf.StringValue"
+                            '@type': 'type.googleapis.com/google.protobuf.StringValue'
                            value: | # 插件配置
                              {
                                "provider": {
@@ -1809,8 +1952,8 @@ static_resources:
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
-          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
-          "sni": "api.anthropic.com"
+          '@type': type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+          'sni': 'api.anthropic.com'
 ```

 访问示例：
--- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md
@@ -208,6 +208,28 @@ For DeepL, the corresponding `type` is `deepl`. Its unique configuration field i
 | ------------ | --------- | ----------- | ------- | ------------------------------------ |
 | `targetLang` | string    | Required    | -       | The target language required by the DeepL translation service |

+#### Google Vertex AI
+For Vertex, the corresponding `type` is `vertex`. Its unique configuration field is:
+
+| Name                        | Data Type     | Requirement   | Default | Description                                                                                                                                                 |
+|-----------------------------|---------------|---------------| ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `vertexAuthKey`             | string        | Required      | -      | Google Service Account JSON Key used for authentication. The format should be PEM encoded PKCS#8 private key along with client_email and other information  |
+| `vertexRegion`              | string        | Required      | -      | Google Cloud region (e.g., us-central1, europe-west4) used to build the Vertex API address                                                                  |
+| `vertexProjectId`           | string        | Required      | -      | Google Cloud Project ID, used to identify the target GCP project                                                                                            |
+| `vertexAuthServiceName`     | string        | Required      | -      | Service name for OAuth2 authentication, used to access oauth2.googleapis.com                                                                                |
+| `vertexGeminiSafetySetting` | map of string | Optional      | -      | Gemini model content safety filtering settings.                                                                                                             |
+| `vertexTokenRefreshAhead`   | number        | Optional      | -      | Vertex access token refresh ahead time in seconds                                                                                                           |
+
+#### AWS Bedrock
+
+For AWS Bedrock, the corresponding `type` is `bedrock`. Its unique configuration field is:
+
+| Name           | Data Type | Requirement | Default | Description                                   |
+|----------------|-----------|-------------|---------|-----------------------------------------------|
+| `awsAccessKey` | string    | Required    | -       | AWS Access Key used for authentication        |
+| `awsSecretKey` | string    | Required    | -       | AWS Secret Access Key used for authentication |
+| `awsRegion`    | string    | Required    | -       | AWS region, e.g., us-east-1                   |
+
 ## Usage Examples

 ### Using OpenAI Protocol Proxy for Azure OpenAI Service
@@ -1411,6 +1433,113 @@ provider:
 }
 ```

+### Utilizing OpenAI Protocol Proxy for Google Vertex Services
+**Configuration Information**
+```yaml
+provider:
+  type: vertex
+  vertexAuthKey: |
+    {
+      "type": "service_account",
+      "project_id": "your-project-id",
+      "private_key_id": "your-private-key-id",
+      "private_key": "-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n",
+      "client_email": "your-service-account@your-project.iam.gserviceaccount.com",
+      "token_uri": "https://oauth2.googleapis.com/token"
+    }
+  vertexRegion: us-central1
+  vertexProjectId: your-project-id
+  vertexAuthServiceName: your-auth-service-name
+```
+
+**Request Example**
+```json
+{
+  "model": "gemini-2.0-flash-001",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Who are you?"
+    }
+  ],
+  "stream": false
+}
+```
+
+**Response Example**
+```json
+{
+  "id": "chatcmpl-0000000000000",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Hello! I am the Gemini model provided by Vertex AI, developed by Google. I can answer questions, provide information, and assist in completing various tasks. How can I help you today?"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1729986750,
+  "model": "gemini-2.0-flash-001",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 43,
+    "total_tokens": 58
+  }
+}
+```
+
+### Utilizing OpenAI Protocol Proxy for AWS Bedrock Services
+**Configuration Information**
+```yaml
+provider:
+  type: bedrock
+  awsAccessKey: "YOUR_AWS_ACCESS_KEY_ID"
+  awsSecretKey: "YOUR_AWS_SECRET_ACCESS_KEY"
+  awsRegion: "YOUR_AWS_REGION"
+```
+
+**Request Example**
+```json
+{
+  "model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
+  "messages": [
+    {
+      "role": "user",
+      "content": "who are you"
+    }
+  ],
+  "stream": false
+}
+```
+
+**Response Example**
+```json
+{
+  "id": "d52da49d-daf3-49d9-a105-0b527481fe14",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "I'm Claude, an AI created by Anthropic. I aim to be helpful, honest, and harmless. I won't pretend to be human, and I'll always try to be direct and truthful about what I am and what I can do."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1749659050,
+  "model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 10,
+    "completion_tokens": 57,
+    "total_tokens": 67
+  }
+}
+```
+
 ## Full Configuration Example

 ### Kubernetes Example
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -352,15 +352,60 @@ func getApiName(path string) provider.ApiName {
 	if strings.HasSuffix(path, "/v1/images/generations") {
 		return provider.ApiNameImageGeneration
 	}
+	if strings.HasSuffix(path, "/v1/images/variations") {
+		return provider.ApiNameImageVariation
+	}
+	if strings.HasSuffix(path, "/v1/images/edits") {
+		return provider.ApiNameImageEdit
+	}
 	if strings.HasSuffix(path, "/v1/batches") {
 		return provider.ApiNameBatches
 	}
+	if util.RegRetrieveBatchPath.MatchString(path) {
+		return provider.ApiNameRetrieveBatch
+	}
+	if util.RegCancelBatchPath.MatchString(path) {
+		return provider.ApiNameCancelBatch
+	}
 	if strings.HasSuffix(path, "/v1/files") {
 		return provider.ApiNameFiles
 	}
+	if util.RegRetrieveFilePath.MatchString(path) {
+		return provider.ApiNameRetrieveFile
+	}
+	if util.RegRetrieveFileContentPath.MatchString(path) {
+		return provider.ApiNameRetrieveFileContent
+	}
 	if strings.HasSuffix(path, "/v1/models") {
 		return provider.ApiNameModels
 	}
+	if strings.HasSuffix(path, "/v1/fine_tuning/jobs") {
+		return provider.ApiNameFineTuningJobs
+	}
+	if util.RegRetrieveFineTuningJobPath.MatchString(path) {
+		return provider.ApiNameFineTuningRetrieveJob
+	}
+	if util.RegRetrieveFineTuningJobEventsPath.MatchString(path) {
+		return provider.PathOpenAIFineTuningJobEvents
+	}
+	if util.RegRetrieveFineTuningJobCheckpointsPath.MatchString(path) {
+		return provider.PathOpenAIFineTuningJobCheckpoints
+	}
+	if util.RegCancelFineTuningJobPath.MatchString(path) {
+		return provider.ApiNameFineTuningCancelJob
+	}
+	if util.RegResumeFineTuningJobPath.MatchString(path) {
+		return provider.ApiNameFineTuningResumeJob
+	}
+	if util.RegPauseFineTuningJobPath.MatchString(path) {
+		return provider.ApiNameFineTuningPauseJob
+	}
+	if util.RegFineTuningCheckpointPermissionPath.MatchString(path) {
+		return provider.ApiNameFineTuningCheckpointPermissions
+	}
+	if util.RegDeleteFineTuningCheckpointPermissionPath.MatchString(path) {
+		return provider.PathOpenAIFineDeleteTuningCheckpointPermission
+	}
 	// cohere style
 	if strings.HasSuffix(path, "/v1/rerank") {
 		return provider.ApiNameCohereV1Rerank
--- a/plugins/wasm-go/extensions/ai-proxy/provider/bedrock.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/bedrock.go
@@ -13,6 +13,7 @@ import (
 	"hash/crc32"
 	"io"
 	"net/http"
+	"net/url"
 	"strconv"
 	"strings"
 	"time"
@@ -22,6 +23,8 @@ import (
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )

 const (
@@ -39,8 +42,7 @@ const (
 	requestIdHeader        = "X-Amzn-Requestid"
 )

-type bedrockProviderInitializer struct {
-}
+type bedrockProviderInitializer struct{}

 func (b *bedrockProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if len(config.awsAccessKey) == 0 || len(config.awsSecretKey) == 0 {
@@ -101,7 +103,7 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
 		chatChoice.Delta = &chatMessage{Content: bedrockEvent.Delta.Text}
 	}
 	if bedrockEvent.StopReason != nil {
-		chatChoice.FinishReason = stopReasonBedrock2OpenAI(*bedrockEvent.StopReason)
+		chatChoice.FinishReason = util.Ptr(stopReasonBedrock2OpenAI(*bedrockEvent.StopReason))
 	}
 	choices = append(choices, chatChoice)
 	requestId := ctx.GetStringContext(requestIdHeader, "")
@@ -115,7 +117,7 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
 	}
 	if bedrockEvent.Usage != nil {
 		openAIFormattedChunk.Choices = choices[:0]
-		openAIFormattedChunk.Usage = usage{
+		openAIFormattedChunk.Usage = &usage{
 			CompletionTokens: bedrockEvent.Usage.OutputTokens,
 			PromptTokens:     bedrockEvent.Usage.InputTokens,
 			TotalTokens:      bedrockEvent.Usage.TotalTokens,
@@ -607,6 +609,11 @@ func (b *bedrockProvider) insertHttpContextMessage(body []byte, content string,
 }

 func (b *bedrockProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) {
+	if gjson.GetBytes(body, "model").Exists() {
+		rawModel := gjson.GetBytes(body, "model").String()
+		encodedModel := url.QueryEscape(rawModel)
+		body, _ = sjson.SetBytes(body, "model", encodedModel)
+	}
 	switch apiName {
 	case ApiNameChatCompletion:
 		return b.onChatCompletionRequestBody(ctx, body, headers)
@@ -748,18 +755,19 @@ func (b *bedrockProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, b
 				Role:    bedrockResponse.Output.Message.Role,
 				Content: outputContent,
 			},
-			FinishReason: stopReasonBedrock2OpenAI(bedrockResponse.StopReason),
+			FinishReason: util.Ptr(stopReasonBedrock2OpenAI(bedrockResponse.StopReason)),
 		},
 	}
 	requestId := ctx.GetStringContext(requestIdHeader, "")
+	modelId, _ := url.QueryUnescape(ctx.GetStringContext(ctxKeyFinalRequestModel, ""))
 	return &chatCompletionResponse{
 		Id:                requestId,
 		Created:           time.Now().UnixMilli() / 1000,
-		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		Model:             modelId,
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           choices,
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     bedrockResponse.Usage.InputTokens,
 			CompletionTokens: bedrockResponse.Usage.OutputTokens,
 			TotalTokens:      bedrockResponse.Usage.TotalTokens,
@@ -900,8 +908,8 @@ func (b *bedrockProvider) setAuthHeaders(body []byte, headers http.Header) {
 }

 func (b *bedrockProvider) generateSignature(path, amzDate, dateStamp string, body []byte) string {
+	path = encodeSigV4Path(path)
 	hashedPayload := sha256Hex(body)
-	path = urlEncoding(path)

 	endpoint := fmt.Sprintf(bedrockDefaultDomain, b.config.awsRegion)
 	canonicalHeaders := fmt.Sprintf("host:%s\nx-amz-date:%s\n", endpoint, amzDate)
@@ -918,14 +926,15 @@ func (b *bedrockProvider) generateSignature(path, amzDate, dateStamp string, bod
 	return signature
 }

-func urlEncoding(rawStr string) string {
-	encodedStr := strings.ReplaceAll(rawStr, ":", "%3A")
-	encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B")
-	encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D")
-	encodedStr = strings.ReplaceAll(encodedStr, "&", "%26")
-	encodedStr = strings.ReplaceAll(encodedStr, "$", "%24")
-	encodedStr = strings.ReplaceAll(encodedStr, "@", "%40")
-	return encodedStr
+func encodeSigV4Path(path string) string {
+	segments := strings.Split(path, "/")
+	for i, seg := range segments {
+		if seg == "" {
+			continue
+		}
+		segments[i] = url.PathEscape(seg)
+	}
+	return strings.Join(segments, "/")
 }

 func getSignatureKey(key, dateStamp, region, service string) []byte {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
@@ -19,22 +19,55 @@ const (
 	claudeDomain             = "api.anthropic.com"
 	claudeChatCompletionPath = "/v1/messages"
 	claudeCompletionPath     = "/v1/complete"
-	defaultVersion           = "2023-06-01"
-	defaultMaxTokens         = 4096
+	claudeDefaultVersion     = "2023-06-01"
+	claudeDefaultMaxTokens   = 4096
 )

 type claudeProviderInitializer struct{}

+type claudeTool struct {
+	Name        string                 `json:"name"`
+	Description string                 `json:"description,omitempty"`
+	InputSchema map[string]interface{} `json:"input_schema,omitempty"`
+}
+
+type claudeToolChoice struct {
+	Type                   string `json:"type"`
+	Name                   string `json:"name,omitempty"`
+	DisableParallelToolUse bool   `json:"disable_parallel_tool_use,omitempty"`
+}
+
+type claudeChatMessage struct {
+	Role    string `json:"role"`
+	Content any    `json:"content"`
+}
+
+type claudeChatMessageContentSource struct {
+	Type      string `json:"type"`
+	MediaType string `json:"media_type,omitempty"`
+	Data      string `json:"data,omitempty"`
+	Url       string `json:"url,omitempty"`
+	FileId    string `json:"file_id,omitempty"`
+}
+
+type claudeChatMessageContent struct {
+	Type   string                          `json:"type"`
+	Text   string                          `json:"text,omitempty"`
+	Source *claudeChatMessageContentSource `json:"source,omitempty"`
+}
 type claudeTextGenRequest struct {
-	Model         string        `json:"model"`
-	Messages      []chatMessage `json:"messages"`
-	System        string        `json:"system,omitempty"`
-	MaxTokens     int           `json:"max_tokens,omitempty"`
-	StopSequences []string      `json:"stop_sequences,omitempty"`
-	Stream        bool          `json:"stream,omitempty"`
-	Temperature   float64       `json:"temperature,omitempty"`
-	TopP          float64       `json:"top_p,omitempty"`
-	TopK          int           `json:"top_k,omitempty"`
+	Model         string              `json:"model"`
+	Messages      []claudeChatMessage `json:"messages"`
+	System        string              `json:"system,omitempty"`
+	MaxTokens     int                 `json:"max_tokens,omitempty"`
+	StopSequences []string            `json:"stop_sequences,omitempty"`
+	Stream        bool                `json:"stream,omitempty"`
+	Temperature   float64             `json:"temperature,omitempty"`
+	TopP          float64             `json:"top_p,omitempty"`
+	TopK          int                 `json:"top_k,omitempty"`
+	ToolChoice    *claudeToolChoice   `json:"tool_choice,omitempty"`
+	Tools         []claudeTool        `json:"tools,omitempty"`
+	ServiceTier   string              `json:"service_tier,omitempty"`
 }

 type claudeTextGenResponse struct {
@@ -50,13 +83,14 @@ type claudeTextGenResponse struct {
 }

 type claudeTextGenContent struct {
-	Type string `json:"type"`
+	Type string `json:"type,omitempty"`
 	Text string `json:"text,omitempty"`
 }

 type claudeTextGenUsage struct {
-	InputTokens  int `json:"input_tokens"`
-	OutputTokens int `json:"output_tokens"`
+	InputTokens  int    `json:"input_tokens,omitempty"`
+	OutputTokens int    `json:"output_tokens,omitempty"`
+	ServiceTier  string `json:"service_tier,omitempty"`
 }

 type claudeTextGenError struct {
@@ -65,12 +99,12 @@ type claudeTextGenError struct {
 }

 type claudeTextGenStreamResponse struct {
-	Type         string                `json:"type"`
-	Message      claudeTextGenResponse `json:"message"`
-	Index        int                   `json:"index"`
-	ContentBlock *claudeTextGenContent `json:"content_block"`
-	Delta        *claudeTextGenDelta   `json:"delta"`
-	Usage        claudeTextGenUsage    `json:"usage"`
+	Type         string                 `json:"type"`
+	Message      *claudeTextGenResponse `json:"message,omitempty"`
+	Index        int                    `json:"index,omitempty"`
+	ContentBlock *claudeTextGenContent  `json:"content_block,omitempty"`
+	Delta        *claudeTextGenDelta    `json:"delta,omitempty"`
+	Usage        *claudeTextGenUsage    `json:"usage,omitempty"`
 }

 type claudeTextGenDelta struct {
@@ -93,6 +127,7 @@ func (c *claudeProviderInitializer) DefaultCapabilities() map[string]string {
 		string(ApiNameCompletion):     claudeCompletionPath,
 		// docs: https://docs.anthropic.com/en/docs/build-with-claude/embeddings#voyage-http-api
 		string(ApiNameEmbeddings): PathOpenAIEmbeddings,
+		string(ApiNameModels):     PathOpenAIModels,
 	}
 }

@@ -107,6 +142,10 @@ func (c *claudeProviderInitializer) CreateProvider(config ProviderConfig) (Provi
 type claudeProvider struct {
 	config       ProviderConfig
 	contextCache *contextCache
+
+	messageId   string
+	usage       usage
+	serviceTier string
 }

 func (c *claudeProvider) GetProviderType() string {
@@ -124,16 +163,16 @@ func (c *claudeProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam

 	headers.Set("x-api-key", c.config.GetApiTokenInUse(ctx))

-	if c.config.claudeVersion == "" {
-		c.config.claudeVersion = defaultVersion
+	if c.config.apiVersion == "" {
+		c.config.apiVersion = claudeDefaultVersion
 	}

-	headers.Set("anthropic-version", c.config.claudeVersion)
+	headers.Set("anthropic-version", c.config.apiVersion)
 }

 func (c *claudeProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
 	if !c.config.isSupportedAPI(apiName) {
-		return types.ActionContinue, errUnsupportedApiName
+		return types.ActionContinue, nil
 	}
 	return c.config.handleRequestBody(c, c.contextCache, ctx, apiName, body)
 }
@@ -205,14 +244,15 @@ func (c *claudeProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 func (c *claudeProvider) buildClaudeTextGenRequest(origRequest *chatCompletionRequest) *claudeTextGenRequest {
 	claudeRequest := claudeTextGenRequest{
 		Model:         origRequest.Model,
-		MaxTokens:     origRequest.MaxTokens,
+		MaxTokens:     origRequest.getMaxTokens(),
 		StopSequences: origRequest.Stop,
 		Stream:        origRequest.Stream,
 		Temperature:   origRequest.Temperature,
 		TopP:          origRequest.TopP,
+		// ServiceTier:   origRequest.ServiceTier,
 	}
 	if claudeRequest.MaxTokens == 0 {
-		claudeRequest.MaxTokens = defaultMaxTokens
+		claudeRequest.MaxTokens = claudeDefaultMaxTokens
 	}

 	for _, message := range origRequest.Messages {
@@ -220,12 +260,80 @@ func (c *claudeProvider) buildClaudeTextGenRequest(origRequest *chatCompletionRe
 			claudeRequest.System = message.StringContent()
 			continue
 		}
-		claudeMessage := chatMessage{
-			Role:    message.Role,
-			Content: message.Content,
+
+		claudeMessage := claudeChatMessage{
+			Role: message.Role,
+		}
+		if message.IsStringContent() {
+			claudeMessage.Content = message.StringContent()
+		} else {
+			chatMessageContents := make([]claudeChatMessageContent, 0)
+			for _, messageContent := range message.ParseContent() {
+				switch messageContent.Type {
+				case contentTypeText:
+					chatMessageContents = append(chatMessageContents, claudeChatMessageContent{
+						Type: contentTypeText,
+						Text: messageContent.Text,
+					})
+				case contentTypeImageUrl:
+					if strings.HasPrefix(messageContent.ImageUrl.Url, "data:") {
+						parts := strings.SplitN(messageContent.ImageUrl.Url, ";", 2)
+						if len(parts) != 2 {
+							log.Errorf("invalid image url format: %s", messageContent.ImageUrl.Url)
+							continue
+						}
+						chatMessageContents = append(chatMessageContents, claudeChatMessageContent{
+							Type: "image",
+							Source: &claudeChatMessageContentSource{
+								Type:      "base64",
+								MediaType: strings.TrimPrefix(parts[0], "data:"),
+								Data:      strings.TrimPrefix(parts[1], "base64,"),
+							},
+						})
+					} else {
+						chatMessageContents = append(chatMessageContents, claudeChatMessageContent{
+							Type: "image",
+							Source: &claudeChatMessageContentSource{
+								Type: "url",
+								Url:  messageContent.ImageUrl.Url,
+							},
+						})
+					}
+				case contentTypeFile:
+					chatMessageContents = append(chatMessageContents, claudeChatMessageContent{
+						Type: "file",
+						Source: &claudeChatMessageContentSource{
+							Type:   "url",
+							FileId: messageContent.File.FileId,
+						},
+					})
+				default:
+					log.Errorf("Unsupported content type: %s", messageContent.Type)
+					continue
+				}
+			}
+			claudeMessage.Content = chatMessageContents
 		}
 		claudeRequest.Messages = append(claudeRequest.Messages, claudeMessage)
 	}
+
+	for _, tool := range origRequest.Tools {
+		claudeTool := claudeTool{
+			Name:        tool.Function.Name,
+			Description: tool.Function.Description,
+			InputSchema: tool.Function.Parameters,
+		}
+		claudeRequest.Tools = append(claudeRequest.Tools, claudeTool)
+	}
+
+	if tc := origRequest.getToolChoiceObject(); tc != nil {
+		claudeRequest.ToolChoice = &claudeToolChoice{
+			Name:                   tc.Function.Name,
+			Type:                   tc.Type,
+			DisableParallelToolUse: !origRequest.ParallelToolCalls,
+		}
+	}
+
 	return &claudeRequest
 }

@@ -233,7 +341,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
 	choice := chatCompletionChoice{
 		Index:        0,
 		Message:      &chatMessage{Role: roleAssistant, Content: origResponse.Content[0].Text},
-		FinishReason: stopReasonClaude2OpenAI(origResponse.StopReason),
+		FinishReason: util.Ptr(stopReasonClaude2OpenAI(origResponse.StopReason)),
 	}

 	return &chatCompletionResponse{
@@ -243,7 +351,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           []chatCompletionChoice{choice},
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     origResponse.Usage.InputTokens,
 			CompletionTokens: origResponse.Usage.OutputTokens,
 			TotalTokens:      origResponse.Usage.InputTokens + origResponse.Usage.OutputTokens,
@@ -270,27 +378,50 @@ func stopReasonClaude2OpenAI(reason *string) string {
 func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, origResponse *claudeTextGenStreamResponse) *chatCompletionResponse {
 	switch origResponse.Type {
 	case "message_start":
+		c.messageId = origResponse.Message.Id
+		c.usage = usage{
+			PromptTokens:     origResponse.Message.Usage.InputTokens,
+			CompletionTokens: origResponse.Message.Usage.OutputTokens,
+		}
+		c.serviceTier = origResponse.Message.Usage.ServiceTier
 		choice := chatCompletionChoice{
-			Index: 0,
+			Index: origResponse.Index,
 			Delta: &chatMessage{Role: roleAssistant, Content: ""},
 		}
-		return createChatCompletionResponse(ctx, origResponse, choice)
+		return c.createChatCompletionResponse(ctx, origResponse, choice)

 	case "content_block_delta":
 		choice := chatCompletionChoice{
-			Index: 0,
+			Index: origResponse.Index,
 			Delta: &chatMessage{Content: origResponse.Delta.Text},
 		}
-		return createChatCompletionResponse(ctx, origResponse, choice)
+		return c.createChatCompletionResponse(ctx, origResponse, choice)

 	case "message_delta":
+		c.usage.CompletionTokens += origResponse.Usage.OutputTokens
+		c.usage.TotalTokens = c.usage.PromptTokens + c.usage.CompletionTokens
+
 		choice := chatCompletionChoice{
-			Index:        0,
+			Index:        origResponse.Index,
 			Delta:        &chatMessage{},
-			FinishReason: stopReasonClaude2OpenAI(origResponse.Delta.StopReason),
+			FinishReason: util.Ptr(stopReasonClaude2OpenAI(origResponse.Delta.StopReason)),
 		}
-		return createChatCompletionResponse(ctx, origResponse, choice)
-	case "content_block_stop", "message_stop":
+		return c.createChatCompletionResponse(ctx, origResponse, choice)
+	case "message_stop":
+		return &chatCompletionResponse{
+			Id:          c.messageId,
+			Created:     time.Now().UnixMilli() / 1000,
+			Model:       ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+			Object:      objectChatCompletionChunk,
+			Choices:     []chatCompletionChoice{},
+			ServiceTier: c.serviceTier,
+			Usage: &usage{
+				PromptTokens:     c.usage.PromptTokens,
+				CompletionTokens: c.usage.CompletionTokens,
+				TotalTokens:      c.usage.TotalTokens,
+			},
+		}
+	case "content_block_stop", "ping", "content_block_start":
 		log.Debugf("skip processing response type: %s", origResponse.Type)
 		return nil
 	default:
@@ -299,13 +430,14 @@ func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, or
 	}
 }

-func createChatCompletionResponse(ctx wrapper.HttpContext, response *claudeTextGenStreamResponse, choice chatCompletionChoice) *chatCompletionResponse {
+func (c *claudeProvider) createChatCompletionResponse(ctx wrapper.HttpContext, response *claudeTextGenStreamResponse, choice chatCompletionChoice) *chatCompletionResponse {
 	return &chatCompletionResponse{
-		Id:      response.Message.Id,
-		Created: time.Now().UnixMilli() / 1000,
-		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
-		Object:  objectChatCompletionChunk,
-		Choices: []chatCompletionChoice{choice},
+		Id:          c.messageId,
+		Created:     time.Now().UnixMilli() / 1000,
+		Model:       ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		Object:      objectChatCompletionChunk,
+		Choices:     []chatCompletionChoice{choice},
+		ServiceTier: c.serviceTier,
 	}
 }

@@ -332,5 +464,14 @@ func (c *claudeProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, claudeChatCompletionPath) {
 		return ApiNameChatCompletion
 	}
+	if strings.Contains(path, claudeCompletionPath) {
+		return ApiNameCompletion
+	}
+	if strings.Contains(path, PathOpenAIModels) {
+		return ApiNameModels
+	}
+	if strings.Contains(path, PathOpenAIEmbeddings) {
+		return ApiNameEmbeddings
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/dify.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/dify.go
@@ -6,13 +6,13 @@ import (
 	"fmt"
 	"net/http"
 	"strings"
-	"time"
+
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 )

 const (
@@ -116,34 +116,34 @@ func (d *difyProvider) responseDify2OpenAI(ctx wrapper.HttpContext, response *Di
 		choice = chatCompletionChoice{
 			Index:        0,
 			Message:      &chatMessage{Role: roleAssistant, Content: response.Answer},
-			FinishReason: finishReasonStop,
+			FinishReason: util.Ptr(finishReasonStop),
 		}
-		//response header中增加conversationId字段
+		// response header中增加conversationId字段
 		_ = proxywasm.ReplaceHttpResponseHeader("ConversationId", response.ConversationId)
 		id = response.ConversationId
 	case BotTypeCompletion:
 		choice = chatCompletionChoice{
 			Index:        0,
 			Message:      &chatMessage{Role: roleAssistant, Content: response.Answer},
-			FinishReason: finishReasonStop,
+			FinishReason: util.Ptr(finishReasonStop),
 		}
 		id = response.MessageId
 	case BotTypeWorkflow:
 		choice = chatCompletionChoice{
 			Index:        0,
 			Message:      &chatMessage{Role: roleAssistant, Content: response.Data.Outputs[d.config.outputVariable]},
-			FinishReason: finishReasonStop,
+			FinishReason: util.Ptr(finishReasonStop),
 		}
 		id = response.Data.WorkflowId
 	}
 	return &chatCompletionResponse{
 		Id:                id,
-		Created:           time.Now().UnixMilli() / 1000,
+		Created:           response.CreatedAt,
 		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           []chatCompletionChoice{choice},
-		Usage:             response.MetaData.Usage,
+		Usage:             &response.MetaData.Usage,
 	}
 }

@@ -188,7 +188,7 @@ func (d *difyProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name Api
 func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, response *DifyChunkChatResponse) *chatCompletionResponse {
 	var choice chatCompletionChoice
 	var id string
-	var responseUsage usage
+	var responseUsage *usage
 	switch d.config.botType {
 	case BotTypeChat, BotTypeAgent:
 		choice = chatCompletionChoice{
@@ -211,9 +211,9 @@ func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, respon
 		id = response.Data.WorkflowId
 	}
 	if response.Event == "message_end" || response.Event == "workflow_finished" {
-		choice.FinishReason = finishReasonStop
+		choice.FinishReason = util.Ptr(finishReasonStop)
 		if response.Event == "message_end" {
-			responseUsage = usage{
+			responseUsage = &usage{
 				PromptTokens:     response.MetaData.Usage.PromptTokens,
 				CompletionTokens: response.MetaData.Usage.CompletionTokens,
 				TotalTokens:      response.MetaData.Usage.TotalTokens,
@@ -222,7 +222,7 @@ func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, respon
 	}
 	return &chatCompletionResponse{
 		Id:                id,
-		Created:           time.Now().UnixMilli() / 1000,
+		Created:           response.CreatedAt,
 		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		SystemFingerprint: "",
 		Object:            objectChatCompletionChunk,
@@ -309,7 +309,7 @@ type DifyChatResponse struct {
 	ConversationId string       `json:"conversation_id"`
 	MessageId      string       `json:"message_id"`
 	Answer         string       `json:"answer"`
-	CreateAt       int64        `json:"create_at"`
+	CreatedAt      int64        `json:"created_at"`
 	Data           DifyData     `json:"data"`
 	MetaData       DifyMetaData `json:"metadata"`
 }
@@ -319,6 +319,7 @@ type DifyChunkChatResponse struct {
 	ConversationId string       `json:"conversation_id"`
 	MessageId      string       `json:"message_id"`
 	Answer         string       `json:"answer"`
+	CreatedAt      int64        `json:"created_at"`
 	Data           DifyData     `json:"data"`
 	MetaData       DifyMetaData `json:"metadata"`
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/doubao.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/doubao.go
@@ -11,9 +11,10 @@ import (
 )

 const (
-	doubaoDomain             = "ark.cn-beijing.volces.com"
-	doubaoChatCompletionPath = "/api/v3/chat/completions"
-	doubaoEmbeddingsPath     = "/api/v3/embeddings"
+	doubaoDomain              = "ark.cn-beijing.volces.com"
+	doubaoChatCompletionPath  = "/api/v3/chat/completions"
+	doubaoEmbeddingsPath      = "/api/v3/embeddings"
+	doubaoImageGenerationPath = "/api/v3/images/generations"
 )

 type doubaoProviderInitializer struct{}
@@ -27,8 +28,9 @@ func (m *doubaoProviderInitializer) ValidateConfig(config *ProviderConfig) error

 func (m *doubaoProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): doubaoChatCompletionPath,
-		string(ApiNameEmbeddings):     doubaoEmbeddingsPath,
+		string(ApiNameChatCompletion):  doubaoChatCompletionPath,
+		string(ApiNameEmbeddings):      doubaoEmbeddingsPath,
+		string(ApiNameImageGeneration): doubaoImageGenerationPath,
 	}
 }

@@ -75,5 +77,8 @@ func (m *doubaoProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, doubaoEmbeddingsPath) {
 		return ApiNameEmbeddings
 	}
+	if strings.Contains(path, doubaoImageGenerationPath) {
+		return ApiNameImageGeneration
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/gemini.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/gemini.go
@@ -19,14 +19,16 @@ import (

 const (
 	geminiApiKeyHeader             = "x-goog-api-key"
+	geminiDefaultApiVersion        = "v1beta" // 可选: v1, v1beta
 	geminiDomain                   = "generativelanguage.googleapis.com"
 	geminiChatCompletionPath       = "generateContent"
 	geminiChatCompletionStreamPath = "streamGenerateContent?alt=sse"
 	geminiEmbeddingPath            = "batchEmbedContents"
+	geminiModelsPath               = "models"
+	geminiImageGenerationPath      = "predict"
 )

-type geminiProviderInitializer struct {
-}
+type geminiProviderInitializer struct{}

 func (g *geminiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
@@ -37,8 +39,10 @@ func (g *geminiProviderInitializer) ValidateConfig(config *ProviderConfig) error

 func (g *geminiProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): "",
-		string(ApiNameEmbeddings):     "",
+		string(ApiNameChatCompletion):  "",
+		string(ApiNameEmbeddings):      "",
+		string(ApiNameModels):          "",
+		string(ApiNameImageGeneration): "",
 	}
 }

@@ -79,11 +83,38 @@ func (g *geminiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 }

 func (g *geminiProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) {
-	if apiName == ApiNameChatCompletion {
+	switch apiName {
+	case ApiNameChatCompletion:
 		return g.onChatCompletionRequestBody(ctx, body, headers)
-	} else {
+	case ApiNameEmbeddings:
 		return g.onEmbeddingsRequestBody(ctx, body, headers)
+	case ApiNameImageGeneration:
+		return g.onImageGenerationRequestBody(ctx, body, headers)
 	}
+	return body, nil
+}
+
+func (g *geminiProvider) onImageGenerationRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
+	request := &imageGenerationRequest{}
+	if err := g.config.parseRequestAndMapModel(ctx, request, body); err != nil {
+		return nil, err
+	}
+	path := g.getRequestPath(ApiNameImageGeneration, request.Model, false)
+	log.Debugf("request path:%s", path)
+	util.OverwriteRequestPathHeader(headers, path)
+	geminiRequest := g.buildGeminiImageGenerationRequest(request)
+	return json.Marshal(geminiRequest)
+}
+
+func (g *geminiProvider) buildGeminiImageGenerationRequest(request *imageGenerationRequest) *geminiImageGenerationRequest {
+	geminiRequest := &geminiImageGenerationRequest{
+		Instances: []geminiImageGenerationInstance{{Prompt: request.Prompt}},
+		Parameters: &geminiImageGenerationParameters{
+			SampleCount: request.N,
+		},
+	}
+
+	return geminiRequest
 }

 func (g *geminiProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
@@ -112,7 +143,7 @@ func (g *geminiProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body [
 }

 func (g *geminiProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool) ([]byte, error) {
-	log.Infof("chunk body:%s", string(chunk))
+	log.Debugf("chunk body:%s", string(chunk))
 	if isLastChunk || len(chunk) == 0 {
 		return nil, nil
 	}
@@ -148,14 +179,43 @@ func (g *geminiProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 }

 func (g *geminiProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
-	if apiName == ApiNameChatCompletion {
+	switch apiName {
+	case ApiNameChatCompletion:
 		return g.onChatCompletionResponseBody(ctx, body)
-	} else {
+	case ApiNameEmbeddings:
 		return g.onEmbeddingsResponseBody(ctx, body)
+	case ApiNameImageGeneration:
+		return g.onImageGenerationResponseBody(ctx, body)
+	default:
+		return body, nil
 	}
 }

+func (g *geminiProvider) onImageGenerationResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	geminiResponse := &geminiImageGenerationResponse{}
+	if err := json.Unmarshal(body, geminiResponse); err != nil {
+		return nil, fmt.Errorf("unable to unmarshal gemini image generation response: %v", err)
+	}
+	response := g.buildImageGenerationResponse(ctx, geminiResponse)
+	return json.Marshal(response)
+}
+
+func (g *geminiProvider) buildImageGenerationResponse(ctx wrapper.HttpContext, geminiResponse *geminiImageGenerationResponse) *imageGenerationResponse {
+	data := make([]imageGenerationData, len(geminiResponse.Predictions))
+	for i, prediction := range geminiResponse.Predictions {
+		data[i] = imageGenerationData{
+			B64: prediction.BytesBase64Encoded,
+		}
+	}
+	response := &imageGenerationResponse{
+		Created: time.Now().UnixMilli() / 1000,
+		Data:    data,
+	}
+	return response
+}
+
 func (g *geminiProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	log.Debugf("chat completion response body:%s", string(body))
 	geminiResponse := &geminiChatResponse{}
 	if err := json.Unmarshal(body, geminiResponse); err != nil {
 		return nil, fmt.Errorf("unable to unmarshal gemini chat response: %v", err)
@@ -181,26 +241,37 @@ func (g *geminiProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body
 	return json.Marshal(response)
 }

-func (g *geminiProvider) getRequestPath(apiName ApiName, geminiModel string, stream bool) string {
+func (g *geminiProvider) getRequestPath(apiName ApiName, model string, stream bool) string {
 	action := ""
-	if apiName == ApiNameEmbeddings {
-		action = geminiEmbeddingPath
-	} else if stream {
-		action = geminiChatCompletionStreamPath
-	} else {
-		action = geminiChatCompletionPath
+	if g.config.apiVersion == "" {
+		g.config.apiVersion = geminiDefaultApiVersion
 	}
-	return fmt.Sprintf("/v1/models/%s:%s", geminiModel, action)
+	switch apiName {
+	case ApiNameModels:
+		return fmt.Sprintf("/%s/%s", g.config.apiVersion, geminiModelsPath)
+	case ApiNameEmbeddings:
+		action = geminiEmbeddingPath
+	case ApiNameChatCompletion:
+		if stream {
+			action = geminiChatCompletionStreamPath
+		} else {
+			action = geminiChatCompletionPath
+		}
+	case ApiNameImageGeneration:
+		action = geminiImageGenerationPath
+	}
+	return fmt.Sprintf("/%s/models/%s:%s", g.config.apiVersion, model, action)
 }

-type geminiChatRequest struct {
+type geminiGenerationContentRequest struct {
 	// Model and Stream are only used when using the gemini original protocol
-	Model            string                     `json:"model,omitempty"`
-	Stream           bool                       `json:"stream,omitempty"`
-	Contents         []geminiChatContent        `json:"contents"`
-	SafetySettings   []geminiChatSafetySetting  `json:"safety_settings,omitempty"`
-	GenerationConfig geminiChatGenerationConfig `json:"generation_config,omitempty"`
-	Tools            []geminiChatTools          `json:"tools,omitempty"`
+	Model             string                     `json:"model,omitempty"`
+	Stream            bool                       `json:"stream,omitempty"`
+	Contents          []geminiChatContent        `json:"contents"`
+	SystemInstruction *geminiChatContent         `json:"system_instruction,omitempty"`
+	SafetySettings    []geminiChatSafetySetting  `json:"safetySettings,omitempty"`
+	GenerationConfig  geminiChatGenerationConfig `json:"generationConfig,omitempty"`
+	Tools             []geminiChatTools          `json:"tools,omitempty"`
 }

 type geminiChatContent struct {
@@ -213,13 +284,26 @@ type geminiChatSafetySetting struct {
 	Threshold string `json:"threshold"`
 }

+type geminiThinkingConfig struct {
+	IncludeThoughts bool  `json:"includeThoughts,omitempty"`
+	ThinkingBudget  int64 `json:"thinkingBudget,omitempty"`
+}
+
 type geminiChatGenerationConfig struct {
-	Temperature     float64  `json:"temperature,omitempty"`
-	TopP            float64  `json:"topP,omitempty"`
-	TopK            float64  `json:"topK,omitempty"`
-	MaxOutputTokens int      `json:"maxOutputTokens,omitempty"`
-	CandidateCount  int      `json:"candidateCount,omitempty"`
-	StopSequences   []string `json:"stopSequences,omitempty"`
+	Temperature        float64               `json:"temperature,omitempty"`
+	TopP               float64               `json:"topP,omitempty"`
+	TopK               int64                 `json:"topK,omitempty"`
+	Seed               int64                 `json:"seed,omitempty"`
+	Logprobs           bool                  `json:"logprobs,omitempty"`
+	MaxOutputTokens    int                   `json:"maxOutputTokens,omitempty"`
+	CandidateCount     int                   `json:"candidateCount,omitempty"`
+	StopSequences      []string              `json:"stopSequences,omitempty"`
+	PresencePenalty    int64                 `json:"presencePenalty,omitempty"`
+	FrequencyPenalty   int64                 `json:"frequencyPenalty,omitempty"`
+	ResponseModalities []string              `json:"responseModalities,omitempty"`
+	NegativePrompt     string                `json:"negativePrompt,omitempty"`
+	ThinkingConfig     *geminiThinkingConfig `json:"thinkingConfig,omitempty"`
+	MediaResolution    string                `json:"mediaResolution,omitempty"`
 }

 type geminiChatTools struct {
@@ -242,25 +326,52 @@ type geminiFunctionCall struct {
 	Arguments    any    `json:"args"`
 }

-func (g *geminiProvider) buildGeminiChatRequest(request *chatCompletionRequest) *geminiChatRequest {
+// geminiImageGenerationRequest is the request body for generate image using Imagen 3
+type geminiImageGenerationRequest struct {
+	Instances  []geminiImageGenerationInstance  `json:"instances"`
+	Parameters *geminiImageGenerationParameters `json:"parameters,omitempty"`
+}
+
+type geminiImageGenerationInstance struct {
+	Prompt string `json:"prompt"`
+}
+
+type geminiImageGenerationParameters struct {
+	SampleCount int    `json:"sampleCount,omitempty"`
+	AspectRatio string `json:"aspectRatio,omitempty"`
+}
+
+type geminiImageGenerationPrediction struct {
+	BytesBase64Encoded string `json:"bytesBase64Encoded"`
+	MimeType           string `json:"mimeType"`
+}
+
+type geminiImageGenerationResponse struct {
+	Predictions []geminiImageGenerationPrediction `json:"predictions"`
+}
+
+func (g *geminiProvider) buildGeminiChatRequest(request *chatCompletionRequest) *geminiGenerationContentRequest {
 	var safetySettings []geminiChatSafetySetting
-	{
-	}
 	for category, threshold := range g.config.geminiSafetySetting {
 		safetySettings = append(safetySettings, geminiChatSafetySetting{
 			Category:  category,
 			Threshold: threshold,
 		})
 	}
-	geminiRequest := geminiChatRequest{
+	geminiRequest := geminiGenerationContentRequest{
 		Contents:       make([]geminiChatContent, 0, len(request.Messages)),
 		SafetySettings: safetySettings,
 		GenerationConfig: geminiChatGenerationConfig{
-			Temperature:     request.Temperature,
-			TopP:            request.TopP,
-			MaxOutputTokens: request.MaxTokens,
+			Temperature:        request.Temperature,
+			TopP:               request.TopP,
+			MaxOutputTokens:    request.MaxTokens,
+			PresencePenalty:    int64(request.PresencePenalty),
+			FrequencyPenalty:   int64(request.FrequencyPenalty),
+			Logprobs:           request.Logprobs,
+			ResponseModalities: request.Modalities,
 		},
 	}
+
 	if request.Tools != nil {
 		functions := make([]function, 0, len(request.Tools))
 		for _, tool := range request.Tools {
@@ -272,7 +383,7 @@ func (g *geminiProvider) buildGeminiChatRequest(request *chatCompletionRequest)
 			},
 		}
 	}
-	shouldAddDummyModelMessage := false
+	// shouldAddDummyModelMessage := false
 	for _, message := range request.Messages {
 		content := geminiChatContent{
 			Role: message.Role,
@@ -284,32 +395,22 @@ func (g *geminiProvider) buildGeminiChatRequest(request *chatCompletionRequest)
 		}

 		// there's no assistant role in gemini and API shall vomit if role is not user or model
-		if content.Role == roleAssistant {
+		switch content.Role {
+		case roleSystem:
+			content.Role = ""
+			geminiRequest.SystemInstruction = &content
+			continue
+		case roleAssistant:
 			content.Role = "model"
-		} else if content.Role == roleSystem { // converting system prompt to prompt from user for the same reason
-			content.Role = roleUser
-			shouldAddDummyModelMessage = true
 		}
 		geminiRequest.Contents = append(geminiRequest.Contents, content)

-		// if a system message is the last message, we need to add a dummy model message to make gemini happy
-		if shouldAddDummyModelMessage {
-			geminiRequest.Contents = append(geminiRequest.Contents, geminiChatContent{
-				Role: "model",
-				Parts: []geminiPart{
-					{
-						Text: "Okay",
-					},
-				},
-			})
-			shouldAddDummyModelMessage = false
-		}
 	}

 	return &geminiRequest
 }

-func (g *geminiProvider) setSystemContent(request *geminiChatRequest, content string) {
+func (g *geminiProvider) setSystemContent(request *geminiGenerationContentRequest, content string) {
 	systemContents := []geminiChatContent{{
 		Role: roleUser,
 		Parts: []geminiPart{
@@ -399,32 +500,34 @@ func (g *geminiProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
 		Object:  objectChatCompletion,
 		Created: time.Now().UnixMilli() / 1000,
 		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
-		Choices: make([]chatCompletionChoice, 0, len(response.Candidates)),
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     response.UsageMetadata.PromptTokenCount,
 			CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
 			TotalTokens:      response.UsageMetadata.TotalTokenCount,
 		},
 	}
-	for i, candidate := range response.Candidates {
-		choice := chatCompletionChoice{
-			Index: i,
-			Message: &chatMessage{
-				Role: roleAssistant,
-			},
-			FinishReason: finishReasonStop,
-		}
-		if len(candidate.Content.Parts) > 0 {
-			if candidate.Content.Parts[0].FunctionCall != nil {
-				choice.Message.ToolCalls = g.buildToolCalls(&candidate)
-			} else {
-				choice.Message.Content = candidate.Content.Parts[0].Text
+	choiceIndex := 0
+	for _, candidate := range response.Candidates {
+		for _, part := range candidate.Content.Parts {
+			choice := chatCompletionChoice{
+				Index: choiceIndex,
+				Message: &chatMessage{
+					Role: roleAssistant,
+				},
+				FinishReason: util.Ptr(finishReasonStop),
 			}
-		} else {
-			choice.Message.Content = ""
-			choice.FinishReason = candidate.FinishReason
+			if part.FunctionCall != nil {
+				choice.Message.ToolCalls = g.buildToolCalls(&candidate)
+			} else if part.InlineData != nil {
+				choice.Message.Content = part.InlineData.Data
+			} else {
+				choice.Message.Content = part.Text
+			}
+
+			choice.FinishReason = util.Ptr(strings.ToLower(candidate.FinishReason))
+			fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
+			choiceIndex += 1
 		}
-		fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
 	}
 	return &fullTextResponse
 }
@@ -457,6 +560,9 @@ func (g *geminiProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
 	var choice chatCompletionChoice
 	if len(geminiResp.Candidates) > 0 && len(geminiResp.Candidates[0].Content.Parts) > 0 {
 		choice.Delta = &chatMessage{Content: geminiResp.Candidates[0].Content.Parts[0].Text}
+		if geminiResp.Candidates[0].FinishReason != "" {
+			choice.FinishReason = util.Ptr(strings.ToLower(geminiResp.Candidates[0].FinishReason))
+		}
 	}
 	streamResponse := chatCompletionResponse{
 		Id:      fmt.Sprintf("chatcmpl-%s", uuid.New().String()),
@@ -464,7 +570,7 @@ func (g *geminiProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
 		Created: time.Now().UnixMilli() / 1000,
 		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		Choices: []chatCompletionChoice{choice},
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     geminiResp.UsageMetadata.PromptTokenCount,
 			CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
 			TotalTokens:      geminiResp.UsageMetadata.TotalTokenCount,
@@ -512,5 +618,8 @@ func (g *geminiProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, geminiEmbeddingPath) {
 		return ApiNameEmbeddings
 	}
+	if strings.Contains(path, geminiImageGenerationPath) {
+		return ApiNameImageGeneration
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/hunyuan.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/hunyuan.go
@@ -42,13 +42,10 @@ const (
 	hunyuanAuthIdLen  = 36

 	// docs: https://cloud.tencent.com/document/product/1729/111007
-	hunyuanOpenAiDomain      = "api.hunyuan.cloud.tencent.com"
-	hunyuanOpenAiRequestPath = "/v1/chat/completions"
-	hunyuanOpenAiEmbeddings  = "/v1/embeddings"
+	hunyuanOpenAiDomain = "api.hunyuan.cloud.tencent.com"
 )

-type hunyuanProviderInitializer struct {
-}
+type hunyuanProviderInitializer struct{}

 // ref: https://console.cloud.tencent.com/api/explorer?Product=hunyuan&Version=2023-09-01&Action=ChatCompletions
 type hunyuanTextGenRequest struct {
@@ -105,8 +102,8 @@ func (m *hunyuanProviderInitializer) ValidateConfig(config *ProviderConfig) erro

 func (m *hunyuanProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): hunyuanOpenAiRequestPath,
-		string(ApiNameEmbeddings):     hunyuanOpenAiEmbeddings,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+		string(ApiNameEmbeddings):     PathOpenAIEmbeddings,
 	}
 }

@@ -324,7 +321,7 @@ func (m *hunyuanProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name
 	}

 	// hunyuan的流式返回:
-	//data: {"Note":"以上内容为AI生成，不代表开发者立场，请勿删除或修改本标记","Choices":[{"Delta":{"Role":"assistant","Content":"有助于"},"FinishReason":""}],"Created":1716359713,"Id":"086b6b19-8b2c-4def-a65c-db6a7bc86acd","Usage":{"PromptTokens":7,"CompletionTokens":145,"TotalTokens":152}}
+	// data: {"Note":"以上内容为AI生成，不代表开发者立场，请勿删除或修改本标记","Choices":[{"Delta":{"Role":"assistant","Content":"有助于"},"FinishReason":""}],"Created":1716359713,"Id":"086b6b19-8b2c-4def-a65c-db6a7bc86acd","Usage":{"PromptTokens":7,"CompletionTokens":145,"TotalTokens":152}}

 	// openai的流式返回
 	// data: {"id": "chatcmpl-7QyqpwdfhqwajicIEznoc6Q47XAyW", "object": "chat.completion.chunk", "created": 1677664795, "model": "gpt-3.5-turbo-0613", "choices": [{"delta": {"content": "The "}, "index": 0, "finish_reason": null}]}
@@ -338,7 +335,7 @@ func (m *hunyuanProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name
 	}

 	// 初始化处理下标，以及将要返回的处理过的chunks
-	var newEventPivot = -1
+	newEventPivot := -1
 	var outputBuffer []byte

 	// 从buffer区取出若干完整的chunk，将其转为openAI格式后返回
@@ -390,7 +387,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
 		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		SystemFingerprint: "",
 		Object:            objectChatCompletionChunk,
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     hunyuanFormattedChunk.Usage.PromptTokens,
 			CompletionTokens: hunyuanFormattedChunk.Usage.CompletionTokens,
 			TotalTokens:      hunyuanFormattedChunk.Usage.TotalTokens,
@@ -403,7 +400,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
 	if hunyuanFormattedChunk.Choices[0].FinishReason == hunyuanStreamEndMark {
 		// log.Debugf("@@@ --- 最后chunk: ")
 		openAIFormattedChunk.Choices = append(openAIFormattedChunk.Choices, chatCompletionChoice{
-			FinishReason: hunyuanFormattedChunk.Choices[0].FinishReason,
+			FinishReason: util.Ptr(hunyuanFormattedChunk.Choices[0].FinishReason),
 		})
 	} else {
 		deltaMsg := chatMessage{
@@ -451,7 +448,6 @@ func (m *hunyuanProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName
 }

 func (m *hunyuanProvider) insertContextMessageIntoHunyuanRequest(request *hunyuanTextGenRequest, content string) {
-
 	fileMessage := hunyuanChatMessage{
 		Role:    roleSystem,
 		Content: content,
@@ -499,7 +495,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
 				Content:   choice.Message.Content,
 				ToolCalls: nil,
 			},
-			FinishReason: choice.FinishReason,
+			FinishReason: util.Ptr(choice.FinishReason),
 		})
 	}
 	return &chatCompletionResponse{
@@ -509,7 +505,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           choices,
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     hunyuanResponse.Response.Usage.PromptTokens,
 			CompletionTokens: hunyuanResponse.Response.Usage.CompletionTokens,
 			TotalTokens:      hunyuanResponse.Response.Usage.TotalTokens,
--- a/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go
@@ -36,8 +36,7 @@ const (
 	defaultSenderName        string = "小明"
 )

-type minimaxProviderInitializer struct {
-}
+type minimaxProviderInitializer struct{}

 func (m *minimaxProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	// If using the chat completion Pro API, a group ID must be set.
@@ -368,7 +367,7 @@ func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionPro
 				Content: message.Text,
 			}
 			choices = append(choices, chatCompletionChoice{
-				FinishReason: choice.FinishReason,
+				FinishReason: util.Ptr(choice.FinishReason),
 				Index:        messageIndex,
 				Message:      message,
 			})
@@ -381,7 +380,7 @@ func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionPro
 		Created: response.Created,
 		Model:   response.Model,
 		Choices: choices,
-		Usage: usage{
+		Usage: &usage{
 			TotalTokens:      int(response.Usage.TotalTokens),
 			PromptTokens:     int(response.Usage.PromptTokens),
 			CompletionTokens: int(response.Usage.CompletionTokens),
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -20,8 +20,10 @@ const (

 	httpStatus200 = "200"

-	contentTypeText     = "text"
-	contentTypeImageUrl = "image_url"
+	contentTypeText       = "text"
+	contentTypeImageUrl   = "image_url"
+	contentTypeInputAudio = "input_audio"
+	contentTypeFile       = "file"

 	reasoningStartTag = "<think>"
 	reasoningEndTag   = "</think>"
@@ -53,11 +55,40 @@ type chatCompletionRequest struct {
 	Temperature         float64                `json:"temperature,omitempty"`
 	TopP                float64                `json:"top_p,omitempty"`
 	Tools               []tool                 `json:"tools,omitempty"`
-	ToolChoice          *toolChoice            `json:"tool_choice,omitempty"`
+	ToolChoice          interface{}            `json:"tool_choice,omitempty"`
 	ParallelToolCalls   bool                   `json:"parallel_tool_calls,omitempty"`
 	User                string                 `json:"user,omitempty"`
 }

+func (c *chatCompletionRequest) getMaxTokens() int {
+	if c.MaxCompletionTokens > 0 {
+		return c.MaxCompletionTokens
+	}
+	return c.MaxTokens
+}
+
+func (c *chatCompletionRequest) getToolChoiceString() string {
+	if c.ToolChoice == nil {
+		return ""
+	}
+
+	if tc, ok := c.ToolChoice.(string); ok {
+		return tc
+	}
+	return ""
+}
+
+func (c *chatCompletionRequest) getToolChoiceObject() *toolChoice {
+	if c.ToolChoice == nil {
+		return nil
+	}
+
+	if tc, ok := c.ToolChoice.(*toolChoice); ok {
+		return tc
+	}
+	return nil
+}
+
 type CompletionRequest struct {
 	Model            string         `json:"model"`
 	Prompt           string         `json:"prompt"`
@@ -107,15 +138,15 @@ type chatCompletionResponse struct {
 	ServiceTier       string                 `json:"service_tier,omitempty"`
 	SystemFingerprint string                 `json:"system_fingerprint,omitempty"`
 	Object            string                 `json:"object,omitempty"`
-	Usage             usage                  `json:"usage,omitempty"`
+	Usage             *usage                 `json:"usage"`
 }

 type chatCompletionChoice struct {
 	Index        int                    `json:"index"`
 	Message      *chatMessage           `json:"message,omitempty"`
 	Delta        *chatMessage           `json:"delta,omitempty"`
-	FinishReason string                 `json:"finish_reason,omitempty"`
-	Logprobs     map[string]interface{} `json:"logprobs,omitempty"`
+	FinishReason *string                `json:"finish_reason"`
+	Logprobs     map[string]interface{} `json:"logprobs"`
 }

 type usage struct {
@@ -200,13 +231,26 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
 	}
 }

-type messageContent struct {
-	Type     string    `json:"type,omitempty"`
-	Text     string    `json:"text"`
-	ImageUrl *imageUrl `json:"image_url,omitempty"`
+type chatMessageContent struct {
+	Type       string                      `json:"type,omitempty"`
+	Text       string                      `json:"text"`
+	ImageUrl   *chatMessageContentImageUrl `json:"image_url,omitempty"`
+	File       *chatMessageContentFile     `json:"file,omitempty"`
+	InputAudio *chatMessageContentAudio    `json:"input_audio,omitempty"`
 }

-type imageUrl struct {
+type chatMessageContentAudio struct {
+	Data   string `json:"data"`
+	Format string `json:"format"`
+}
+
+type chatMessageContentFile struct {
+	FileData string `json:"file_data,omitempty"`
+	FileId   string `json:"file_id,omitempty"`
+	FileName string `json:"file_name,omitempty"`
+}
+
+type chatMessageContentImageUrl struct {
 	Url    string `json:"url,omitempty"`
 	Detail string `json:"detail,omitempty"`
 }
@@ -266,11 +310,11 @@ func (m *chatMessage) StringContent() string {
 	return ""
 }

-func (m *chatMessage) ParseContent() []messageContent {
-	var contentList []messageContent
+func (m *chatMessage) ParseContent() []chatMessageContent {
+	var contentList []chatMessageContent
 	content, ok := m.Content.(string)
 	if ok {
-		contentList = append(contentList, messageContent{
+		contentList = append(contentList, chatMessageContent{
 			Type: contentTypeText,
 			Text: content,
 		})
@@ -286,18 +330,43 @@ func (m *chatMessage) ParseContent() []messageContent {
 			switch contentMap["type"] {
 			case contentTypeText:
 				if subStr, ok := contentMap[contentTypeText].(string); ok {
-					contentList = append(contentList, messageContent{
+					contentList = append(contentList, chatMessageContent{
 						Type: contentTypeText,
 						Text: subStr,
 					})
 				}
 			case contentTypeImageUrl:
 				if subObj, ok := contentMap[contentTypeImageUrl].(map[string]any); ok {
-					contentList = append(contentList, messageContent{
+					msg := chatMessageContent{
 						Type: contentTypeImageUrl,
-						ImageUrl: &imageUrl{
+						ImageUrl: &chatMessageContentImageUrl{
 							Url: subObj["url"].(string),
 						},
+					}
+					if detail, ok := subObj["detail"].(string); ok {
+						msg.ImageUrl.Detail = detail
+					}
+					contentList = append(contentList, msg)
+				}
+			case contentTypeInputAudio:
+				if subObj, ok := contentMap[contentTypeInputAudio].(map[string]any); ok {
+					contentList = append(contentList, chatMessageContent{
+						Type: contentTypeInputAudio,
+						InputAudio: &chatMessageContentAudio{
+							Data:   subObj["data"].(string),
+							Format: subObj["format"].(string),
+						},
+					})
+				}
+			case contentTypeFile:
+				if subObj, ok := contentMap[contentTypeFile].(map[string]any); ok {
+					contentList = append(contentList, chatMessageContent{
+						Type: contentTypeFile,
+						File: &chatMessageContentFile{
+							FileId: subObj["file_id"].(string),
+							// FileName: subObj["file_name"].(string),
+							// FileData: subObj["file_data"].(string),
+						},
 					})
 				}
 			}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
@@ -18,13 +18,10 @@ import (
 // moonshotProvider is the provider for Moonshot AI service.

 const (
-	moonshotDomain             = "api.moonshot.cn"
-	moonshotChatCompletionPath = "/v1/chat/completions"
-	moonshotModelsPath         = "/v1/models"
+	moonshotDomain = "api.moonshot.cn"
 )

-type moonshotProviderInitializer struct {
-}
+type moonshotProviderInitializer struct{}

 func (m *moonshotProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.moonshotFileId != "" && config.context != nil {
@@ -38,8 +35,8 @@ func (m *moonshotProviderInitializer) ValidateConfig(config *ProviderConfig) err

 func (m *moonshotProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): moonshotChatCompletionPath,
-		string(ApiNameModels):         moonshotModelsPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+		string(ApiNameModels):         PathOpenAIModels,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
@@ -15,17 +15,10 @@ import (
 // openaiProvider is the provider for OpenAI service.

 const (
-	defaultOpenaiDomain             = "api.openai.com"
-	defaultOpenaiChatCompletionPath = "/v1/chat/completions"
-	defaultOpenaiCompletionPath     = "/v1/completions"
-	defaultOpenaiEmbeddingsPath     = "/v1/embeddings"
-	defaultOpenaiAudioSpeech        = "/v1/audio/speech"
-	defaultOpenaiImageGeneration    = "/v1/images/generations"
-	defaultOpenaiModels             = "/v1/models"
+	defaultOpenaiDomain = "api.openai.com"
 )

-type openaiProviderInitializer struct {
-}
+type openaiProviderInitializer struct{}

 func (m *openaiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	return nil
@@ -33,20 +26,45 @@ func (m *openaiProviderInitializer) ValidateConfig(config *ProviderConfig) error

 func (m *openaiProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameCompletion):      defaultOpenaiCompletionPath,
-		string(ApiNameChatCompletion):  defaultOpenaiChatCompletionPath,
-		string(ApiNameEmbeddings):      defaultOpenaiEmbeddingsPath,
-		string(ApiNameImageGeneration): defaultOpenaiImageGeneration,
-		string(ApiNameAudioSpeech):     defaultOpenaiAudioSpeech,
-		string(ApiNameModels):          defaultOpenaiModels,
+		string(ApiNameCompletion):                           PathOpenAICompletions,
+		string(ApiNameChatCompletion):                       PathOpenAIChatCompletions,
+		string(ApiNameEmbeddings):                           PathOpenAIEmbeddings,
+		string(ApiNameImageGeneration):                      PathOpenAIImageGeneration,
+		string(ApiNameImageEdit):                            PathOpenAIImageEdit,
+		string(ApiNameImageVariation):                       PathOpenAIImageVariation,
+		string(ApiNameAudioSpeech):                          PathOpenAIAudioSpeech,
+		string(ApiNameModels):                               PathOpenAIModels,
+		string(ApiNameFiles):                                PathOpenAIFiles,
+		string(ApiNameRetrieveFile):                         PathOpenAIRetrieveFile,
+		string(ApiNameRetrieveFileContent):                  PathOpenAIRetrieveFileContent,
+		string(ApiNameBatches):                              PathOpenAIBatches,
+		string(ApiNameRetrieveBatch):                        PathOpenAIRetrieveBatch,
+		string(ApiNameCancelBatch):                          PathOpenAICancelBatch,
+		string(ApiNameResponses):                            PathOpenAIResponses,
+		string(ApiNameFineTuningJobs):                       PathOpenAIFineTuningJobs,
+		string(ApiNameFineTuningRetrieveJob):                PathOpenAIFineTuningRetrieveJob,
+		string(ApiNameFineTuningJobEvents):                  PathOpenAIFineTuningJobEvents,
+		string(ApiNameFineTuningJobCheckpoints):             PathOpenAIFineTuningJobCheckpoints,
+		string(ApiNameFineTuningCancelJob):                  PathOpenAIFineTuningCancelJob,
+		string(ApiNameFineTuningResumeJob):                  PathOpenAIFineTuningResumeJob,
+		string(ApiNameFineTuningPauseJob):                   PathOpenAIFineTuningPauseJob,
+		string(ApiNameFineTuningCheckpointPermissions):      PathOpenAIFineTuningCheckpointPermissions,
+		string(ApiNameDeleteFineTuningCheckpointPermission): PathOpenAIFineDeleteTuningCheckpointPermission,
 	}
 }

+// isDirectPath checks if the path is a known standard OpenAI interface path.
 func isDirectPath(path string) bool {
 	return strings.HasSuffix(path, "/completions") ||
 		strings.HasSuffix(path, "/embeddings") ||
 		strings.HasSuffix(path, "/audio/speech") ||
-		strings.HasSuffix(path, "/images/generations")
+		strings.HasSuffix(path, "/images/generations") ||
+		strings.HasSuffix(path, "/images/variations") ||
+		strings.HasSuffix(path, "/images/edits") ||
+		strings.HasSuffix(path, "/models") ||
+		strings.HasSuffix(path, "/responses") ||
+		strings.HasSuffix(path, "/fine_tuning/jobs") ||
+		strings.HasSuffix(path, "/fine_tuning/checkpoints")
 }

 func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
@@ -100,15 +118,14 @@ func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiNa
 }

 func (m *openaiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
-	if m.customPath != "" {
-		if m.isDirectCustomPath || apiName == "" {
-			util.OverwriteRequestPathHeader(headers, m.customPath)
-		} else {
-			util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
-		}
-	} else {
+	if m.isDirectCustomPath {
+		util.OverwriteRequestPathHeader(headers, m.customPath)
+	}
+
+	if apiName != "" {
 		util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	}
+
 	if m.customDomain != "" {
 		util.OverwriteRequestHostHeader(headers, m.customDomain)
 	} else {
@@ -121,7 +138,7 @@ func (m *openaiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam
 }

 func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.needToProcessRequestBody(apiName) {
 		// We don't need to process the request body for other APIs.
 		return types.ActionContinue, nil
 	}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"math/rand"
 	"net/http"
+	"regexp"
 	"strings"

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
@@ -16,32 +17,70 @@ import (
 	"github.com/tidwall/sjson"
 )

-type ApiName string
-type Pointcut string
+type (
+	ApiName  string
+	Pointcut string
+)

 const (

 	// ApiName 格式 {vendor}/{version}/{apitype}
 	// 表示遵循 厂商/版本/接口类型 的格式
 	// 目前openai是事实意义上的标准，但是也有其他厂商存在其他任务的一些可能的标准，比如cohere的rerank
-	ApiNameCompletion      ApiName = "openai/v1/completions"
-	ApiNameChatCompletion  ApiName = "openai/v1/chatcompletions"
-	ApiNameEmbeddings      ApiName = "openai/v1/embeddings"
-	ApiNameImageGeneration ApiName = "openai/v1/imagegeneration"
-	ApiNameAudioSpeech     ApiName = "openai/v1/audiospeech"
-	ApiNameFiles           ApiName = "openai/v1/files"
-	ApiNameBatches         ApiName = "openai/v1/batches"
-	ApiNameModels          ApiName = "openai/v1/models"
+	ApiNameCompletion                           ApiName = "openai/v1/completions"
+	ApiNameChatCompletion                       ApiName = "openai/v1/chatcompletions"
+	ApiNameEmbeddings                           ApiName = "openai/v1/embeddings"
+	ApiNameImageGeneration                      ApiName = "openai/v1/imagegeneration"
+	ApiNameImageEdit                            ApiName = "openai/v1/imageedit"
+	ApiNameImageVariation                       ApiName = "openai/v1/imagevariation"
+	ApiNameAudioSpeech                          ApiName = "openai/v1/audiospeech"
+	ApiNameFiles                                ApiName = "openai/v1/files"
+	ApiNameRetrieveFile                         ApiName = "openai/v1/retrievefile"
+	ApiNameRetrieveFileContent                  ApiName = "openai/v1/retrievefilecontent"
+	ApiNameBatches                              ApiName = "openai/v1/batches"
+	ApiNameRetrieveBatch                        ApiName = "openai/v1/retrievebatch"
+	ApiNameCancelBatch                          ApiName = "openai/v1/cancelbatch"
+	ApiNameModels                               ApiName = "openai/v1/models"
+	ApiNameResponses                            ApiName = "openai/v1/responses"
+	ApiNameFineTuningJobs                       ApiName = "openai/v1/fine-tuningjobs"
+	ApiNameFineTuningRetrieveJob                ApiName = "openai/v1/retrievefine-tuningjob"
+	ApiNameFineTuningJobEvents                  ApiName = "openai/v1/fine-tuningjobsevents"
+	ApiNameFineTuningJobCheckpoints             ApiName = "openai/v1/fine-tuningjobcheckpoints"
+	ApiNameFineTuningCancelJob                  ApiName = "openai/v1/cancelfine-tuningjob"
+	ApiNameFineTuningResumeJob                  ApiName = "openai/v1/resumefine-tuningjob"
+	ApiNameFineTuningPauseJob                   ApiName = "openai/v1/pausefine-tuningjob"
+	ApiNameFineTuningCheckpointPermissions      ApiName = "openai/v1/fine-tuningjobcheckpointpermissions"
+	ApiNameDeleteFineTuningCheckpointPermission ApiName = "openai/v1/deletefine-tuningjobcheckpointpermission"

-	PathOpenAICompletions     = "/v1/completions"
-	PathOpenAIChatCompletions = "/v1/chat/completions"
-	PathOpenAIEmbeddings      = "/v1/embeddings"
-	PathOpenAIFiles           = "/v1/files"
-	PathOpenAIBatches         = "/v1/batches"
-	PathOpenAIModels          = "/v1/models"
+	PathOpenAICompletions                          = "/v1/completions"
+	PathOpenAIChatCompletions                      = "/v1/chat/completions"
+	PathOpenAIEmbeddings                           = "/v1/embeddings"
+	PathOpenAIFiles                                = "/v1/files"
+	PathOpenAIRetrieveFile                         = "/v1/files/{file_id}"
+	PathOpenAIRetrieveFileContent                  = "/v1/files/{file_id}/content"
+	PathOpenAIBatches                              = "/v1/batches"
+	PathOpenAIRetrieveBatch                        = "/v1/batches/{batch_id}"
+	PathOpenAICancelBatch                          = "/v1/batches/{batch_id}/cancel"
+	PathOpenAIModels                               = "/v1/models"
+	PathOpenAIImageGeneration                      = "/v1/images/generations"
+	PathOpenAIImageEdit                            = "/v1/images/edits"
+	PathOpenAIImageVariation                       = "/v1/images/variations"
+	PathOpenAIAudioSpeech                          = "/v1/audio/speech"
+	PathOpenAIResponses                            = "/v1/responses"
+	PathOpenAIFineTuningJobs                       = "/v1/fine_tuning/jobs"
+	PathOpenAIFineTuningRetrieveJob                = "/v1/fine_tuning/jobs/{fine_tuning_job_id}"
+	PathOpenAIFineTuningJobEvents                  = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/events"
+	PathOpenAIFineTuningJobCheckpoints             = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints"
+	PathOpenAIFineTuningCancelJob                  = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel"
+	PathOpenAIFineTuningResumeJob                  = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/resume"
+	PathOpenAIFineTuningPauseJob                   = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/pause"
+	PathOpenAIFineTuningCheckpointPermissions      = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions"
+	PathOpenAIFineDeleteTuningCheckpointPermission = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}"

 	// TODO: 以下是一些非标准的API名称，需要进一步确认是否支持
 	ApiNameCohereV1Rerank ApiName = "cohere/v1/rerank"
+	ApiNameQwenAsyncAIGC  ApiName = "api/v1/services/aigc"
+	ApiNameQwenAsyncTask  ApiName = "api/v1/tasks/"

 	providerTypeMoonshot   = "moonshot"
 	providerTypeAzure      = "azure"
@@ -71,6 +110,7 @@ const (
 	providerTypeTogetherAI = "together-ai"
 	providerTypeDify       = "dify"
 	providerTypeBedrock    = "bedrock"
+	providerTypeVertex     = "vertex"

 	protocolOpenAI   = "openai"
 	protocolOriginal = "original"
@@ -142,6 +182,7 @@ var (
 		providerTypeTogetherAI: &togetherAIProviderInitializer{},
 		providerTypeDify:       &difyProviderInitializer{},
 		providerTypeBedrock:    &bedrockProviderInitializer{},
+		providerTypeVertex:     &vertexProviderInitializer{},
 	}
 )

@@ -271,14 +312,29 @@ type ProviderConfig struct {
 	// @Description zh-CN 配置一个外部获取对话上下文的文件来源，用于在AI请求中补充对话上下文
 	context *ContextConfig `required:"false" yaml:"context" json:"context"`
 	// @Title zh-CN 版本
-	// @Description zh-CN 请求AI服务的版本，目前仅适用于Claude AI服务
-	claudeVersion string `required:"false" yaml:"version" json:"version"`
+	// @Description zh-CN 请求AI服务的版本，目前仅适用于 Gemini 和 Claude AI服务
+	apiVersion string `required:"false" yaml:"apiVersion" json:"apiVersion"`
 	// @Title zh-CN Cloudflare Account ID
 	// @Description zh-CN 仅适用于 Cloudflare Workers AI 服务。参考：https://developers.cloudflare.com/workers-ai/get-started/rest-api/#2-run-a-model-via-api
 	cloudflareAccountId string `required:"false" yaml:"cloudflareAccountId" json:"cloudflareAccountId"`
 	// @Title zh-CN Gemini AI内容过滤和安全级别设定
 	// @Description zh-CN 仅适用于 Gemini AI 服务。参考：https://ai.google.dev/gemini-api/docs/safety-settings
 	geminiSafetySetting map[string]string `required:"false" yaml:"geminiSafetySetting" json:"geminiSafetySetting"`
+	// @Title zh-CN Vertex AI访问区域
+	// @Description zh-CN 仅适用于Vertex AI服务。如需查看支持的区域的完整列表，请参阅https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations?hl=zh-cn#available-regions
+	vertexRegion string `required:"false" yaml:"vertexRegion" json:"vertexRegion"`
+	// @Title zh-CN Vertex AI项目Id
+	// @Description zh-CN 仅适用于Vertex AI服务。创建和管理项目请参阅https://cloud.google.com/resource-manager/docs/creating-managing-projects?hl=zh-cn#identifiers
+	vertexProjectId string `required:"false" yaml:"vertexProjectId" json:"vertexProjectId"`
+	// @Title zh-CN Vertex 认证秘钥
+	// @Description zh-CN 用于Google服务账号认证的完整JSON密钥文件内容，获取可参考https://cloud.google.com/iam/docs/keys-create-delete?hl=zh-cn#iam-service-account-keys-create-console
+	vertexAuthKey string `required:"false" yaml:"vertexAuthKey" json:"vertexAuthKey"`
+	// @Title zh-CN Vertex 认证服务名
+	// @Description zh-CN 用于Google服务账号认证的服务,DNS类型的服务名
+	vertexAuthServiceName string `required:"false" yaml:"vertexAuthServiceName" json:"vertexAuthServiceName"`
+	// @Title zh-CN Vertex token刷新提前时间
+	// @Description zh-CN 用于Google服务账号认证，access token过期时间判定提前刷新，单位为秒，默认值为60秒
+	vertexTokenRefreshAhead int64 `required:"false" yaml:"vertexTokenRefreshAhead" json:"vertexTokenRefreshAhead"`
 	// @Title zh-CN 翻译服务需指定的目标语种
 	// @Description zh-CN 翻译结果的语种，目前仅适用于DeepL服务。
 	targetLang string `required:"false" yaml:"targetLang" json:"targetLang"`
@@ -356,7 +412,13 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		c.context = &ContextConfig{}
 		c.context.FromJson(contextJson)
 	}
-	c.claudeVersion = json.Get("claudeVersion").String()
+
+	// 这里获取 claudeVersion 字段，与结构体中定义 yaml/json 的 tag 不一致
+	c.apiVersion = json.Get("claudeVersion").String()
+	if c.apiVersion == "" {
+		// 增加获取 version 字段，用于适配其他模型的配置，并保持与结构体中定义的 tag 一致
+		c.apiVersion = json.Get("apiVersion").String()
+	}
 	c.hunyuanAuthId = json.Get("hunyuanAuthId").String()
 	c.hunyuanAuthKey = json.Get("hunyuanAuthKey").String()
 	c.awsAccessKey = json.Get("awsAccessKey").String()
@@ -365,12 +427,20 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	c.minimaxApiType = json.Get("minimaxApiType").String()
 	c.minimaxGroupId = json.Get("minimaxGroupId").String()
 	c.cloudflareAccountId = json.Get("cloudflareAccountId").String()
-	if c.typ == providerTypeGemini {
+	if c.typ == providerTypeGemini || c.typ == providerTypeVertex {
 		c.geminiSafetySetting = make(map[string]string)
 		for k, v := range json.Get("geminiSafetySetting").Map() {
 			c.geminiSafetySetting[k] = v.String()
 		}
 	}
+	c.vertexRegion = json.Get("vertexRegion").String()
+	c.vertexProjectId = json.Get("vertexProjectId").String()
+	c.vertexAuthKey = json.Get("vertexAuthKey").String()
+	c.vertexAuthServiceName = json.Get("vertexAuthServiceName").String()
+	c.vertexTokenRefreshAhead = json.Get("vertexTokenRefreshAhead").Int()
+	if c.vertexTokenRefreshAhead == 0 {
+		c.vertexTokenRefreshAhead = 60
+	}
 	c.targetLang = json.Get("targetLang").String()

 	if schemaValue, ok := json.Get("responseJsonSchema").Value().(map[string]interface{}); ok {
@@ -439,6 +509,8 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		case string(ApiNameChatCompletion),
 			string(ApiNameEmbeddings),
 			string(ApiNameImageGeneration),
+			string(ApiNameImageVariation),
+			string(ApiNameImageEdit),
 			string(ApiNameAudioSpeech),
 			string(ApiNameCohereV1Rerank):
 			c.capabilities[capability] = pathJson.String()
@@ -596,13 +668,25 @@ func doGetMappedModel(model string, modelMapping map[string]string) string {
 	}

 	for k, v := range modelMapping {
-		if k == wildcard || !strings.HasSuffix(k, wildcard) {
+		if k == wildcard {
 			continue
 		}
-		k = strings.TrimSuffix(k, wildcard)
-		if strings.HasPrefix(model, k) {
-			log.Debugf("model [%s] is mapped to [%s] via prefix [%s]", model, v, k)
-			return v
+		if strings.HasSuffix(k, wildcard) {
+			k = strings.TrimSuffix(k, wildcard)
+			if strings.HasPrefix(model, k) {
+				log.Debugf("model [%s] is mapped to [%s] via prefix [%s]", model, v, k)
+				return v
+			}
+		}
+
+		if strings.HasPrefix(k, "~") {
+			k = strings.TrimPrefix(k, "~")
+			re := regexp.MustCompile(k)
+			if re.MatchString(model) {
+				v = re.ReplaceAllString(model, v)
+				log.Debugf("model [%s] is mapped to [%s] via regex [%s]", model, v, k)
+				return v
+			}
 		}
 	}

@@ -703,7 +787,8 @@ func (c *ProviderConfig) setDefaultCapabilities(capabilities map[string]string)
 }

 func (c *ProviderConfig) handleRequestBody(
-	provider Provider, contextCache *contextCache, ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
+	provider Provider, contextCache *contextCache, ctx wrapper.HttpContext, apiName ApiName, body []byte,
+) (types.Action, error) {
 	// use original protocol
 	if c.IsOriginal() {
 		return types.ActionContinue, nil
@@ -771,3 +856,19 @@ func (c *ProviderConfig) DefaultTransformResponseHeaders(ctx wrapper.HttpContext
 		headers.Del("Content-Length")
 	}
 }
+
+func (c *ProviderConfig) needToProcessRequestBody(apiName ApiName) bool {
+	switch apiName {
+	case ApiNameChatCompletion,
+		ApiNameCompletion,
+		ApiNameEmbeddings,
+		ApiNameImageGeneration,
+		ApiNameImageEdit,
+		ApiNameImageVariation,
+		ApiNameAudioSpeech,
+		ApiNameFineTuningJobs,
+		ApiNameResponses:
+		return true
+	}
+	return false
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -23,13 +23,22 @@ import (
 const (
 	qwenResultFormatMessage = "message"

-	qwenDefaultDomain               = "dashscope.aliyuncs.com"
-	qwenChatCompletionPath          = "/api/v1/services/aigc/text-generation/generation"
-	qwenTextEmbeddingPath           = "/api/v1/services/embeddings/text-embedding/text-embedding"
-	qwenChatCompatiblePath          = "/compatible-mode/v1/chat/completions"
-	qwenTextEmbeddingCompatiblePath = "/compatible-mode/v1/embeddings"
-	qwenBailianPath                 = "/api/v1/apps"
-	qwenMultimodalGenerationPath    = "/api/v1/services/aigc/multimodal-generation/generation"
+	qwenDefaultDomain                     = "dashscope.aliyuncs.com"
+	qwenChatCompletionPath                = "/api/v1/services/aigc/text-generation/generation"
+	qwenTextEmbeddingPath                 = "/api/v1/services/embeddings/text-embedding/text-embedding"
+	qwenCompatibleChatCompletionPath      = "/compatible-mode/v1/chat/completions"
+	qwenCompatibleCompletionsPath         = "/compatible-mode/v1/completions"
+	qwenCompatibleTextEmbeddingPath       = "/compatible-mode/v1/embeddings"
+	qwenCompatibleFilesPath               = "/compatible-mode/v1/files"
+	qwenCompatibleRetrieveFilePath        = "/compatible-mode/v1/files/{file_id}"
+	qwenCompatibleRetrieveFileContentPath = "/compatible-mode/v1/files/{file_id}/content"
+	qwenCompatibleBatchesPath             = "/compatible-mode/v1/batches"
+	qwenCompatibleRetrieveBatchPath       = "/compatible-mode/v1/batches/{batch_id}"
+	qwenBailianPath                       = "/api/v1/apps"
+	qwenMultimodalGenerationPath          = "/api/v1/services/aigc/multimodal-generation/generation"
+
+	qwenAsyncAIGCPath                     = "/api/v1/services/aigc/"
+	qwenAsyncTaskPath                     = "/api/v1/tasks/"

 	qwenTopPMin = 0.000001
 	qwenTopPMax = 0.999999
@@ -40,8 +49,7 @@ const (
 	qwenVlModelPrefixName = "qwen-vl"
 )

-type qwenProviderInitializer struct {
-}
+type qwenProviderInitializer struct{}

 func (m *qwenProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if len(config.qwenFileIds) != 0 && config.context != nil {
@@ -56,13 +64,21 @@ func (m *qwenProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 func (m *qwenProviderInitializer) DefaultCapabilities(qwenEnableCompatible bool) map[string]string {
 	if qwenEnableCompatible {
 		return map[string]string{
-			string(ApiNameChatCompletion): qwenChatCompatiblePath,
-			string(ApiNameEmbeddings):     qwenTextEmbeddingCompatiblePath,
+			string(ApiNameChatCompletion):      qwenCompatibleChatCompletionPath,
+			string(ApiNameEmbeddings):          qwenCompatibleTextEmbeddingPath,
+			string(ApiNameCompletion):          qwenCompatibleCompletionsPath,
+			string(ApiNameFiles):               qwenCompatibleFilesPath,
+			string(ApiNameRetrieveFile):        qwenCompatibleRetrieveFilePath,
+			string(ApiNameRetrieveFileContent): qwenCompatibleRetrieveFileContentPath,
+			string(ApiNameBatches):             qwenCompatibleBatchesPath,
+			string(ApiNameRetrieveBatch):       qwenCompatibleRetrieveBatchPath,
 		}
 	} else {
 		return map[string]string{
 			string(ApiNameChatCompletion): qwenChatCompletionPath,
 			string(ApiNameEmbeddings):     qwenTextEmbeddingPath,
+			string(ApiNameQwenAsyncAIGC): qwenAsyncAIGCPath,
+			string(ApiNameQwenAsyncTask): qwenAsyncTaskPath,
 		}
 	}
 }
@@ -291,7 +307,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
 		message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
 		choices = append(choices, chatCompletionChoice{
 			Message:      &message,
-			FinishReason: qwenChoice.FinishReason,
+			FinishReason: util.Ptr(qwenChoice.FinishReason),
 		})
 	}
 	return &chatCompletionResponse{
@@ -301,7 +317,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           choices,
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     qwenResponse.Usage.InputTokens,
 			CompletionTokens: qwenResponse.Usage.OutputTokens,
 			TotalTokens:      qwenResponse.Usage.TotalTokens,
@@ -402,11 +418,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont

 	if finished {
 		finishResponse := *&baseMessage
-		finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: qwenChoice.FinishReason})
+		finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: util.Ptr(qwenChoice.FinishReason)})

 		usageResponse := *&baseMessage
 		usageResponse.Choices = []chatCompletionChoice{{Delta: &chatMessage{}}}
-		usageResponse.Usage = usage{
+		usageResponse.Usage = &usage{
 			PromptTokens:     qwenResponse.Usage.InputTokens,
 			CompletionTokens: qwenResponse.Usage.OutputTokens,
 			TotalTokens:      qwenResponse.Usage.TotalTokens,
@@ -673,11 +689,15 @@ func (m *qwenProvider) GetApiName(path string) ApiName {
 	case strings.Contains(path, qwenChatCompletionPath),
 		strings.Contains(path, qwenMultimodalGenerationPath),
 		strings.Contains(path, qwenBailianPath),
-		strings.Contains(path, qwenChatCompatiblePath):
+		strings.Contains(path, qwenCompatibleChatCompletionPath):
 		return ApiNameChatCompletion
 	case strings.Contains(path, qwenTextEmbeddingPath),
-		strings.Contains(path, qwenTextEmbeddingCompatiblePath):
+		strings.Contains(path, qwenCompatibleTextEmbeddingPath):
 		return ApiNameEmbeddings
+	case strings.Contains(path, qwenAsyncAIGCPath):
+		return ApiNameQwenAsyncAIGC
+	case strings.Contains(path, qwenAsyncTaskPath):
+		return ApiNameQwenAsyncTask
 	default:
 		return ""
 	}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/spark.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/spark.go
@@ -15,12 +15,10 @@ import (

 // sparkProvider is the provider for SparkLLM AI service.
 const (
-	sparkHost               = "spark-api-open.xf-yun.com"
-	sparkChatCompletionPath = "/v1/chat/completions"
+	sparkHost = "spark-api-open.xf-yun.com"
 )

-type sparkProviderInitializer struct {
-}
+type sparkProviderInitializer struct{}

 type sparkProvider struct {
 	config       ProviderConfig
@@ -58,7 +56,7 @@ func (i *sparkProviderInitializer) ValidateConfig(config *ProviderConfig) error

 func (i *sparkProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): sparkChatCompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 	}
 }

@@ -152,7 +150,7 @@ func (p *sparkProvider) responseSpark2OpenAI(ctx wrapper.HttpContext, response *
 		Object:  objectChatCompletion,
 		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		Choices: choices,
-		Usage:   response.Usage,
+		Usage:   &response.Usage,
 	}
 }

@@ -170,7 +168,7 @@ func (p *sparkProvider) streamResponseSpark2OpenAI(ctx wrapper.HttpContext, resp
 		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		Object:  objectChatCompletion,
 		Choices: choices,
-		Usage:   response.Usage,
+		Usage:   &response.Usage,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/stepfun.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/stepfun.go
@@ -10,12 +10,10 @@ import (
 )

 const (
-	stepfunDomain             = "api.stepfun.com"
-	stepfunChatCompletionPath = "/v1/chat/completions"
+	stepfunDomain = "api.stepfun.com"
 )

-type stepfunProviderInitializer struct {
-}
+type stepfunProviderInitializer struct{}

 func (m *stepfunProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
@@ -27,7 +25,7 @@ func (m *stepfunProviderInitializer) ValidateConfig(config *ProviderConfig) erro
 func (m *stepfunProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
 		// stepfun的chat接口path和OpenAI的chat接口一样
-		string(ApiNameChatCompletion): stepfunChatCompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go
@@ -11,8 +11,7 @@ import (
 )

 const (
-	togetherAIDomain         = "api.together.xyz"
-	togetherAICompletionPath = "/v1/chat/completions"
+	togetherAIDomain = "api.together.xyz"
 )

 type togetherAIProviderInitializer struct{}
@@ -26,7 +25,7 @@ func (m *togetherAIProviderInitializer) ValidateConfig(config *ProviderConfig) e

 func (m *togetherAIProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): togetherAICompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 	}
 }

@@ -67,7 +66,7 @@ func (m *togetherAIProvider) TransformRequestHeaders(ctx wrapper.HttpContext, ap
 }

 func (m *togetherAIProvider) GetApiName(path string) ApiName {
-	if strings.Contains(path, togetherAICompletionPath) {
+	if strings.Contains(path, PathOpenAIChatCompletions) {
 		return ApiNameChatCompletion
 	}
 	return ""
--- a/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
@@ -0,0 +1,667 @@
+package provider
+
+import (
+	"crypto"
+	"crypto/rsa"
+	"crypto/sha256"
+	"crypto/x509"
+	"encoding/base64"
+	"encoding/json"
+	"encoding/pem"
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+)
+
+const (
+	vertexAuthDomain = "oauth2.googleapis.com"
+	vertexDomain     = "{REGION}-aiplatform.googleapis.com"
+	// /v1/projects/{PROJECT_ID}/locations/{REGION}/publishers/google/models/{MODEL_ID}:{ACTION}
+	vertexPathTemplate               = "/v1/projects/%s/locations/%s/publishers/google/models/%s:%s"
+	vertexChatCompletionAction       = "generateContent"
+	vertexChatCompletionStreamAction = "streamGenerateContent?alt=sse"
+	vertexEmbeddingAction            = "predict"
+)
+
+type vertexProviderInitializer struct{}
+
+func (v *vertexProviderInitializer) ValidateConfig(config *ProviderConfig) error {
+	if config.vertexAuthKey == "" {
+		return errors.New("missing vertexAuthKey in vertex provider config")
+	}
+	if config.vertexRegion == "" || config.vertexProjectId == "" {
+		return errors.New("missing vertexRegion or vertexProjectId in vertex provider config")
+	}
+	if config.vertexAuthServiceName == "" {
+		return errors.New("missing vertexAuthServiceName in vertex provider config")
+	}
+	return nil
+}
+
+func (v *vertexProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): vertexPathTemplate,
+		string(ApiNameEmbeddings):     vertexPathTemplate,
+	}
+}
+
+func (v *vertexProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(v.DefaultCapabilities())
+	return &vertexProvider{
+		config: config,
+		client: wrapper.NewClusterClient(wrapper.DnsCluster{
+			Domain:      vertexAuthDomain,
+			ServiceName: config.vertexAuthServiceName,
+			Port:        443,
+		}),
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type vertexProvider struct {
+	client       wrapper.HttpClient
+	config       ProviderConfig
+	contextCache *contextCache
+}
+
+func (v *vertexProvider) GetProviderType() string {
+	return providerTypeVertex
+}
+
+func (v *vertexProvider) GetApiName(path string) ApiName {
+	if strings.HasSuffix(path, vertexChatCompletionAction) || strings.HasSuffix(path, vertexChatCompletionStreamAction) {
+		return ApiNameChatCompletion
+	}
+	if strings.HasSuffix(path, vertexEmbeddingAction) {
+		return ApiNameEmbeddings
+	}
+	return ""
+}
+
+func (v *vertexProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName) error {
+	v.config.handleRequestHeaders(v, ctx, apiName)
+	return nil
+}
+
+func (v *vertexProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
+	vertexRegionDomain := strings.Replace(vertexDomain, "{REGION}", v.config.vertexRegion, 1)
+	util.OverwriteRequestHostHeader(headers, vertexRegionDomain)
+}
+
+func (v *vertexProvider) getToken() (cached bool, err error) {
+	cacheKeyName := v.buildTokenKey()
+	cachedAccessToken, err := v.getCachedAccessToken(cacheKeyName)
+	if err == nil && cachedAccessToken != "" {
+		_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+cachedAccessToken)
+		return true, nil
+	}
+
+	var key ServiceAccountKey
+	if err := json.Unmarshal([]byte(v.config.vertexAuthKey), &key); err != nil {
+		return false, fmt.Errorf("[vertex]: unable to unmarshal auth key json: %v", err)
+	}
+
+	if key.ClientEmail == "" || key.PrivateKey == "" || key.TokenURI == "" {
+		return false, fmt.Errorf("[vertex]: missing auth params")
+	}
+
+	jwtToken, err := createJWT(&key)
+	if err != nil {
+		log.Errorf("[vertex]: unable to create JWT token: %v", err)
+		return false, err
+	}
+
+	err = v.getAccessToken(jwtToken)
+	if err != nil {
+		log.Errorf("[vertex]: unable to get access token: %v", err)
+		return false, err
+	}
+
+	return false, err
+}
+
+func (v *vertexProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
+	if !v.config.isSupportedAPI(apiName) {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	if v.config.IsOriginal() {
+		return types.ActionContinue, nil
+	}
+	headers := util.GetOriginalRequestHeaders()
+	body, err := v.TransformRequestBodyHeaders(ctx, apiName, body, headers)
+	util.ReplaceRequestHeaders(headers)
+	_ = proxywasm.ReplaceHttpRequestBody(body)
+	if err != nil {
+		return types.ActionContinue, err
+	}
+	cached, err := v.getToken()
+	if cached {
+		return types.ActionContinue, nil
+	}
+	if err == nil {
+		return types.ActionPause, nil
+	}
+	return types.ActionContinue, err
+}
+
+func (v *vertexProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) {
+	if apiName == ApiNameChatCompletion {
+		return v.onChatCompletionRequestBody(ctx, body, headers)
+	} else {
+		return v.onEmbeddingsRequestBody(ctx, body, headers)
+	}
+}
+
+func (v *vertexProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
+	request := &chatCompletionRequest{}
+	err := v.config.parseRequestAndMapModel(ctx, request, body)
+	if err != nil {
+		return nil, err
+	}
+	path := v.getRequestPath(ApiNameChatCompletion, request.Model, request.Stream)
+	util.OverwriteRequestPathHeader(headers, path)
+
+	vertexRequest := v.buildVertexChatRequest(request)
+	return json.Marshal(vertexRequest)
+}
+
+func (v *vertexProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
+	request := &embeddingsRequest{}
+	if err := v.config.parseRequestAndMapModel(ctx, request, body); err != nil {
+		return nil, err
+	}
+	path := v.getRequestPath(ApiNameEmbeddings, request.Model, false)
+	util.OverwriteRequestPathHeader(headers, path)
+
+	vertexRequest := v.buildEmbeddingRequest(request)
+	return json.Marshal(vertexRequest)
+}
+
+func (v *vertexProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool) ([]byte, error) {
+	log.Infof("[vertexProvider] receive chunk body: %s", string(chunk))
+	if isLastChunk || len(chunk) == 0 {
+		return nil, nil
+	}
+	if name != ApiNameChatCompletion {
+		return chunk, nil
+	}
+	responseBuilder := &strings.Builder{}
+	lines := strings.Split(string(chunk), "\n")
+	for _, data := range lines {
+		if len(data) < 6 {
+			// ignore blank line or wrong format
+			continue
+		}
+		data = data[6:]
+		var vertexResp vertexChatResponse
+		if err := json.Unmarshal([]byte(data), &vertexResp); err != nil {
+			log.Errorf("unable to unmarshal vertex response: %v", err)
+			continue
+		}
+		response := v.buildChatCompletionStreamResponse(ctx, &vertexResp)
+		responseBody, err := json.Marshal(response)
+		if err != nil {
+			log.Errorf("unable to marshal response: %v", err)
+			return nil, err
+		}
+		v.appendResponse(responseBuilder, string(responseBody))
+	}
+	modifiedResponseChunk := responseBuilder.String()
+	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
+	return []byte(modifiedResponseChunk), nil
+}
+
+func (v *vertexProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
+	if apiName == ApiNameChatCompletion {
+		return v.onChatCompletionResponseBody(ctx, body)
+	} else {
+		return v.onEmbeddingsResponseBody(ctx, body)
+	}
+}
+
+func (v *vertexProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	vertexResponse := &vertexChatResponse{}
+	if err := json.Unmarshal(body, vertexResponse); err != nil {
+		return nil, fmt.Errorf("unable to unmarshal vertex chat response: %v", err)
+	}
+	response := v.buildChatCompletionResponse(ctx, vertexResponse)
+	return json.Marshal(response)
+}
+
+func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, response *vertexChatResponse) *chatCompletionResponse {
+	fullTextResponse := chatCompletionResponse{
+		Id:      response.ResponseId,
+		Object:  objectChatCompletion,
+		Created: time.Now().UnixMilli() / 1000,
+		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		Choices: make([]chatCompletionChoice, 0, len(response.Candidates)),
+		Usage: &usage{
+			PromptTokens:     response.UsageMetadata.PromptTokenCount,
+			CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
+			TotalTokens:      response.UsageMetadata.TotalTokenCount,
+		},
+	}
+	for _, candidate := range response.Candidates {
+		choice := chatCompletionChoice{
+			Index: candidate.Index,
+			Message: &chatMessage{
+				Role: roleAssistant,
+			},
+			FinishReason: util.Ptr(candidate.FinishReason),
+		}
+		if len(candidate.Content.Parts) > 0 {
+			choice.Message.Content = candidate.Content.Parts[0].Text
+		} else {
+			choice.Message.Content = ""
+		}
+		fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
+	}
+	return &fullTextResponse
+}
+
+func (v *vertexProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	vertexResponse := &vertexEmbeddingResponse{}
+	if err := json.Unmarshal(body, vertexResponse); err != nil {
+		return nil, fmt.Errorf("unable to unmarshal vertex embeddings response: %v", err)
+	}
+	response := v.buildEmbeddingsResponse(ctx, vertexResponse)
+	return json.Marshal(response)
+}
+
+func (v *vertexProvider) buildEmbeddingsResponse(ctx wrapper.HttpContext, vertexResp *vertexEmbeddingResponse) *embeddingsResponse {
+	response := embeddingsResponse{
+		Object: "list",
+		Data:   make([]embedding, 0, len(vertexResp.Predictions)),
+		Model:  ctx.GetContext(ctxKeyFinalRequestModel).(string),
+	}
+	totalTokens := 0
+	for _, item := range vertexResp.Predictions {
+		response.Data = append(response.Data, embedding{
+			Object:    `embedding`,
+			Index:     0,
+			Embedding: item.Embeddings.Values,
+		})
+		if item.Embeddings.Statistics != nil {
+			totalTokens += item.Embeddings.Statistics.TokenCount
+		}
+	}
+	response.Usage.TotalTokens = totalTokens
+	return &response
+}
+
+func (v *vertexProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpContext, vertexResp *vertexChatResponse) *chatCompletionResponse {
+	var choice chatCompletionChoice
+	if len(vertexResp.Candidates) > 0 && len(vertexResp.Candidates[0].Content.Parts) > 0 {
+		choice.Delta = &chatMessage{Content: vertexResp.Candidates[0].Content.Parts[0].Text}
+	}
+	streamResponse := chatCompletionResponse{
+		Id:      vertexResp.ResponseId,
+		Object:  objectChatCompletionChunk,
+		Created: time.Now().UnixMilli() / 1000,
+		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		Choices: []chatCompletionChoice{choice},
+		Usage: &usage{
+			PromptTokens:     vertexResp.UsageMetadata.PromptTokenCount,
+			CompletionTokens: vertexResp.UsageMetadata.CandidatesTokenCount,
+			TotalTokens:      vertexResp.UsageMetadata.TotalTokenCount,
+		},
+	}
+	return &streamResponse
+}
+
+func (v *vertexProvider) appendResponse(responseBuilder *strings.Builder, responseBody string) {
+	responseBuilder.WriteString(fmt.Sprintf("%s %s\n\n", streamDataItemKey, responseBody))
+}
+
+func (v *vertexProvider) getRequestPath(apiName ApiName, modelId string, stream bool) string {
+	action := ""
+	if apiName == ApiNameEmbeddings {
+		action = vertexEmbeddingAction
+	} else if stream {
+		action = vertexChatCompletionStreamAction
+	} else {
+		action = vertexChatCompletionAction
+	}
+	return fmt.Sprintf(vertexPathTemplate, v.config.vertexProjectId, v.config.vertexRegion, modelId, action)
+}
+
+func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest) *vertexChatRequest {
+	safetySettings := make([]vertexChatSafetySetting, 0)
+	for category, threshold := range v.config.geminiSafetySetting {
+		safetySettings = append(safetySettings, vertexChatSafetySetting{
+			Category:  category,
+			Threshold: threshold,
+		})
+	}
+	vertexRequest := vertexChatRequest{
+		Contents:       make([]vertexChatContent, 0),
+		SafetySettings: safetySettings,
+		GenerationConfig: vertexChatGenerationConfig{
+			Temperature:     request.Temperature,
+			TopP:            request.TopP,
+			MaxOutputTokens: request.MaxTokens,
+		},
+	}
+	if request.Tools != nil {
+		functions := make([]function, 0, len(request.Tools))
+		for _, tool := range request.Tools {
+			functions = append(functions, tool.Function)
+		}
+		vertexRequest.Tools = []vertexTool{
+			{
+				FunctionDeclarations: functions,
+			},
+		}
+	}
+	shouldAddDummyModelMessage := false
+	for _, message := range request.Messages {
+		content := vertexChatContent{
+			Role: message.Role,
+			Parts: []vertexPart{
+				{
+					Text: message.StringContent(),
+				},
+			},
+		}
+
+		// there's no assistant role in vertex and API shall vomit if role is not user or model
+		if content.Role == roleAssistant {
+			content.Role = "model"
+		} else if content.Role == roleSystem { // converting system prompt to prompt from user for the same reason
+			content.Role = roleUser
+			shouldAddDummyModelMessage = true
+		}
+		vertexRequest.Contents = append(vertexRequest.Contents, content)
+
+		// if a system message is the last message, we need to add a dummy model message to make vertex happy
+		if shouldAddDummyModelMessage {
+			vertexRequest.Contents = append(vertexRequest.Contents, vertexChatContent{
+				Role: "model",
+				Parts: []vertexPart{
+					{
+						Text: "Okay",
+					},
+				},
+			})
+			shouldAddDummyModelMessage = false
+		}
+	}
+
+	return &vertexRequest
+}
+
+func (v *vertexProvider) buildEmbeddingRequest(request *embeddingsRequest) *vertexEmbeddingRequest {
+	inputs := request.ParseInput()
+	instances := make([]vertexEmbeddingInstance, len(inputs))
+	for i, input := range inputs {
+		instances[i] = vertexEmbeddingInstance{
+			Content: input,
+		}
+	}
+	return &vertexEmbeddingRequest{Instances: instances}
+}
+
+type vertexChatRequest struct {
+	CachedContent     string                     `json:"cachedContent,omitempty"`
+	Contents          []vertexChatContent        `json:"contents"`
+	SystemInstruction *vertexSystemInstruction   `json:"systemInstruction,omitempty"`
+	Tools             []vertexTool               `json:"tools,omitempty"`
+	SafetySettings    []vertexChatSafetySetting  `json:"safetySettings,omitempty"`
+	GenerationConfig  vertexChatGenerationConfig `json:"generationConfig,omitempty"`
+	Labels            map[string]string          `json:"labels,omitempty"`
+}
+
+type vertexChatContent struct {
+	// The producer of the content. Must be either 'user' or 'model'.
+	Role  string       `json:"role,omitempty"`
+	Parts []vertexPart `json:"parts"`
+}
+
+type vertexPart struct {
+	Text       string    `json:"text,omitempty"`
+	InlineData *blob     `json:"inlineData,omitempty"`
+	FileData   *fileData `json:"fileData,omitempty"`
+}
+
+type blob struct {
+	MimeType string `json:"mimeType"`
+	Data     string `json:"data"`
+}
+
+type fileData struct {
+	MimeType string `json:"mimeType"`
+	FileUri  string `json:"fileUri"`
+}
+
+type vertexSystemInstruction struct {
+	Role  string       `json:"role"`
+	Parts []vertexPart `json:"parts"`
+}
+
+type vertexTool struct {
+	FunctionDeclarations any `json:"functionDeclarations"`
+}
+
+type vertexChatSafetySetting struct {
+	Category  string `json:"category"`
+	Threshold string `json:"threshold"`
+}
+
+type vertexChatGenerationConfig struct {
+	Temperature     float64 `json:"temperature,omitempty"`
+	TopP            float64 `json:"topP,omitempty"`
+	TopK            int     `json:"topK,omitempty"`
+	CandidateCount  int     `json:"candidateCount,omitempty"`
+	MaxOutputTokens int     `json:"maxOutputTokens,omitempty"`
+}
+
+type vertexEmbeddingRequest struct {
+	Instances  []vertexEmbeddingInstance `json:"instances"`
+	Parameters *vertexEmbeddingParams    `json:"parameters,omitempty"`
+}
+
+type vertexEmbeddingInstance struct {
+	TaskType string `json:"task_type"`
+	Title    string `json:"title,omitempty"`
+	Content  string `json:"content"`
+}
+
+type vertexEmbeddingParams struct {
+	AutoTruncate bool `json:"autoTruncate,omitempty"`
+}
+
+type vertexChatResponse struct {
+	Candidates     []vertexChatCandidate    `json:"candidates"`
+	ResponseId     string                   `json:"responseId,omitempty"`
+	PromptFeedback vertexChatPromptFeedback `json:"promptFeedback"`
+	UsageMetadata  vertexUsageMetadata      `json:"usageMetadata"`
+}
+
+type vertexChatCandidate struct {
+	Content       vertexChatContent        `json:"content"`
+	FinishReason  string                   `json:"finishReason"`
+	Index         int                      `json:"index"`
+	SafetyRatings []vertexChatSafetyRating `json:"safetyRatings"`
+}
+
+type vertexChatSafetyRating struct {
+	Category    string `json:"category"`
+	Probability string `json:"probability"`
+}
+
+type vertexChatPromptFeedback struct {
+	SafetyRatings []vertexChatSafetyRating `json:"safetyRatings"`
+}
+
+type vertexUsageMetadata struct {
+	PromptTokenCount     int `json:"promptTokenCount,omitempty"`
+	CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"`
+	TotalTokenCount      int `json:"totalTokenCount,omitempty"`
+}
+
+type vertexEmbeddingResponse struct {
+	Predictions []vertexPredictions `json:"predictions"`
+}
+
+type vertexPredictions struct {
+	Embeddings struct {
+		Values     []float64         `json:"values"`
+		Statistics *vertexStatistics `json:"statistics,omitempty"`
+	} `json:"embeddings"`
+}
+
+type vertexStatistics struct {
+	TokenCount int  `json:"token_count"`
+	Truncated  bool `json:"truncated"`
+}
+
+type ServiceAccountKey struct {
+	ClientEmail  string `json:"client_email"`
+	PrivateKeyID string `json:"private_key_id"`
+	PrivateKey   string `json:"private_key"`
+	TokenURI     string `json:"token_uri"`
+}
+
+func createJWT(key *ServiceAccountKey) (string, error) {
+	// 解析 PEM 格式的 RSA 私钥
+	block, _ := pem.Decode([]byte(key.PrivateKey))
+	if block == nil {
+		return "", fmt.Errorf("invalid PEM block")
+	}
+	parsedKey, err := x509.ParsePKCS8PrivateKey(block.Bytes)
+	if err != nil {
+		return "", err
+	}
+	rsaKey := parsedKey.(*rsa.PrivateKey)
+
+	// 构造 JWT Header
+	jwtHeader := map[string]string{
+		"alg": "RS256",
+		"typ": "JWT",
+		"kid": key.PrivateKeyID,
+	}
+	headerJSON, _ := json.Marshal(jwtHeader)
+	headerB64 := base64.RawURLEncoding.EncodeToString(headerJSON)
+
+	// 构造 JWT Claims
+	now := time.Now().Unix()
+	claims := map[string]interface{}{
+		"iss":   key.ClientEmail,
+		"scope": "https://www.googleapis.com/auth/cloud-platform",
+		"aud":   key.TokenURI,
+		"iat":   now,
+		"exp":   now + 3600, // 1 小时有效期
+	}
+	claimsJSON, _ := json.Marshal(claims)
+	claimsB64 := base64.RawURLEncoding.EncodeToString(claimsJSON)
+
+	signingInput := fmt.Sprintf("%s.%s", headerB64, claimsB64)
+	hashed := sha256.Sum256([]byte(signingInput))
+	signature, err := rsaKey.Sign(nil, hashed[:], crypto.SHA256)
+	if err != nil {
+		return "", err
+	}
+	sigB64 := base64.RawURLEncoding.EncodeToString(signature)
+
+	return fmt.Sprintf("%s.%s.%s", headerB64, claimsB64, sigB64), nil
+}
+
+func (v *vertexProvider) getAccessToken(jwtToken string) error {
+	headers := [][2]string{
+		{"Content-Type", "application/x-www-form-urlencoded"},
+	}
+	reqBody := "grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer&assertion=" + jwtToken
+	err := v.client.Post("/token", headers, []byte(reqBody), func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+		responseString := string(responseBody)
+		defer func() {
+			_ = proxywasm.ResumeHttpRequest()
+		}()
+		if statusCode != http.StatusOK {
+			log.Errorf("failed to create vertex access key, status: %d body: %s", statusCode, responseString)
+			_ = util.ErrorHandler("ai-proxy.vertex.load_ak_failed", fmt.Errorf("failed to load vertex ak"))
+			return
+		}
+		responseJson := gjson.Parse(responseString)
+		accessToken := responseJson.Get("access_token").String()
+		_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+accessToken)
+
+		expiresIn := int64(3600)
+		if expiresInVal := responseJson.Get("expires_in"); expiresInVal.Exists() {
+			expiresIn = expiresInVal.Int()
+		}
+		expireTime := time.Now().Add(time.Duration(expiresIn) * time.Second).Unix()
+		keyName := v.buildTokenKey()
+		err := setCachedAccessToken(keyName, accessToken, expireTime)
+		if err != nil {
+			log.Errorf("[vertex]: unable to cache access token: %v", err)
+		}
+	}, v.config.timeout)
+	return err
+}
+
+func (v *vertexProvider) buildTokenKey() string {
+	region := v.config.vertexRegion
+	projectID := v.config.vertexProjectId
+
+	return fmt.Sprintf("vertex-%s-%s-access-token", region, projectID)
+}
+
+type cachedAccessToken struct {
+	Token    string `json:"token"`
+	ExpireAt int64  `json:"expireAt"`
+}
+
+func (v *vertexProvider) getCachedAccessToken(key string) (string, error) {
+	data, _, err := proxywasm.GetSharedData(key)
+	if err != nil {
+		if errors.Is(err, types.ErrorStatusNotFound) {
+			return "", nil
+		}
+		return "", err
+	}
+	if data == nil {
+		return "", nil
+	}
+
+	var tokenInfo cachedAccessToken
+	if err = json.Unmarshal(data, &tokenInfo); err != nil {
+		return "", err
+	}
+
+	now := time.Now().Unix()
+	refreshAhead := v.config.vertexTokenRefreshAhead
+
+	if tokenInfo.ExpireAt > now+refreshAhead {
+		return tokenInfo.Token, nil
+	}
+
+	return "", nil
+}
+
+func setCachedAccessToken(key string, accessToken string, expireTime int64) error {
+	tokenInfo := cachedAccessToken{
+		Token:    accessToken,
+		ExpireAt: expireTime,
+	}
+
+	_, cas, err := proxywasm.GetSharedData(key)
+	if err != nil && !errors.Is(err, types.ErrorStatusNotFound) {
+		return err
+	}
+
+	data, err := json.Marshal(tokenInfo)
+	if err != nil {
+		return err
+	}
+
+	return proxywasm.SetSharedData(key, data, cas)
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/yi.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/yi.go
@@ -10,12 +10,10 @@ import (
 )

 const (
-	yiDomain             = "api.lingyiwanwu.com"
-	yiChatCompletionPath = "/v1/chat/completions"
+	yiDomain = "api.lingyiwanwu.com"
 )

-type yiProviderInitializer struct {
-}
+type yiProviderInitializer struct{}

 func (m *yiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
@@ -26,7 +24,7 @@ func (m *yiProviderInitializer) ValidateConfig(config *ProviderConfig) error {

 func (m *yiProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): yiChatCompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/util/http.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/http.go
@@ -2,7 +2,10 @@ package util

 import (
 	"net/http"
+	"regexp"
+	"strings"

+	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 )

@@ -13,6 +16,21 @@ const (
 	MimeTypeApplicationJson = "application/json"
 )

+var (
+	RegRetrieveBatchPath                        = regexp.MustCompile(`^.*/v1/batches/(?P<batch_id>[^/]+)$`)
+	RegCancelBatchPath                          = regexp.MustCompile(`^.*/v1/batches/(?P<batch_id>[^/]+)/cancel$`)
+	RegRetrieveFilePath                         = regexp.MustCompile(`^.*/v1/files/(?P<file_id>[^/]+)$`)
+	RegRetrieveFileContentPath                  = regexp.MustCompile(`^.*/v1/files/(?P<file_id>[^/]+)/content$`)
+	RegRetrieveFineTuningJobPath                = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)$`)
+	RegRetrieveFineTuningJobEventsPath          = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/events$`)
+	RegRetrieveFineTuningJobCheckpointsPath     = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/checkpoints$`)
+	RegCancelFineTuningJobPath                  = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/cancel$`)
+	RegResumeFineTuningJobPath                  = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/resume$`)
+	RegPauseFineTuningJobPath                   = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/pause$`)
+	RegFineTuningCheckpointPermissionPath       = regexp.MustCompile(`^.*/v1/fine_tuning/checkpoints/(?P<fine_tuned_model_checkpoint>[^/]+)/permissions$`)
+	RegDeleteFineTuningCheckpointPermissionPath = regexp.MustCompile(`^.*/v1/fine_tuning/checkpoints/(?P<fine_tuned_model_checkpoint>[^/]+)/permissions/(?P<permission_id>[^/]+)$`)
+)
+
 type ErrorHandlerFunc func(statusCodeDetails string, err error) error

 var ErrorHandler ErrorHandlerFunc = func(statusCodeDetails string, err error) error {
@@ -62,10 +80,45 @@ func OverwriteRequestPathHeaderByCapability(headers http.Header, apiName string,
 	if !exist {
 		return
 	}
-	if originPath, err := proxywasm.GetHttpRequestHeader(":path"); err == nil {
+	originPath, err := proxywasm.GetHttpRequestHeader(":path")
+	if err == nil {
 		headers.Set("X-ENVOY-ORIGINAL-PATH", originPath)
 	}
+	/**
+	这里实现不太优雅，理应通过 apiName 来判断使用哪个正则替换
+	但 ApiName 定义在 provider 中， 而 provider 中又引用了 util
+	会导致循环引用
+	**/
+	if strings.Contains(mappedPath, "{") && strings.Contains(mappedPath, "}") {
+		replacements := []struct {
+			regx *regexp.Regexp
+			key  string
+		}{
+			{RegRetrieveFilePath, "file_id"},
+			{RegRetrieveFileContentPath, "file_id"},
+			{RegRetrieveBatchPath, "batch_id"},
+			{RegCancelBatchPath, "batch_id"},
+		}
+
+		for _, r := range replacements {
+			if r.regx.MatchString(originPath) {
+				subMatch := r.regx.FindStringSubmatch(originPath)
+				if subMatch == nil {
+					continue
+				}
+				index := r.regx.SubexpIndex(r.key)
+				if index < 0 || index >= len(subMatch) {
+					continue
+				}
+				id := subMatch[index]
+				mappedPath = r.regx.ReplaceAllStringFunc(mappedPath, func(s string) string {
+					return strings.Replace(s, "{"+r.key+"}", id, 1)
+				})
+			}
+		}
+	}
 	headers.Set(":path", mappedPath)
+	log.Debugf("[OverwriteRequestPath] originPath=%s, mappedPath=%s", originPath, mappedPath)
 }

 func OverwriteRequestAuthorizationHeader(headers http.Header, credential string) {
--- a/plugins/wasm-go/extensions/ai-proxy/util/ptr.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/ptr.go
@@ -0,0 +1,5 @@
+package util
+
+func Ptr[T any](v T) *T {
+	return &v
+}
--- a/plugins/wasm-go/extensions/ai-search/main.go
+++ b/plugins/wasm-go/extensions/ai-search/main.go
@@ -289,6 +289,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Lo
 	}
 	ctx.SetRequestBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
 	_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+	_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
 	return types.ActionContinue
 }

@@ -362,7 +363,8 @@ func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte, log
 			}, rewriteBody,
 			func(statusCode int, responseHeaders http.Header, responseBody []byte) {
 				if statusCode != http.StatusOK {
-					log.Errorf("search rewrite failed, status: %d", statusCode)
+					log.Errorf("search rewrite failed, status: %d, request url: %s, request cluster: %s, search rewrite model: %s",
+						statusCode, searchRewrite.url, searchRewrite.client.ClusterName(), searchRewrite.modelName)
 					// After a rewrite failure, no further search is performed, thus quickly identifying the failure.
 					proxywasm.ResumeHttpRequest()
 					return
--- a/plugins/wasm-go/extensions/ai-statistics/README.md
+++ b/plugins/wasm-go/extensions/ai-statistics/README.md
@@ -23,6 +23,7 @@ description: AI可观测配置参考
 | 名称             | 数据类型  | 填写要求 | 默认值 | 描述                     |
 |----------------|-------|------|-----|------------------------|
 | `attributes` | []Attribute | 非必填  | -   | 用户希望记录在log/span中的信息 |
+| `disable_openai_usage` | bool | 非必填  | false   | 非openai兼容协议时，model、token的支持非标，配置为true时可以避免报错 |

 Attribute 配置说明:

--- a/plugins/wasm-go/extensions/ai-statistics/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-statistics/README_EN.md
@@ -22,7 +22,8 @@ Users can also expand observable values through configuration:

 | Name             | Type  | Required | Default | Description |
 |----------------|-------|------|-----|------------------------|
-| `attributes` | []Attribute | required  | -   | Information that the user wants to record in log/span |
+| `attributes` | []Attribute | optional  | -   | Information that the user wants to record in log/span |
+| `disable_openai_usage` | bool | optional  | false   | When using a non-OpenAI-compatible protocol, the support for model and token is non-standard. Setting the configuration to true can prevent errors. |

 Attribute Configuration instructions:

--- a/plugins/wasm-go/extensions/ai-statistics/main.go
+++ b/plugins/wasm-go/extensions/ai-statistics/main.go
@@ -92,6 +92,8 @@ type AIStatisticsConfig struct {
 	attributes []Attribute
 	// If there exist attributes extracted from streaming body, chunks should be buffered
 	shouldBufferStreamingBody bool
+	// If disableOpenaiUsage is true, model/input_token/output_token logs will be skipped
+	disableOpenaiUsage bool
 }

 func generateMetricName(route, cluster, model, consumer, metricName string) string {
@@ -160,6 +162,10 @@ func parseConfig(configJson gjson.Result, config *AIStatisticsConfig, log wrappe
 	}
 	// Metric settings
 	config.counterMetrics = make(map[string]proxywasm.MetricCounter)
+
+	// Parse openai usage config setting.
+	config.disableOpenaiUsage = configJson.Get("disable_openai_usage").Bool()
+
 	return nil
 }

@@ -264,15 +270,17 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config AIStatisticsConfig, dat
 	}

 	// Set information about this request
-	if model, inputToken, outputToken, ok := getUsage(data); ok {
-		ctx.SetUserAttribute(Model, model)
-		ctx.SetUserAttribute(InputToken, inputToken)
-		ctx.SetUserAttribute(OutputToken, outputToken)
-		// Set span attributes for ARMS.
-		setSpanAttribute(ArmsModelName, model, log)
-		setSpanAttribute(ArmsInputToken, inputToken, log)
-		setSpanAttribute(ArmsOutputToken, outputToken, log)
-		setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
+	if !config.disableOpenaiUsage {
+		if model, inputToken, outputToken, ok := getUsage(data); ok {
+			ctx.SetUserAttribute(Model, model)
+			ctx.SetUserAttribute(InputToken, inputToken)
+			ctx.SetUserAttribute(OutputToken, outputToken)
+			// Set span attributes for ARMS.
+			setSpanAttribute(ArmsModelName, model, log)
+			setSpanAttribute(ArmsInputToken, inputToken, log)
+			setSpanAttribute(ArmsOutputToken, outputToken, log)
+			setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
+		}
 	}
 	// If the end of the stream is reached, record metrics/logs/spans.
 	if endOfStream {
@@ -311,15 +319,17 @@ func onHttpResponseBody(ctx wrapper.HttpContext, config AIStatisticsConfig, body
 	}

 	// Set information about this request
-	if model, inputToken, outputToken, ok := getUsage(body); ok {
-		ctx.SetUserAttribute(Model, model)
-		ctx.SetUserAttribute(InputToken, inputToken)
-		ctx.SetUserAttribute(OutputToken, outputToken)
-		// Set span attributes for ARMS.
-		setSpanAttribute(ArmsModelName, model, log)
-		setSpanAttribute(ArmsInputToken, inputToken, log)
-		setSpanAttribute(ArmsOutputToken, outputToken, log)
-		setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
+	if !config.disableOpenaiUsage {
+		if model, inputToken, outputToken, ok := getUsage(body); ok {
+			ctx.SetUserAttribute(Model, model)
+			ctx.SetUserAttribute(InputToken, inputToken)
+			ctx.SetUserAttribute(OutputToken, outputToken)
+			// Set span attributes for ARMS.
+			setSpanAttribute(ArmsModelName, model, log)
+			setSpanAttribute(ArmsInputToken, inputToken, log)
+			setSpanAttribute(ArmsOutputToken, outputToken, log)
+			setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
+		}
 	}

 	// Set user defined log & span attributes.
@@ -471,6 +481,11 @@ func writeMetric(ctx wrapper.HttpContext, config AIStatisticsConfig, log wrapper
 		log.Warnf("ClusterName typd assert failed, skip metric record")
 		return
 	}
+	
+	if config.disableOpenaiUsage {
+		return
+	} 
+
 	if ctx.GetUserAttribute(Model) == nil || ctx.GetUserAttribute(InputToken) == nil || ctx.GetUserAttribute(OutputToken) == nil {
 		log.Warnf("get usage information failed, skip metric record")
 		return
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
@@ -142,7 +142,7 @@ func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLim
 		config.rejectedCode = DefaultRejectedCode
 	}
 	rejectedMsg := json.Get("rejected_msg")
-	if rejectedCode.Exists() {
+	if rejectedMsg.Exists() {
 		config.rejectedMsg = rejectedMsg.String()
 	} else {
 		config.rejectedMsg = DefaultRejectedMsg
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
@@ -6,8 +6,12 @@ description: 基于 Key 集群限流插件配置参考

 ## 功能说明

-`cluster-key-rate-limit` 插件基于 Redis 实现集群限流，适用于需要跨多个 Higress Gateway 实例实现全局一致速率限制的场景。
-限流所使用的 Key 可以来源于 URL 参数、HTTP 请求头、客户端 IP 地址、消费者名称或 Cookie 中的 Key。
+`cluster-key-rate-limit` 插件基于 Redis 实现**集群级限流**，适用于需要跨多个 Higress Gateway 实例进行**全局一致速率限制**的场景。
+
+支持两种限流模式：
+
+- **规则级全局限流**：基于相同的 `rule_name` 和 `global_threshold` 配置，对自定义规则组设置全局限流阈值
+- **Key 级动态限流**：根据请求中的动态 Key（如 URL 参数、请求头、客户端 IP、Consumer 名称或 Cookie 字段）进行分组限流

 ## 运行属性

@@ -19,12 +23,22 @@ description: 基于 Key 集群限流插件配置参考
 | 配置项                  | 类型   | 必填 | 默认值 | 说明                                                                          |
 | ----------------------- | ------ | ---- | ------ |-----------------------------------------------------------------------------|
 | rule_name               | string | 是 | - | 限流规则名称，根据限流规则名称 + 限流类型 + 限流 key 名称 + 限流 key 对应的实际值来拼装 redis key             |
-| rule_items | array of object | 是   | -                 | 限流规则项，按照 rule_items 下的排列顺序，匹配第一个 rule_item 后命中限流规则，后续规则将被忽略                 |
+| global_threshold | Object | 否，`global_threshold` 或 `rule_items` 选填一项 | - | 对整个自定义规则组进行限流 |
+| rule_items | array of object | 否，`global_threshold` 或 `rule_items` 选填一项 | -                 | 限流规则项，按照 rule_items 下的排列顺序，匹配第一个 rule_item 后命中限流规则，后续规则将被忽略                 |
 | show_limit_quota_header | bool | 否 | false | 响应头中是否显示 `X-RateLimit-Limit`（限制的总请求数）和 `X-RateLimit-Remaining`（剩余还可以发送的请求数） |
 | rejected_code           | int | 否 | 429 | 请求被限流时，返回的 HTTP 状态码                                                         |
 | rejected_msg            | string | 否 | Too many requests | 请求被限流时，返回的响应体                                                               |
 | redis                   | object          | 是                                                           | -                 | redis 相关配置                                                                  |

+`global_threshold` 中每一项的配置字段说明。
+
+| 配置项           | 类型 | 必填                                                         | 默认值 | 说明               |
+| ---------------- | ---- | ------------------------------------------------------------ | ------ | ------------------ |
+| query_per_second | int  | 否，`query_per_second`,`query_per_minute`,`query_per_hour`,`query_per_day` 中选填一项 | -      | 允许每秒请求次数   |
+| query_per_minute | int  | 否，`query_per_second`,`query_per_minute`,`query_per_hour`,`query_per_day` 中选填一项 | -      | 允许每分钟请求次数 |
+| query_per_hour   | int  | 否，`query_per_second`,`query_per_minute`,`query_per_hour`,`query_per_day` 中选填一项 | -      | 允许每小时请求次数 |
+| query_per_day    | int  | 否，`query_per_second`,`query_per_minute`,`query_per_hour`,`query_per_day` 中选填一项 | -      | 允许每天请求次数   |
+
 `rule_items` 中每一项的配置字段说明。

 | 配置项                | 类型            | 必填                   | 默认值 | 说明                                                                                                                                                       |
@@ -63,28 +77,39 @@ description: 基于 Key 集群限流插件配置参考

 ## 配置示例

+### 自定义规则组全局限流
+
+```yaml
+rule_name: routeA-global-limit-rule
+global_threshold:
+  query_per_minute: 1000 # 自定义规则组每分钟最多1000次请求
+redis:
+  service_name: redis.static
+show_limit_quota_header: true
+```
+
 ### 识别请求参数 apikey，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-request-param-limit-rule
 rule_items:
- limit_by_param: apikey
-  limit_keys:
-  - key: 9a342114-ba8a-11ec-b1bf-00163e1250b5
-    query_per_minute: 10
-  - key: a6a6d7f2-ba8a-11ec-bec2-00163e1250b5
-    query_per_hour: 100
- limit_by_per_param: apikey
-  limit_keys:
-  # 正则表达式，匹配以 a 开头的所有字符串，每个 apikey 对应的请求 10qds
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # 正则表达式，匹配以 b 开头的所有字符串，每个 apikey 对应的请求 100qd
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # 兜底用，匹配所有请求，每个 apikey 对应的请求 1000qdh
-  - key: "*"
-    query_per_hour: 1000
+  - limit_by_param: apikey
+    limit_keys:
+      - key: 9a342114-ba8a-11ec-b1bf-00163e1250b5
+        query_per_minute: 10
+      - key: a6a6d7f2-ba8a-11ec-bec2-00163e1250b5
+        query_per_hour: 100
+  - limit_by_per_param: apikey
+    limit_keys:
+      # 正则表达式，匹配以 a 开头的所有字符串，每个 apikey 对应的请求 10qds
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # 正则表达式，匹配以 b 开头的所有字符串，每个 apikey 对应的请求 100qd
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # 兜底用，匹配所有请求，每个 apikey 对应的请求 1000qdh
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
@@ -93,25 +118,25 @@ show_limit_quota_header: true
 ### 识别请求头 x-ca-key，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-request-header-limit-rule
 rule_items:
- limit_by_header: x-ca-key
-  limit_keys:
-  - key: 102234
-    query_per_minute: 10
-  - key: 308239
-    query_per_hour: 10
- limit_by_per_header: x-ca-key
-  limit_keys:
-  # 正则表达式，匹配以 a 开头的所有字符串，每个 apikey 对应的请求 10qds
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # 正则表达式，匹配以b开头的所有字符串，每个 apikey 对应的请求 100qd
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # 兜底用，匹配所有请求，每个 apikey 对应的请求 1000qdh
-  - key: "*"
-    query_per_hour: 1000            
+  - limit_by_header: x-ca-key
+    limit_keys:
+      - key: 102234
+        query_per_minute: 10
+      - key: 308239
+        query_per_hour: 10
+  - limit_by_per_header: x-ca-key
+    limit_keys:
+      # 正则表达式，匹配以 a 开头的所有字符串，每个 apikey 对应的请求 10qds
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # 正则表达式，匹配以b开头的所有字符串，每个 apikey 对应的请求 100qd
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # 兜底用，匹配所有请求，每个 apikey 对应的请求 1000qdh
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
@@ -120,19 +145,19 @@ show_limit_quota_header: true
 ### 根据请求头 x-forwarded-for 获取对端 IP，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-client-ip-limit-rule
 rule_items:
- limit_by_per_ip: from-header-x-forwarded-for
-  limit_keys:
-  # 精确 IP
-  - key: 1.1.1.1
-    query_per_day: 10
-  # IP 段，符合这个 IP 段的 IP，每个 IP 100qpd
-  - key: 1.1.1.0/24
-    query_per_day: 100
-  # 兜底用，即默认每个 IP 1000 qpd
-  - key: 0.0.0.0/0
-    query_per_day: 1000
+  - limit_by_per_ip: from-header-x-forwarded-for
+    limit_keys:
+      # 精确 IP
+      - key: 1.1.1.1
+        query_per_day: 10
+      # IP 段，符合这个 IP 段的 IP，每个 IP 100qpd
+      - key: 1.1.1.0/24
+        query_per_day: 100
+      # 兜底用，即默认每个 IP 1000 qpd
+      - key: 0.0.0.0/0
+        query_per_day: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
@@ -141,25 +166,25 @@ show_limit_quota_header: true
 ### 识别 consumer，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-consumer-limit-rule
 rule_items:
- limit_by_consumer: ''
-  limit_keys:
-  - key: consumer1
-    query_per_second: 10
-  - key: consumer2
-    query_per_hour: 100
- limit_by_per_consumer: ''
-  limit_keys:
-  # 正则表达式，匹配以 a 开头的所有字符串，每个 consumer 对应的请求 10qds
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # 正则表达式，匹配以 b 开头的所有字符串，每个 consumer 对应的请求 100qd
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # 兜底用，匹配所有请求，每个 consumer 对应的请求 1000qdh
-  - key: "*"
-    query_per_hour: 1000     
+  - limit_by_consumer: ''
+    limit_keys:
+      - key: consumer1
+        query_per_second: 10
+      - key: consumer2
+        query_per_hour: 100
+  - limit_by_per_consumer: ''
+    limit_keys:
+      # 正则表达式，匹配以 a 开头的所有字符串，每个 consumer 对应的请求 10qds
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # 正则表达式，匹配以 b 开头的所有字符串，每个 consumer 对应的请求 100qd
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # 兜底用，匹配所有请求，每个 consumer 对应的请求 1000qdh
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true 
@@ -168,7 +193,7 @@ show_limit_quota_header: true
 ### 识别 Cookie 中的键值对，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-cookie-limit-rule
 rule_items:
  - limit_by_cookie: key1
    limit_keys:
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
@@ -1,164 +1,201 @@
 ---
-title: Key-Based Cluster Rate Limiting
-keywords: [higress, rate-limit]
-description: Configuration reference for the Key-Based Cluster Rate Limiting plugin
+title: Cluster Rate Limiting Based on Key  
+keywords: [higress, rate-limit]  
+description: Configuration reference for the Key-based cluster rate limiting plugin
+
 ---
+
 ## Function Description
-The `cluster-key-rate-limit` plugin implements cluster rate limiting based on Redis, suitable for scenarios that require global consistent rate limiting across multiple Higress Gateway instances. 

-The Key used for rate limiting can originate from URL parameters, HTTP request headers, client IP addresses, consumer names, or keys in cookies. 
+The `cluster-key-rate-limit` plugin implements **cluster-level rate limiting** based on Redis, suitable for scenarios
+requiring **globally consistent rate limiting across multiple Higress Gateway instances**.

-## Execution Attributes
-Plugin Execution Phase: `default phase`  
-Plugin Execution Priority: `20` 
+It supports two rate limiting modes:

-## Configuration Description
-| Configuration Item        | Type          | Required | Default Value | Description                                                                               |
-|---------------------------|---------------|----------|---------------|-------------------------------------------------------------------------------------------|
-| rule_name                 | string        | Yes      | -             | The name of the rate limiting rule. The Redis key is constructed using rule name + rate limit type + limit key name + actual value of the limit key.         |
-| rule_items                | array of object| Yes     | -             | Rate limiting rule items. The first matching `rule_item` based on the order under `rule_items` will trigger the rate limiting, and subsequent rules will be ignored.                 |
-| show_limit_quota_header   | bool          | No       | false         | Whether to display `X-RateLimit-Limit` (total requests allowed) and `X-RateLimit-Remaining` (remaining requests that can be sent) in the response headers. |
-| rejected_code             | int           | No       | 429           | HTTP status code returned when a request is rate limited.                                                          |
-| rejected_msg              | string        | No       | Too many requests | Response body returned when a request is rate limited.                                                               |
-| redis                     | object        | Yes      | -             | Redis related configuration.                                                                  |
+- **Rule-Level Global Rate Limiting**: Applies a unified rate limit threshold to custom rule groups based on identical `rule_name` and `global_threshold` configurations.
+- **Key-Level Dynamic Rate Limiting**: Groups and limits requests by dynamic keys extracted from requests, such as URL parameters, request headers, client IPs, consumer names, or cookie fields.

-Description of configuration fields for each item in `rule_items`.
-| Configuration Item        | Type          | Required               | Default Value | Description                                                                                           |
-|---------------------------|---------------|------------------------|---------------|-------------------------------------------------------------------------------------------------------|
-| limit_by_header           | string        | No, one of `limit_by_*` | -             | The name of the HTTP request header from which to retrieve the rate limiting key value.               |
-| limit_by_param            | string        | No, one of `limit_by_*` | -             | The name of the URL parameter from which to retrieve the rate limiting key value.                     |
-| limit_by_consumer         | string        | No, one of `limit_by_*` | -             | Applies rate limiting based on consumer name without needing to add an actual value.                  |
-| limit_by_cookie           | string        | No, one of `limit_by_*` | -             | The name of the key in the Cookie from which to retrieve the rate limiting key value.                |
-| limit_by_per_header       | string        | No, one of `limit_by_*` | -             | Matches specific HTTP request headers according to the rules and calculates rate limits for each header. The `limit_keys` configuration supports regular expressions or `*`. |
-| limit_by_per_param        | string        | No, one of `limit_by_*` | -             | Matches specific URL parameters according to the rules and calculates rate limits for each parameter. The `limit_keys` configuration supports regular expressions or `*`. |
-| limit_by_per_consumer     | string        | No, one of `limit_by_*` | -             | Matches specific consumers according to the rules and calculates rate limits for each consumer. The `limit_keys` configuration supports regular expressions or `*`. |
-| limit_by_per_cookie       | string        | No, one of `limit_by_*` | -             | Matches specific cookies according to the rules and calculates rate limits for each cookie. The `limit_keys` configuration supports regular expressions or `*`. |
-| limit_by_per_ip           | string        | No, one of `limit_by_*` | -             | Matches specific IPs according to the rules and calculates rate limits for each IP. Retrieve via IP parameter name from request headers, defined as `from-header-{header name}`, e.g., `from-header-x-forwarded-for`. To get the remote socket IP directly, use `from-remote-addr`. |
-| limit_keys                | array of object | Yes                    | -             | Configures the limit counts after matching key values.                                               |
+## Operational Attributes

-Description of configuration fields for each item in `limit_keys`.
-| Configuration Item        | Type          | Required                                                         | Default Value | Description                                                        |
-|---------------------------|---------------|------------------------------------------------------------------|---------------|--------------------------------------------------------------------|
-| key                       | string        | Yes                                                              | -             | Matched key value; types `limit_by_per_header`, `limit_by_per_param`, `limit_by_per_consumer`, `limit_by_per_cookie` support regular expression configurations (starting with regexp: followed by a regular expression) or `*` (representing all), e.g., `regexp:^d.*` (all strings starting with d); `limit_by_per_ip` supports configuring IP addresses or IP segments. |
-| query_per_second          | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per second.                           |
-| query_per_minute          | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per minute.                           |
-| query_per_hour            | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per hour.                             |
-| query_per_day             | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per day.                              |
+- **Plugin execution phase**: `Default phase`
+- **Plugin execution priority**: `20`

-Description of configuration fields for each item in `redis`.
-| Configuration Item | Type   | Required | Default Value                          | Description                                                                                                     |
-|--------------------|--------|----------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------|
-| service_name       | string | Required | -                                      | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
-| service_port       | int    | No       | 80 for static services; otherwise 6379 | Service port for the Redis service.                                                                             |
-| username           | string | No       | -                                      | Redis username.                                                                                                 |
-| password           | string | No       | -                                      | Redis password.                                                                                                 |
-| timeout            | int    | No       | 1000                                   | Redis connection timeout in milliseconds.                                                                       |
-| database           | int    | No       | 0                                      | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                                  |
+## Configuration Instructions

+| Configuration Item       | Type          | Required                                  | Default Value       | Description                                                                |  
+|--------------------------|---------------|-------------------------------------------|---------------------|----------------------------------------------------------------------------|  
+| rule_name                | string        | Yes                                       | -                   | Name of the rate limiting rule. Used to construct the Redis key in the format: `rule_name:rate_limit_type:key_name:key_value`. |  
+| global_threshold         | Object        | No (choose either `global_threshold` or `rule_items`) | -                 | Apply rate limiting to the entire custom rule group.|  
+| rule_items               | array of object | No (choose either `global_threshold` or `rule_items`) | -               | Rate limiting rule items. Rules are matched in the order of the array; once the first matching rule is hit, subsequent rules are ignored. |  
+| show_limit_quota_header  | bool          | No                                        | false             | Whether to display `X-RateLimit-Limit` (total allowed requests) and `X-RateLimit-Remaining` (remaining allowed requests) in the response header. |  
+| rejected_code            | int           | No                                        | 429               | HTTP status code returned when a request is rate-limited.                  |  
+| rejected_msg             | string        | No                                        | Too many requests | Response body returned when a request is rate-limited.                      |  
+| redis                    | object        | Yes                                       | -                   | Configuration for Redis.                                                   |  
+
+### Configuration Fields for `global_threshold`
+
+| Configuration Item       | Type | Required                                 | Default Value | Description                          |  
+|--------------------------|------|------------------------------------------|---------------|--------------------------------------|  
+| query_per_second         | int  | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per second.         |  
+| query_per_minute         | int  | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per minute.         |  
+| query_per_hour           | int  | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per hour.           |  
+| query_per_day            | int  | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per day.            |  
+
+### Configuration Fields for `rule_items`
+
+| Configuration Item            | Type          | Required                          | Default Value | Description                                                                 |  
+|-------------------------------|---------------|-----------------------------------|---------------|-----------------------------------------------------------------------------|  
+| limit_by_header               | string        | No (choose one of `limit_by_*` fields) | -           | Configures the HTTP request header name to extract the rate limiting key.   |  
+| limit_by_param                | string        | No (choose one of `limit_by_*` fields) | -           | Configures the URL parameter name to extract the rate limiting key.        |  
+| limit_by_consumer             | string        | No (choose one of `limit_by_*` fields) | -           | Rate limits based on the consumer name (no need to add a specific value).   |  
+| limit_by_cookie               | string        | No (choose one of `limit_by_*` fields) | -           | Configures the Cookie key name to extract the rate limiting key.           |  
+| limit_by_per_header           | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific HTTP headers by rule and calculates rate limits for each header. Supports regular expressions (starting with `regexp:`) or `*` for the `limit_keys` configuration. |  
+| limit_by_per_param            | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific URL parameters by rule and calculates rate limits for each parameter. Supports regular expressions (starting with `regexp:`) or `*` for the `limit_keys` configuration. |  
+| limit_by_per_consumer         | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific consumers by rule and calculates rate limits for each consumer. Supports regular expressions (starting with `regexp:`) or `*` for the `limit_keys` configuration (no need to add a specific value for the consumer name). |  
+| limit_by_per_cookie           | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific Cookies by rule and calculates rate limits for each Cookie value. Supports regular expressions (starting with `regexp:`) or `*` for the `limit_keys` configuration. |  
+| limit_by_per_ip               | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific IPs by rule and calculates rate limits for each IP. The IP can be extracted from a request header (formatted as `from-header-<header_name>`, e.g., `from-header-x-forwarded-for`) or directly from the peer socket IP (configured as `from-remote-addr`). |  
+| limit_keys                    | array of object | Yes                               | -           | Configures the rate limits for matched key values.                          |  
+
+### Configuration Fields for `limit_keys`
+
+| Configuration Item       | Type   | Required                                 | Default Value | Description                                                                 |  
+|--------------------------|--------|------------------------------------------|---------------|-----------------------------------------------------------------------------|  
+| key                      | string | Yes                                      | -             | The matched key value. For `limit_by_per_header`, `limit_by_per_param`, `limit_by_per_consumer`, and `limit_by_per_cookie` types, supports regular expressions (prefixed with `regexp:`) or `*` (wildcard for all). Example regular expression: `regexp:^d.*` (matches all strings starting with `d`). For `limit_by_per_ip`, supports IP addresses or CIDR blocks. |  
+| query_per_second         | int    | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per second.                                                |  
+| query_per_minute         | int    | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per minute.                                                |  
+| query_per_hour           | int    | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per hour.                                                  |  
+| query_per_day            | int    | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per day.                                                   |  
+
+### Configuration Fields for `redis`
+
+| Configuration Item   | Type   | Required | Default Value                                                     | Description                                                                 |  
+|----------------------|--------|----------|-------------------------------------------------------------------|-----------------------------------------------------------------------------|  
+| service_name         | string | Yes      | -                                                                 | The fully qualified domain name (FQDN) of the Redis service, including the service type (e.g., `my-redis.dns`, `redis.my-ns.svc.cluster.local`). |  
+| service_port         | int    | No       | 80 (for static services), 6379 for other services                  | The port of the Redis service.                                              |  
+| username             | string | No       | -                                                                 | Redis username for authentication.                                          |  
+| password             | string | No       | -                                                                 | Redis password for authentication.                                          |  
+| timeout              | int    | No       | 1000 (milliseconds)                                               | Redis connection timeout in milliseconds.                                  |  
+| database             | int    | No       | 0                                                                 | The ID of the Redis database to use (e.g., configuring `1` corresponds to `SELECT 1`). |  

 ## Configuration Examples

-### Distinguish rate limiting based on the request parameter apikey
-```yaml
-rule_name: default_rule
-rule_items:
- limit_by_param: apikey
-  limit_keys:
-  - key: 9a342114-ba8a-11ec-b1bf-00163e1250b5
-    query_per_minute: 10
-  - key: a6a6d7f2-ba8a-11ec-bec2-00163e1250b5
-    query_per_hour: 100
- limit_by_per_param: apikey
-  limit_keys:
-  # Regular expression, matches all strings starting with a, each apikey corresponds to 10qds.
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # Regular expression, matches all strings starting with b, each apikey corresponds to 100qd.
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # As a fallback, matches all requests, each apikey corresponds to 1000qdh.
-  - key: "*"
-    query_per_hour: 1000
+### Global Rate Limiting for Custom Rule Group
+
+```yaml  
+rule_name: routeA-global-limit-rule
+global_threshold:
+  query_per_minute: 1000 # Maximum 1000 requests per minute for this rule group
 redis:
  service_name: redis.static
 show_limit_quota_header: true
 ```

-### Distinguish rate limiting based on the header x-ca-key
-```yaml
-rule_name: default_rule
+### Rate Limiting by Request Parameter `apikey`
+
+```yaml  
+rule_name: routeA-request-param-limit-rule
 rule_items:
- limit_by_header: x-ca-key
-  limit_keys:
-  - key: 102234
-    query_per_minute: 10
-  - key: 308239
-    query_per_hour: 10
- limit_by_per_header: x-ca-key
-  limit_keys:
-  # Regular expression, matches all strings starting with a, each apikey corresponds to 10qds.
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # Regular expression, matches all strings starting with b, each apikey corresponds to 100qd.
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # As a fallback, matches all requests, each apikey corresponds to 1000qdh.
-  - key: "*"
-    query_per_hour: 1000
+  - limit_by_param: apikey
+    limit_keys:
+      - key: 9a342114-ba8a-11ec-b1bf-00163e1250b5
+        query_per_minute: 10
+      - key: a6a6d7f2-ba8a-11ec-bec2-00163e1250b5
+        query_per_hour: 100
+  - limit_by_per_param: apikey
+    limit_keys:
+      # Regular expression to match all strings starting with "a"; 10 requests per second for each apikey  
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # Regular expression to match all strings starting with "b"; 100 requests per minute for each apikey  
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # Fallback rule to match all requests; 1000 requests per hour for each apikey  
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
 ```

-### Distinguish rate limiting based on the client IP from the request header x-forwarded-for
-```yaml
-rule_name: default_rule
+### Rate Limiting by Request Header `x-ca-key`
+
+```yaml  
+rule_name: routeA-request-header-limit-rule
 rule_items:
- limit_by_per_ip: from-header-x-forwarded-for
-  limit_keys:
-  # Exact IP
-  - key: 1.1.1.1
-    query_per_day: 10
-  # IP segment, for IPs matching this segment, each IP corresponds to 100qpd.
-  - key: 1.1.1.0/24
-    query_per_day: 100
-  # As a fallback, defaults to 1000 qpd for each IP.
-  - key: 0.0.0.0/0
-    query_per_day: 1000
+  - limit_by_header: x-ca-key
+    limit_keys:
+      - key: 102234
+        query_per_minute: 10
+      - key: 308239
+        query_per_hour: 10
+  - limit_by_per_header: x-ca-key
+    limit_keys:
+      # Regular expression to match all strings starting with "a"; 10 requests per second for each key  
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # Regular expression to match all strings starting with "b"; 100 requests per minute for each key  
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # Fallback rule to match all requests; 1000 requests per hour for each key  
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
 ```

-### Distinguish rate limiting based on consumers
-```yaml
-rule_name: default_rule
+### Rate Limiting by Client IP Extracted from `x-forwarded-for` Header
+
+```yaml  
+rule_name: routeA-client-ip-limit-rule
 rule_items:
- limit_by_consumer: ''
-  limit_keys:
-  - key: consumer1
-    query_per_second: 10
-  - key: consumer2
-    query_per_hour: 100
- limit_by_per_consumer: ''
-  limit_keys:
-  # Regular expression, matches all strings starting with a, each consumer corresponds to 10qds.
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # Regular expression, matches all strings starting with b, each consumer corresponds to 100qd.
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # As a fallback, matches all requests, each consumer corresponds to 1000qdh.
-  - key: "*"
-    query_per_hour: 1000
+  - limit_by_per_ip: from-header-x-forwarded-for
+    limit_keys:
+      # Exact IP match  
+      - key: 1.1.1.1
+        query_per_day: 10
+      # CIDR block match; 100 requests per day for each IP in the block  
+      - key: 1.1.1.0/24
+        query_per_day: 100
+      # Fallback rule for all IPs; 1000 requests per day for each IP  
+      - key: 0.0.0.0/0
+        query_per_day: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
 ```

-### Distinguish rate limiting based on key-value pairs in cookies
-```yaml
-rule_name: default_rule
+### Rate Limiting by Consumer
+
+```yaml  
+rule_name: routeA-consumer-limit-rule
+rule_items:
+  - limit_by_consumer: ''
+    limit_keys:
+      - key: consumer1
+        query_per_second: 10
+      - key: consumer2
+        query_per_hour: 100
+  - limit_by_per_consumer: ''
+    limit_keys:
+      # Regular expression to match all consumer names starting with "a"; 10 requests per second for each consumer  
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # Regular expression to match all consumer names starting with "b"; 100 requests per minute for each consumer  
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # Fallback rule to match all consumers; 1000 requests per hour for each consumer  
+      - key: "*"
+        query_per_hour: 1000
+redis:
+  service_name: redis.static
+show_limit_quota_header: true
+```
+
+### Rate Limiting by Cookie Value
+
+```yaml  
+rule_name: routeA-cookie-limit-rule
 rule_items:
  - limit_by_cookie: key1
    limit_keys:
@@ -168,13 +205,13 @@ rule_items:
        query_per_hour: 100
  - limit_by_per_cookie: key1
    limit_keys:
-      # Regular expression, matches all strings starting with a, each cookie's value corresponds to 10qds.
+      # Regular expression to match all cookie values starting with "a"; 10 requests per second for each value  
      - key: "regexp:^a.*"
        query_per_second: 10
-      # Regular expression, matches all strings starting with b, each cookie's value corresponds to 100qd.
+      # Regular expression to match all cookie values starting with "b"; 100 requests per minute for each value  
      - key: "regexp:^b.*"
        query_per_minute: 100
-      # As a fallback, matches all requests, each cookie's value corresponds to 1000qdh.
+      # Fallback rule to match all cookie values; 1000 requests per hour for each value  
      - key: "*"
        query_per_hour: 1000
 rejected_code: 200
@@ -182,4 +219,4 @@ rejected_msg: '{"code":-1,"msg":"Too many requests"}'
 redis:
  service_name: redis.static
 show_limit_quota_header: true
-```
+```
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
@@ -1,303 +0,0 @@
-package main
-
-import (
-	"errors"
-	"fmt"
-	"strings"
-
-	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/tidwall/gjson"
-	re "github.com/wasilibs/go-re2"
-	"github.com/zmap/go-iptree/iptree"
-)
-
-// 限流规则项类型
-type limitRuleItemType string
-
-// 限流配置项key类型
-type limitConfigItemType string
-
-const (
-	limitByHeaderType      limitRuleItemType = "limit_by_header"
-	limitByParamType       limitRuleItemType = "limit_by_param"
-	limitByConsumerType    limitRuleItemType = "limit_by_consumer"
-	limitByCookieType      limitRuleItemType = "limit_by_cookie"
-	limitByPerHeaderType   limitRuleItemType = "limit_by_per_header"
-	limitByPerParamType    limitRuleItemType = "limit_by_per_param"
-	limitByPerConsumerType limitRuleItemType = "limit_by_per_consumer"
-	limitByPerCookieType   limitRuleItemType = "limit_by_per_cookie"
-	limitByPerIpType       limitRuleItemType = "limit_by_per_ip"
-
-	exactType  limitConfigItemType = "exact"  // 精确匹配
-	regexpType limitConfigItemType = "regexp" // 正则表达式
-	allType    limitConfigItemType = "*"      // 匹配所有情况
-	ipNetType  limitConfigItemType = "ipNet"  // ip段
-
-	RemoteAddrSourceType = "remote-addr"
-	HeaderSourceType     = "header"
-
-	DefaultRejectedCode uint32 = 429
-	DefaultRejectedMsg  string = "Too many requests"
-
-	Second           int64 = 1
-	SecondsPerMinute       = 60 * Second
-	SecondsPerHour         = 60 * SecondsPerMinute
-	SecondsPerDay          = 24 * SecondsPerHour
-)
-
-var timeWindows = map[string]int64{
-	"query_per_second": Second,
-	"query_per_minute": SecondsPerMinute,
-	"query_per_hour":   SecondsPerHour,
-	"query_per_day":    SecondsPerDay,
-}
-
-type ClusterKeyRateLimitConfig struct {
-	ruleName             string          // 限流规则名称
-	ruleItems            []LimitRuleItem // 限流规则项
-	showLimitQuotaHeader bool            // 响应头中是否显示X-RateLimit-Limit和X-RateLimit-Remaining
-	rejectedCode         uint32          // 当请求超过阈值被拒绝时,返回的HTTP状态码
-	rejectedMsg          string          // 当请求超过阈值被拒绝时,返回的响应体
-	redisClient          wrapper.RedisClient
-}
-
-type LimitRuleItem struct {
-	limitType    limitRuleItemType // 限流类型
-	key          string            // 根据该key值进行限流,limit_by_consumer和limit_by_per_consumer两种类型为ConsumerHeader,其他类型为对应的key值
-	limitByPerIp LimitByPerIp      // 对端ip地址或ip段
-	configItems  []LimitConfigItem // 限流配置项
-}
-
-type LimitByPerIp struct {
-	sourceType string // ip来源类型
-	headerName string // 根据该请求头获取客户端ip
-}
-
-type LimitConfigItem struct {
-	configType limitConfigItemType // 限流配置项key类型
-	key        string              // 限流key
-	ipNet      *iptree.IPTree      // 限流key转换的ip地址或者ip段,仅用于itemType为ipNetType
-	regexp     *re.Regexp          // 正则表达式,仅用于itemType为regexpType
-	count      int64               // 指定时间窗口内的总请求数量阈值
-	timeWindow int64               // 时间窗口大小
-}
-
-func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
-	redisConfig := json.Get("redis")
-	if !redisConfig.Exists() {
-		return errors.New("missing redis in config")
-	}
-	serviceName := redisConfig.Get("service_name").String()
-	if serviceName == "" {
-		return errors.New("redis service name must not be empty")
-	}
-	servicePort := int(redisConfig.Get("service_port").Int())
-	if servicePort == 0 {
-		if strings.HasSuffix(serviceName, ".static") {
-			// use default logic port which is 80 for static service
-			servicePort = 80
-		} else {
-			servicePort = 6379
-		}
-	}
-	username := redisConfig.Get("username").String()
-	password := redisConfig.Get("password").String()
-	timeout := int(redisConfig.Get("timeout").Int())
-	if timeout == 0 {
-		timeout = 1000
-	}
-	config.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
-		FQDN: serviceName,
-		Port: int64(servicePort),
-	})
-	database := int(redisConfig.Get("database").Int())
-	return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
-}
-
-func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
-	ruleName := json.Get("rule_name")
-	if !ruleName.Exists() {
-		return errors.New("missing rule_name in config")
-	}
-	config.ruleName = ruleName.String()
-
-	// 初始化ruleItems
-	err := initRuleItems(json, config)
-	if err != nil {
-		return err
-	}
-
-	showLimitQuotaHeader := json.Get("show_limit_quota_header")
-	if showLimitQuotaHeader.Exists() {
-		config.showLimitQuotaHeader = showLimitQuotaHeader.Bool()
-	}
-
-	rejectedCode := json.Get("rejected_code")
-	if rejectedCode.Exists() {
-		config.rejectedCode = uint32(rejectedCode.Uint())
-	} else {
-		config.rejectedCode = DefaultRejectedCode
-	}
-	rejectedMsg := json.Get("rejected_msg")
-	if rejectedCode.Exists() {
-		config.rejectedMsg = rejectedMsg.String()
-	} else {
-		config.rejectedMsg = DefaultRejectedMsg
-	}
-	return nil
-}
-
-func initRuleItems(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
-	ruleItemsResult := json.Get("rule_items")
-	if !ruleItemsResult.Exists() {
-		return errors.New("missing rule_items in config")
-	}
-	if len(ruleItemsResult.Array()) == 0 {
-		return errors.New("config rule_items cannot be empty")
-	}
-	var ruleItems []LimitRuleItem
-	for _, item := range ruleItemsResult.Array() {
-		var ruleItem LimitRuleItem
-
-		// 根据配置区分限流类型
-		var limitType limitRuleItemType
-		setLimitByKeyIfExists := func(field gjson.Result, limitTypeStr limitRuleItemType) {
-			if field.Exists() && field.String() != "" {
-				ruleItem.key = field.String()
-				limitType = limitTypeStr
-			}
-		}
-		setLimitByKeyIfExists(item.Get("limit_by_header"), limitByHeaderType)
-		setLimitByKeyIfExists(item.Get("limit_by_param"), limitByParamType)
-		setLimitByKeyIfExists(item.Get("limit_by_cookie"), limitByCookieType)
-		setLimitByKeyIfExists(item.Get("limit_by_per_header"), limitByPerHeaderType)
-		setLimitByKeyIfExists(item.Get("limit_by_per_param"), limitByPerParamType)
-		setLimitByKeyIfExists(item.Get("limit_by_per_cookie"), limitByPerCookieType)
-
-		limitByConsumer := item.Get("limit_by_consumer")
-		if limitByConsumer.Exists() {
-			ruleItem.key = ConsumerHeader
-			limitType = limitByConsumerType
-		}
-		limitByPerConsumer := item.Get("limit_by_per_consumer")
-		if limitByPerConsumer.Exists() {
-			ruleItem.key = ConsumerHeader
-			limitType = limitByPerConsumerType
-		}
-
-		limitByPerIpResult := item.Get("limit_by_per_ip")
-		if limitByPerIpResult.Exists() && limitByPerIpResult.String() != "" {
-			limitByPerIp := limitByPerIpResult.String()
-			ruleItem.key = limitByPerIp
-			if strings.HasPrefix(limitByPerIp, "from-header-") {
-				headerName := limitByPerIp[len("from-header-"):]
-				if headerName == "" {
-					return errors.New("limit_by_per_ip parse error: empty after 'from-header-'")
-				}
-				ruleItem.limitByPerIp = LimitByPerIp{
-					sourceType: HeaderSourceType,
-					headerName: headerName,
-				}
-			} else if limitByPerIp == "from-remote-addr" {
-				ruleItem.limitByPerIp = LimitByPerIp{
-					sourceType: RemoteAddrSourceType,
-					headerName: "",
-				}
-			} else {
-				return errors.New("the 'limit_by_per_ip' restriction must start with 'from-header-' or be exactly 'from-remote-addr'")
-			}
-			limitType = limitByPerIpType
-		}
-
-		if limitType == "" {
-			return errors.New("only one of 'limit_by_header' and 'limit_by_param' and 'limit_by_consumer' and 'limit_by_cookie' and 'limit_by_per_header' and 'limit_by_per_param' and 'limit_by_per_consumer' and 'limit_by_per_cookie' and 'limit_by_per_ip' can be set")
-		}
-		ruleItem.limitType = limitType
-
-		// 初始化configItems
-		err := initConfigItems(item, &ruleItem)
-		if err != nil {
-			return err
-		}
-
-		ruleItems = append(ruleItems, ruleItem)
-	}
-	config.ruleItems = ruleItems
-	return nil
-}
-
-func initConfigItems(json gjson.Result, rule *LimitRuleItem) error {
-	limitKeys := json.Get("limit_keys")
-	if !limitKeys.Exists() {
-		return errors.New("missing limit_keys in config")
-	}
-	if len(limitKeys.Array()) == 0 {
-		return errors.New("config limit_keys cannot be empty")
-	}
-	var configItems []LimitConfigItem
-	for _, item := range limitKeys.Array() {
-		key := item.Get("key")
-		if !key.Exists() || key.String() == "" {
-			return errors.New("limit_keys key is required")
-		}
-
-		var (
-			itemKey  = key.String()
-			itemType limitConfigItemType
-			ipNet    *iptree.IPTree
-			regexp   *re.Regexp
-		)
-		if rule.limitType == limitByPerIpType {
-			var err error
-			ipNet, err = parseIPNet(itemKey)
-			if err != nil {
-				return fmt.Errorf("failed to parse IPNet for key '%s': %w", itemKey, err)
-			}
-			itemType = ipNetType
-		} else if rule.limitType == limitByPerHeaderType ||
-			rule.limitType == limitByPerParamType ||
-			rule.limitType == limitByPerConsumerType ||
-			rule.limitType == limitByPerCookieType {
-			if itemKey == "*" {
-				itemType = allType
-			} else if strings.HasPrefix(itemKey, "regexp:") {
-				regexpStr := itemKey[len("regexp:"):]
-				var err error
-				regexp, err = re.Compile(regexpStr)
-				if err != nil {
-					return fmt.Errorf("failed to compile regex for key '%s': %w", itemKey, err)
-				}
-				itemType = regexpType
-			} else {
-				return fmt.Errorf("the '%s' restriction must start with 'regexp:' or be exactly '*'", rule.limitType)
-			}
-		} else {
-			itemType = exactType
-		}
-
-		if configItem, err := createConfigItemFromRate(item, itemType, itemKey, ipNet, regexp); err != nil {
-			return err
-		} else if configItem != nil {
-			configItems = append(configItems, *configItem)
-		}
-	}
-	rule.configItems = configItems
-	return nil
-}
-
-func createConfigItemFromRate(item gjson.Result, itemType limitConfigItemType, key string, ipNet *iptree.IPTree, regexp *re.Regexp) (*LimitConfigItem, error) {
-	for timeWindowKey, duration := range timeWindows {
-		q := item.Get(timeWindowKey)
-		if q.Exists() && q.Int() > 0 {
-			return &LimitConfigItem{
-				configType: itemType,
-				key:        key,
-				ipNet:      ipNet,
-				regexp:     regexp,
-				count:      q.Int(),
-				timeWindow: duration,
-			}, nil
-		}
-	}
-	return nil, errors.New("one of 'query_per_second', 'query_per_minute', 'query_per_hour', or 'query_per_day' must be set for key: " + key)
-}
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config/config.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config/config.go
@@ -0,0 +1,357 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+
+	"cluster-key-rate-limit/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+	re "github.com/wasilibs/go-re2"
+	"github.com/zmap/go-iptree/iptree"
+)
+
+// LimitRuleItemType 限流规则项类型
+type LimitRuleItemType string
+
+// LimitConfigItemType 限流配置项key类型
+type LimitConfigItemType string
+
+const (
+	LimitByHeaderType      LimitRuleItemType = "limit_by_header"
+	LimitByParamType       LimitRuleItemType = "limit_by_param"
+	LimitByConsumerType    LimitRuleItemType = "limit_by_consumer"
+	LimitByCookieType      LimitRuleItemType = "limit_by_cookie"
+	LimitByPerHeaderType   LimitRuleItemType = "limit_by_per_header"
+	LimitByPerParamType    LimitRuleItemType = "limit_by_per_param"
+	LimitByPerConsumerType LimitRuleItemType = "limit_by_per_consumer"
+	LimitByPerCookieType   LimitRuleItemType = "limit_by_per_cookie"
+	LimitByPerIpType       LimitRuleItemType = "limit_by_per_ip"
+
+	ExactType  LimitConfigItemType = "exact"  // 精确匹配
+	RegexpType LimitConfigItemType = "regexp" // 正则表达式
+	AllType    LimitConfigItemType = "*"      // 匹配所有情况
+	IpNetType  LimitConfigItemType = "ipNet"  // ip段
+
+	ConsumerHeader = "x-mse-consumer" // LimitByConsumer从该request header获取consumer的名字
+
+	RemoteAddrSourceType = "remote-addr"
+	HeaderSourceType     = "header"
+
+	DefaultRejectedCode uint32 = 429
+	DefaultRejectedMsg  string = "Too many requests"
+
+	Second           int64 = 1
+	SecondsPerMinute       = 60 * Second
+	SecondsPerHour         = 60 * SecondsPerMinute
+	SecondsPerDay          = 24 * SecondsPerHour
+)
+
+var timeWindows = map[string]int64{
+	"query_per_second": Second,
+	"query_per_minute": SecondsPerMinute,
+	"query_per_hour":   SecondsPerHour,
+	"query_per_day":    SecondsPerDay,
+}
+
+type ClusterKeyRateLimitConfig struct {
+	RuleName             string           // 限流规则名称
+	GlobalThreshold      *GlobalThreshold // 全局限流配置
+	RuleItems            []LimitRuleItem  // 限流规则项
+	ShowLimitQuotaHeader bool             // 响应头中是否显示X-RateLimit-Limit和X-RateLimit-Remaining
+	RejectedCode         uint32           // 当请求超过阈值被拒绝时,返回的HTTP状态码
+	RejectedMsg          string           // 当请求超过阈值被拒绝时,返回的响应体
+	RedisClient          wrapper.RedisClient
+}
+
+type GlobalThreshold struct {
+	Count      int64 // 时间窗口内请求数
+	TimeWindow int64 // 时间窗口大小(秒)
+}
+
+type LimitRuleItem struct {
+	LimitType    LimitRuleItemType // 限流类型
+	Key          string            // 根据该key值进行限流,limit_by_consumer和limit_by_per_consumer两种类型为ConsumerHeader,其他类型为对应的key值
+	LimitByPerIp LimitByPerIp      // 对端ip地址或ip段
+	ConfigItems  []LimitConfigItem // 限流配置项
+}
+
+type LimitByPerIp struct {
+	SourceType string // ip来源类型
+	HeaderName string // 根据该请求头获取客户端ip
+}
+
+type LimitConfigItem struct {
+	ConfigType LimitConfigItemType // 限流配置项key类型
+	Key        string              // 限流key
+	IpNet      *iptree.IPTree      // 限流key转换的ip地址或者ip段,仅用于itemType为ipNetType
+	Regexp     *re.Regexp          // 正则表达式,仅用于itemType为regexpType
+	Count      int64               // 指定时间窗口内的总请求数量阈值
+	TimeWindow int64               // 时间窗口大小
+}
+
+func InitRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
+	redisConfig := json.Get("redis")
+	if !redisConfig.Exists() {
+		return errors.New("missing redis in config")
+	}
+
+	serviceName := redisConfig.Get("service_name").String()
+	if serviceName == "" {
+		return errors.New("redis service name must not be empty")
+	}
+
+	servicePort := int(redisConfig.Get("service_port").Int())
+	if servicePort == 0 {
+		if strings.HasSuffix(serviceName, ".static") {
+			// use default logic port which is 80 for static service
+			servicePort = 80
+		} else {
+			servicePort = 6379
+		}
+	}
+
+	username := redisConfig.Get("username").String()
+	password := redisConfig.Get("password").String()
+	timeout := int(redisConfig.Get("timeout").Int())
+	if timeout == 0 {
+		timeout = 1000
+	}
+
+	config.RedisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: int64(servicePort),
+	})
+	database := int(redisConfig.Get("database").Int())
+	return config.RedisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
+}
+
+func ParseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
+	ruleName := json.Get("rule_name")
+	if !ruleName.Exists() {
+		return errors.New("missing rule_name in config")
+	}
+	config.RuleName = ruleName.String()
+
+	// 初始化限流规则
+	if err := initLimitRule(json, config); err != nil {
+		return err
+	}
+
+	showLimitQuotaHeader := json.Get("show_limit_quota_header")
+	if showLimitQuotaHeader.Exists() {
+		config.ShowLimitQuotaHeader = showLimitQuotaHeader.Bool()
+	}
+
+	rejectedCode := json.Get("rejected_code")
+	if rejectedCode.Exists() {
+		config.RejectedCode = uint32(rejectedCode.Uint())
+	} else {
+		config.RejectedCode = DefaultRejectedCode
+	}
+
+	rejectedMsg := json.Get("rejected_msg")
+	if rejectedMsg.Exists() {
+		config.RejectedMsg = rejectedMsg.String()
+	} else {
+		config.RejectedMsg = DefaultRejectedMsg
+	}
+	return nil
+}
+
+func initLimitRule(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
+	globalThresholdResult := json.Get("global_threshold")
+	ruleItemsResult := json.Get("rule_items")
+
+	hasGlobal := globalThresholdResult.Exists()
+	hasRule := ruleItemsResult.Exists()
+	if !hasGlobal && !hasRule {
+		return errors.New("at least one of 'global_threshold' or 'rule_items' must be set")
+	} else if hasGlobal && hasRule {
+		return errors.New("'global_threshold' and 'rule_items' cannot be set at the same time")
+	}
+
+	// 处理全局限流配置
+	if hasGlobal {
+		threshold, err := parseGlobalThreshold(globalThresholdResult)
+		if err != nil {
+			return fmt.Errorf("failed to parse global_threshold: %w", err)
+		}
+		config.GlobalThreshold = threshold
+		return nil
+	}
+
+	// 处理条件限流规则
+	items := ruleItemsResult.Array()
+	if len(items) == 0 {
+		return errors.New("config rule_items cannot be empty")
+	}
+
+	var ruleItems []LimitRuleItem
+	for _, item := range items {
+		ruleItem, err := parseLimitRuleItem(item)
+		if err != nil {
+			return fmt.Errorf("failed to parse rule_item in rule_items: %w", err)
+		}
+		ruleItems = append(ruleItems, *ruleItem)
+	}
+	config.RuleItems = ruleItems
+	return nil
+}
+
+func parseGlobalThreshold(item gjson.Result) (*GlobalThreshold, error) {
+	for timeWindowKey, duration := range timeWindows {
+		q := item.Get(timeWindowKey)
+		if q.Exists() && q.Int() > 0 {
+			return &GlobalThreshold{
+				Count:      q.Int(),
+				TimeWindow: duration,
+			}, nil
+		}
+	}
+	return nil, errors.New("one of 'query_per_second', 'query_per_minute', 'query_per_hour', or 'query_per_day' must be set for global_threshold")
+}
+
+func parseLimitRuleItem(item gjson.Result) (*LimitRuleItem, error) {
+	var ruleItem LimitRuleItem
+	// 根据配置区分限流类型
+	var limitType LimitRuleItemType
+
+	trySetLimitType := func(field gjson.Result, limitTypeStr LimitRuleItemType) {
+		if field.Exists() && field.String() != "" {
+			ruleItem.Key = field.String()
+			limitType = limitTypeStr
+		}
+	}
+	trySetLimitType(item.Get("limit_by_header"), LimitByHeaderType)
+	trySetLimitType(item.Get("limit_by_param"), LimitByParamType)
+	trySetLimitType(item.Get("limit_by_cookie"), LimitByCookieType)
+	trySetLimitType(item.Get("limit_by_per_header"), LimitByPerHeaderType)
+	trySetLimitType(item.Get("limit_by_per_param"), LimitByPerParamType)
+	trySetLimitType(item.Get("limit_by_per_cookie"), LimitByPerCookieType)
+
+	limitByConsumer := item.Get("limit_by_consumer")
+	if limitByConsumer.Exists() {
+		ruleItem.Key = ConsumerHeader
+		limitType = LimitByConsumerType
+	}
+	limitByPerConsumer := item.Get("limit_by_per_consumer")
+	if limitByPerConsumer.Exists() {
+		ruleItem.Key = ConsumerHeader
+		limitType = LimitByPerConsumerType
+	}
+
+	limitByPerIpResult := item.Get("limit_by_per_ip")
+	if limitByPerIpResult.Exists() && limitByPerIpResult.String() != "" {
+		limitByPerIp := limitByPerIpResult.String()
+		ruleItem.Key = limitByPerIp
+		if strings.HasPrefix(limitByPerIp, "from-header-") {
+			headerName := limitByPerIp[len("from-header-"):]
+			if headerName == "" {
+				return nil, errors.New("limit_by_per_ip parse error: empty after 'from-header-'")
+			}
+			ruleItem.LimitByPerIp = LimitByPerIp{
+				SourceType: HeaderSourceType,
+				HeaderName: headerName,
+			}
+		} else if limitByPerIp == "from-remote-addr" {
+			ruleItem.LimitByPerIp = LimitByPerIp{
+				SourceType: RemoteAddrSourceType,
+				HeaderName: "",
+			}
+		} else {
+			return nil, errors.New("the 'limit_by_per_ip' restriction must start with 'from-header-' or be exactly 'from-remote-addr'")
+		}
+		limitType = LimitByPerIpType
+	}
+
+	if limitType == "" {
+		return nil, errors.New("only one of 'limit_by_header' and 'limit_by_param' and 'limit_by_consumer' and 'limit_by_cookie' and 'limit_by_per_header' and 'limit_by_per_param' and 'limit_by_per_consumer' and 'limit_by_per_cookie' and 'limit_by_per_ip' can be set")
+	}
+	ruleItem.LimitType = limitType
+
+	// 初始化configItems
+	err := initConfigItems(item, &ruleItem)
+	if err != nil {
+		return nil, fmt.Errorf("failed to init config items: %w", err)
+	}
+
+	return &ruleItem, nil
+}
+
+func initConfigItems(json gjson.Result, rule *LimitRuleItem) error {
+	limitKeys := json.Get("limit_keys")
+	if !limitKeys.Exists() {
+		return errors.New("missing limit_keys in config")
+	}
+	if len(limitKeys.Array()) == 0 {
+		return errors.New("config limit_keys cannot be empty")
+	}
+	var configItems []LimitConfigItem
+	for _, item := range limitKeys.Array() {
+		key := item.Get("key")
+		if !key.Exists() || key.String() == "" {
+			return errors.New("limit_keys key is required")
+		}
+
+		var (
+			itemKey  = key.String()
+			itemType LimitConfigItemType
+			ipNet    *iptree.IPTree
+			regexp   *re.Regexp
+		)
+		if rule.LimitType == LimitByPerIpType {
+			var err error
+			ipNet, err = util.ParseIPNet(itemKey)
+			if err != nil {
+				return fmt.Errorf("failed to parse IPNet for key '%s': %w", itemKey, err)
+			}
+			itemType = IpNetType
+		} else if rule.LimitType == LimitByPerHeaderType ||
+			rule.LimitType == LimitByPerParamType ||
+			rule.LimitType == LimitByPerConsumerType ||
+			rule.LimitType == LimitByPerCookieType {
+			if itemKey == "*" {
+				itemType = AllType
+			} else if strings.HasPrefix(itemKey, "regexp:") {
+				regexpStr := itemKey[len("regexp:"):]
+				var err error
+				regexp, err = re.Compile(regexpStr)
+				if err != nil {
+					return fmt.Errorf("failed to compile regex for key '%s': %w", itemKey, err)
+				}
+				itemType = RegexpType
+			} else {
+				return fmt.Errorf("the '%s' restriction must start with 'regexp:' or be exactly '*'", rule.LimitType)
+			}
+		} else {
+			itemType = ExactType
+		}
+
+		if configItem, err := createConfigItemFromRate(item, itemType, itemKey, ipNet, regexp); err != nil {
+			return err
+		} else if configItem != nil {
+			configItems = append(configItems, *configItem)
+		}
+	}
+	rule.ConfigItems = configItems
+	return nil
+}
+
+func createConfigItemFromRate(item gjson.Result, itemType LimitConfigItemType, key string, ipNet *iptree.IPTree, regexp *re.Regexp) (*LimitConfigItem, error) {
+	for timeWindowKey, duration := range timeWindows {
+		q := item.Get(timeWindowKey)
+		if q.Exists() && q.Int() > 0 {
+			return &LimitConfigItem{
+				ConfigType: itemType,
+				Key:        key,
+				IpNet:      ipNet,
+				Regexp:     regexp,
+				Count:      q.Int(),
+				TimeWindow: duration,
+			}, nil
+		}
+	}
+	return nil, errors.New("one of 'query_per_second', 'query_per_minute', 'query_per_hour', or 'query_per_day' must be set for key: " + key)
+}
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config/config_test.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config/config_test.go
@@ -0,0 +1,211 @@
+package config
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/tidwall/gjson"
+)
+
+func TestParseClusterKeyRateLimitConfig(t *testing.T) {
+	tests := []struct {
+		name        string
+		json        string
+		expected    ClusterKeyRateLimitConfig
+		expectedErr error
+	}{
+		{
+			name:        "MissingRuleName",
+			json:        `{}`,
+			expectedErr: errors.New("missing rule_name in config"),
+		},
+		{
+			name: "GlobalThreshold_QueryPerSecond",
+			json: `{
+				"rule_name": "global-route-limit",
+				"global_threshold": {
+					"query_per_second": 100
+				}
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "global-route-limit",
+				GlobalThreshold: &GlobalThreshold{
+					Count:      100,
+					TimeWindow: Second,
+				},
+				RejectedCode: DefaultRejectedCode,
+				RejectedMsg:  DefaultRejectedMsg,
+			},
+		},
+		{
+			name: "GlobalThreshold_QueryPerMinute",
+			json: `{
+				"rule_name": "global-route-limit",
+				"global_threshold": {
+					"query_per_minute": 1000
+				}
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "global-route-limit",
+				GlobalThreshold: &GlobalThreshold{
+					Count:      1000,
+					TimeWindow: SecondsPerMinute,
+				},
+				RejectedCode: DefaultRejectedCode,
+				RejectedMsg:  DefaultRejectedMsg,
+			},
+		},
+		{
+			name: "RuleItems_SingleRule",
+			json: `{
+				"rule_name": "rule-based-limit",
+				"rule_items": [
+					{
+						"limit_by_header": "x-test",
+						"limit_keys": [
+							{"key": "key1", "query_per_second": 10}
+						]
+					}
+				]
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "rule-based-limit",
+				RuleItems: []LimitRuleItem{
+					{
+						LimitType: LimitByHeaderType,
+						Key:       "x-test",
+						ConfigItems: []LimitConfigItem{
+							{
+								ConfigType: ExactType,
+								Key:        "key1",
+								Count:      10,
+								TimeWindow: Second,
+							},
+						},
+					},
+				},
+				RejectedCode: DefaultRejectedCode,
+				RejectedMsg:  DefaultRejectedMsg,
+			},
+		},
+		{
+			name: "RuleItems_MultipleRules",
+			json: `{
+				"rule_name": "multi-rule-limit",
+				"rule_items": [
+					{
+						"limit_by_param": "user_id",
+						"limit_keys": [
+							{"key": "123", "query_per_hour": 50}
+						]
+					},
+					{
+						"limit_by_per_cookie": "session_id",
+						"limit_keys": [
+							{"key": "*", "query_per_day": 100}
+						]
+					}
+				]
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "multi-rule-limit",
+				RuleItems: []LimitRuleItem{
+					{
+						LimitType: LimitByParamType,
+						Key:       "user_id",
+						ConfigItems: []LimitConfigItem{
+							{
+								ConfigType: ExactType,
+								Key:        "123",
+								Count:      50,
+								TimeWindow: SecondsPerHour,
+							},
+						},
+					},
+					{
+						LimitType: LimitByPerCookieType,
+						Key:       "session_id",
+						ConfigItems: []LimitConfigItem{
+							{
+								ConfigType: AllType,
+								Key:        "*",
+								Count:      100,
+								TimeWindow: SecondsPerDay,
+							},
+						},
+					},
+				},
+				RejectedCode: DefaultRejectedCode,
+				RejectedMsg:  DefaultRejectedMsg,
+			},
+		},
+		{
+			name: "Conflict_GlobalThresholdAndRuleItems",
+			json: `{
+				"rule_name": "test-conflict",
+				"global_threshold": {"query_per_second": 100},
+				"rule_items": [{"limit_by_header": "x-test"}]
+			}`,
+			expectedErr: errors.New("'global_threshold' and 'rule_items' cannot be set at the same time"),
+		},
+		{
+			name: "Missing_GlobalThresholdAndRuleItems",
+			json: `{
+				"rule_name": "test-missing"
+			}`,
+			expectedErr: errors.New("at least one of 'global_threshold' or 'rule_items' must be set"),
+		},
+		{
+			name: "Custom_RejectedCodeAndMessage",
+			json: `{
+				"rule_name": "custom-reject",
+				"rejected_code": 403,
+				"rejected_msg": "Forbidden",
+				"global_threshold": {"query_per_second": 100}
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "custom-reject",
+				GlobalThreshold: &GlobalThreshold{
+					Count:      100,
+					TimeWindow: Second,
+				},
+				RejectedCode: 403,
+				RejectedMsg:  "Forbidden",
+			},
+		},
+		{
+			name: "ShowLimitQuotaHeader_Enabled",
+			json: `{
+				"rule_name": "show-header",
+				"show_limit_quota_header": true,
+				"global_threshold": {"query_per_second": 100}
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "show-header",
+				GlobalThreshold: &GlobalThreshold{
+					Count:      100,
+					TimeWindow: Second,
+				},
+				ShowLimitQuotaHeader: true,
+				RejectedCode:         DefaultRejectedCode,
+				RejectedMsg:          DefaultRejectedMsg,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var config ClusterKeyRateLimitConfig
+			result := gjson.Parse(tt.json)
+			err := ParseClusterKeyRateLimitConfig(result, &config)
+
+			if tt.expectedErr != nil {
+				assert.EqualError(t, err, tt.expectedErr.Error())
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expected, config)
+			}
+		})
+	}
+}
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/go.mod
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/go.mod
@@ -7,6 +7,7 @@ replace github.com/alibaba/higress/plugins/wasm-go => ../..
 require (
 	github.com/alibaba/higress/plugins/wasm-go v0.0.0
 	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
+	github.com/stretchr/testify v1.8.4
 	github.com/tidwall/gjson v1.17.3
 	github.com/tidwall/resp v0.1.1
 	github.com/wasilibs/go-re2 v1.5.3
@@ -15,10 +16,14 @@ require (

 require (
 	github.com/asergeyev/nradix v0.0.0-20170505151046-3872ab85bb56 // indirect
+	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/google/uuid v1.3.0 // indirect
 	github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
 	github.com/magefile/mage v1.14.0 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/tetratelabs/wazero v1.7.1 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/go.sum
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/go.sum
@@ -1,6 +1,7 @@
 github.com/asergeyev/nradix v0.0.0-20170505151046-3872ab85bb56 h1:Wi5Tgn8K+jDcBYL+dIMS1+qXYH2r7tpRAyBgqrWfQtw=
 github.com/asergeyev/nradix v0.0.0-20170505151046-3872ab85bb56/go.mod h1:8BhOLuqtSuT5NZtZMwfvEibi09RO3u79uqfHZzfDTR4=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
@@ -10,9 +11,12 @@ github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsef
 github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
 github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 github.com/tetratelabs/wazero v1.7.1 h1:QtSfd6KLc41DIMpDYlJdoMc6k7QTN246DM2+n2Y/Dx8=
 github.com/tetratelabs/wazero v1.7.1/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/gjson v1.17.3 h1:bwWLZU7icoKRG+C+0PNwIKC6FCJO/Q3p2pZvuP0jN94=
 github.com/tidwall/gjson v1.17.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
@@ -21,9 +25,14 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
 github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
 github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
 github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
 github.com/wasilibs/go-re2 v1.5.3 h1:wiuTcgDZdLhu8NG8oqF5sF5Q3yIU14lPAvXqeYzDK3g=
 github.com/wasilibs/go-re2 v1.5.3/go.mod h1:PzpVPsBdFC7vM8QJbbEnOeTmwA0DGE783d/Gex8eCV8=
 github.com/wasilibs/nottinygc v0.4.0 h1:h1TJMihMC4neN6Zq+WKpLxgd9xCFMw7O9ETLwY2exJQ=
 github.com/zmap/go-iptree v0.0.0-20210731043055-d4e632617837 h1:DjHnADS2r2zynZ3WkCFAQ+PNYngMSNceRROi0pO6c3M=
 github.com/zmap/go-iptree v0.0.0-20210731043055-d4e632617837/go.mod h1:9vp0bxqozzQwcjBwenEXfKVq8+mYbwHkQ1NF9Ap0DMw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/main.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/main.go
@@ -21,6 +21,9 @@ import (
 	"strconv"
 	"strings"

+	"cluster-key-rate-limit/config"
+	"cluster-key-rate-limit/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
@@ -31,15 +34,20 @@ import (
 func main() {
 	wrapper.SetCtx(
 		"cluster-key-rate-limit",
-		wrapper.ParseConfigBy(parseConfig),
-		wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
-		wrapper.ProcessResponseHeadersBy(onHttpResponseHeaders),
+		wrapper.ParseConfig(parseConfig),
+		wrapper.ProcessRequestHeaders(onHttpRequestHeaders),
+		wrapper.ProcessResponseHeaders(onHttpResponseHeaders),
 	)
 }

 const (
-	ClusterRateLimitFormat string = "higress-cluster-key-rate-limit:%s:%s:%d:%d:%s:%s" // redis key为前缀:限流规则名称:限流类型:时间窗口:窗口内限流数:限流key名称:限流key对应的实际值
-	FixedWindowScript      string = `
+	// ClusterKeyPrefix 集群限流插件在 Redis 中 key 的统一前缀
+	ClusterKeyPrefix = "higress-cluster-key-rate-limit"
+	// ClusterGlobalRateLimitFormat  全局限流模式 redis key 为 ClusterKeyPrefix:限流规则名称:global_threshold:时间窗口:窗口内限流数
+	ClusterGlobalRateLimitFormat = ClusterKeyPrefix + ":%s:global_threshold:%d:%d"
+	// ClusterRateLimitFormat 规则限流模式 redis key 为 ClusterKeyPrefix:限流规则名称:限流类型:时间窗口:窗口内限流数:限流key名称:限流key对应的实际值
+	ClusterRateLimitFormat = ClusterKeyPrefix + ":%s:%s:%d:%d:%s:%s"
+	FixedWindowScript      = `
    	local ttl = redis.call('ttl', KEYS[1])
    	if ttl < 0 then
        	redis.call('set', KEYS[1], ARGV[1] - 1, 'EX', ARGV[2])
@@ -48,14 +56,13 @@ const (
    	return {ARGV[1], redis.call('incrby', KEYS[1], -1), ttl}
 	`

-	LimitContextKey string = "LimitContext" // 限流上下文信息
+	LimitContextKey = "LimitContext" // 限流上下文信息

-	ConsumerHeader string = "x-mse-consumer" // LimitByConsumer从该request header获取consumer的名字
-	CookieHeader   string = "cookie"
+	CookieHeader = "cookie"

-	RateLimitLimitHeader     string = "X-RateLimit-Limit"     // 限制的总请求数
-	RateLimitRemainingHeader string = "X-RateLimit-Remaining" // 剩余还可以发送的请求数
-	RateLimitResetHeader     string = "X-RateLimit-Reset"     // 限流重置时间（触发限流时返回）
+	RateLimitLimitHeader     = "X-RateLimit-Limit"     // 限制的总请求数
+	RateLimitRemainingHeader = "X-RateLimit-Remaining" // 剩余还可以发送的请求数
+	RateLimitResetHeader     = "X-RateLimit-Reset"     // 限流重置时间（触发限流时返回）
 )

 type LimitContext struct {
@@ -64,31 +71,43 @@ type LimitContext struct {
 	reset     int
 }

-func parseConfig(json gjson.Result, config *ClusterKeyRateLimitConfig, log wrapper.Log) error {
-	err := initRedisClusterClient(json, config)
+func parseConfig(json gjson.Result, cfg *config.ClusterKeyRateLimitConfig) error {
+	err := config.InitRedisClusterClient(json, cfg)
 	if err != nil {
 		return err
 	}
-	err = parseClusterKeyRateLimitConfig(json, config)
+	err = config.ParseClusterKeyRateLimitConfig(json, cfg)
 	if err != nil {
 		return err
 	}
 	return nil
 }

-func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitConfig, log wrapper.Log) types.Action {
-	// 判断是否命中限流规则
-	val, ruleItem, configItem := checkRequestAgainstLimitRule(ctx, config.ruleItems, log)
-	if ruleItem == nil || configItem == nil {
-		return types.ActionContinue
+func onHttpRequestHeaders(ctx wrapper.HttpContext, config config.ClusterKeyRateLimitConfig) types.Action {
+	limitKey, count, timeWindow := "", int64(0), int64(0)
+
+	if config.GlobalThreshold != nil {
+		// 全局限流模式
+		limitKey = fmt.Sprintf(ClusterGlobalRateLimitFormat, config.RuleName, config.GlobalThreshold.TimeWindow, config.GlobalThreshold.Count)
+		count = config.GlobalThreshold.Count
+		timeWindow = config.GlobalThreshold.TimeWindow
+	} else {
+		// 规则限流模式
+		val, ruleItem, configItem := checkRequestAgainstLimitRule(ctx, config.RuleItems)
+		if ruleItem == nil || configItem == nil {
+			// 没有匹配到限流规则直接返回
+			return types.ActionContinue
+		}
+
+		limitKey = fmt.Sprintf(ClusterRateLimitFormat, config.RuleName, ruleItem.LimitType, configItem.TimeWindow, configItem.Count, ruleItem.Key, val)
+		count = configItem.Count
+		timeWindow = configItem.TimeWindow
 	}

-	// 构建redis限流key和参数
-	limitKey := fmt.Sprintf(ClusterRateLimitFormat, config.ruleName, ruleItem.limitType, configItem.timeWindow, configItem.count, ruleItem.key, val)
-	keys := []interface{}{limitKey}
-	args := []interface{}{configItem.count, configItem.timeWindow}
 	// 执行限流逻辑
-	err := config.redisClient.Eval(FixedWindowScript, 1, keys, args, func(response resp.Value) {
+	keys := []interface{}{limitKey}
+	args := []interface{}{count, timeWindow}
+	err := config.RedisClient.Eval(FixedWindowScript, 1, keys, args, func(response resp.Value) {
 		resultArray := response.Array()
 		if len(resultArray) != 3 {
 			log.Errorf("redis response parse error, response: %v", response)
@@ -108,6 +127,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitCon
 			proxywasm.ResumeHttpRequest()
 		}
 	})
+
 	if err != nil {
 		log.Errorf("redis call failed: %v", err)
 		return types.ActionContinue
@@ -115,79 +135,81 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitCon
 	return types.ActionPause
 }

-func onHttpResponseHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitConfig, log wrapper.Log) types.Action {
+func onHttpResponseHeaders(ctx wrapper.HttpContext, config config.ClusterKeyRateLimitConfig) types.Action {
 	limitContext, ok := ctx.GetContext(LimitContextKey).(LimitContext)
 	if !ok {
 		return types.ActionContinue
 	}
-	if config.showLimitQuotaHeader {
+	if config.ShowLimitQuotaHeader {
 		_ = proxywasm.ReplaceHttpResponseHeader(RateLimitLimitHeader, strconv.Itoa(limitContext.count))
 		_ = proxywasm.ReplaceHttpResponseHeader(RateLimitRemainingHeader, strconv.Itoa(limitContext.remaining))
 	}
 	return types.ActionContinue
 }

-func checkRequestAgainstLimitRule(ctx wrapper.HttpContext, ruleItems []LimitRuleItem, log wrapper.Log) (string, *LimitRuleItem, *LimitConfigItem) {
-	for _, rule := range ruleItems {
-		val, ruleItem, configItem := hitRateRuleItem(ctx, rule, log)
-		if ruleItem != nil && configItem != nil {
-			return val, ruleItem, configItem
+func checkRequestAgainstLimitRule(ctx wrapper.HttpContext, ruleItems []config.LimitRuleItem) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
+	if len(ruleItems) > 0 {
+		for _, rule := range ruleItems {
+			val, ruleItem, configItem := hitRateRuleItem(ctx, rule)
+			if ruleItem != nil && configItem != nil {
+				return val, ruleItem, configItem
+			}
 		}
 	}
 	return "", nil, nil
 }

-func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem, log wrapper.Log) (string, *LimitRuleItem, *LimitConfigItem) {
-	switch rule.limitType {
+func hitRateRuleItem(ctx wrapper.HttpContext, rule config.LimitRuleItem) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
+	switch rule.LimitType {
 	// 根据HTTP请求头限流
-	case limitByHeaderType, limitByPerHeaderType:
-		val, err := proxywasm.GetHttpRequestHeader(rule.key)
+	case config.LimitByHeaderType, config.LimitByPerHeaderType:
+		val, err := proxywasm.GetHttpRequestHeader(rule.Key)
 		if err != nil {
-			return logDebugAndReturnEmpty(log, "failed to get request header %s: %v", rule.key, err)
+			return logDebugAndReturnEmpty("failed to get request header %s: %v", rule.Key, err)
 		}
-		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
+		return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
 	// 根据HTTP请求参数限流
-	case limitByParamType, limitByPerParamType:
+	case config.LimitByParamType, config.LimitByPerParamType:
 		parse, err := url.Parse(ctx.Path())
 		if err != nil {
-			return logDebugAndReturnEmpty(log, "failed to parse request path: %v", err)
+			return logDebugAndReturnEmpty("failed to parse request path: %v", err)
 		}
 		query, err := url.ParseQuery(parse.RawQuery)
 		if err != nil {
-			return logDebugAndReturnEmpty(log, "failed to parse query params: %v", err)
+			return logDebugAndReturnEmpty("failed to parse query params: %v", err)
 		}
-		val, ok := query[rule.key]
+		val, ok := query[rule.Key]
 		if !ok {
-			return logDebugAndReturnEmpty(log, "request param %s is empty", rule.key)
+			return logDebugAndReturnEmpty("request param %s is empty", rule.Key)
 		}
-		return val[0], &rule, findMatchingItem(rule.limitType, rule.configItems, val[0])
+		return val[0], &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val[0])
 	// 根据consumer限流
-	case limitByConsumerType, limitByPerConsumerType:
-		val, err := proxywasm.GetHttpRequestHeader(ConsumerHeader)
+	case config.LimitByConsumerType, config.LimitByPerConsumerType:
+		val, err := proxywasm.GetHttpRequestHeader(config.ConsumerHeader)
 		if err != nil {
-			return logDebugAndReturnEmpty(log, "failed to get request header %s: %v", ConsumerHeader, err)
+			return logDebugAndReturnEmpty("failed to get request header %s: %v", config.ConsumerHeader, err)
 		}
-		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
+		return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
 	// 根据cookie中key值限流
-	case limitByCookieType, limitByPerCookieType:
+	case config.LimitByCookieType, config.LimitByPerCookieType:
 		cookie, err := proxywasm.GetHttpRequestHeader(CookieHeader)
 		if err != nil {
-			return logDebugAndReturnEmpty(log, "failed to get request cookie : %v", err)
+			return logDebugAndReturnEmpty("failed to get request cookie : %v", err)
 		}
-		val := extractCookieValueByKey(cookie, rule.key)
+		val := util.ExtractCookieValueByKey(cookie, rule.Key)
 		if val == "" {
-			return logDebugAndReturnEmpty(log, "cookie key '%s' extracted from cookie '%s' is empty.", rule.key, cookie)
+			return logDebugAndReturnEmpty("cookie key '%s' extracted from cookie '%s' is empty.", rule.Key, cookie)
 		}
-		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
+		return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
 	// 根据客户端IP限流
-	case limitByPerIpType:
+	case config.LimitByPerIpType:
 		realIp, err := getDownStreamIp(rule)
 		if err != nil {
 			log.Warnf("failed to get down stream ip: %v", err)
 			return "", &rule, nil
 		}
-		for _, item := range rule.configItems {
-			if _, found, _ := item.ipNet.Get(realIp); !found {
+		for _, item := range rule.ConfigItems {
+			if _, found, _ := item.IpNet.Get(realIp); !found {
 				continue
 			}
 			return realIp.String(), &rule, &item
@@ -196,37 +218,37 @@ func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem, log wrapper.Lo
 	return "", nil, nil
 }

-func logDebugAndReturnEmpty(log wrapper.Log, errMsg string, args ...interface{}) (string, *LimitRuleItem, *LimitConfigItem) {
+func logDebugAndReturnEmpty(errMsg string, args ...interface{}) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
 	log.Debugf(errMsg, args...)
 	return "", nil, nil
 }

-func findMatchingItem(limitType limitRuleItemType, items []LimitConfigItem, key string) *LimitConfigItem {
+func findMatchingItem(limitType config.LimitRuleItemType, items []config.LimitConfigItem, key string) *config.LimitConfigItem {
 	for _, item := range items {
 		// per类型,检查allType和regexpType
-		if limitType == limitByPerHeaderType ||
-			limitType == limitByPerParamType ||
-			limitType == limitByPerConsumerType ||
-			limitType == limitByPerCookieType {
-			if item.configType == allType || (item.configType == regexpType && item.regexp.MatchString(key)) {
+		if limitType == config.LimitByPerHeaderType ||
+			limitType == config.LimitByPerParamType ||
+			limitType == config.LimitByPerConsumerType ||
+			limitType == config.LimitByPerCookieType {
+			if item.ConfigType == config.AllType || (item.ConfigType == config.RegexpType && item.Regexp.MatchString(key)) {
 				return &item
 			}
 		}
 		// 其他类型,直接比较key
-		if item.key == key {
+		if item.Key == key {
 			return &item
 		}
 	}
 	return nil
 }

-func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
+func getDownStreamIp(rule config.LimitRuleItem) (net.IP, error) {
 	var (
 		realIpStr string
 		err       error
 	)
-	if rule.limitByPerIp.sourceType == HeaderSourceType {
-		realIpStr, err = proxywasm.GetHttpRequestHeader(rule.limitByPerIp.headerName)
+	if rule.LimitByPerIp.SourceType == config.HeaderSourceType {
+		realIpStr, err = proxywasm.GetHttpRequestHeader(rule.LimitByPerIp.HeaderName)
 		if err == nil {
 			realIpStr = strings.Split(strings.Trim(realIpStr, " "), ",")[0]
 		}
@@ -238,7 +260,7 @@ func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
 	if err != nil {
 		return nil, err
 	}
-	ip := parseIP(realIpStr)
+	ip := util.ParseIP(realIpStr)
 	realIP := net.ParseIP(ip)
 	if realIP == nil {
 		return nil, fmt.Errorf("invalid ip[%s]", ip)
@@ -246,13 +268,13 @@ func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
 	return realIP, nil
 }

-func rejected(config ClusterKeyRateLimitConfig, context LimitContext) {
+func rejected(config config.ClusterKeyRateLimitConfig, context LimitContext) {
 	headers := make(map[string][]string)
 	headers[RateLimitResetHeader] = []string{strconv.Itoa(context.reset)}
-	if config.showLimitQuotaHeader {
+	if config.ShowLimitQuotaHeader {
 		headers[RateLimitLimitHeader] = []string{strconv.Itoa(context.count)}
 		headers[RateLimitRemainingHeader] = []string{strconv.Itoa(0)}
 	}
 	_ = proxywasm.SendHttpResponseWithDetail(
-		config.rejectedCode, "cluster-key-rate-limit.rejected", reconvertHeaders(headers), []byte(config.rejectedMsg), -1)
+		config.RejectedCode, "cluster-key-rate-limit.rejected", util.ReconvertHeaders(headers), []byte(config.RejectedMsg), -1)
 }
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/util/utils.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/util/utils.go
@@ -1,4 +1,4 @@
-package main
+package util

 import (
 	"fmt"
@@ -8,8 +8,8 @@ import (
 	"github.com/zmap/go-iptree/iptree"
 )

-// parseIPNet 解析Ip段配置
-func parseIPNet(key string) (*iptree.IPTree, error) {
+// ParseIPNet 解析Ip段配置
+func ParseIPNet(key string) (*iptree.IPTree, error) {
 	tree := iptree.New()
 	err := tree.AddByString(key, 0)
 	if err != nil {
@@ -18,8 +18,8 @@ func parseIPNet(key string) (*iptree.IPTree, error) {
 	return tree, nil
 }

-// parseIP 解析IP
-func parseIP(source string) string {
+// ParseIP 解析IP
+func ParseIP(source string) string {
 	if strings.Contains(source, ".") {
 		// parse ipv4
 		return strings.Split(source, ":")[0]
@@ -31,8 +31,8 @@ func parseIP(source string) string {
 	return source
 }

-// reconvertHeaders headers: map[string][]string -> [][2]string
-func reconvertHeaders(hs map[string][]string) [][2]string {
+// ReconvertHeaders headers: map[string][]string -> [][2]string
+func ReconvertHeaders(hs map[string][]string) [][2]string {
 	var ret [][2]string
 	for k, vs := range hs {
 		for _, v := range vs {
@@ -45,8 +45,8 @@ func reconvertHeaders(hs map[string][]string) [][2]string {
 	return ret
 }

-// extractCookieValueByKey 从cookie中提取key对应的value
-func extractCookieValueByKey(cookie string, key string) (value string) {
+// ExtractCookieValueByKey 从cookie中提取key对应的value
+func ExtractCookieValueByKey(cookie string, key string) (value string) {
 	pairs := strings.Split(cookie, ";")
 	for _, pair := range pairs {
 		pair = strings.TrimSpace(pair)
--- a/plugins/wasm-go/extensions/ext-auth/config/config.go
+++ b/plugins/wasm-go/extensions/ext-auth/config/config.go
@@ -7,6 +7,7 @@ import (
 	"strings"

 	"ext-auth/expr"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/tidwall/gjson"
 )
@@ -56,12 +57,12 @@ type AuthorizationResponse struct {
 	AllowedClientHeaders   expr.Matcher
 }

-func ParseConfig(json gjson.Result, config *ExtAuthConfig, log wrapper.Log) error {
+func ParseConfig(json gjson.Result, config *ExtAuthConfig) error {
 	httpServiceConfig := json.Get("http_service")
 	if !httpServiceConfig.Exists() {
 		return errors.New("missing http_service in config")
 	}
-	if err := parseHttpServiceConfig(httpServiceConfig, config, log); err != nil {
+	if err := parseHttpServiceConfig(httpServiceConfig, config); err != nil {
 		return err
 	}

@@ -88,10 +89,10 @@ func ParseConfig(json gjson.Result, config *ExtAuthConfig, log wrapper.Log) erro
 	return nil
 }

-func parseHttpServiceConfig(json gjson.Result, config *ExtAuthConfig, log wrapper.Log) error {
+func parseHttpServiceConfig(json gjson.Result, config *ExtAuthConfig) error {
 	var httpService HttpService

-	if err := parseEndpointConfig(json, &httpService, log); err != nil {
+	if err := parseEndpointConfig(json, &httpService); err != nil {
 		return err
 	}

@@ -114,7 +115,7 @@ func parseHttpServiceConfig(json gjson.Result, config *ExtAuthConfig, log wrappe
 	return nil
 }

-func parseEndpointConfig(json gjson.Result, httpService *HttpService, log wrapper.Log) error {
+func parseEndpointConfig(json gjson.Result, httpService *HttpService) error {
 	endpointMode := json.Get("endpoint_mode").String()
 	if endpointMode == "" {
 		endpointMode = EndpointModeEnvoy
--- a/plugins/wasm-go/extensions/ext-auth/config/config_test.go
+++ b/plugins/wasm-go/extensions/ext-auth/config/config_test.go
@@ -403,7 +403,7 @@ func TestParseConfig(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			var config ExtAuthConfig
 			result := gjson.Parse(tt.json)
-			err := ParseConfig(result, &config, &wrapper.DefaultLog{})
+			err := ParseConfig(result, &config)

 			if tt.expectedErr != "" {
 				assert.EqualError(t, err, tt.expectedErr)
--- a/plugins/wasm-go/extensions/ext-auth/go.mod
+++ b/plugins/wasm-go/extensions/ext-auth/go.mod
@@ -22,6 +22,7 @@ require (
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
 	github.com/tidwall/resp v0.1.1 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
 	golang.org/x/sys v0.21.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/plugins/wasm-go/extensions/ext-auth/go.sum
+++ b/plugins/wasm-go/extensions/ext-auth/go.sum
@@ -14,6 +14,7 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 github.com/tetratelabs/wazero v1.7.2 h1:1+z5nXJNwMLPAWaTePFi49SSTL0IMx/i3Fg8Yc25GDc=
 github.com/tetratelabs/wazero v1.7.2/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/gjson v1.17.3 h1:bwWLZU7icoKRG+C+0PNwIKC6FCJO/Q3p2pZvuP0jN94=
 github.com/tidwall/gjson v1.17.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
@@ -22,6 +23,8 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
 github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
 github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
 github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
 github.com/wasilibs/go-re2 v1.6.0 h1:CLlhDebt38wtl/zz4ww+hkXBMcxjrKFvTDXzFW2VOz8=
 github.com/wasilibs/go-re2 v1.6.0/go.mod h1:prArCyErsypRBI/jFAFJEbzyHzjABKqkzlidF0SNA04=
 github.com/wasilibs/nottinygc v0.4.0 h1:h1TJMihMC4neN6Zq+WKpLxgd9xCFMw7O9ETLwY2exJQ=
--- a/plugins/wasm-go/extensions/ext-auth/main.go
+++ b/plugins/wasm-go/extensions/ext-auth/main.go
@@ -20,6 +20,7 @@ import (

 	"ext-auth/config"
 	"ext-auth/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"

 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
@@ -29,9 +30,9 @@ import (
 func main() {
 	wrapper.SetCtx(
 		"ext-auth",
-		wrapper.ParseConfigBy(config.ParseConfig),
-		wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
-		wrapper.ProcessRequestBodyBy(onHttpRequestBody),
+		wrapper.ParseConfig(config.ParseConfig),
+		wrapper.ProcessRequestHeaders(onHttpRequestHeaders),
+		wrapper.ProcessRequestBody(onHttpRequestBody),
 	)
 }

@@ -50,7 +51,7 @@ const (
 	HeaderXForwardedHost   = "x-forwarded-host"
 )

-func onHttpRequestHeaders(ctx wrapper.HttpContext, config config.ExtAuthConfig, log wrapper.Log) types.Action {
+func onHttpRequestHeaders(ctx wrapper.HttpContext, config config.ExtAuthConfig) types.Action {
 	// If the request's domain and path match the MatchRules, skip authentication
 	if config.MatchRules.IsAllowedByMode(ctx.Host(), ctx.Method(), wrapper.GetRequestPathWithoutQuery()) {
 		ctx.DontReadRequestBody()
@@ -70,17 +71,17 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config config.ExtAuthConfig,
 	}

 	ctx.DontReadRequestBody()
-	return checkExtAuth(ctx, config, nil, log, types.HeaderStopAllIterationAndWatermark)
+	return checkExtAuth(ctx, config, nil, types.HeaderStopAllIterationAndWatermark)
 }

-func onHttpRequestBody(ctx wrapper.HttpContext, config config.ExtAuthConfig, body []byte, log wrapper.Log) types.Action {
+func onHttpRequestBody(ctx wrapper.HttpContext, config config.ExtAuthConfig, body []byte) types.Action {
 	if config.HttpService.AuthorizationRequest.WithRequestBody {
-		return checkExtAuth(ctx, config, body, log, types.DataStopIterationAndBuffer)
+		return checkExtAuth(ctx, config, body, types.DataStopIterationAndBuffer)
 	}
 	return types.ActionContinue
 }

-func checkExtAuth(ctx wrapper.HttpContext, cfg config.ExtAuthConfig, body []byte, log wrapper.Log, pauseAction types.Action) types.Action {
+func checkExtAuth(ctx wrapper.HttpContext, cfg config.ExtAuthConfig, body []byte, pauseAction types.Action) types.Action {
 	httpServiceConfig := cfg.HttpService

 	extAuthReqHeaders := buildExtAuthRequestHeaders(ctx, cfg)
--- a/plugins/wasm-go/extensions/frontend-gray/config/config.go
+++ b/plugins/wasm-go/extensions/frontend-gray/config/config.go
@@ -75,6 +75,8 @@ type GrayConfig struct {
 	SkippedByHeaders    map[string]string
 	IndexPaths          []string
 	GrayWeight          int
+	// 表示uniqueGrayTag配置项是否被用户自定义设置
+	UniqueGrayTagConfigured bool
 }

 func isValidName(s string) bool {
@@ -134,6 +136,8 @@ func JsonToGrayConfig(json gjson.Result, grayConfig *GrayConfig) error {
 	grayConfig.GraySubKey = json.Get("graySubKey").String()
 	grayConfig.BackendGrayTag = GetWithDefault(json, "backendGrayTag", "x-mse-tag")
 	grayConfig.UniqueGrayTag = GetWithDefault(json, "uniqueGrayTag", "x-higress-uid")
+	// 判断 uniqueGrayTag 是否被配置
+	grayConfig.UniqueGrayTagConfigured = json.Get("uniqueGrayTag").Exists()
 	grayConfig.StoreMaxAge = 60 * 60 * 24 * 365 // 默认一年
 	storeMaxAge, err := strconv.Atoi(GetWithDefault(json, "StoreMaxAge", strconv.Itoa(grayConfig.StoreMaxAge)))
 	if err != nil {
--- a/plugins/wasm-go/extensions/frontend-gray/go.mod
+++ b/plugins/wasm-go/extensions/frontend-gray/go.mod
@@ -7,6 +7,7 @@ replace github.com/alibaba/higress/plugins/wasm-go => ../..
 require (
 	github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240727022514-bccfbde62188
 	github.com/bmatcuk/doublestar/v4 v4.6.1
+	github.com/google/uuid v1.3.0
 	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
 	github.com/stretchr/testify v1.9.0
 	github.com/tidwall/gjson v1.17.3
@@ -14,12 +15,12 @@ require (

 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
-	github.com/google/uuid v1.3.0 // indirect
 	github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
 	github.com/magefile/mage v1.14.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
 	github.com/tidwall/resp v0.1.1 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/plugins/wasm-go/extensions/frontend-gray/go.sum
+++ b/plugins/wasm-go/extensions/frontend-gray/go.sum
@@ -13,12 +13,13 @@ github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKE
 github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
 github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
 github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
+github.com/pmezard/go-difflib v1.0.0+incompatible/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/pmezard/go-difflib v1.0.0+incompatible/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM=
 github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/gjson v1.17.3 h1:bwWLZU7icoKRG+C+0PNwIKC6FCJO/Q3p2pZvuP0jN94=
@@ -29,6 +30,8 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
 github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
 github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
 github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/plugins/wasm-go/extensions/frontend-gray/main.go
+++ b/plugins/wasm-go/extensions/frontend-gray/main.go
@@ -63,7 +63,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, grayConfig config.GrayConfig)
 	}
 	frontendVersion := util.GetCookieValue(cookie, config.XHigressTag)

-	if grayConfig.GrayWeight > 0 {
+	if grayConfig.UniqueGrayTagConfigured || grayConfig.GrayWeight > 0 {
 		ctx.SetContext(grayConfig.UniqueGrayTag, util.GetGrayWeightUniqueId(cookie, grayConfig.UniqueGrayTag))
 	}

@@ -179,13 +179,13 @@ func onHttpResponseHeader(ctx wrapper.HttpContext, grayConfig config.GrayConfig)
 	// 前端版本
 	frontendVersion, isFrontendVersionOk := ctx.GetContext(config.PreHigressVersion).(string)
 	if isFrontendVersionOk {
-		proxywasm.AddHttpResponseHeader("Set-Cookie", fmt.Sprintf("%s=%s; Max-Age=%d; Path=/;", config.XHigressTag, frontendVersion, grayConfig.StoreMaxAge))
+		proxywasm.AddHttpResponseHeader("Set-Cookie", fmt.Sprintf("%s=%s; Max-Age=%d; Path=/; HttpOnly; Secure", config.XHigressTag, frontendVersion, grayConfig.StoreMaxAge))
 	}
 	// 设置GrayWeight 唯一值
-	if grayConfig.GrayWeight > 0 {
+	if grayConfig.UniqueGrayTagConfigured || grayConfig.GrayWeight > 0 {
 		uniqueId, isUniqueIdOk := ctx.GetContext(grayConfig.UniqueGrayTag).(string)
 		if isUniqueIdOk {
-			proxywasm.AddHttpResponseHeader("Set-Cookie", fmt.Sprintf("%s=%s; Max-Age=%d; Path=/;", grayConfig.UniqueGrayTag, uniqueId, grayConfig.StoreMaxAge))
+			proxywasm.AddHttpResponseHeader("Set-Cookie", fmt.Sprintf("%s=%s; Max-Age=%d; Path=/; HttpOnly; Secure", grayConfig.UniqueGrayTag, uniqueId, grayConfig.StoreMaxAge))
 		}
 	}
 	// 设置后端的版本
@@ -194,9 +194,9 @@ func onHttpResponseHeader(ctx wrapper.HttpContext, grayConfig config.GrayConfig)
 		if isBackVersionOk {
 			if backendVersion == "" {
 				// 删除后端灰度版本
-				proxywasm.AddHttpResponseHeader("Set-Cookie", fmt.Sprintf("%s=%s; Expires=Thu, 01 Jan 1970 00:00:00 GMT; Path=/;", grayConfig.BackendGrayTag, backendVersion))
+				proxywasm.AddHttpResponseHeader("Set-Cookie", fmt.Sprintf("%s=%s; Expires=Thu, 01 Jan 1970 00:00:00 GMT; Path=/; HttpOnly; Secure", grayConfig.BackendGrayTag, backendVersion))
 			} else {
-				proxywasm.AddHttpResponseHeader("Set-Cookie", fmt.Sprintf("%s=%s; Max-Age=%d; Path=/;", grayConfig.BackendGrayTag, backendVersion, grayConfig.StoreMaxAge))
+				proxywasm.AddHttpResponseHeader("Set-Cookie", fmt.Sprintf("%s=%s; Max-Age=%d; Path=/; HttpOnly; Secure", grayConfig.BackendGrayTag, backendVersion, grayConfig.StoreMaxAge))
 			}
 		}
 	}
--- a/plugins/wasm-go/extensions/log-request-response/README.md
+++ b/plugins/wasm-go/extensions/log-request-response/README.md
@@ -0,0 +1,154 @@
+# log-request-response 插件
+
+这个插件用于在 Higress 的访问日志中添加以下信息：
+
+- HTTP 请求头（添加为 `%FILTER_STATE(wasm.log-request-headers:PLAIN)%`）
+- POST、PUT、PATCH 请求的请求体内容（添加为 `%FILTER_STATE(wasm.log-request-body:PLAIN)%`）
+- 响应头（添加为 `%FILTER_STATE(wasm.log-response-headers:PLAIN)%`）
+- 响应体内容（添加为 `%FILTER_STATE(wasm.log-response-body:PLAIN)%`）
+
+## 配置参数
+
+在 Higress 控制台配置该插件时，使用以下结构化的 YAML 配置：
+
+```yaml
+# 请求相关配置
+request:
+  # 请求头配置
+  headers:
+    # 是否记录请求头（默认：false）
+    enabled: true
+  # 请求体配置
+  body:
+    # 是否记录请求体内容（默认：false）
+    enabled: true
+    # 最大记录长度限制，单位字节（默认：10KB）
+    maxSize: 10240
+    # 需要记录请求体的内容类型（默认包含常见的内容类型）
+    contentTypes:
+      - application/json
+      - application/xml
+      - application/x-www-form-urlencoded
+      - text/plain
+
+# 响应相关配置
+response:
+  # 响应头配置
+  headers:
+    # 是否记录响应头（默认：false）
+    enabled: true
+  # 响应体配置
+  body:
+    # 是否记录响应体内容（默认：false）
+    enabled: true
+    # 最大记录长度限制，单位字节（默认：10KB）
+    maxSize: 10240
+    # 需要记录响应体的内容类型（默认包含常见的内容类型）
+    contentTypes:
+      - application/json
+      - application/xml
+      - text/plain
+      - text/html
+```
+
+## 工作原理
+
+1. 请求处理时，插件会根据配置决定是否记录请求头和请求体
+2. 只有当请求方法为 POST、PUT 或 PATCH，且内容类型在配置的 `request.body.contentTypes` 列表中时，才会记录请求体
+3. 响应处理时，插件会根据配置决定是否记录响应头和响应体
+4. 只有当响应的内容类型在配置的 `response.body.contentTypes` 列表中时，才会记录响应体
+5. 所有记录的内容都会被限制在配置的 `maxSize` 指定的大小内
+6. 插件对请求体和响应体都使用流式处理方式，不会阻止或修改原始内容传递
+7. 记录的内容会被存储在 Envoy 的 Filter State 中，可以通过访问日志配置获取
+
+## 编译方法
+
+```bash
+# 先整理依赖
+go mod tidy
+
+# 编译
+tinygo build -o main.wasm -scheduler=none -target=wasi -gc=custom -tags="custommalloc nottinygc_finalizer" ./main.go
+```
+
+## 访问日志配置
+
+要在 Higress 访问日志中显示插件添加的 Filter State 数据，需要修改 Higress 的访问日志配置。编辑 ConfigMap：
+
+```bash
+kubectl edit cm -n higress-system higress-config
+```
+
+在 `envoyAccessLogService.config.accessLog` 下的 `format` 字段中添加以下内容：
+
+```json
+{
+  "request_headers": "%FILTER_STATE(wasm.log-request-headers:PLAIN)%",
+  "request_body": "%FILTER_STATE(wasm.log-request-body:PLAIN)%",
+  "response_headers": "%FILTER_STATE(wasm.log-response-headers:PLAIN)%",
+  "response_body": "%FILTER_STATE(wasm.log-response-body:PLAIN)%"
+}
+```
+
+完整的访问日志配置可能会像这样（添加到现有配置中）：
+
+```yaml
+mesh:
+  accessLogFile: "/dev/stdout"
+  accessLogFormat: |
+    {
+      "authority": "%REQ(:AUTHORITY)%",
+      "bytes_received": "%BYTES_RECEIVED%",
+      "bytes_sent": "%BYTES_SENT%",
+      "downstream_local_address": "%DOWNSTREAM_LOCAL_ADDRESS%",
+      "downstream_remote_address": "%DOWNSTREAM_REMOTE_ADDRESS%",
+      "duration": "%DURATION%",
+      "method": "%REQ(:METHOD)%",
+      "path": "%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%",
+      "protocol": "%PROTOCOL%",
+      "request_id": "%REQ(X-REQUEST-ID)%",
+      "requested_server_name": "%REQUESTED_SERVER_NAME%",
+      "response_code": "%RESPONSE_CODE%",
+      "response_flags": "%RESPONSE_FLAGS%",
+      "route_name": "%ROUTE_NAME%",
+      "start_time": "%START_TIME%",
+      "trace_id": "%REQ(X-B3-TRACEID)%",
+      "upstream_cluster": "%UPSTREAM_CLUSTER%",
+      "upstream_host": "%UPSTREAM_HOST%",
+      "upstream_local_address": "%UPSTREAM_LOCAL_ADDRESS%",
+      "upstream_service_time": "%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%",
+      "upstream_transport_failure_reason": "%UPSTREAM_TRANSPORT_FAILURE_REASON%",
+      "user_agent": "%REQ(USER-AGENT)%",
+      "x_forwarded_for": "%REQ(X-FORWARDED-FOR)%",
+      "request_headers": "%FILTER_STATE(wasm.log-request-headers:PLAIN)%",
+      "request_body": "%FILTER_STATE(wasm.log-request-body:PLAIN)%",
+      "response_headers": "%FILTER_STATE(wasm.log-response-headers:PLAIN)%",
+      "response_body": "%FILTER_STATE(wasm.log-response-body:PLAIN)%"
+    }
+```
+
+## 日志输出示例
+
+配置完成后，Higress 的访问日志中将包含这些额外的字段（取决于您的配置启用了哪些选项）：
+
+```json
+{
+  "authority": "example.com",
+  "method": "POST",
+  "path": "/api/users",
+  "response_code": 200,
+  "request_headers": "{\"host\":\"example.com\",\"path\":\"/api/users\",\"method\":\"POST\",\"content-type\":\"application/json\"}",
+  "request_body": "{\"name\":\"测试用户\",\"email\":\"test@example.com\"}",
+  "response_headers": "{\"content-type\":\"application/json\",\"status\":\"200\"}",
+  "response_body": "{\"id\":123,\"status\":\"success\"}"
+}
+```
+
+## 注意事项
+
+1. 所有日志记录选项默认都是关闭的（false），需要明确启用才会记录相应内容
+2. 对于大型请求体或响应体，可以通过 `request.body.maxSize` 和 `response.body.maxSize` 参数限制记录的长度，以避免日志过大
+3. 插件使用流式处理方式处理请求体和响应体，不会对原始内容产生任何影响
+4. 只有指定内容类型的 POST、PUT、PATCH 请求才会记录请求体内容
+5. 只有指定内容类型的响应才会记录响应体内容
+6. 请确保合理配置该插件，避免记录敏感信息到日志中
--- a/plugins/wasm-go/extensions/log-request-response/VERSION
+++ b/plugins/wasm-go/extensions/log-request-response/VERSION
@@ -0,0 +1 @@
+1.0.0
--- a/plugins/wasm-go/extensions/log-request-response/docker-compose.yaml
+++ b/plugins/wasm-go/extensions/log-request-response/docker-compose.yaml
@@ -0,0 +1,25 @@
+services:
+  envoy:
+    image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/gateway:v2.1.3
+    entrypoint: /usr/local/bin/envoy
+    # 注意这里对wasm开启了debug级别日志，正式部署时则默认info级别
+    command: -c /etc/envoy/envoy.yaml --component-log-level wasm:debug
+    depends_on:
+      - httpbin
+    networks:
+      - wasmtest
+    ports:
+      - "10000:10000"
+    volumes:
+      - ./envoy.yaml:/etc/envoy/envoy.yaml
+      - ./main.wasm:/etc/envoy/main.wasm
+
+  httpbin:
+    image: kennethreitz/httpbin:latest
+    networks:
+      - wasmtest
+    ports:
+      - "12345:80"
+
+networks:
+  wasmtest: {}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
johnlanni	272d693df3	fix higress-console version in helm chart	2025-06-18 09:15:46 +08:00
澄潭	69bc800198	fix: The mcp to rest capability of the mcp server supports returning status without returning a body from the backend, and instead responds via sse (#2445 )	2025-06-17 21:26:38 +08:00
澄潭	1daaa4b880	release 2.1.5-rc.1 (#2446 )	2025-06-17 21:23:42 +08:00
澄潭	6e31a7b67c	update envoy and istio (#2440 )	2025-06-17 17:22:46 +08:00
澄潭	91f070906a	feat: add mcp-router plugin (#2409 )	2025-06-17 15:40:13 +08:00
澄潭	e3aeddcc24	add release-notes of 2.1.4 (#2433 )	2025-06-17 14:41:14 +08:00
woody	926913f0e7	feat(ai-proxy): add support for OpenAI Fine-Tuning API (#2424 )	2025-06-17 13:44:00 +08:00
mirror	c471bb2003	feat: add default route support for wanx image&video synthesis (#2431 )	2025-06-17 13:43:26 +08:00
澄潭	0b9256617e	fix: When configuring an MCP server for SSE forwarding, the controller may crash (#2423 )	2025-06-16 16:08:39 +08:00
hourmoneys	2670ecbf8e	feat: Add AI-based bidding information tool MCP service (#2343 )	2025-06-16 10:14:46 +08:00
mirror	7040e4bd34	feat: support for wanxiang image/video generation in ai-proxy & ai-statistics (#2378 )	2025-06-16 09:39:37 +08:00
xuruidong	de8a4d0b03	docs: fix broken link in mcp-servers README_zh.md (#2418 )	2025-06-15 22:14:10 +08:00
Xijun Dai	b33a3a4d2e	fix(ai-proxy): fix gemini provider missing finishReason (#2408 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com> Co-authored-by: Se7en <chengzw258@163.com>	2025-06-13 21:51:44 +08:00
澄潭	087cb48fc5	opt: unify the `end-of-line` markers in the MCP session filter. (#2403 )	2025-06-12 18:58:56 +08:00
hourmoneys	95f32002d2	add mcp-server doc (#2327 )	2025-06-12 17:14:39 +08:00
Xijun Dai	fb8dd819e9	feat(ai-proxy): Adjust the streaming response structure to keep it consistent with the openai (#2391 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-12 16:25:35 +08:00
EricaLiu	86934b3203	fix: fix const McpStreamableProtocol spell mistake (#2405 )	2025-06-12 15:35:39 +08:00
HaoJie Liu	38068ee43d	fix(ai-proxy): fix bedrock Sigv4 mismatch (#2402 )	2025-06-12 10:46:02 +08:00
EricaLiu	d81573e0d2	fix: change auto generate se namespace to mcp (#2398 )	2025-06-11 20:30:48 +08:00
tangchang	312b80f91d	feat: Plugin server supports k8s deployment and configures the default download URL of the plugin(#2232 , #2280,#2312) (#2389 ) Co-authored-by: xujingfeng <jingfeng.xjf@alibaba-inc.com> Co-authored-by: 澄潭 <zty98751@alibaba-inc.com>	2025-06-11 12:20:09 +08:00
zty98751	e42e6eeee6	split translae-readme from helm-docs action	2025-06-11 09:52:41 +08:00
澄潭	9f5067d22f	Update release-hgctl.yaml	2025-06-10 22:21:42 +08:00
澄潭	6af9587372	Update release-crd.yaml	2025-06-10 22:21:00 +08:00
johnlanni	5812c1e734	release 2.1.4	2025-06-10 20:58:22 +08:00
github-actions[bot]	bafbe7972d	Update CRD file in the helm folder (#2392 ) Co-authored-by: CH3CHO <2909796+CH3CHO@users.noreply.github.com>	2025-06-10 20:29:23 +08:00
Kent Dong	f3fbf7d6c8	fix: Support mixing line breaks in a single SSE response (#2344 )	2025-06-10 20:21:04 +08:00
EricaLiu	1666dfb01c	fix : fix credential process logic for nacos mcp util and add ut for it (#2394 )	2025-06-10 20:03:45 +08:00
EricaLiu	d2f09fe8c5	fix: refactored mcp server auto discovery logic and fix some issue (#2382 ) Co-authored-by: johnlanni <zty98751@alibaba-inc.com>	2025-06-10 17:11:34 +08:00
Xijun Dai	69d877c116	feat(ai-proxy): 添加 Claude 图片理解与 Tools 调用能力 \|\| feat(ai-proxy): Add Claude image understanding and Tools calling capabilities (#2385 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-10 15:11:18 +08:00
澄潭	5bc0058779	add upstream override wasm abi (#2387 )	2025-06-10 14:20:02 +08:00
HaoJie Liu	d4e114b152	feat(ai-proxy): support Google Cloud Vertex (#2119 ) Co-authored-by: Kent Dong <ch3cho@qq.com>	2025-06-09 18:11:30 +08:00
Xijun Dai	e674c780c6	feat(ai-proxy): add models & image generation support for gemini (#2380 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com> Co-authored-by: Kent Dong <ch3cho@qq.com>	2025-06-08 15:25:22 +08:00
mamba	26cd6837d5	feat(frontend-gray): Add uniqueGrayTag configuration detection (#2371 ) Co-authored-by: rinfx <yucheng.lxr@alibaba-inc.com>	2025-06-07 15:35:28 +08:00
Xijun Dai	5674d91a10	feat(ai-proxy): 修复 openai 配置 openaiCustomUrl 之后, 对不支持 Api 透传路径错误的问题 \|\| feat(ai-proxy): Fixed the issue that the API pass-through path error does not support openaiCustomUrl after openai is configured. (#2364 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-06 17:02:56 +08:00
澄潭	c78b4aaba3	Update README.md	2025-06-05 13:36:33 +08:00
澄潭	0e4e8da9c1	Update README.md	2025-06-05 13:35:58 +08:00
澄潭	c9ec8a12bb	Update README.md	2025-06-05 12:00:59 +08:00
澄潭	7484bcea62	Update README.md	2025-06-05 12:00:08 +08:00
Xijun Dai	896780b60e	feat(ai-proxy): add modelMapping regexp support (#2358 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-03 22:29:17 +08:00
澄潭	7b1ae49cd4	fix content-length header not remove in ai-search plugin (#2363 )	2025-06-03 20:40:14 +08:00
VinciWu557	ee26baf054	feat: support dify ai-proxy e2e test \|\| feat: support diify ai-proxy e2e test (#2319 )	2025-06-03 19:31:58 +08:00
Xijun Dai	33fc47cefb	feat(ai-proxy): add batches & files support (#2355 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-03 09:42:36 +08:00
澄潭	19946d46ca	Update README.md	2025-05-30 17:24:28 +08:00
mirror	52d0212698	fix: set "EnableSemanticCachefalse" to false when no vector configured in ai-cache (#2351 )	2025-05-30 13:38:06 +08:00
Xijun Dai	a73c33f1da	feat(ai-proxy): support OpenAI-compatible image and audio model Mapping (#2341 )	2025-05-30 12:16:52 +08:00
韩贤涛	69b755a10d	feat: cluster-key-rate-limit support setting global rate limit thresholds for routes (#2262 )	2025-05-29 09:57:10 +08:00
johnlanni	52464c0e06	fix empty authority rewrite in mcp-server plugin	2025-05-28 19:56:16 +08:00
澄潭	d7d5d1c571	Update README.md	2025-05-28 15:31:12 +08:00
johnlanni	ea948ee818	add more info log in mcp-server	2025-05-28 10:30:35 +08:00
Xijun Dai	767f51adce	feat(ai-proxy): add doubao Image Generation support (#2331 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-05-27 18:59:07 +08:00
HaoJie Liu	168cb04c61	fix(ai-proxy): URL encode model name in Bedrock requests (#2321 )	2025-05-27 16:06:52 +08:00
johnlanni	323aabf72b	rm .tgitconfig	2025-05-27 07:14:13 +08:00
澄潭	b8d75598ed	Update mcp-server.yaml	2025-05-26 16:51:03 +08:00
johnlanni	b37649a62f	update README of shebao-tools mcp server	2025-05-26 16:31:14 +08:00
澄潭	76f76a70ab	add info log of ai-search plugin (#2323 )	2025-05-26 16:23:59 +08:00
澄潭	647c961f51	Update README.md	2025-05-26 16:12:52 +08:00
澄潭	5a5a72a9f8	Update README.md	2025-05-26 16:09:30 +08:00
Kent Dong	ffcf5df28a	feat: Refactor mcpServer.matchList config generation logic (#2207 )	2025-05-26 15:26:44 +08:00
Se7en	ec83623614	feat: allow skipping higress dev image build during wasmplugin e2e tests (#2264 )	2025-05-26 10:20:05 +08:00
Kent Dong	bf5be07d74	feat: Add a github action to copy CRD definitions from api folder to helm folder (#2268 )	2025-05-26 10:10:56 +08:00
hourmoneys	f6bb5d7729	add mcp service shebao tools (#2303 )	2025-05-23 17:27:15 +08:00
Whitea	031ae21caa	feat(mcp-server): add HackMD mcp server (#2260 )	2025-05-22 16:53:01 +08:00
Forgottener	fa3c5ea0fc	feat: Supports recording request header, request body, response header and response body information in the access log (#2265 )	2025-05-21 16:15:05 +08:00
澄潭	93436db13c	fix proxy-wasm-cpp-sdk (#2281 )	2025-05-21 13:59:27 +08:00
xujingfeng	be2c6f8a4a	fix: modify log level WARN -> DEBUG in key-auth plugin (#2275 )	2025-05-20 13:52:17 +08:00
EricaLiu	c768973e47	Fix : add fail strategy for wasmplugin generated by mcp server (#2237 )	2025-05-15 16:28:37 +08:00
澄潭	8ec65ed377	mcp server support API auth through OAS3 `security schemes` \|\| mcp server support API auth through OAS3 `security schemes` (#2241 )	2025-05-15 15:48:27 +08:00
@@ -1 +1 @@
 .4.0
 .0.0