fix higress-console version in helm chart

fix: The mcp to rest capability of the mcp server supports returning status without returning a body from the backend, and instead responds via sse (#2445 )
release 2.1.5-rc.1 (#2446 )
2026-02-25 13:10:50 +08:00 · 2025-06-18 09:15:46 +08:00 · 2025-06-17 21:26:38 +08:00 · 2025-06-17 21:23:42 +08:00 · 2025-06-17 17:22:46 +08:00 · 2025-06-17 15:40:13 +08:00
175 changed files with 10978 additions and 2459 deletions
--- a/.github/workflows/helm-docs.yaml
+++ b/.github/workflows/helm-docs.yaml
@@ -6,11 +6,13 @@ on:
      - "*"
    paths:
      - 'helm/**'
+      - '!helm/higress/README.zh.md'
  workflow_dispatch: ~
  push:
    branches: [ main ]
    paths:
      - 'helm/**'
+      - '!helm/higress/README.zh.md'

 jobs:
  helm:
@@ -31,96 +33,9 @@ jobs:
        run: |
          GOBIN=$PWD GO111MODULE=on go install github.com/norwoodj/helm-docs/cmd/helm-docs@v1.14.2
          ./helm-docs -c ${GITHUB_WORKSPACE}/helm/higress -f ../core/values.yaml
-          DIFF=$(git diff ${GITHUB_WORKSPACE}/helm/higress/*md)
+          DIFF=$(git diff ${GITHUB_WORKSPACE}/helm/higress/README.md)
          if [ ! -z "$DIFF" ]; then
            echo "Please use helm-docs in your clone, of your fork, of the project, and commit a updated README.md for the chart."
          fi
          git diff --exit-code
          rm -f ./helm-docs
-
-  translate-readme:
-    needs: helm
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Install dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y jq
-
-      - name: Compare README.md
-        id: compare_readme
-        run: |
-          cd ./helm/higress
-          BASE_BRANCH=main
-          UPSTREAM_REPO=https://github.com/alibaba/higress.git
-
-          TEMP_DIR=$(mktemp -d)
-          git clone --depth 1 --branch $BASE_BRANCH $UPSTREAM_REPO $TEMP_DIR
-
-          if diff -q "$TEMP_DIR/README.md" README.md > /dev/null; then
-            echo "README.md has no changes in comparison to base branch. Skipping translation."
-            echo "skip_translation=true" >> $GITHUB_ENV
-          else
-            echo "README.md has changed in comparison to base branch. Proceeding with translation."
-            echo "skip_translation=false" >> $GITHUB_ENV
-          fi
-
-      - name: Translate README.md to Chinese
-        if: env.skip_translation == 'false'
-        env:
-          API_URL: ${{ secrets.HIGRESS_OPENAI_API_URL }}
-          API_KEY: ${{ secrets.HIGRESS_OPENAI_API_KEY }}
-          API_MODEL: ${{ secrets.HIGRESS_OPENAI_API_MODEL }}
-        run: |
-          cd ./helm/higress
-          FILE_CONTENT=$(cat README.md)
-
-          PAYLOAD=$(jq -n \
-            --arg model "$API_MODEL" \
-            --arg content "$FILE_CONTENT" \
-            '{
-              model: $model,
-              messages: [
-                {"role": "system", "content": "You are a translation assistant that translates English Markdown text to Chinese."},
-                {"role": "user", "content": $content}
-              ],
-              temperature: 1.1,
-              stream: false
-            }')
-
-          RESPONSE=$(curl -s -X POST "$API_URL" \
-            -H "Content-Type: application/json" \
-            -H "Authorization: Bearer $API_KEY" \
-            -d "$PAYLOAD")
-
-          echo "Response: $RESPONSE"
-
-          echo "$RESPONSE" | jq -c -r '.choices[] | .message.content' > README.zh.new.md
-
-          if [ -f "README.zh.new.md" ]; then
-            echo "Translation completed and saved to README.zh.new.md."
-          else
-            echo "Translation failed or no content returned!"
-            exit 1
-          fi
-
-          mv README.zh.new.md README.zh.md
-
-      - name: Create Pull Request
-        if: env.skip_translation == 'false'
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "Update helm translated README.zh.md"
-          branch: update-helm-readme-zh
-          title: "Update helm translated README.zh.md"
-          body: |
-            This PR updates the translated README.zh.md file.
-
-            - Automatically generated by GitHub Actions
-          labels: translation, automated
-          base: main
--- a/.github/workflows/release-crd.yaml
+++ b/.github/workflows/release-crd.yaml
@@ -17,7 +17,7 @@ jobs:
        cat helm/core/crds/customresourcedefinitions.gen.yaml helm/core/crds/istio-envoyfilter.yaml > crd.yaml

    - name: Upload hgctl packages to the GitHub release
-      uses: softprops/action-gh-release@v2
+      uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631
      if: startsWith(github.ref, 'refs/tags/')
      with:
        files: |
--- a/.github/workflows/release-hgctl.yaml
+++ b/.github/workflows/release-hgctl.yaml
@@ -26,7 +26,7 @@ jobs:
        zip -q -r hgctl_${{ env.HGCTL_VERSION }}_windows_arm64.zip out/windows_arm64/

    - name: Upload hgctl packages to the GitHub release
-      uses: softprops/action-gh-release@v2
+      uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631
      if: startsWith(github.ref, 'refs/tags/')
      with:
        files: |
@@ -51,7 +51,7 @@ jobs:
        tar -zcvf hgctl_${{ env.HGCTL_VERSION }}_darwin_arm64.tar.gz out/darwin_arm64/

    - name: Upload hgctl packages to the GitHub release
-      uses: softprops/action-gh-release@v2
+      uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631
      if: startsWith(github.ref, 'refs/tags/')
      with:
        files: |
@@ -73,7 +73,7 @@ jobs:
        tar -zcvf hgctl_${{ env.HGCTL_VERSION }}_darwin_amd64.tar.gz out/darwin_amd64/

    - name: Upload hgctl packages to the GitHub release
-      uses: softprops/action-gh-release@v2
+      uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631
      if: startsWith(github.ref, 'refs/tags/')
      with:
        files: |
--- a/.github/workflows/sync-crds.yaml
+++ b/.github/workflows/sync-crds.yaml
@@ -0,0 +1,36 @@
+name: "Sync CRDs to Helm Chart"
+
+on:
+  workflow_dispatch: ~
+  push:
+    branches: [ main ]
+    paths:
+      - 'api/kubernetes/customresourcedefinitions.gen.yaml'
+
+jobs:
+  sync-crds:
+    name: Sync CRDs
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Copy the CRD YAML File to Helm Folder
+        run: |
+          cp api/kubernetes/customresourcedefinitions.gen.yaml helm/core/crds/customresourcedefinitions.gen.yaml
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          commit-message: "Update CRD file in the helm folder"
+          branch: sync-crds
+          title: "Update CRD file in the helm folder"
+          body: |
+            This PR updates CRD file in the helm folder.
+
+            - Automatically copied by GitHub Actions
+          labels: crds, automated
+          base: main
--- a/.github/workflows/translate-readme.yaml
+++ b/.github/workflows/translate-readme.yaml
@@ -0,0 +1,131 @@
+name: "Helm Docs"
+
+on:
+  workflow_dispatch: ~
+  push:
+    branches: [ main ]
+    paths:
+      - 'helm/higress/README.md'
+
+jobs:
+  translate-readme:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y jq
+
+      - name: Compare README.md
+        id: compare_readme
+        run: |
+          cd ./helm/higress
+          
+          BASE_BRANCH=${GITHUB_BASE_REF:-main} 
+          git fetch origin $BASE_BRANCH
+          
+          if git diff --quiet origin/$BASE_BRANCH -- README.md; then
+            echo "README.md has no local changes compared to $BASE_BRANCH. Skipping translation."
+            echo "skip_translation=true" >> $GITHUB_ENV
+          else
+            echo "README.md has local changes compared to $BASE_BRANCH. Proceeding with translation."
+            echo "skip_translation=false" >> $GITHUB_ENV
+            echo "--------- diff ---------"
+            git diff origin/$BASE_BRANCH -- README.md
+            echo "------------------------"
+          fi
+
+      - name: Translate README.md to Chinese
+        if: env.skip_translation == 'false'
+        env:
+          API_URL: ${{ secrets.HIGRESS_OPENAI_API_URL }}
+          API_KEY: ${{ secrets.HIGRESS_OPENAI_API_KEY }}
+          API_MODEL: ${{ secrets.HIGRESS_OPENAI_API_MODEL }}
+        run: |
+          cat << 'EOF' > translate_readme.py
+          import os
+          import json
+          import requests
+
+          API_URL = os.environ["API_URL"]
+          API_KEY = os.environ["API_KEY"]
+          API_MODEL = os.environ["API_MODEL"]
+          README_PATH = "./helm/higress/README.md"
+          OUTPUT_PATH = "./helm/higress/README.zh.md"
+
+          def stream_translation(api_url, api_key, payload):
+              headers = {
+                  "Content-Type": "application/json",
+                  "Authorization": f"Bearer {api_key}",
+              }
+              response = requests.post(api_url, headers=headers, json=payload, stream=True)
+              response.raise_for_status()
+
+              with open(OUTPUT_PATH, "w", encoding="utf-8") as out_file:
+                  for line in response.iter_lines(decode_unicode=True):
+                      if line.strip() == "" or not line.startswith("data: "):
+                          continue
+                      data = line[6:]
+                      if data.strip() == "[DONE]":
+                          break
+                      try:
+                          chunk = json.loads(data)
+                          content = chunk["choices"][0]["delta"].get("content", "")
+                          if content:
+                              out_file.write(content)
+                      except Exception as e:
+                          print("Error parsing chunk:", e)
+
+          def main():
+              if not os.path.exists(README_PATH):
+                  print("README.md not found!")
+                  return
+
+              with open(README_PATH, "r", encoding="utf-8") as f:
+                  content = f.read()
+
+              payload = {
+                  "model": API_MODEL,
+                  "messages": [
+                      {
+                          "role": "system",
+                          "content": "You are a translation assistant that translates English Markdown text to Chinese. Preserve original Markdown formatting and line breaks."
+                      },
+                      {
+                          "role": "user",
+                          "content": content
+                      }
+                  ],
+                  "temperature": 0.3,
+                  "stream": True
+              }
+
+              print("Streaming translation started...")
+              stream_translation(API_URL, API_KEY, payload)
+              print(f"Translation completed and saved to {OUTPUT_PATH}.")
+
+          if __name__ == "__main__":
+              main()
+          EOF
+          
+          python3 translate_readme.py
+          rm -rf translate_readme.py
+
+      - name: Create Pull Request
+        if: env.skip_translation == 'false'
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          commit-message: "Update helm translated README.zh.md"
+          branch: update-helm-readme-zh
+          title: "Update helm translated README.zh.md"
+          body: |
+            This PR updates the translated README.zh.md file.
+
+            - Automatically generated by GitHub Actions
+          labels: translation, automated
+          base: main
--- a/.github/workflows/translate-test.yml
+++ b/.github/workflows/translate-test.yml
@@ -0,0 +1,29 @@
+name: 'Translate GitHub content into English'
+on:
+  issues:
+    types: [opened, edited]
+  issue_comment:
+    types: [created, edited]
+  discussion:
+    types: [created, edited]
+  discussion_comment:
+    types: [created, edited]
+  pull_request_target:
+    types: [opened, edited]
+  pull_request_review_comment:
+    types: [created, edited]
+
+jobs:
+  translate:
+    permissions:
+      issues: write
+      discussions: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: lizheming/github-translate-action@main
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          APPEND_TRANSLATION: true
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -33,6 +33,7 @@ header:
    - 'hgctl/cmd/hgctl/config/testdata/config'
    - 'hgctl/pkg/manifests'
    - 'pkg/ingress/kube/gateway/istio/testdata'
+    - 'release-notes/**'

  comment: on-failure
 dependency:
--- a/Makefile.core.mk
+++ b/Makefile.core.mk
@@ -144,7 +144,7 @@ docker-buildx-push: clean-env docker.higress-buildx
 export PARENT_GIT_TAG:=$(shell cat VERSION)
 export PARENT_GIT_REVISION:=$(TAG)

-export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.5/envoy-symbol-ARCH.tar.gz
+export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.7/envoy-symbol-ARCH.tar.gz

 build-envoy: prebuild
 	./tools/hack/build-envoy.sh
@@ -191,6 +191,7 @@ install: pre-install
 	cd helm/higress; helm dependency build
 	helm install higress helm/higress -n higress-system --create-namespace --set 'global.local=true'

+HIGRESS_LATEST_IMAGE_TAG ?= latest
 ENVOY_LATEST_IMAGE_TAG ?= 958467a353d411ae3f06e03b096bfd342cddb2c6
 ISTIO_LATEST_IMAGE_TAG ?= d9c728d3b01f64855e012b08d136e306f1160397

@@ -268,10 +269,26 @@ higress-conformance-test-clean: $(tools/kind) delete-cluster
 .PHONY: higress-wasmplugin-test-prepare
 higress-wasmplugin-test-prepare: $(tools/kind) delete-cluster create-cluster docker-build kube-load-image install-dev-wasmplugin

+# higress-wasmplugin-test-prepare-skip-docker-build prepares the environment for higress wasmplugin tests without build higress docker image.
+.PHONY: higress-wasmplugin-test-prepare-skip-docker-build
+higress-wasmplugin-test-prepare-skip-docker-build: $(tools/kind) delete-cluster create-cluster prebuild
+	@export TAG="$(HIGRESS_LATEST_IMAGE_TAG)" && \
+	$(MAKE) kube-load-image && \
+	$(MAKE) install-dev-wasmplugin
+
 # higress-wasmplugin-test runs ingress wasmplugin tests.
 .PHONY: higress-wasmplugin-test
 higress-wasmplugin-test: $(tools/kind) delete-cluster create-cluster docker-build kube-load-image install-dev-wasmplugin run-higress-e2e-test-wasmplugin delete-cluster

+# higress-wasmplugin-test-skip-docker-build runs ingress wasmplugin tests without build higress docker image
+.PHONY: higress-wasmplugin-test-skip-docker-build
+higress-wasmplugin-test-skip-docker-build: $(tools/kind) delete-cluster create-cluster prebuild
+	@export TAG="$(HIGRESS_LATEST_IMAGE_TAG)" && \
+	$(MAKE) kube-load-image && \
+	$(MAKE) install-dev-wasmplugin && \
+	$(MAKE) run-higress-e2e-test-wasmplugin && \
+	$(MAKE) delete-cluster
+
 # higress-wasmplugin-test-clean cleans the environment for higress wasmplugin tests.
 .PHONY: higress-wasmplugin-test-clean
 higress-wasmplugin-test-clean: $(tools/kind) delete-cluster
@@ -290,8 +307,12 @@ delete-cluster: $(tools/kind) ## Delete kind cluster.
 # dubbo-provider-demo和nacos-standlone-rc3的镜像已经上传到阿里云镜像库，第一次需要先拉到本地
 # docker pull registry.cn-hangzhou.aliyuncs.com/hinsteny/dubbo-provider-demo:0.0.1
 # docker pull registry.cn-hangzhou.aliyuncs.com/hinsteny/nacos-standlone-rc3:1.0.0-RC3
+# If TAG is HIGRESS_LATEST_IMAGE_TAG, means we skip building higress docker image, so we need to pull the image first.
 .PHONY: kube-load-image
 kube-load-image: $(tools/kind) ## Install the Higress image to a kind cluster using the provided $IMAGE and $TAG.
+	@if [ "$(TAG)" = "$(HIGRESS_LATEST_IMAGE_TAG)" ]; then \
+		tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress $(TAG); \
+	fi
 	tools/hack/kind-load-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress $(TAG)
 	tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/pilot $(ISTIO_LATEST_IMAGE_TAG)
 	tools/hack/docker-pull-image.sh higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/gateway $(ENVOY_LATEST_IMAGE_TAG)
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@
    
 [![Build Status](https://github.com/alibaba/higress/actions/workflows/build-and-test.yaml/badge.svg?branch=main)](https://github.com/alibaba/higress/actions)
 [![license](https://img.shields.io/github/license/alibaba/higress.svg)](https://www.apache.org/licenses/LICENSE-2.0.html)
+[![discord](https://img.shields.io/discord/1364956090566971515?color=5865F2&label=discord&labelColor=black&logo=discord&logoColor=white&style=flat-square)](https://discord.gg/tSbww9VDaM)

 <a href="https://trendshift.io/repositories/10918" target="_blank"><img src="https://trendshift.io/api/badge/repositories/10918" alt="alibaba%2Fhigress | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a> <a href="https://www.producthunt.com/posts/higress?embed=true&utm_source=badge-featured&utm_medium=badge&utm_souce=badge-higress" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=951287&theme=light&t=1745492822283" alt="Higress - Global&#0032;APIs&#0032;as&#0032;MCP&#0032;powered&#0032;by&#0032;AI&#0032;Gateway | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>

@@ -68,6 +69,10 @@ Port descriptions:

 > All Higress Docker images use Higress's own image repository and are not affected by Docker Hub rate limits.
 > In addition, the submission and updates of the images are protected by a security scanning mechanism (powered by Alibaba Cloud ACR), making them very secure for use in production environments.
+> 
+> If you experience a timeout when pulling image from `higress-registry.cn-hangzhou.cr.aliyuncs.com`, you can try replacing it with the following docker registry mirror source:
+> 
+> **Southeast Asia**: `higress-registry.ap-southeast-7.cr.aliyuncs.com`

 For other installation methods such as Helm deployment under K8s, please refer to the official [Quick Start documentation](https://higress.io/en-us/docs/user/quickstart).

@@ -140,7 +145,10 @@ For other installation methods such as Helm deployment under K8s, please refer t

 ## Community

-[Slack](https://w1689142780-euk177225.slack.com/archives/C05GEL4TGTG): to get invited go [here](https://communityinviter.com/apps/w1689142780-euk177225/higress).
+Join our Discord community! This is where you can connect with developers and other enthusiastic users of Higress.
+
+[![discord](https://img.shields.io/discord/1364956090566971515?color=5865F2&label=discord&labelColor=black&logo=discord&logoColor=white&style=for-the-badge)](https://discord.gg/tSbww9VDaM)
+

 ### Thanks

--- a/2
+++ b/2
@@ -1 +1 @@
-v2.1.2
+v2.1.5-rc.1
--- a/api/kubernetes/customresourcedefinitions.gen.yaml
+++ b/api/kubernetes/customresourcedefinitions.gen.yaml
@@ -250,6 +250,10 @@ spec:
              registries:
                items:
                  properties:
+                    allowMcpServers:
+                      items:
+                        type: string
+                      type: array
                    authSecretName:
                      type: string
                    consulDatacenter:
@@ -265,12 +269,23 @@ spec:
                      type: string
                    enableMCPServer:
                      type: boolean
+                    enableScopeMcpServers:
+                      type: boolean
                    mcpServerBaseUrl:
                      type: string
                    mcpServerExportDomains:
                      items:
                        type: string
                      type: array
+                    metadata:
+                      additionalProperties:
+                        properties:
+                          innerMap:
+                            additionalProperties:
+                              type: string
+                            type: object
+                        type: object
+                      type: object
                    nacosAccessKey:
                      type: string
                    nacosAddressServer:
--- a/api/networking/v1/mcp_bridge.pb.go
+++ b/api/networking/v1/mcp_bridge.pb.go
@@ -111,28 +111,31 @@ type RegistryConfig struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields

-	Type                   string              `protobuf:"bytes,1,opt,name=type,proto3" json:"type,omitempty"`
-	Name                   string              `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"`
-	Domain                 string              `protobuf:"bytes,3,opt,name=domain,proto3" json:"domain,omitempty"`
-	Port                   uint32              `protobuf:"varint,4,opt,name=port,proto3" json:"port,omitempty"`
-	NacosAddressServer     string              `protobuf:"bytes,5,opt,name=nacosAddressServer,proto3" json:"nacosAddressServer,omitempty"`
-	NacosAccessKey         string              `protobuf:"bytes,6,opt,name=nacosAccessKey,proto3" json:"nacosAccessKey,omitempty"`
-	NacosSecretKey         string              `protobuf:"bytes,7,opt,name=nacosSecretKey,proto3" json:"nacosSecretKey,omitempty"`
-	NacosNamespaceId       string              `protobuf:"bytes,8,opt,name=nacosNamespaceId,proto3" json:"nacosNamespaceId,omitempty"`
-	NacosNamespace         string              `protobuf:"bytes,9,opt,name=nacosNamespace,proto3" json:"nacosNamespace,omitempty"`
-	NacosGroups            []string            `protobuf:"bytes,10,rep,name=nacosGroups,proto3" json:"nacosGroups,omitempty"`
-	NacosRefreshInterval   int64               `protobuf:"varint,11,opt,name=nacosRefreshInterval,proto3" json:"nacosRefreshInterval,omitempty"`
-	ConsulNamespace        string              `protobuf:"bytes,12,opt,name=consulNamespace,proto3" json:"consulNamespace,omitempty"`
-	ZkServicesPath         []string            `protobuf:"bytes,13,rep,name=zkServicesPath,proto3" json:"zkServicesPath,omitempty"`
-	ConsulDatacenter       string              `protobuf:"bytes,14,opt,name=consulDatacenter,proto3" json:"consulDatacenter,omitempty"`
-	ConsulServiceTag       string              `protobuf:"bytes,15,opt,name=consulServiceTag,proto3" json:"consulServiceTag,omitempty"`
-	ConsulRefreshInterval  int64               `protobuf:"varint,16,opt,name=consulRefreshInterval,proto3" json:"consulRefreshInterval,omitempty"`
-	AuthSecretName         string              `protobuf:"bytes,17,opt,name=authSecretName,proto3" json:"authSecretName,omitempty"`
-	Protocol               string              `protobuf:"bytes,18,opt,name=protocol,proto3" json:"protocol,omitempty"`
-	Sni                    string              `protobuf:"bytes,19,opt,name=sni,proto3" json:"sni,omitempty"`
-	McpServerExportDomains []string            `protobuf:"bytes,20,rep,name=mcpServerExportDomains,proto3" json:"mcpServerExportDomains,omitempty"`
-	McpServerBaseUrl       string              `protobuf:"bytes,21,opt,name=mcpServerBaseUrl,proto3" json:"mcpServerBaseUrl,omitempty"`
-	EnableMCPServer        *wrappers.BoolValue `protobuf:"bytes,22,opt,name=enableMCPServer,proto3" json:"enableMCPServer,omitempty"`
+	Type                   string               `protobuf:"bytes,1,opt,name=type,proto3" json:"type,omitempty"`
+	Name                   string               `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"`
+	Domain                 string               `protobuf:"bytes,3,opt,name=domain,proto3" json:"domain,omitempty"`
+	Port                   uint32               `protobuf:"varint,4,opt,name=port,proto3" json:"port,omitempty"`
+	NacosAddressServer     string               `protobuf:"bytes,5,opt,name=nacosAddressServer,proto3" json:"nacosAddressServer,omitempty"`
+	NacosAccessKey         string               `protobuf:"bytes,6,opt,name=nacosAccessKey,proto3" json:"nacosAccessKey,omitempty"`
+	NacosSecretKey         string               `protobuf:"bytes,7,opt,name=nacosSecretKey,proto3" json:"nacosSecretKey,omitempty"`
+	NacosNamespaceId       string               `protobuf:"bytes,8,opt,name=nacosNamespaceId,proto3" json:"nacosNamespaceId,omitempty"`
+	NacosNamespace         string               `protobuf:"bytes,9,opt,name=nacosNamespace,proto3" json:"nacosNamespace,omitempty"`
+	NacosGroups            []string             `protobuf:"bytes,10,rep,name=nacosGroups,proto3" json:"nacosGroups,omitempty"`
+	NacosRefreshInterval   int64                `protobuf:"varint,11,opt,name=nacosRefreshInterval,proto3" json:"nacosRefreshInterval,omitempty"`
+	ConsulNamespace        string               `protobuf:"bytes,12,opt,name=consulNamespace,proto3" json:"consulNamespace,omitempty"`
+	ZkServicesPath         []string             `protobuf:"bytes,13,rep,name=zkServicesPath,proto3" json:"zkServicesPath,omitempty"`
+	ConsulDatacenter       string               `protobuf:"bytes,14,opt,name=consulDatacenter,proto3" json:"consulDatacenter,omitempty"`
+	ConsulServiceTag       string               `protobuf:"bytes,15,opt,name=consulServiceTag,proto3" json:"consulServiceTag,omitempty"`
+	ConsulRefreshInterval  int64                `protobuf:"varint,16,opt,name=consulRefreshInterval,proto3" json:"consulRefreshInterval,omitempty"`
+	AuthSecretName         string               `protobuf:"bytes,17,opt,name=authSecretName,proto3" json:"authSecretName,omitempty"`
+	Protocol               string               `protobuf:"bytes,18,opt,name=protocol,proto3" json:"protocol,omitempty"`
+	Sni                    string               `protobuf:"bytes,19,opt,name=sni,proto3" json:"sni,omitempty"`
+	McpServerExportDomains []string             `protobuf:"bytes,20,rep,name=mcpServerExportDomains,proto3" json:"mcpServerExportDomains,omitempty"`
+	McpServerBaseUrl       string               `protobuf:"bytes,21,opt,name=mcpServerBaseUrl,proto3" json:"mcpServerBaseUrl,omitempty"`
+	EnableMCPServer        *wrappers.BoolValue  `protobuf:"bytes,22,opt,name=enableMCPServer,proto3" json:"enableMCPServer,omitempty"`
+	EnableScopeMcpServers  *wrappers.BoolValue  `protobuf:"bytes,23,opt,name=enableScopeMcpServers,proto3" json:"enableScopeMcpServers,omitempty"`
+	AllowMcpServers        []string             `protobuf:"bytes,24,rep,name=allowMcpServers,proto3" json:"allowMcpServers,omitempty"`
+	Metadata               map[string]*InnerMap `protobuf:"bytes,25,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
 }

 func (x *RegistryConfig) Reset() {
@@ -321,6 +324,74 @@ func (x *RegistryConfig) GetEnableMCPServer() *wrappers.BoolValue {
 	return nil
 }

+func (x *RegistryConfig) GetEnableScopeMcpServers() *wrappers.BoolValue {
+	if x != nil {
+		return x.EnableScopeMcpServers
+	}
+	return nil
+}
+
+func (x *RegistryConfig) GetAllowMcpServers() []string {
+	if x != nil {
+		return x.AllowMcpServers
+	}
+	return nil
+}
+
+func (x *RegistryConfig) GetMetadata() map[string]*InnerMap {
+	if x != nil {
+		return x.Metadata
+	}
+	return nil
+}
+
+type InnerMap struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	InnerMap map[string]string `protobuf:"bytes,1,rep,name=inner_map,json=innerMap,proto3" json:"inner_map,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+}
+
+func (x *InnerMap) Reset() {
+	*x = InnerMap{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_networking_v1_mcp_bridge_proto_msgTypes[2]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *InnerMap) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*InnerMap) ProtoMessage() {}
+
+func (x *InnerMap) ProtoReflect() protoreflect.Message {
+	mi := &file_networking_v1_mcp_bridge_proto_msgTypes[2]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use InnerMap.ProtoReflect.Descriptor instead.
+func (*InnerMap) Descriptor() ([]byte, []int) {
+	return file_networking_v1_mcp_bridge_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *InnerMap) GetInnerMap() map[string]string {
+	if x != nil {
+		return x.InnerMap
+	}
+	return nil
+}
+
 var File_networking_v1_mcp_bridge_proto protoreflect.FileDescriptor

 var file_networking_v1_mcp_bridge_proto_rawDesc = []byte{
@@ -338,7 +409,7 @@ var file_networking_v1_mcp_bridge_proto_rawDesc = []byte{
 	0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73,
 	0x73, 0x2e, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2e, 0x76, 0x31, 0x2e,
 	0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x0a,
-	0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x69, 0x65, 0x73, 0x22, 0xfd, 0x06, 0x0a, 0x0e, 0x52,
+	0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x69, 0x65, 0x73, 0x22, 0xa8, 0x09, 0x0a, 0x0e, 0x52,
 	0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x17, 0x0a,
 	0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x03, 0xe0, 0x41, 0x02,
 	0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02,
@@ -394,11 +465,39 @@ var file_networking_v1_mcp_bridge_proto_rawDesc = []byte{
 	0x50, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x18, 0x16, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e,
 	0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e,
 	0x42, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x0f, 0x65, 0x6e, 0x61, 0x62, 0x6c,
-	0x65, 0x4d, 0x43, 0x50, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x42, 0x2e, 0x5a, 0x2c, 0x67, 0x69,
-	0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x6c, 0x69, 0x62, 0x61, 0x62, 0x61,
-	0x2f, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x6e, 0x65, 0x74,
-	0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74,
-	0x6f, 0x33,
+	0x65, 0x4d, 0x43, 0x50, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x12, 0x50, 0x0a, 0x15, 0x65, 0x6e,
+	0x61, 0x62, 0x6c, 0x65, 0x53, 0x63, 0x6f, 0x70, 0x65, 0x4d, 0x63, 0x70, 0x53, 0x65, 0x72, 0x76,
+	0x65, 0x72, 0x73, 0x18, 0x17, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67,
+	0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x42, 0x6f, 0x6f, 0x6c,
+	0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x15, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x53, 0x63, 0x6f,
+	0x70, 0x65, 0x4d, 0x63, 0x70, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x12, 0x28, 0x0a, 0x0f,
+	0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x4d, 0x63, 0x70, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x18,
+	0x18, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0f, 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x4d, 0x63, 0x70, 0x53,
+	0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x12, 0x4f, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61,
+	0x74, 0x61, 0x18, 0x19, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x33, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65,
+	0x73, 0x73, 0x2e, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2e, 0x76, 0x31,
+	0x2e, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e,
+	0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d,
+	0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x5c, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64,
+	0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x35, 0x0a, 0x05, 0x76, 0x61,
+	0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x68, 0x69, 0x67, 0x72,
+	0x65, 0x73, 0x73, 0x2e, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2e, 0x76,
+	0x31, 0x2e, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75,
+	0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x93, 0x01, 0x0a, 0x08, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d,
+	0x61, 0x70, 0x12, 0x4a, 0x0a, 0x09, 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x5f, 0x6d, 0x61, 0x70, 0x18,
+	0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2e,
+	0x6e, 0x65, 0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2e, 0x76, 0x31, 0x2e, 0x49, 0x6e,
+	0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x2e, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x45,
+	0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x69, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x1a, 0x3b,
+	0x0a, 0x0d, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12,
+	0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65,
+	0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x2e, 0x5a, 0x2c, 0x67,
+	0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x6c, 0x69, 0x62, 0x61, 0x62,
+	0x61, 0x2f, 0x68, 0x69, 0x67, 0x72, 0x65, 0x73, 0x73, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x6e, 0x65,
+	0x74, 0x77, 0x6f, 0x72, 0x6b, 0x69, 0x6e, 0x67, 0x2f, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f,
+	0x74, 0x6f, 0x33,
 }

 var (
@@ -413,20 +512,27 @@ func file_networking_v1_mcp_bridge_proto_rawDescGZIP() []byte {
 	return file_networking_v1_mcp_bridge_proto_rawDescData
 }

-var file_networking_v1_mcp_bridge_proto_msgTypes = make([]protoimpl.MessageInfo, 2)
+var file_networking_v1_mcp_bridge_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
 var file_networking_v1_mcp_bridge_proto_goTypes = []interface{}{
 	(*McpBridge)(nil),          // 0: higress.networking.v1.McpBridge
 	(*RegistryConfig)(nil),     // 1: higress.networking.v1.RegistryConfig
-	(*wrappers.BoolValue)(nil), // 2: google.protobuf.BoolValue
+	(*InnerMap)(nil),           // 2: higress.networking.v1.InnerMap
+	nil,                        // 3: higress.networking.v1.RegistryConfig.MetadataEntry
+	nil,                        // 4: higress.networking.v1.InnerMap.InnerMapEntry
+	(*wrappers.BoolValue)(nil), // 5: google.protobuf.BoolValue
 }
 var file_networking_v1_mcp_bridge_proto_depIdxs = []int32{
 	1, // 0: higress.networking.v1.McpBridge.registries:type_name -> higress.networking.v1.RegistryConfig
-	2, // 1: higress.networking.v1.RegistryConfig.enableMCPServer:type_name -> google.protobuf.BoolValue
-	2, // [2:2] is the sub-list for method output_type
-	2, // [2:2] is the sub-list for method input_type
-	2, // [2:2] is the sub-list for extension type_name
-	2, // [2:2] is the sub-list for extension extendee
-	0, // [0:2] is the sub-list for field type_name
+	5, // 1: higress.networking.v1.RegistryConfig.enableMCPServer:type_name -> google.protobuf.BoolValue
+	5, // 2: higress.networking.v1.RegistryConfig.enableScopeMcpServers:type_name -> google.protobuf.BoolValue
+	3, // 3: higress.networking.v1.RegistryConfig.metadata:type_name -> higress.networking.v1.RegistryConfig.MetadataEntry
+	4, // 4: higress.networking.v1.InnerMap.inner_map:type_name -> higress.networking.v1.InnerMap.InnerMapEntry
+	2, // 5: higress.networking.v1.RegistryConfig.MetadataEntry.value:type_name -> higress.networking.v1.InnerMap
+	6, // [6:6] is the sub-list for method output_type
+	6, // [6:6] is the sub-list for method input_type
+	6, // [6:6] is the sub-list for extension type_name
+	6, // [6:6] is the sub-list for extension extendee
+	0, // [0:6] is the sub-list for field type_name
 }

 func init() { file_networking_v1_mcp_bridge_proto_init() }
@@ -459,6 +565,18 @@ func file_networking_v1_mcp_bridge_proto_init() {
 				return nil
 			}
 		}
+		file_networking_v1_mcp_bridge_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*InnerMap); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
 	}
 	type x struct{}
 	out := protoimpl.TypeBuilder{
@@ -466,7 +584,7 @@ func file_networking_v1_mcp_bridge_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: file_networking_v1_mcp_bridge_proto_rawDesc,
 			NumEnums:      0,
-			NumMessages:   2,
+			NumMessages:   5,
 			NumExtensions: 0,
 			NumServices:   0,
 		},
--- a/api/networking/v1/mcp_bridge.proto
+++ b/api/networking/v1/mcp_bridge.proto
@@ -71,4 +71,11 @@ message RegistryConfig {
  repeated string mcpServerExportDomains = 20;
  string mcpServerBaseUrl = 21;
  google.protobuf.BoolValue enableMCPServer = 22;
+  google.protobuf.BoolValue enableScopeMcpServers = 23;
+  repeated string allowMcpServers = 24;
+  map<string, InnerMap> metadata = 25;
 }
+
+message InnerMap {
+  map<string, string> inner_map = 1;
+}
--- a/api/networking/v1/mcp_bridge_deepcopy.gen.go
+++ b/api/networking/v1/mcp_bridge_deepcopy.gen.go
@@ -46,3 +46,24 @@ func (in *RegistryConfig) DeepCopy() *RegistryConfig {
 func (in *RegistryConfig) DeepCopyInterface() interface{} {
 	return in.DeepCopy()
 }
+
+// DeepCopyInto supports using InnerMap within kubernetes types, where deepcopy-gen is used.
+func (in *InnerMap) DeepCopyInto(out *InnerMap) {
+	p := proto.Clone(in).(*InnerMap)
+	*out = *p
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InnerMap. Required by controller-gen.
+func (in *InnerMap) DeepCopy() *InnerMap {
+	if in == nil {
+		return nil
+	}
+	out := new(InnerMap)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInterface is an autogenerated deepcopy function, copying the receiver, creating a new InnerMap. Required by controller-gen.
+func (in *InnerMap) DeepCopyInterface() interface{} {
+	return in.DeepCopy()
+}
--- a/api/networking/v1/mcp_bridge_json.gen.go
+++ b/api/networking/v1/mcp_bridge_json.gen.go
@@ -28,6 +28,17 @@ func (this *RegistryConfig) UnmarshalJSON(b []byte) error {
 	return McpBridgeUnmarshaler.Unmarshal(bytes.NewReader(b), this)
 }

+// MarshalJSON is a custom marshaler for InnerMap
+func (this *InnerMap) MarshalJSON() ([]byte, error) {
+	str, err := McpBridgeMarshaler.MarshalToString(this)
+	return []byte(str), err
+}
+
+// UnmarshalJSON is a custom unmarshaler for InnerMap
+func (this *InnerMap) UnmarshalJSON(b []byte) error {
+	return McpBridgeUnmarshaler.Unmarshal(bytes.NewReader(b), this)
+}
+
 var (
 	McpBridgeMarshaler   = &jsonpb.Marshaler{}
 	McpBridgeUnmarshaler = &jsonpb.Unmarshaler{AllowUnknownFields: true}
--- a/envoy/envoy
+++ b/envoy/envoy
--- a/go.mod
+++ b/go.mod
@@ -31,7 +31,7 @@ require (
 	github.com/hudl/fargo v1.4.0
 	github.com/mholt/acmez v1.2.0
 	github.com/nacos-group/nacos-sdk-go v1.0.8
-	github.com/nacos-group/nacos-sdk-go/v2 v2.1.2
+	github.com/nacos-group/nacos-sdk-go/v2 v2.3.2
 	github.com/onsi/gomega v1.27.10
 	github.com/spf13/cobra v1.8.0
 	github.com/spf13/pflag v1.0.5
@@ -202,6 +202,7 @@ require (
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
 	github.com/spf13/cast v1.5.1 // indirect
 	github.com/stoewer/go-strcase v1.3.0 // indirect
+	github.com/stretchr/objx v0.5.0 // indirect
 	github.com/tetratelabs/wazero v1.7.3 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
@@ -274,6 +275,5 @@ replace github.com/caddyserver/certmagic => github.com/2456868764/certmagic v1.0

 replace (
 	github.com/dubbogo/gost => github.com/johnlanni/gost v1.11.23-0.20220713132522-0967a24036c6
-	github.com/nacos-group/nacos-sdk-go/v2 => github.com/luoxiner/nacos-sdk-go/v2 v2.2.9-60
 	golang.org/x/exp => golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1
 )
--- a/go.sum
+++ b/go.sum
@@ -1434,8 +1434,6 @@ github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhn
 github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
 github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
 github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
-github.com/luoxiner/nacos-sdk-go/v2 v2.2.9-60 h1:FA/azfz2nSkMc1XR8LeqhcAiA/2/sOMcyBGYCTUc+Cs=
-github.com/luoxiner/nacos-sdk-go/v2 v2.2.9-60/go.mod h1:9FKXl6FqOiVmm72i8kADtbeK71egyG9y3uRDBg41tpQ=
 github.com/lyft/protoc-gen-star v0.6.1/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA=
 github.com/lyft/protoc-gen-star/v2 v2.0.1/go.mod h1:RcCdONR2ScXaYnQC5tUzxzlpA3WVYF7/opLeUgcQs/o=
 github.com/lyft/protoc-gen-star/v2 v2.0.3/go.mod h1:amey7yeodaJhXSbf/TlLvWiqQfLOSpEk//mLlc+axEk=
@@ -1525,6 +1523,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRW
 github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
 github.com/nacos-group/nacos-sdk-go v1.0.8 h1:8pEm05Cdav9sQgJSv5kyvlgfz0SzFUUGI3pWX6SiSnM=
 github.com/nacos-group/nacos-sdk-go v1.0.8/go.mod h1:hlAPn3UdzlxIlSILAyOXKxjFSvDJ9oLzTJ9hLAK1KzA=
+github.com/nacos-group/nacos-sdk-go/v2 v2.3.2 h1:9QB2nCJzT5wkTVlxNYl3XL/7+G6p2USMi2gQh/ouQQo=
+github.com/nacos-group/nacos-sdk-go/v2 v2.3.2/go.mod h1:9FKXl6FqOiVmm72i8kADtbeK71egyG9y3uRDBg41tpQ=
 github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg=
 github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU=
 github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k=
--- a/helm/core/Chart.yaml
+++ b/helm/core/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.1.2
+appVersion: 2.1.5-rc.1
 description: Helm chart for deploying higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -15,4 +15,4 @@ dependencies:
    repository: "file://../redis"
    version: 0.0.1
 type: application
-version: 2.1.2
+version: 2.1.5-rc.1
--- a/helm/core/crds/customresourcedefinitions.gen.yaml
+++ b/helm/core/crds/customresourcedefinitions.gen.yaml
@@ -250,6 +250,10 @@ spec:
              registries:
                items:
                  properties:
+                    allowMcpServers:
+                      items:
+                        type: string
+                      type: array
                    authSecretName:
                      type: string
                    consulDatacenter:
@@ -263,6 +267,25 @@ spec:
                      type: string
                    domain:
                      type: string
+                    enableMCPServer:
+                      type: boolean
+                    enableScopeMcpServers:
+                      type: boolean
+                    mcpServerBaseUrl:
+                      type: string
+                    mcpServerExportDomains:
+                      items:
+                        type: string
+                      type: array
+                    metadata:
+                      additionalProperties:
+                        properties:
+                          innerMap:
+                            additionalProperties:
+                              type: string
+                            type: object
+                        type: object
+                      type: object
                    nacosAccessKey:
                      type: string
                    nacosAddressServer:
--- a/helm/core/templates/_helpers.tpl
+++ b/helm/core/templates/_helpers.tpl
@@ -113,3 +113,36 @@ kind: VMPodScrape
 {{- fail "unexpected gateway.metrics.provider" -}}
 {{- end -}}
 {{- end -}}
+
+{{- define "pluginServer.name" -}}
+{{- .Values.pluginServer.name | default "higress-plugin-server" -}}
+{{- end }}
+
+{{- define "pluginServer.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{- define "pluginServer.labels" -}}
+helm.sh/chart: {{ include "pluginServer.chart" . }}
+{{ include "pluginServer.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+app.kubernetes.io/name: {{ include "pluginServer.name" . }}
+{{- end }}
+
+{{- define "pluginServer.selectorLabels" -}}
+{{- if hasKey .Values.pluginServer.labels "app" }}
+{{- with .Values.pluginServer.labels.app }}app: {{.|quote}}
+{{- end}}
+{{- else }}app: {{ include "pluginServer.name" . }}
+{{- end }}
+{{- if hasKey .Values.pluginServer.labels "higress" }}
+{{- with .Values.pluginServer.labels.higress }}
+higress: {{.|quote}}
+{{- end}}
+{{- else }}
+higress: {{ include "pluginServer.name" . }}
+{{- end }}
+{{- end }}
--- a/helm/core/templates/plugin-server-deployment.yaml
+++ b/helm/core/templates/plugin-server-deployment.yaml
@@ -0,0 +1,39 @@
+{{- if .Values.global.enablePluginServer }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "pluginServer.name" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  replicas: {{ .Values.pluginServer.replicas }}
+  selector:
+    matchLabels:
+      {{- include "pluginServer.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- with .Values.pluginServer.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- include "pluginServer.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.pluginServer.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          image: {{ .Values.pluginServer.hub | default .Values.global.hub }}/{{ .Values.pluginServer.image | default "plugin-server" }}:{{ .Values.pluginServer.tag | default "1.0.0" }}
+          {{- if .Values.global.imagePullPolicy }}
+          imagePullPolicy: {{ .Values.global.imagePullPolicy }}
+          {{- end }}
+          ports:
+            - containerPort: 8080
+          resources:
+            requests:
+              cpu: {{ .Values.pluginServer.resources.requests.cpu }}
+              memory: {{ .Values.pluginServer.resources.requests.memory }}
+            limits:
+              cpu: {{ .Values.pluginServer.resources.limits.cpu }}
+              memory: {{ .Values.pluginServer.resources.limits.memory }}
+{{- end }}
--- a/helm/core/templates/plugin-server-service.yaml
+++ b/helm/core/templates/plugin-server-service.yaml
@@ -0,0 +1,16 @@
+{{- if .Values.global.enablePluginServer }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "pluginServer.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "pluginServer.labels" . | nindent 4 }}
+spec:
+  ports:
+    - protocol: TCP
+      port: {{ .Values.pluginServer.service.port }}
+      targetPort: 8080
+  selector:
+    {{- include "pluginServer.selectorLabels" . | nindent 4 }}
+{{- end }}
--- a/helm/core/values.yaml
+++ b/helm/core/values.yaml
@@ -11,6 +11,7 @@ global:
  enableSRDS: true
  # -- Whether to enable Redis(redis-stack-server) for Higress, default is false.
  enableRedis: false
+  enablePluginServer: false
  onDemandRDS: false
  hostRDSMergeSubset: false
  onlyPushRouteCluster: true
@@ -767,4 +768,31 @@ redis:
      accessModes:
        - ReadWriteOnce
      # -- Persistent Volume size
-      size: 1Gi
+      size: 1Gi
+
+pluginServer:
+  name: "higress-plugin-server"
+  # -- Number of Higress Plugin Server pods, 2 recommended for high availability
+  replicas: 2
+  image: plugin-server
+
+  hub: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress
+  tag: ""
+
+  imagePullSecrets: []
+
+  labels: {}
+  # -- Labels to apply to the pod
+  podLabels: {}
+
+  #  Plugin-server Service configuration
+  service:
+    port: 80                    # Container target port (usually fixed)
+
+  resources:
+    requests:
+      cpu: 200m
+      memory: 128Mi
+    limits:
+      cpu: 500m
+      memory: 256Mi
--- a/helm/higress/Chart.lock
+++ b/helm/higress/Chart.lock
@@ -1,9 +1,9 @@
 dependencies:
 - name: higress-core
  repository: file://../core
-  version: 2.1.2
+  version: 2.1.5-rc.1
 - name: higress-console
  repository: https://higress.io/helm-charts/
-  version: 2.1.2
-digest: sha256:7612de239141ca0d27400f7d5b9a786acd98826f511e2e3ed65ccd9d2c9f1700
-generated: "2025-04-29T20:52:39.996652+08:00"
+  version: 2.1.4
+digest: sha256:6dbbfb24eabe0927a167c11896799ea20c7f8590aa2889b853dc9a210d075d3a
+generated: "2025-06-18T09:15:09.621898+08:00"
--- a/helm/higress/Chart.yaml
+++ b/helm/higress/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.1.2
+appVersion: 2.1.5-rc.1
 description: Helm chart for deploying Higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -12,9 +12,9 @@ sources:
 dependencies:
 - name: higress-core
  repository: "file://../core"
-  version: 2.1.2
+  version: 2.1.5-rc.1
 - name: higress-console
  repository: "https://higress.io/helm-charts/"
-  version: 2.1.2
+  version: 2.1.4
 type: application
-version: 2.1.2
+version: 2.1.5-rc.1
--- a/helm/higress/README.md
+++ b/helm/higress/README.md
@@ -165,6 +165,7 @@ The command removes all the Kubernetes components associated with the chart and
 | global.enableIPv6 | bool | `false` |  |
 | global.enableIstioAPI | bool | `true` | If true, Higress Controller will monitor istio resources as well |
 | global.enableLDSCache | bool | `false` |  |
+| global.enablePluginServer | bool | `false` |  |
 | global.enableProxyProtocol | bool | `false` |  |
 | global.enablePushAllMCPClusters | bool | `true` |  |
 | global.enableRedis | bool | `false` | Whether to enable Redis(redis-stack-server) for Higress, default is false. |
@@ -273,6 +274,19 @@ The command removes all the Kubernetes components associated with the chart and
 | pilot.serviceAnnotations | object | `{}` |  |
 | pilot.tag | string | `""` |  |
 | pilot.traceSampling | float | `1` |  |
+| pluginServer.hub | string | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` |  |
+| pluginServer.image | string | `"plugin-server"` |  |
+| pluginServer.imagePullSecrets | list | `[]` |  |
+| pluginServer.labels | object | `{}` |  |
+| pluginServer.name | string | `"higress-plugin-server"` |  |
+| pluginServer.podLabels | object | `{}` | Labels to apply to the pod |
+| pluginServer.replicas | int | `2` | Number of Higress Plugin Server pods, 2 recommended for high availability |
+| pluginServer.resources.limits.cpu | string | `"500m"` |  |
+| pluginServer.resources.limits.memory | string | `"256Mi"` |  |
+| pluginServer.resources.requests.cpu | string | `"200m"` |  |
+| pluginServer.resources.requests.memory | string | `"128Mi"` |  |
+| pluginServer.service.port | int | `80` |  |
+| pluginServer.tag | string | `""` |  |
 | redis.redis.affinity | object | `{}` | Affinity for Redis |
 | redis.redis.image | string | `"redis-stack-server"` | Specify the image |
 | redis.redis.name | string | `"redis-stack-server"` |  |
--- a/helm/higress/README.zh.md
+++ b/helm/higress/README.zh.md
@@ -1,8 +1,8 @@
-## Higress for Kubernetes
+## Higress 适用于 Kubernetes

 Higress 是基于阿里巴巴内部网关实践的云原生 API 网关。

-通过基于 Istio 和 Envoy，Higress 实现了流量网关、微服务网关和安全网关的三重网关架构的集成，从而大大降低了部署、运维的成本。
+通过 Istio 和 Envoy 的支持，Higress 实现了流量网关、微服务网关和安全网关三种架构的融合，从而极大地减少了部署、运维的成本。

 ## 设置仓库信息

@@ -13,7 +13,7 @@ helm repo update

 ## 安装

-使用名为 `higress` 的版本来安装 chart：
+使用 Helm 安装名为 `higress` 的组件：

 ```console
 helm install higress -n higress-system higress.io/higress --create-namespace --render-subchart-notes
@@ -21,60 +21,130 @@ helm install higress -n higress-system higress.io/higress --create-namespace --r

 ## 卸载

-卸载删除 higress 部署：
+删除名称为 higress 的安装：

 ```console
 helm delete higress -n higress-system
 ```

-该命令会删除与 chart 相关的所有 Kubernetes 组件并删除发行版。
+该命令将删除与组件关联的所有 Kubernetes 组件并卸载该发行版。

 ## 参数

-## 配置值
+## Values

-| 键名 | 类型 | 默认值 | 描述 |
-|------|------|---------|-------------|
-| clusterName | string | `""` |  |
-| controller.affinity | object | `{}` |  |
-| controller.automaticHttps.email | string | `""` |  |
-| controller.automaticHttps.enabled | bool | `true` |  |
-| controller.autoscaling.enabled | bool | `false` |  |
-| controller.autoscaling.maxReplicas | int | `5` |  |
-| controller.autoscaling.minReplicas | int | `1` |  |
-| controller.autoscaling.targetCPUUtilizationPercentage | int | `80` |  |
-| controller.env | object | `{}` |  |
-| controller.hub | string | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` |  |
-| controller.image | string | `"higress"` |  |
-| controller.imagePullSecrets | list | `[]` |  |
-| controller.labels | object | `{}` |  |
-| controller.name | string | `"higress-controller"` |  |
-| controller.nodeSelector | object | `{}` |  |
-| controller.podAnnotations | object | `{}` |  |
-| controller.podLabels | object | `{}` | 应用到 pod 上的标签 |
-| controller.podSecurityContext | object | `{}` |  |
-| controller.ports[0].name | string | `"http"` |  |
-| controller.ports[0].port | int | `8888` |  |
-| controller.ports[0].protocol | string | `"TCP"` |  |
-| controller.ports[0].targetPort | int | `8888` |  |
-| controller.probe.httpGet.path | string | `"/ready"` |  |
-| controller.probe.httpGet.port | int | `8888` |  |
-| controller.probe.initialDelaySeconds | int | `1` |  |
-| controller.probe.periodSeconds | int | `3` |  |
-| controller.probe.timeoutSeconds | int | `5` |  |
-| controller.rbac.create | bool | `true` |  |
-| controller.replicas | int | `1` | Higress Controller pods 的数量 |
-| controller.resources.limits.cpu | string | `"1000m"` |  |
-| controller.resources.limits.memory | string | `"2048Mi"` |  |
-| controller.resources.requests.cpu | string | `"500m"` |  |
-| controller.resources.requests.memory | string | `"2048Mi"` |  |
-| gateway.metrics.enabled | bool | `false` | 如果为 true，则为gateway创建PodMonitor或VMPodScrape |
-| gateway.metrics.provider | string | `monitoring.coreos.com` | CustomResourceDefinition 的提供商组名，可以是 monitoring.coreos.com 或 operator.victoriametrics.com |
-| gateway.readinessFailureThreshold | int | `30` | 成功进行探针测试前连续失败探针的最大次数。 |
-| global MeshNetworks | object | `{}` |  |
-| global.tracer.datadog.address | string | `"$(HOST_IP):8126"` | 提交给 Datadog agent 的 Host:Port 。|
-| redis.redis.persistence.enabled | bool | `false` | 启用 Redis 持久性，默认为 false |
-| redis.redis.persistence.size | string | `"1Gi"` | Persistent Volume 大小 |
-| redis.redis.service.port | int | `6379` | Exporter service 端口 |
-| tracing.skywalking.port | int | `11800` |  |
-| upstream.connectionBufferLimits | int | `10485760` | 上游连接缓冲限制（字节）|
+| 键 | 类型 | 默认值 | 描述 |
+|----|------|---------|-------------|
+| clusterName | string | `""` | 集群名 |
+| controller.affinity | object | `{}` | 控制器亲和性设置 |
+| controller.automaticHttps.email | string | `""` | 自动 HTTPS 所需的邮件 |
+| controller.automaticHttps.enabled | bool | `true` | 是否启用自动 HTTPS 功能 |
+| controller.autoscaling.enabled | bool | `false` | 是否启用控制器的自动扩展功能 |
+| controller.autoscaling.maxReplicas | int | `5` | 最大副本数 |
+| controller.autoscaling.minReplicas | int | `1` | 最小副本数 |
+| controller.autoscaling.targetCPUUtilizationPercentage | int | `80` | 目标 CPU 使用率百分比 |
+| controller.env | object | `{}` | 环境变量 |
+| controller.hub | string | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` | 图像库的基础地址 |
+| controller.image | string | `"higress"` | 镜像名称 |
+| controller.imagePullSecrets | list | `[]` | 拉取秘钥列表 |
+| controller.labels | object | `{}` | 标签 |
+| controller.name | string | `"higress-controller"` | 控制器名称 |
+| controller.nodeSelector | object | `{}` | 节点选择器 |
+| controller.podAnnotations | object | `{}` | Pod 注解 |
+| controller.podLabels | object | `{}` | 应用到 Pod 上的标签 |
+| controller.podSecurityContext | object | `{}` | Pod 安全上下文 |
+| controller.ports[0].name | string | `"http"` | 端口名称 |
+| controller.ports[0].port | int | `8888` | 端口编号 |
+| controller.ports[0].protocol | string | `"TCP"` | 协议类型 |
+| controller.ports[0].targetPort | int | `8888` | 目标端口 |
+| controller.ports[1].name | string | `"http-solver"` | 端口名称 |
+| controller.ports[1].port | int | `8889` | 端口编号 |
+| controller.ports[1].protocol | string | `"TCP"` | 协议类型 |
+| controller.ports[1].targetPort | int | `8889` | 目标端口 |
+| controller.ports[2].name | string | `"grpc"` | 端口名称 |
+| controller.ports[2].port | int | `15051` | 端口编号 |
+| controller.ports[2].protocol | string | `"TCP"` | 协议类型 |
+| controller.ports[2].targetPort | int | `15051` | 目标端口 |
+| controller.probe.httpGet.path | string | `"/ready"` | 运行状况检查路径 |
+| controller.probe.httpGet.port | int | `8888` | 端口运行状态检查 |
+| controller.probe.initialDelaySeconds | int | `1` | 初始延迟秒数 |
+| controller.probe.periodSeconds | int | `3` | 健康检查间隔秒数 |
+| controller.probe.timeoutSeconds | int | `5` | 超时秒数 |
+| controller.rbac.create | bool | `true` | 是否创建 RBAC 相关资源 |
+| controller.replicas | int | `1` | Higress 控制器 Pod 的数量 |
+| controller.resources.limits.cpu | string | `"1000m"` | CPU 上限 |
+| controller.resources.limits.memory | string | `"2048Mi"` | 内存上限 |
+| controller.resources.requests.cpu | string | `"500m"` | CPU 请求量 |
+| controller.resources.requests.memory | string | `"2048Mi"` | 内存请求量 |
+| controller.securityContext | object | `{}` | 安全上下文 |
+| controller.service.type | string | `"ClusterIP"` | 服务类型 |
+| controller.serviceAccount.annotations | object | `{}` | 添加到服务帐户的注解 |
+| controller.serviceAccount.create | bool | `true` | 是否创建服务帐户 |
+| controller.serviceAccount.name | string | `""` | 如果未设置且 create 为 true，则从 fullname 模板生成名称 |
+| controller.tag | string | `""` | 标记 |
+| controller.tolerations | list | `[]` | 受容容忍度列表 |
+| downstream.connectionBufferLimits | int | `32768` | 下游连接缓冲区限制（字节） |
+| downstream.http2.initialConnectionWindowSize | int | `1048576` | HTTP/2 初始连接窗口大小 |
+| downstream.http2.initialStreamWindowSize | int | `65535` | 流初始窗口大小 |
+| downstream.http2.maxConcurrentStreams | int | `100` | 并发流最大数量 |
+| downstream.idleTimeout | int | `180` | 空闲超时时间（秒） |
+| downstream.maxRequestHeadersKb | int | `60` | 最大请求头大小（KB） |
+| downstream.routeTimeout | int | `0` | 路由超时时间 |
+| gateway.affinity | object | `{}` | 网关的节点亲和性 |
+| gateway.annotations | object | `{}` | 应用于所有资源的注解 |
+| gateway.autoscaling.enabled | bool | `false` | 启用网关的自动扩展功能 |
+| gateway.autoscaling.maxReplicas | int | `5` | 最大副本数 |
+| gateway.autoscaling.minReplicas | int | `1` | 最小副本数 |
+| gateway.autoscaling.targetCPUUtilizationPercentage | int | `80` | CPU 使用率的目标百分比 |
+| gateway.containerSecurityContext | string | `nil` | 网关容器的安全配置上下文 |
+| gateway.env | object | `{}` | Pod 环境变量 |
+| gateway.hostNetwork | bool | `false` | 是否使用主机网络 |
+| gateway.httpPort | int | `80` | HTTP 服务端口 |
+| gateway.httpsPort | int | `443` | HTTPS 服务端口 |
+| gateway.hub | string | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` | 网关镜像的基础域名 |
+| gateway.image | string | `"gateway"` |  |
+| gateway.kind | string | `"Deployment"` | 部署类型 |
+| gateway.labels | object | `{}` | 应用于所有资源的标签 |
+| gateway.metrics.enabled | bool | `false` | 启用网关度量收集 |
+| gateway.metrics.honorLabels | bool | `false` | 是否合并现有标签 |
+| gateway.metrics.interval | string | `""` | 度量间隔时间 |
+| gateway.metrics.provider | string | `"monitoring.coreos.com"` | 定义监控提供者 |
+| gateway.metrics.rawSpec | object | `{}` | 额外的度量规范 |
+| gateway.metrics.relabelConfigs | list | `[]` | 重新标签配置 |
+| gateway.metrics.relabelings | list | `[]` | 重新标签项 |
+| gateway.metrics.scrapeTimeout | string | `""` | 抓取的超时时间 |
+| gateway.name | string | `"higress-gateway"` | 网关名称 |
+| gateway.networkGateway | string | `""` | 网络网关指定 |
+| gateway.nodeSelector | object | `{}` | 节点选择器 |
+| gateway.replicas | int | `2` | Higress Gateway pod 的数量 |
+| gateway.resources.limits.cpu | string | `"2000m"` | 容器资源限制的 CPU |
+| gateway.resources.limits.memory | string | `"2048Mi"` | 容器资源限制的内存 |
+| gateway.resources.requests.cpu | string | `"2000m"` | 容器资源请求的 CPU |
+| gateway.resources.requests.memory | string | `"2048Mi"` | 容器资源请求的内存 |
+| gateway.revision | string | `""` | 网关所属版本声明 |
+| gateway.rollingMaxSurge | string | `"100%"` | 最大激增数目百分比 |
+| gateway.rollingMaxUnavailable | string | `"25%"` | 最大不可用比例 |
+| gateway.readinessFailureThreshold | int | `30` | 成功尝试之前连续失败的最大探测次数 |
+| gateway.readinessInitialDelaySeconds | int | `1` | 初次检测推迟多少秒后开始探测存活状态 |
+| gateway.readinessPeriodSeconds | int | `2` | 存活探测间隔秒数 |
+| gateway.readinessSuccessThreshold | int | `1` | 认为成功之前连续成功最小探测次数 |
+| gateway.readinessTimeoutSeconds | int | `3` | 存活探测超时秒数 |
+| gateway.securityContext | string | `nil` | 客户豆荚的安全上下文 |
+| gateway.service.annotations | object | `{}` | 应用于服务账户的注释 |
+| gateway.service.externalTrafficPolicy | string | `""` | 外部路由策略 |
+| gateway.service.loadBalancerClass | string | `""` | 负载均衡器类别 |
+| gateway.service.loadBalancerIP | string | `""` | 负载均衡器 IP 地址 |
+| gateway.service.loadBalancerSourceRanges | list | `[]` | 允许访问负载均衡器的 CIDR 范围 |
+| gateway.service.ports[0].name | string | `"http2"` | 服务定义的端口名称 |
+| gateway.service.ports[0].port | int | `80` | 服务端口 |
+| gateway.service.ports[0].protocol | string | `"TCP"` | 协议 |
+| gateway.service.ports[0].targetPort | int | `80` | 靶向端口 |
+| gateway.service.ports[1].name | string | `"https"` | 服务定义的端口名称 |
+| gateway.service.ports[1].port | int | `443` | 服务端口 |
+| gateway.service.ports[1].protocol | string | `"TCP"` | 协议 |
+| gateway.service.ports[1].targetPort | int | `443` | 靶向端口 |
+| gateway.service.type | string | `"LoadBalancer"` | 服务类型 |
+| global.disableAlpnH2 | bool | `false` | 设置是否禁用 ALPN 中的 http/2 |
+| ... | ... | ... | ... |
+
+由于内容较多，其他参数可以参考完整表。
--- a/istio/istio
+++ b/istio/istio
--- a/pkg/ingress/config/ingress_config.go
+++ b/pkg/ingress/config/ingress_config.go
@@ -63,6 +63,7 @@ import (
 	"github.com/alibaba/higress/pkg/ingress/kube/ingress"
 	"github.com/alibaba/higress/pkg/ingress/kube/ingressv1"
 	"github.com/alibaba/higress/pkg/ingress/kube/mcpbridge"
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
 	"github.com/alibaba/higress/pkg/ingress/kube/secret"
 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	"github.com/alibaba/higress/pkg/ingress/kube/wasmplugin"
@@ -158,6 +159,8 @@ type IngressConfig struct {

 	// secretConfigMgr manages secret dependencies
 	secretConfigMgr *SecretConfigMgr
+
+	mcpServerCache mcpserver.McpServerCache
 }

 // getSecretValue implements the getValue function for secret references
@@ -224,6 +227,7 @@ func NewIngressConfig(localKubeClient kube.Client, xdsUpdater istiomodel.XDSUpda

 	higressConfigController := configmap.NewController(localKubeClient, clusterId, namespace)
 	config.configmapMgr = configmap.NewConfigmapMgr(xdsUpdater, namespace, higressConfigController, higressConfigController.Lister())
+	config.configmapMgr.RegisterMcpServerProvider(&config.mcpServerCache)

 	httpsConfigMgr, _ := cert.NewConfigMgr(namespace, localKubeClient.Kube())
 	config.httpsConfigMgr = httpsConfigMgr
@@ -421,6 +425,10 @@ func (m *IngressConfig) createWrapperConfigs(configs []config.Config) []common.W
 	m.watchedSecretSet = globalContext.WatchedSecrets
 	m.mutex.Unlock()

+	if m.mcpServerCache.SetMcpServers(globalContext.McpServers) {
+		m.notifyXDSFullUpdate(mcpserver.GvkMcpServer, "mcp-server-annotation-change", nil)
+	}
+
 	return wrapperConfigs
 }

@@ -590,7 +598,7 @@ func (m *IngressConfig) convertVirtualService(configs []common.WrapperConfig) []
 			Spec: vs,
 		})
 	}
-	// add vs from naco3 for mcp server
+	// add vs from nacos3 for mcp server
 	if m.RegistryReconciler != nil {
 		allConfigsFromMcp := m.RegistryReconciler.GetAllConfigs(gvk.VirtualService)
 		for _, cfg := range allConfigsFromMcp {
@@ -794,23 +802,38 @@ func (m *IngressConfig) convertDestinationRule(configs []common.WrapperConfig) [
 			if !exist {
 				destinationRules[serviceName] = destinationRuleWrapper
 			} else if dr.DestinationRule.TrafficPolicy != nil {
-				if dr.DestinationRule.TrafficPolicy.LoadBalancer == nil &&
-					destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer != nil {
-					dr.DestinationRule.TrafficPolicy.LoadBalancer = destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer
-				}
-				portTrafficPolicy := destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings[0]
-				portUpdated := false
-				for _, policy := range dr.DestinationRule.TrafficPolicy.PortLevelSettings {
-					if policy.Port.Number == portTrafficPolicy.Port.Number {
-						policy.Tls = portTrafficPolicy.Tls
-						portUpdated = true
-						break
+				// if the service is referenced by an sse type mcp server, an source ip based consistent hashing policy needs to be configured
+				// consistent hashing policy will be generated by mcp server watcher, then if service do not have LoadBalancer settings, it will be merged
+				if destinationRuleWrapper.DestinationRule.TrafficPolicy != nil && destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer != nil {
+					if dr.DestinationRule.TrafficPolicy.LoadBalancer == nil {
+						dr.DestinationRule.TrafficPolicy.LoadBalancer = destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer
+					} else if dr.DestinationRule.TrafficPolicy.LoadBalancer.LbPolicy == nil {
+						dr.DestinationRule.TrafficPolicy.LoadBalancer.LbPolicy = destinationRuleWrapper.DestinationRule.TrafficPolicy.LoadBalancer.LbPolicy
 					}
 				}
-				if portUpdated {
-					continue
+				// if the service is referenced by an https type mcp server, an client side simple mode tls policy needs to be configured
+				// simple mode tls policy will be generated by mcp server watcher, then if service do not have tls settings, it will be merged
+				if dr.DestinationRule.TrafficPolicy.Tls == nil && destinationRuleWrapper.DestinationRule.TrafficPolicy != nil &&
+					destinationRuleWrapper.DestinationRule.TrafficPolicy.Tls != nil {
+					dr.DestinationRule.TrafficPolicy.Tls = destinationRuleWrapper.DestinationRule.TrafficPolicy.Tls
+				}
+				// Directly inherit or override the port policy (if it exists)
+				if len(destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings) > 0 {
+					portTrafficPolicy := destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings[0]
+					portUpdated := false
+					for _, policy := range dr.DestinationRule.TrafficPolicy.PortLevelSettings {
+						if policy.Port.Number == portTrafficPolicy.Port.Number {
+							policy.Tls = portTrafficPolicy.Tls
+							policy.LoadBalancer = portTrafficPolicy.LoadBalancer
+							portUpdated = true
+							break
+						}
+					}
+					if portUpdated {
+						continue
+					}
+					dr.DestinationRule.TrafficPolicy.PortLevelSettings = append(dr.DestinationRule.TrafficPolicy.PortLevelSettings, portTrafficPolicy)
 				}
-				dr.DestinationRule.TrafficPolicy.PortLevelSettings = append(dr.DestinationRule.TrafficPolicy.PortLevelSettings, portTrafficPolicy)
 			}
 		}
 	}
@@ -1208,9 +1231,9 @@ func (m *IngressConfig) AddOrUpdateMcpBridge(clusterNamespacedName util.ClusterN
 				f(config.Config{Meta: efMetadata}, config.Config{Meta: efMetadata}, istiomodel.EventUpdate)
 			}
 		}, m.localKubeClient, m.namespace, m.clusterId.String())
+		m.configmapMgr.RegisterMcpServerProvider(m.RegistryReconciler)
 	}
 	reconciler := m.RegistryReconciler
-	m.configmapMgr.SetMcpReconciler(m.RegistryReconciler)
 	err = reconciler.Reconcile(mcpbridge)
 	if err != nil {
 		IngressLog.Errorf("Mcpbridge reconcile failed, err:%v", err)
@@ -1776,3 +1799,19 @@ func (m *IngressConfig) Patch(config.Config, config.PatchFunc) (string, error) {
 func (m *IngressConfig) Delete(config.GroupVersionKind, string, string, *string) error {
 	return common.ErrUnsupportedOp
 }
+
+func (m *IngressConfig) notifyXDSFullUpdate(gvk config.GroupVersionKind, reason istiomodel.TriggerReason, updatedConfigName *util.ClusterNamespacedName) {
+	var configsUpdated map[istiomodel.ConfigKey]struct{}
+	if updatedConfigName != nil {
+		configsUpdated = map[istiomodel.ConfigKey]struct{}{{
+			Kind:      kind.MustFromGVK(gvk),
+			Name:      updatedConfigName.Name,
+			Namespace: updatedConfigName.Namespace,
+		}: {}}
+	}
+	m.XDSUpdater.ConfigUpdate(&istiomodel.PushRequest{
+		Full:           true,
+		ConfigsUpdated: configsUpdated,
+		Reason:         istiomodel.NewReasonStats(reason),
+	})
+}
--- a/pkg/ingress/kube/annotations/annotations.go
+++ b/pkg/ingress/kube/annotations/annotations.go
@@ -21,6 +21,8 @@ import (
 	"istio.io/istio/pkg/cluster"
 	"istio.io/istio/pkg/util/sets"
 	listersv1 "k8s.io/client-go/listers/core/v1"
+
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
 )

 type GlobalContext struct {
@@ -30,6 +32,8 @@ type GlobalContext struct {
 	ClusterSecretLister map[cluster.ID]listersv1.SecretLister

 	ClusterServiceList map[cluster.ID]listersv1.ServiceLister
+
+	McpServers []*mcpserver.McpServer
 }

 type Meta struct {
@@ -169,6 +173,7 @@ func NewAnnotationHandlerManager() AnnotationHandler {
 			match{},
 			headerControl{},
 			http2rpc{},
+			mcpServer{},
 		},
 		gatewayHandlers: []GatewayHandler{
 			downstreamTLS{},
--- a/pkg/ingress/kube/annotations/mcpserver.go
+++ b/pkg/ingress/kube/annotations/mcpserver.go
@@ -0,0 +1,94 @@
+// Copyright (c) 2023 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package annotations
+
+import (
+	"strings"
+
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
+	"github.com/alibaba/higress/pkg/ingress/log"
+)
+
+const (
+	enableMcpServer            = "mcp-server"
+	mcpServerMatchRuleDomains  = "mcp-server-match-rule-domains"
+	mcpServerMatchRuleType     = "mcp-server-match-rule-type"
+	mcpServerMatchRuleValue    = "mcp-server-match-rule-value"
+	mcpServerUpstreamType      = "mcp-server-upstream-type"
+	mcpServerEnablePathRewrite = "mcp-server-enable-path-rewrite"
+	mcpServerPathRewritePrefix = "mcp-server-path-rewrite-prefix"
+)
+
+// help to conform mcpServer implements method of Parse
+var (
+	_ Parser = &mcpServer{}
+)
+
+type mcpServer struct{}
+
+func (a mcpServer) Parse(annotations Annotations, config *Ingress, globalContext *GlobalContext) error {
+	if globalContext == nil {
+		return nil
+	}
+
+	ingressKey := config.Namespace + "/" + config.Name
+
+	enabled, _ := annotations.ParseBoolASAP(enableMcpServer)
+	if !enabled {
+		return nil
+	}
+
+	var matchRuleDomains []string
+	rawMatchRuleDomains, _ := annotations.ParseStringASAP(mcpServerMatchRuleDomains)
+	if rawMatchRuleDomains == "" || rawMatchRuleDomains == "*" {
+		// Match all domains. Leave an empty slice.
+	} else if strings.Contains(rawMatchRuleDomains, ",") {
+		matchRuleDomains = strings.Split(rawMatchRuleDomains, ",")
+	} else {
+		matchRuleDomains = []string{rawMatchRuleDomains}
+	}
+
+	matchRuleType, _ := annotations.ParseStringASAP(mcpServerMatchRuleType)
+	if matchRuleType == "" {
+		log.IngressLog.Errorf("ingress %s: mcp-server-match-rule-path-type is empty", ingressKey)
+		return nil
+	} else if !mcpserver.ValidPathMatchTypes[matchRuleType] {
+		log.IngressLog.Errorf("ingress %s: mcp-server-match-rule-path-type %s is not supported", ingressKey, matchRuleType)
+		return nil
+	}
+
+	matchRuleValue, _ := annotations.ParseStringASAP(mcpServerMatchRuleValue)
+
+	upstreamType, _ := annotations.ParseStringASAP(mcpServerUpstreamType)
+	if upstreamType != "" && !mcpserver.ValidUpstreamTypes[upstreamType] {
+		log.IngressLog.Errorf("mcp-server-upstream-type %s is not supported", upstreamType)
+		return nil
+	}
+
+	enablePathRewrite, _ := annotations.ParseBoolASAP(mcpServerEnablePathRewrite)
+	pathRewritePrefix, _ := annotations.ParseStringASAP(mcpServerPathRewritePrefix)
+
+	globalContext.McpServers = append(globalContext.McpServers, &mcpserver.McpServer{
+		Name:              ingressKey,
+		Domains:           matchRuleDomains,
+		PathMatchType:     matchRuleType,
+		PathMatchValue:    matchRuleValue,
+		UpstreamType:      upstreamType,
+		EnablePathRewrite: enablePathRewrite,
+		PathRewritePrefix: pathRewritePrefix,
+	})
+
+	return nil
+}
--- a/pkg/ingress/kube/annotations/mcpserver_test.go
+++ b/pkg/ingress/kube/annotations/mcpserver_test.go
@@ -0,0 +1,257 @@
+// Copyright (c) 2025 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package annotations
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
+)
+
+func TestMCPServer_Parse(t *testing.T) {
+	parser := mcpServer{}
+	testCases := []struct {
+		skip   bool
+		input  Annotations
+		expect *mcpserver.McpServer
+	}{
+		{
+			// No annotation
+			input:  Annotations{},
+			expect: nil,
+		},
+		{
+			// Not enabled
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "false",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled but no match rule type
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled but empty match rule type
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled but bad match rule type
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "bad-type",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled but bad upstream type
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "bad-type",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "",
+			},
+			expect: nil,
+		},
+		{
+			// Enabled and rewrite not enabled
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "false",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           []string{"www.foo.com"},
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: false,
+				PathRewritePrefix: "/",
+			},
+		},
+		{
+			// Enabled and rewrite not enabled and empty domain
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "false",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           nil,
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: false,
+				PathRewritePrefix: "/",
+			},
+		},
+		{
+			// Enabled and rewrite not enabled and wildcard domain
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "*",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "false",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           nil,
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: false,
+				PathRewritePrefix: "/",
+			},
+		},
+		{
+			// Enabled and rewrite enabled with root
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "true",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           []string{"www.foo.com"},
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: true,
+				PathRewritePrefix: "/",
+			},
+		},
+		{
+			// Enabled and rewrite enabled with root
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "prefix",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "rest",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "true",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/mcp-api",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           []string{"www.foo.com"},
+				PathMatchType:     "prefix",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "rest",
+				EnablePathRewrite: true,
+				PathRewritePrefix: "/mcp-api",
+			},
+		},
+		{
+			// Enabled and multiple domains
+			input: Annotations{
+				buildHigressAnnotationKey(enableMcpServer):            "true",
+				buildHigressAnnotationKey(mcpServerMatchRuleDomains):  "www.foo.com,www.bar.com",
+				buildHigressAnnotationKey(mcpServerMatchRuleType):     "exact",
+				buildHigressAnnotationKey(mcpServerMatchRuleValue):    "/mcp",
+				buildHigressAnnotationKey(mcpServerUpstreamType):      "sse",
+				buildHigressAnnotationKey(mcpServerEnablePathRewrite): "true",
+				buildHigressAnnotationKey(mcpServerPathRewritePrefix): "/",
+			},
+			expect: &mcpserver.McpServer{
+				Name:              "default/route",
+				Domains:           []string{"www.foo.com", "www.bar.com"},
+				PathMatchType:     "exact",
+				PathMatchValue:    "/mcp",
+				UpstreamType:      "sse",
+				EnablePathRewrite: true,
+				PathRewritePrefix: "/",
+			},
+		},
+	}
+
+	for _, tt := range testCases {
+		if tt.skip {
+			return
+		}
+
+		t.Run("", func(t *testing.T) {
+			config := &Ingress{Meta: Meta{
+				Namespace: "default",
+				Name:      "route",
+			}}
+			globalContext := &GlobalContext{}
+			_ = parser.Parse(tt.input, config, globalContext)
+			if tt.expect == nil {
+				if len(globalContext.McpServers) != 0 {
+					t.Fatalf("globalContext.McpServers is not empty: %v", globalContext.McpServers)
+				}
+				return
+			}
+
+			if len(globalContext.McpServers) != 1 {
+				t.Fatalf("globalContext.McpServers length is not 1: %v", globalContext.McpServers)
+			}
+
+			if diff := cmp.Diff(tt.expect, globalContext.McpServers[0]); diff != "" {
+				t.Fatalf("TestMCPServer_Parse() mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
--- a/pkg/ingress/kube/configmap/controller.go
+++ b/pkg/ingress/kube/configmap/controller.go
@@ -18,7 +18,6 @@ import (
 	"reflect"
 	"sync/atomic"

-	"github.com/alibaba/higress/registry/reconcile"
 	"istio.io/istio/pilot/pkg/model"
 	"istio.io/istio/pkg/cluster"
 	"istio.io/istio/pkg/config"
@@ -33,6 +32,7 @@ import (
 	"sigs.k8s.io/yaml"

 	"github.com/alibaba/higress/pkg/ingress/kube/controller"
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	. "github.com/alibaba/higress/pkg/ingress/log"
 )
@@ -59,7 +59,6 @@ type ItemController interface {
 	ValidHigressConfig(higressConfig *HigressConfig) error
 	ConstructEnvoyFilters() ([]*config.Config, error)
 	RegisterItemEventHandler(eventHandler ItemEventHandler)
-	RegisterMcpReconciler(reconciler *reconcile.Reconciler)
 }

 type ConfigmapMgr struct {
@@ -113,9 +112,11 @@ func (c *ConfigmapMgr) GetHigressConfig() *HigressConfig {
 	return nil
 }

-func (c *ConfigmapMgr) SetMcpReconciler(reconciler *reconcile.Reconciler) {
+func (c *ConfigmapMgr) RegisterMcpServerProvider(provider mcpserver.McpServerProvider) {
 	for _, itemController := range c.ItemControllers {
-		itemController.RegisterMcpReconciler(reconciler)
+		if mcpRouteProviderAware, ok := itemController.(mcpserver.McpRouteProviderAware); ok {
+			mcpRouteProviderAware.RegisterMcpServerProvider(provider)
+		}
 	}
 }

--- a/pkg/ingress/kube/configmap/global.go
+++ b/pkg/ingress/kube/configmap/global.go
@@ -21,7 +21,6 @@ import (

 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	. "github.com/alibaba/higress/pkg/ingress/log"
-	"github.com/alibaba/higress/registry/reconcile"
 	networking "istio.io/api/networking/v1alpha3"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/gvk"
@@ -377,9 +376,6 @@ func (g *GlobalOptionController) RegisterItemEventHandler(eventHandler ItemEvent
 	g.eventHandler = eventHandler
 }

-func (g *GlobalOptionController) RegisterMcpReconciler(reconciler *reconcile.Reconciler) {
-}
-
 // generateDownstreamEnvoyFilter generates the downstream envoy filter.
 func (g *GlobalOptionController) generateDownstreamEnvoyFilter(downstreamValueStruct string, bufferLimitStruct string, routeTimeoutStruct string, namespace string) []*networking.EnvoyFilter_EnvoyConfigObjectPatch {
 	var downstreamConfig []*networking.EnvoyFilter_EnvoyConfigObjectPatch
--- a/pkg/ingress/kube/configmap/gzip.go
+++ b/pkg/ingress/kube/configmap/gzip.go
@@ -23,7 +23,6 @@ import (

 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	. "github.com/alibaba/higress/pkg/ingress/log"
-	"github.com/alibaba/higress/registry/reconcile"
 	networking "istio.io/api/networking/v1alpha3"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/gvk"
@@ -292,9 +291,6 @@ func (g *GzipController) RegisterItemEventHandler(eventHandler ItemEventHandler)
 	g.eventHandler = eventHandler
 }

-func (g *GzipController) RegisterMcpReconciler(reconciler *reconcile.Reconciler) {
-}
-
 func (g *GzipController) constructGzipStruct(gzip *Gzip, namespace string) string {
 	gzipConfig := ""
 	contentType := ""
--- a/pkg/ingress/kube/configmap/mcp_server.go
+++ b/pkg/ingress/kube/configmap/mcp_server.go
@@ -22,12 +22,13 @@ import (
 	"strings"
 	"sync/atomic"

-	"github.com/alibaba/higress/pkg/ingress/kube/util"
-	. "github.com/alibaba/higress/pkg/ingress/log"
-	"github.com/alibaba/higress/registry/reconcile"
 	networking "istio.io/api/networking/v1alpha3"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/gvk"
+
+	"github.com/alibaba/higress/pkg/ingress/kube/mcpserver"
+	"github.com/alibaba/higress/pkg/ingress/kube/util"
+	. "github.com/alibaba/higress/pkg/ingress/log"
 )

 // RedisConfig defines the configuration for Redis connection
@@ -56,7 +57,7 @@ type MCPRatelimitConfig struct {
 type SSEServer struct {
 	// The name of the SSE server
 	Name string `json:"name,omitempty"`
-	// The path where the SSE server will be mounted, the full path is (PATH + SsePathSuffix)
+	// The path where the SSE server will be mounted, the full path is (PATH + SSEPathSuffix)
 	Path string `json:"path,omitempty"`
 	// The type of the SSE server
 	Type string `json:"type,omitempty"`
@@ -74,6 +75,12 @@ type MatchRule struct {
 	MatchRulePath string `json:"match_rule_path,omitempty"`
 	// Type of match rule: exact, prefix, suffix, contains, regex
 	MatchRuleType string `json:"match_rule_type,omitempty"`
+	// Type of upstream(s) matched by the rule: rest (default), sse
+	UpstreamType string `json:"upstream_type"`
+	// Enable request path rewrite for matched routes
+	EnablePathRewrite bool `json:"enable_path_rewrite"`
+	// Prefix the request path would be rewritten to.
+	PathRewritePrefix string `json:"path_rewrite_prefix"`
 }

 // McpServer defines the configuration for MCP (Model Context Protocol) server
@@ -83,7 +90,7 @@ type McpServer struct {
 	// Redis Config for MCP server
 	Redis *RedisConfig `json:"redis,omitempty"`
 	// The suffix to be appended to SSE paths, default is "/sse"
-	SsePathSuffix string `json:"sse_path_suffix,omitempty"`
+	SSEPathSuffix string `json:"sse_path_suffix,omitempty"`
 	// List of SSE servers Configs
 	Servers []*SSEServer `json:"servers,omitempty"`
 	// List of match rules for filtering requests
@@ -118,21 +125,32 @@ func validMcpServer(m *McpServer) error {

 	// Validate match rule types
 	if m.MatchList != nil {
-		validTypes := map[string]bool{
+		validMatchRuleTypes := map[string]bool{
 			"exact":    true,
 			"prefix":   true,
 			"suffix":   true,
 			"contains": true,
 			"regex":    true,
 		}
+		validUpstreamTypes := map[string]bool{
+			"rest":       true,
+			"sse":        true,
+			"streamable": true,
+		}

 		for _, rule := range m.MatchList {
 			if rule.MatchRuleType == "" {
 				return errors.New("match_rule_type cannot be empty, must be one of: exact, prefix, suffix, contains, regex")
 			}
-			if !validTypes[rule.MatchRuleType] {
+			if !validMatchRuleTypes[rule.MatchRuleType] {
 				return fmt.Errorf("invalid match_rule_type: %s, must be one of: exact, prefix, suffix, contains, regex", rule.MatchRuleType)
 			}
+			if rule.UpstreamType != "" && !validUpstreamTypes[rule.UpstreamType] {
+				return fmt.Errorf("invalid upstream_type: %s, must be one of: rest, sse, streamable", rule.UpstreamType)
+			}
+			if rule.EnablePathRewrite && rule.UpstreamType != "sse" {
+				return errors.New("path rewrite is only supported for SSE upstream type")
+			}
 		}
 	}

@@ -174,7 +192,7 @@ func deepCopyMcpServer(mcp *McpServer) (*McpServer, error) {
 			WhiteList: mcp.Ratelimit.WhiteList,
 		}
 	}
-	newMcp.SsePathSuffix = mcp.SsePathSuffix
+	newMcp.SSEPathSuffix = mcp.SSEPathSuffix

 	newMcp.EnableUserLevelServer = mcp.EnableUserLevelServer

@@ -201,9 +219,12 @@ func deepCopyMcpServer(mcp *McpServer) (*McpServer, error) {
 		newMcp.MatchList = make([]*MatchRule, len(mcp.MatchList))
 		for i, rule := range mcp.MatchList {
 			newMcp.MatchList[i] = &MatchRule{
-				MatchRuleDomain: rule.MatchRuleDomain,
-				MatchRulePath:   rule.MatchRulePath,
-				MatchRuleType:   rule.MatchRuleType,
+				MatchRuleDomain:   rule.MatchRuleDomain,
+				MatchRulePath:     rule.MatchRulePath,
+				MatchRuleType:     rule.MatchRuleType,
+				UpstreamType:      rule.UpstreamType,
+				EnablePathRewrite: rule.EnablePathRewrite,
+				PathRewritePrefix: rule.PathRewritePrefix,
 			}
 		}
 	}
@@ -212,18 +233,19 @@ func deepCopyMcpServer(mcp *McpServer) (*McpServer, error) {
 }

 type McpServerController struct {
-	Namespace    string
-	mcpServer    atomic.Value
-	Name         string
-	eventHandler ItemEventHandler
-	reconclier   *reconcile.Reconciler
+	Namespace          string
+	mcpServer          atomic.Value
+	Name               string
+	eventHandler       ItemEventHandler
+	mcpServerProviders map[mcpserver.McpServerProvider]bool
 }

 func NewMcpServerController(namespace string) *McpServerController {
 	mcpController := &McpServerController{
-		Namespace: namespace,
-		mcpServer: atomic.Value{},
-		Name:      "mcpServer",
+		Namespace:          namespace,
+		Name:               "mcpServer",
+		mcpServer:          atomic.Value{},
+		mcpServerProviders: make(map[mcpserver.McpServerProvider]bool),
 	}
 	mcpController.SetMcpServer(NewDefaultMcpServer())
 	return mcpController
@@ -290,8 +312,11 @@ func (m *McpServerController) RegisterItemEventHandler(eventHandler ItemEventHan
 	m.eventHandler = eventHandler
 }

-func (m *McpServerController) RegisterMcpReconciler(reconciler *reconcile.Reconciler) {
-	m.reconclier = reconciler
+func (m *McpServerController) RegisterMcpServerProvider(provider mcpserver.McpServerProvider) {
+	if m.mcpServerProviders == nil {
+		m.mcpServerProviders = make(map[mcpserver.McpServerProvider]bool)
+	}
+	m.mcpServerProviders[provider] = true
 }

 func (m *McpServerController) ConstructEnvoyFilters() ([]*config.Config, error) {
@@ -386,37 +411,47 @@ func (m *McpServerController) ConstructEnvoyFilters() ([]*config.Config, error)

 func (m *McpServerController) constructMcpSessionStruct(mcp *McpServer) string {
 	// Build match_list configuration
-	matchList := "[]"
-	var matchConfigs []string
-	if len(mcp.MatchList) > 0 {
-		for _, rule := range mcp.MatchList {
-			matchConfigs = append(matchConfigs, fmt.Sprintf(`{
-				"match_rule_domain": "%s",
-				"match_rule_path": "%s",
-				"match_rule_type": "%s"
-			}`, rule.MatchRuleDomain, rule.MatchRulePath, rule.MatchRuleType))
+	var matchList []*MatchRule
+	matchList = append(matchList, mcp.MatchList...)
+	for provider, _ := range m.mcpServerProviders {
+		servers := provider.GetMcpServers()
+		if len(servers) == 0 {
+			continue
 		}
-	}
-
-	if m.reconclier != nil {
-		vsFromMcp := m.reconclier.GetAllConfigs(gvk.VirtualService)
-		for _, c := range vsFromMcp {
-			vs := c.Spec.(*networking.VirtualService)
-			var host string
-			if len(vs.Hosts) > 1 {
-				host = fmt.Sprintf("(%s)", strings.Join(vs.Hosts, "|"))
-			} else {
-				host = vs.Hosts[0]
+		for _, server := range servers {
+			matchRuleDomain := ""
+			if len(server.Domains) != 0 {
+				if len(server.Domains) > 1 {
+					matchRuleDomain = fmt.Sprintf("(%s)", strings.Join(server.Domains, "|"))
+				} else {
+					matchRuleDomain = server.Domains[0]
+				}
 			}
-			path := vs.Http[0].Match[0].Uri.GetPrefix()
+			matchList = append(matchList, &MatchRule{
+				MatchRuleDomain:   matchRuleDomain,
+				MatchRuleType:     server.PathMatchType,
+				MatchRulePath:     server.PathMatchValue,
+				UpstreamType:      server.UpstreamType,
+				EnablePathRewrite: server.EnablePathRewrite,
+				PathRewritePrefix: server.PathRewritePrefix,
+			})
+		}
+	}
+	matchListConfig := "[]"
+	if len(matchList) > 0 {
+		matchConfigs := make([]string, 0, len(matchList))
+		for _, rule := range matchList {
 			matchConfigs = append(matchConfigs, fmt.Sprintf(`{
 				"match_rule_domain": "%s",
 				"match_rule_path": "%s",
-				"match_rule_type": "prefix"
-			}`, host, path))
+				"match_rule_type": "%s",
+				"upstream_type": "%s",
+				"enable_path_rewrite": %t,
+				"path_rewrite_prefix": "%s"
+			}`, rule.MatchRuleDomain, rule.MatchRulePath, rule.MatchRuleType, rule.UpstreamType, rule.EnablePathRewrite, rule.PathRewritePrefix))
 		}
+		matchListConfig = fmt.Sprintf("[%s]", strings.Join(matchConfigs, ","))
 	}
-	matchList = fmt.Sprintf("[%s]", strings.Join(matchConfigs, ","))

 	// Build redis configuration
 	redisConfig := "null"
@@ -468,8 +503,8 @@ func (m *McpServerController) constructMcpSessionStruct(mcp *McpServer) string {
 	}`,
 		redisConfig,
 		rateLimitConfig,
-		mcp.SsePathSuffix,
-		matchList,
+		mcp.SSEPathSuffix,
+		matchListConfig,
 		mcp.EnableUserLevelServer)
 }

--- a/pkg/ingress/kube/configmap/mcp_server_test.go
+++ b/pkg/ingress/kube/configmap/mcp_server_test.go
@@ -54,6 +54,61 @@ func Test_validMcpServer(t *testing.T) {
 			},
 			wantErr: nil,
 		},
+		{
+			name: "enabled but bad match_rule_type",
+			mcp: &McpServer{
+				Enable:                true,
+				EnableUserLevelServer: false,
+				Redis:                 nil,
+				MatchList: []*MatchRule{
+					{
+						MatchRuleDomain: "*",
+						MatchRulePath:   "/mcp",
+						MatchRuleType:   "bad-type",
+					},
+				},
+				Servers: []*SSEServer{},
+			},
+			wantErr: errors.New("invalid match_rule_type: bad-type, must be one of: exact, prefix, suffix, contains, regex"),
+		},
+		{
+			name: "enabled but bad upstream_type",
+			mcp: &McpServer{
+				Enable:                true,
+				EnableUserLevelServer: false,
+				Redis:                 nil,
+				MatchList: []*MatchRule{
+					{
+						MatchRuleDomain: "*",
+						MatchRulePath:   "/mcp",
+						MatchRuleType:   "prefix",
+						UpstreamType:    "bad-type",
+					},
+				},
+				Servers: []*SSEServer{},
+			},
+			wantErr: errors.New("invalid upstream_type: bad-type, must be one of: rest, sse, streamable"),
+		},
+		{
+			name: "enabled but path rewrite with unsupported upstream type",
+			mcp: &McpServer{
+				Enable:                true,
+				EnableUserLevelServer: false,
+				Redis:                 nil,
+				MatchList: []*MatchRule{
+					{
+						MatchRuleDomain:   "*",
+						MatchRulePath:     "/mcp",
+						MatchRuleType:     "prefix",
+						UpstreamType:      "rest",
+						EnablePathRewrite: true,
+						PathRewritePrefix: "/",
+					},
+				},
+				Servers: []*SSEServer{},
+			},
+			wantErr: errors.New("path rewrite is only supported for SSE upstream type"),
+		},
 		{
 			name: "enabled with user level server but no redis config",
 			mcp: &McpServer{
@@ -76,7 +131,7 @@ func Test_validMcpServer(t *testing.T) {
 					Password: "password",
 					DB:       0,
 				},
-				SsePathSuffix: "/sse",
+				SSEPathSuffix: "/sse",
 				MatchList: []*MatchRule{
 					{
 						MatchRuleDomain: "*",
@@ -238,7 +293,7 @@ func Test_deepCopyMcpServer(t *testing.T) {
 					Password: "password",
 					DB:       0,
 				},
-				SsePathSuffix: "/sse",
+				SSEPathSuffix: "/sse",
 				MatchList: []*MatchRule{
 					{
 						MatchRuleDomain: "*",
@@ -265,7 +320,7 @@ func Test_deepCopyMcpServer(t *testing.T) {
 					Password: "password",
 					DB:       0,
 				},
-				SsePathSuffix: "/sse",
+				SSEPathSuffix: "/sse",
 				MatchList: []*MatchRule{
 					{
 						MatchRuleDomain: "*",
@@ -581,13 +636,27 @@ func TestMcpServerController_constructMcpSessionStruct(t *testing.T) {
 					Password: "pass",
 					DB:       1,
 				},
-				SsePathSuffix: "/sse",
+				SSEPathSuffix: "/sse",
 				MatchList: []*MatchRule{
 					{
 						MatchRuleDomain: "*",
 						MatchRulePath:   "/test",
 						MatchRuleType:   "exact",
 					},
+					{
+						MatchRuleDomain: "*",
+						MatchRulePath:   "/sse-test-1",
+						MatchRuleType:   "prefix",
+						UpstreamType:    "sse",
+					},
+					{
+						MatchRuleDomain:   "*",
+						MatchRulePath:     "/sse-test-2",
+						MatchRuleType:     "prefix",
+						UpstreamType:      "sse",
+						EnablePathRewrite: true,
+						PathRewritePrefix: "/mcp",
+					},
 				},
 				EnableUserLevelServer: true,
 				Ratelimit: &MCPRatelimitConfig{
@@ -623,7 +692,24 @@ func TestMcpServerController_constructMcpSessionStruct(t *testing.T) {
 								"match_list": [{
 									"match_rule_domain": "*",
 									"match_rule_path": "/test",
-									"match_rule_type": "exact"
+									"match_rule_type": "exact",
+									"upstream_type": "",
+									"enable_path_rewrite": false,
+									"path_rewrite_prefix": ""
+								},{
+									"match_rule_domain": "*",
+									"match_rule_path": "/sse-test-1",
+									"match_rule_type": "prefix",
+									"upstream_type": "sse",
+									"enable_path_rewrite": false,
+									"path_rewrite_prefix": ""
+								},{
+									"match_rule_domain": "*",
+									"match_rule_path": "/sse-test-2",
+									"match_rule_type": "prefix",
+									"upstream_type": "sse",
+									"enable_path_rewrite": true,
+									"path_rewrite_prefix": "/mcp"
 								}],
 								"enable_user_level_server": true
 							}
--- a/pkg/ingress/kube/configmap/tracing.go
+++ b/pkg/ingress/kube/configmap/tracing.go
@@ -21,7 +21,6 @@ import (
 	"reflect"
 	"sync/atomic"

-	"github.com/alibaba/higress/registry/reconcile"
 	"istio.io/istio/pkg/config"
 	"istio.io/istio/pkg/config/schema/gvk"

@@ -238,9 +237,6 @@ func (t *TracingController) RegisterItemEventHandler(eventHandler ItemEventHandl
 	t.eventHandler = eventHandler
 }

-func (t *TracingController) RegisterMcpReconciler(reconciler *reconcile.Reconciler) {
-}
-
 func (t *TracingController) ConstructEnvoyFilters() ([]*config.Config, error) {
 	configs := make([]*config.Config, 0)
 	tracing := t.GetTracing()
--- a/pkg/ingress/kube/mcpserver/model.go
+++ b/pkg/ingress/kube/mcpserver/model.go
@@ -0,0 +1,60 @@
+// Copyright (c) 2025 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mcpserver
+
+import (
+	"istio.io/istio/pkg/config"
+)
+
+var (
+	GvkMcpServer = config.GroupVersionKind{Group: "networking.higress.io", Version: "v1alpha1", Kind: "McpServer"}
+)
+
+const (
+	UpstreamTypeRest       string = "rest"
+	UpstreamTypeSSE        string = "sse"
+	UpstreamTypeStreamable string = "streamable"
+
+	ExactMatchType    string = "exact"
+	PrefixMatchType   string = "prefix"
+	SuffixMatchType   string = "suffix"
+	ContainsMatchType string = "contains"
+	RegexMatchType    string = "regex"
+)
+
+var (
+	ValidUpstreamTypes = map[string]bool{
+		UpstreamTypeRest:       true,
+		UpstreamTypeSSE:        true,
+		UpstreamTypeStreamable: true,
+	}
+	ValidPathMatchTypes = map[string]bool{
+		ExactMatchType:    true,
+		PrefixMatchType:   true,
+		SuffixMatchType:   true,
+		ContainsMatchType: true,
+		RegexMatchType:    true,
+	}
+)
+
+type McpServer struct {
+	Name              string   `json:"name,omitempty"`
+	Domains           []string `json:"domains,omitempty"`
+	PathMatchType     string   `json:"path_match_type,omitempty"`
+	PathMatchValue    string   `json:"path_match_value,omitempty"`
+	UpstreamType      string   `json:"upstream_type,omitempty"`
+	EnablePathRewrite bool     `json:"enable_path_rewrite,omitempty"`
+	PathRewritePrefix string   `json:"path_rewrite_prefix,omitempty"`
+}
--- a/pkg/ingress/kube/mcpserver/provider.go
+++ b/pkg/ingress/kube/mcpserver/provider.go
@@ -0,0 +1,70 @@
+// Copyright (c) 2025 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mcpserver
+
+import (
+	"reflect"
+	"slices"
+	"strings"
+	"sync"
+)
+
+type McpServerProvider interface {
+	GetMcpServers() []*McpServer
+}
+
+type McpRouteProviderAware interface {
+	RegisterMcpServerProvider(provider McpServerProvider)
+}
+
+type McpServerCache struct {
+	mcpServers []*McpServer
+	mutex      sync.RWMutex
+}
+
+func (c *McpServerCache) GetMcpServers() []*McpServer {
+	c.mutex.RLock()
+	defer c.mutex.RUnlock()
+	return c.mcpServers
+}
+
+// SetMcpServers sets the mcp servers and returns true if the cached list is changed
+func (c *McpServerCache) SetMcpServers(mcpServers []*McpServer) bool {
+	c.mutex.Lock()
+	defer c.mutex.Unlock()
+
+	sortedMcpServers := make([]*McpServer, 0, len(mcpServers))
+	sortedMcpServers = append(sortedMcpServers, mcpServers...)
+	// Sort the mcp servers by PathMatchValue in descending order
+	slices.SortFunc(sortedMcpServers, func(a, b *McpServer) int {
+		return strings.Compare(a.Name, b.Name)
+	})
+
+	if len(c.mcpServers) == len(sortedMcpServers) {
+		changed := false
+		for i := range c.mcpServers {
+			if !reflect.DeepEqual(c.mcpServers[i], sortedMcpServers[i]) {
+				changed = true
+				break
+			}
+		}
+		if !changed {
+			return false
+		}
+	}
+
+	c.mcpServers = sortedMcpServers
+	return true
+}
--- a/pkg/ingress/kube/mcpserver/provider_test.go
+++ b/pkg/ingress/kube/mcpserver/provider_test.go
@@ -0,0 +1,654 @@
+// Copyright (c) 2025 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mcpserver
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestMcpServerCache_GetSet(t *testing.T) {
+	testCases := []struct {
+		name    string
+		skip    bool
+		init    []*McpServer
+		input   []*McpServer
+		expect  []*McpServer
+		changed bool
+	}{
+		{
+			name:    "nil",
+			init:    nil,
+			input:   nil,
+			changed: false,
+			expect:  nil,
+		},
+		{
+			name: "nil to non-nil",
+			init: nil,
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (length increase)",
+			init: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (length decrease)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (length unchanged + name field changed)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3-1",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3-1",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (length unchanged + non-name field changed)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar-2.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test4",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: true,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar-2.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test4",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (content unchanged + order unchanged)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			changed: false,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+		{
+			name: "non-nil to non-nil (content unchanged + order changed)",
+			init: []*McpServer{
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+			},
+			input: []*McpServer{
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+			},
+			changed: false,
+			expect: []*McpServer{
+				{
+					Name:              "test1",
+					Domains:           nil,
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test1",
+					UpstreamType:      UpstreamTypeRest,
+					EnablePathRewrite: false,
+					PathRewritePrefix: "",
+				},
+				{
+					Name:              "test2",
+					Domains:           []string{"www.foo.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test2",
+					UpstreamType:      UpstreamTypeSSE,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/test",
+				},
+				{
+					Name:              "test3",
+					Domains:           []string{"www.bar.com"},
+					PathMatchType:     ExactMatchType,
+					PathMatchValue:    "/mcp/test3",
+					UpstreamType:      UpstreamTypeStreamable,
+					EnablePathRewrite: true,
+					PathRewritePrefix: "/",
+				},
+			},
+		},
+	}
+
+	for _, tt := range testCases {
+		if tt.skip {
+			continue
+		}
+		t.Run(tt.name, func(t *testing.T) {
+			provider := &McpServerCache{}
+
+			if provider.GetMcpServers() != nil {
+				t.Fatalf("GetMcpServers doesn't return nil before testing.")
+			}
+
+			_ = provider.SetMcpServers(tt.init)
+
+			changed := provider.SetMcpServers(tt.input)
+			if changed != tt.changed {
+				t.Fatalf("actual changed %t != expect changed %t", changed, tt.changed)
+				return
+			}
+
+			actual := provider.GetMcpServers()
+
+			if len(actual) != len(tt.expect) {
+				t.Fatalf("actual length %d != expect length %d", len(actual), len(tt.expect))
+			}
+			for i := range actual {
+				if diff := cmp.Diff(tt.expect[i], actual[i]); diff != "" {
+					t.Fatalf("TestMcpServerCache_GetSet() mismatch (-want +got):\n%s", diff)
+				}
+			}
+		})
+	}
+}
--- a/plugins/golang-filter/README.md
+++ b/plugins/golang-filter/README.md
@@ -20,24 +20,38 @@ Golang HTTP Filter 允许开发者使用 Go 语言编写自定义的 Envoy Filte

 请参考 [Envoy Golang HTTP Filter 示例](https://github.com/envoyproxy/examples/tree/main/golang-http) 了解如何开发和运行一个基本的 Golang Filter。

+## 插件注册
+
+在开发新的 Golang Filter 时，需要在`main.go` 的 `init()` 函数中注册你的插件。注册时需要提供插件名称、Filter 工厂函数和配置解析器：
+
+```go
+func init() {
+    envoyHttp.RegisterHttpFilterFactoryAndConfigParser(
+        "your-plugin-name",    // 插件名称
+        yourFilterFactory,     // Filter 工厂函数
+        &yourConfigParser{},   // 配置解析器
+    )
+}
+```
+
 ## 配置示例

+多个 Golang Filter 插件可以共同编译到一个 `golang-filter.so` 文件中，通过 `plugin_name` 来指定要使用的插件。配置示例如下：
+
 ```yaml
 http_filters:
 - name: envoy.filters.http.golang
  typed_config:
    "@type": type.googleapis.com/envoy.extensions.filters.http.golang.v3alpha.Config
-    library_id: my-go-filter
-    library_path: "./go-filter.so"
-    plugin_name: my-go-filter
+    library_id: your-plugin-name
+    library_path: "./golang-filter.so"  # 包含多个插件的共享库文件
+    plugin_name: your-plugin-name       # 指定要使用的插件名称，需要与 init() 函数中注册的插件名称保持一致
    plugin_config:
      "@type": type.googleapis.com/xds.type.v3.TypedStruct
      value:
          your_config_here: value
-                  
 ```

-
 ## 快速构建

 使用以下命令可以快速构建 golang filter 插件:
--- a/plugins/golang-filter/README_en.md
+++ b/plugins/golang-filter/README_en.md
@@ -20,16 +20,32 @@ The Golang HTTP Filter allows developers to write custom Envoy Filters using the

 Please refer to [Envoy Golang HTTP Filter Example](https://github.com/envoyproxy/examples/tree/main/golang-http) to learn how to develop and run a basic Golang Filter.

+## Plugin Registration
+
+When developing a new Golang Filter, you need to register your plugin in the `init()` function of `main.go`. The registration requires a plugin name, Filter factory function, and configuration parser:
+
+```go
+func init() {
+    envoyHttp.RegisterHttpFilterFactoryAndConfigParser(
+        "your-plugin-name",    // Plugin name
+        yourFilterFactory,     // Filter factory function
+        &yourConfigParser{},   // Configuration parser
+    )
+}
+```
+
 ## Configuration Example

+Multiple Golang Filter plugins can be compiled into a single `golang-filter.so` file, and the desired plugin can be specified using `plugin_name`. Here's an example configuration:
+
 ```yaml
 http_filters:
 - name: envoy.filters.http.golang
  typed_config:
    "@type": type.googleapis.com/envoy.extensions.filters.http.golang.v3alpha.Config
-    library_id: my-go-filter
-    library_path: "./my-go-filter.so"
-    plugin_name: my-go-filter
+    library_id: your-plugin-name
+    library_path: "./golang-filter.so"  # Shared library file containing multiple plugins
+    plugin_name: your-plugin-name       # Specify which plugin to use, must match the name registered in init()
    plugin_config:
      "@type": type.googleapis.com/xds.type.v3.TypedStruct
      value:
@@ -41,5 +57,5 @@ http_filters:
 Use the following command to quickly build the golang filter plugin:

 ```bash
-GO_FILTER_NAME=mcp-server make build
+make build
 ``` 
--- a/plugins/golang-filter/go.mod
+++ b/plugins/golang-filter/go.mod
@@ -2,7 +2,7 @@ module github.com/alibaba/higress/plugins/golang-filter

 go 1.22

-replace github.com/envoyproxy/envoy => github.com/higress-group/envoy v0.0.0-20250428030521-17cf01d9f644
+replace github.com/envoyproxy/envoy => github.com/higress-group/envoy v0.0.0-20250430151331-2c556780b65c

 replace github.com/mark3labs/mcp-go => github.com/higress-group/mcp-go v0.0.0-20250428145706-792ce64b4b30

--- a/plugins/golang-filter/go.sum
+++ b/plugins/golang-filter/go.sum
@@ -234,8 +234,8 @@ github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mO
 github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
 github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
 github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
-github.com/higress-group/envoy v0.0.0-20250428030521-17cf01d9f644 h1:wiLDdiOT3BcTQSFs8oTMu54GIiPFSwKLuWo5J0Cd9b8=
-github.com/higress-group/envoy v0.0.0-20250428030521-17cf01d9f644/go.mod h1:SU+IJUAfh1kkZtH+u0E1dnwho8AhbGeYMgp5vvjU+Gc=
+github.com/higress-group/envoy v0.0.0-20250430151331-2c556780b65c h1:chAOZk/qEXFhLILWoNucj3X6r9xYnRR+SWFvhsOa2oo=
+github.com/higress-group/envoy v0.0.0-20250430151331-2c556780b65c/go.mod h1:SU+IJUAfh1kkZtH+u0E1dnwho8AhbGeYMgp5vvjU+Gc=
 github.com/higress-group/mcp-go v0.0.0-20250428145706-792ce64b4b30 h1:N4NMq8M1nZyyChPyzn+EUUdHi5asig2uLR5hOyRmsXI=
 github.com/higress-group/mcp-go v0.0.0-20250428145706-792ce64b4b30/go.mod h1:O9gri9UOzthw728vusc2oNu99lVh8cKCajpxNfC90gE=
 github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
--- a/plugins/golang-filter/mcp-session/README.md
+++ b/plugins/golang-filter/mcp-session/README.md
@@ -3,27 +3,22 @@

 ## 概述

-MCP Server 是一个基于 Envoy 的 Golang Filter 插件，用于实现服务器端事件（SSE）和消息通信功能。该插件支持多种数据库类型，并使用 Redis 作为消息队列来实现负载均衡的请求通过对应的SSE连接发送。
+MCP Server 是一个基于 Envoy 的 Golang Filter 插件，提供了统一的 MCP (Model Context Protocol) 服务接口。它支持多种后端服务的集成，包括：

-> **注意**：MCP Server需要 Higress 2.1.0 或更高版本才能使用。
-## 项目结构
-```
-mcp-server/
-├── config.go                # 配置解析相关代码
-├── filter.go                # 请求处理相关代码
-├── internal/                # 内部实现逻辑
-├── servers/                 # MCP 服务器实现
-├── go.mod                   # Go模块依赖定义
-└── go.sum                   # Go模块依赖校验
-```
-## MCP Server开发指南
+- 数据库服务：通过 GORM 支持多种数据库的访问和管理
+- 配置中心：支持 Nacos 配置中心的集成
+- 可扩展性：支持自定义服务器实现，方便集成其他服务
+
+> **注意**：MCP Server 需要 Higress 2.1.0 或更高版本才能使用。
+
+## MCP Server 开发指南

 ```go
 // 在init函数中注册你的服务器
 // 参数1: 服务器名称
 // 参数2: 配置结构体实例
 func init() {
-	internal.GlobalRegistry.RegisterServer("demo", &DemoConfig{})
+	common.GlobalRegistry.RegisterServer("demo", &DemoConfig{})
 }

 // 服务器配置结构体
@@ -43,8 +38,8 @@ func (c *DBConfig) ParseConfig(config map[string]any) error {
 // 创建新的MCP服务器实例
 // serverName: 服务器名称
 // 返回值: MCP服务器实例和可能的错误
-func (c *DBConfig) NewServer(serverName string) (*internal.MCPServer, error) {
-	mcpServer := internal.NewMCPServer(serverName, Version)
+func (c *DBConfig) NewServer(serverName string) (*common.MCPServer, error) {
+	mcpServer := common.NewMCPServer(serverName, Version)
    
 	// 添加工具方法到服务器
 	// mcpServer.AddTool()	
--- a/plugins/golang-filter/mcp-server/README_en.md
+++ b/plugins/golang-filter/mcp-server/README_en.md
@@ -0,0 +1,60 @@
+# MCP Server
+English | [简体中文](./README.md)
+
+## Overview
+
+MCP Server is a Golang Filter plugin based on Envoy that provides a unified MCP (Model Context Protocol) service interface. It supports integration with various backend services, including:
+
+- Database Services: Supports multiple database access and management through GORM
+- Configuration Service: Supports integration with Nacos configuration service
+- Extensibility: Supports custom server implementations for easy integration with other services
+
+> **Note**: MCP Server requires Higress version 2.1.0 or higher to be used.
+
+## MCP Server Development Guide
+
+```go
+// Register your server in the init function
+// Parameter 1: Server name
+// Parameter 2: Configuration struct instance
+func init() {
+	common.GlobalRegistry.RegisterServer("demo", &DemoConfig{})
+}
+
+// Server configuration struct
+type DemoConfig struct {
+	helloworld string
+}
+
+// Parse configuration method
+// Parse and validate configuration items from the config map
+func (c *DBConfig) ParseConfig(config map[string]any) error {
+	helloworld, ok := config["helloworld"].(string)
+	if !ok { return errors.New("missing helloworld")}
+	c.helloworld = helloworld
+	return nil
+}
+
+// Create a new MCP server instance
+// serverName: Server name
+// Returns: MCP server instance and possible error
+func (c *DBConfig) NewServer(serverName string) (*common.MCPServer, error) {
+	mcpServer := common.NewMCPServer(serverName, Version)
+    
+	// Add tool methods to the server
+	// mcpServer.AddTool()	
+	
+	// Add resources to the server
+	// mcpServer.AddResource()
+	
+	return mcpServer, nil
+}
+```
+
+**Note**: 
+You need to use underscore imports in config.go to execute the package's init function
+```go
+import (
+	_ "github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/gorm"
+)
+```
--- a/plugins/golang-filter/mcp-session/README_en.md
+++ b/plugins/golang-filter/mcp-session/README_en.md
@@ -1,67 +0,0 @@
-# MCP Server
-English | [简体中文](./README.md)
-
-## Overview
-
-MCP Server is a Golang Filter plugin based on Envoy, designed to implement Server-Sent Events (SSE) and message communication functionality. This plugin supports various database types and uses Redis as a message queue to enable load-balanced requests to be sent through corresponding SSE connections.
-
-> **Note**: MCP Server requires Higress 2.1.0 or higher version.
-
-## Project Structure
-```
-mcp-server/
-├── config.go                # Configuration parsing code
-├── filter.go                # Request processing code
-├── internal/                # Internal implementation logic
-├── servers/                 # MCP server implementation
-├── go.mod                   # Go module dependency definition
-└── go.sum                   # Go module dependency checksum
-```
-
-## MCP Server Development Guide
-
-```go
-// Register your server in the init function
-// Param 1: Server name
-// Param 2: Config struct instance
-func init() {
-	internal.GlobalRegistry.RegisterServer("demo", &DemoConfig{})
-}
-
-// Server configuration struct
-type DemoConfig struct {
-	helloworld string
-}
-
-// Configuration parsing method
-// Parse and validate configuration items from the config map
-func (c *DBConfig) ParseConfig(config map[string]any) error {
-	helloworld, ok := config["helloworld"].(string)
-	if !ok { return errors.New("missing helloworld")}
-	c.helloworld = helloworld
-	return nil
-}
-
-// Create a new MCP server instance
-// serverName: Server name
-// Returns: MCP server instance and possible error
-func (c *DBConfig) NewServer(serverName string) (*internal.MCPServer, error) {
-	mcpServer := internal.NewMCPServer(serverName, Version)
-    
-	// Add tool methods to server
-	// mcpServer.AddTool()	
-	
-	// Add resources to server
-	// mcpServer.AddResource()
-	
-	return mcpServer, nil
-}
-```
-
-**Note**: 
-Need to use underscore import in config.go to execute the package's init function
-```go
-import (
-	_ "github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/gorm"
-)
-```
--- a/plugins/golang-filter/mcp-session/common/match.go
+++ b/plugins/golang-filter/mcp-session/common/match.go
@@ -3,24 +3,36 @@ package common
 import (
 	"regexp"
 	"strings"
+
+	"github.com/envoyproxy/envoy/contrib/golang/common/go/api"
 )

 // RuleType defines the type of matching rule
 type RuleType string

+// UpstreamType defines the type of matching rule
+type UpstreamType string
+
 const (
 	ExactMatch    RuleType = "exact"
 	PrefixMatch   RuleType = "prefix"
 	SuffixMatch   RuleType = "suffix"
 	ContainsMatch RuleType = "contains"
 	RegexMatch    RuleType = "regex"
+
+	RestUpstream       UpstreamType = "rest"
+	SSEUpstream        UpstreamType = "sse"
+	StreamableUpstream UpstreamType = "streamable"
 )

 // MatchRule defines the structure for a matching rule
 type MatchRule struct {
-	MatchRuleDomain string   `json:"match_rule_domain"` // Domain pattern, supports wildcards
-	MatchRulePath   string   `json:"match_rule_path"`   // Path pattern to match
-	MatchRuleType   RuleType `json:"match_rule_type"`   // Type of match rule
+	MatchRuleDomain   string       `json:"match_rule_domain"`   // Domain pattern, supports wildcards
+	MatchRulePath     string       `json:"match_rule_path"`     // Path pattern to match
+	MatchRuleType     RuleType     `json:"match_rule_type"`     // Type of match rule
+	UpstreamType      UpstreamType `json:"upstream_type"`       // Type of upstream(s) matched by the rule
+	EnablePathRewrite bool         `json:"enable_path_rewrite"` // Enable request path rewrite for matched routes
+	PathRewritePrefix string       `json:"path_rewrite_prefix"` // Prefix the request path would be rewritten to.
 }

 // ParseMatchList parses the match list from the config
@@ -38,6 +50,34 @@ func ParseMatchList(matchListConfig []interface{}) []MatchRule {
 			if ruleType, ok := ruleMap["match_rule_type"].(string); ok {
 				rule.MatchRuleType = RuleType(ruleType)
 			}
+			if upstreamType, ok := ruleMap["upstream_type"].(string); ok {
+				rule.UpstreamType = UpstreamType(upstreamType)
+			}
+			if len(rule.UpstreamType) == 0 {
+				rule.UpstreamType = RestUpstream
+			} else {
+				switch rule.UpstreamType {
+				case RestUpstream, SSEUpstream, StreamableUpstream:
+					break
+				default:
+					api.LogWarnf("Unknown upstream type: %s", rule.UpstreamType)
+				}
+			}
+			if enablePathRewrite, ok := ruleMap["enable_path_rewrite"].(bool); ok {
+				rule.EnablePathRewrite = enablePathRewrite
+			}
+			if pathRewritePrefix, ok := ruleMap["path_rewrite_prefix"].(string); ok {
+				rule.PathRewritePrefix = pathRewritePrefix
+			}
+			if rule.EnablePathRewrite {
+				if rule.UpstreamType != SSEUpstream {
+					api.LogWarnf("Path rewrite is only supported for SSE upstream type")
+				} else if rule.MatchRuleType != PrefixMatch {
+					api.LogWarnf("Path rewrite is only supported for prefix match type")
+				} else if !strings.HasPrefix(rule.PathRewritePrefix, "/") {
+					rule.PathRewritePrefix = "/" + rule.PathRewritePrefix
+				}
+			}
 			matchList = append(matchList, rule)
 		}
 	}
@@ -96,17 +136,17 @@ func matchDomainAndPath(domain, path string, rule MatchRule) bool {

 // IsMatch checks if the request matches any rule in the rule list
 // Returns true if no rules are specified
-func IsMatch(rules []MatchRule, host, path string) bool {
+func IsMatch(rules []MatchRule, host, path string) (bool, MatchRule) {
 	if len(rules) == 0 {
-		return true
+		return true, MatchRule{}
 	}

 	for _, rule := range rules {
 		if matchDomainAndPath(host, path, rule) {
-			return true
+			return true, rule
 		}
 	}
-	return false
+	return false, MatchRule{}
 }

 // MatchDomainList checks if the domain matches any of the domains in the list
--- a/plugins/golang-filter/mcp-session/common/sse.go
+++ b/plugins/golang-filter/mcp-session/common/sse.go
@@ -136,7 +136,7 @@ func (s *SSEServer) HandleSSE(cb api.FilterCallbackHandler, stopChan chan struct
 	}

 	// Send the initial endpoint event
-	initialEvent := fmt.Sprintf("event: endpoint\ndata: %s\r\n\r\n", messageEndpoint)
+	initialEvent := fmt.Sprintf("event: endpoint\ndata: %s\n\n", messageEndpoint)
 	err = s.redisClient.Publish(channel, initialEvent)
 	if err != nil {
 		api.LogErrorf("Failed to send initial event: %v", err)
@@ -210,7 +210,7 @@ func (s *SSEServer) HandleMessage(w http.ResponseWriter, r *http.Request, body j
 	var status int
 	// Only send response if there is one (not for notifications)
 	if response != nil {
-		if sessionID != ""{
+		if sessionID != ""  {
 			w.WriteHeader(http.StatusAccepted)
 			status = http.StatusAccepted
 		} else {
--- a/plugins/golang-filter/mcp-session/filter.go
+++ b/plugins/golang-filter/mcp-session/filter.go
@@ -2,6 +2,7 @@ package mcp_session

 import (
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net/http"
 	"net/url"
@@ -28,10 +29,14 @@ type filter struct {
 	config    *config
 	stopChan  chan struct{}

-	req         *http.Request
-	serverName  string
-	proxyURL    *url.URL
-	neepProcess bool
+	req                *http.Request
+	serverName         string
+	proxyURL           *url.URL
+	matchedRule        common.MatchRule
+	needProcess        bool
+	skipRequestBody    bool
+	skipResponseBody   bool
+	cachedResponseBody []byte

 	userLevelConfig     bool
 	mcpConfigHandler    *handler.MCPConfigHandler
@@ -42,31 +47,33 @@ type filter struct {
 // Callbacks which are called in request path
 // The endStream is true if the request doesn't have body
 func (f *filter) DecodeHeaders(header api.RequestHeaderMap, endStream bool) api.StatusType {
-	url := common.NewRequestURL(header)
-	if url == nil {
+	requestUrl := common.NewRequestURL(header)
+	if requestUrl == nil {
 		return api.Continue
 	}
-	f.path = url.ParsedURL.Path
+	f.path = requestUrl.ParsedURL.Path

 	// Check if request matches any rule in match_list
-	if !common.IsMatch(f.config.matchList, url.Host, f.path) {
-		api.LogDebugf("Request does not match any rule in match_list: %s", url.ParsedURL.String())
+	matched, matchedRule := common.IsMatch(f.config.matchList, requestUrl.Host, f.path)
+	if !matched {
+		api.LogDebugf("Request does not match any rule in match_list: %s", requestUrl.ParsedURL.String())
 		return api.Continue
 	}
-	f.neepProcess = true
+	f.needProcess = true
+	f.matchedRule = matchedRule

 	f.req = &http.Request{
-		Method: url.Method,
-		URL:    url.ParsedURL,
+		Method: requestUrl.Method,
+		URL:    requestUrl.ParsedURL,
 	}

 	if strings.HasSuffix(f.path, ConfigPathSuffix) && f.config.enableUserLevelServer {
-		if !url.InternalIP {
-			api.LogWarnf("Access denied: non-Internal IP address %s", url.ParsedURL.String())
+		if !requestUrl.InternalIP {
+			api.LogWarnf("Access denied: non-Internal IP address %s", requestUrl.ParsedURL.String())
 			f.callbacks.DecoderFilterCallbacks().SendLocalReply(http.StatusForbidden, "", nil, 0, "")
 			return api.LocalReply
 		}
-		if strings.HasSuffix(f.path, ConfigPathSuffix) && url.Method == http.MethodGet {
+		if strings.HasSuffix(f.path, ConfigPathSuffix) && requestUrl.Method == http.MethodGet {
 			api.LogDebugf("Handling config request: %s", f.path)
 			f.mcpConfigHandler.HandleConfigRequest(f.req, []byte{})
 			return api.LocalReply
@@ -79,10 +86,27 @@ func (f *filter) DecodeHeaders(header api.RequestHeaderMap, endStream bool) api.
 		}
 	}

-	if !strings.HasSuffix(url.ParsedURL.Path, GlobalSSEPathSuffix) {
-		f.proxyURL = url.ParsedURL
+	return f.processMcpRequestHeaders(header, endStream)
+}
+
+func (f *filter) processMcpRequestHeaders(header api.RequestHeaderMap, endStream bool) api.StatusType {
+	switch f.matchedRule.UpstreamType {
+	case common.RestUpstream, common.StreamableUpstream:
+		return f.processMcpRequestHeadersForRestUpstream(header, endStream)
+	case common.SSEUpstream:
+		return f.processMcpRequestHeadersForSSEUpstream(header, endStream)
+	}
+	f.needProcess = false
+	return api.Continue
+}
+
+func (f *filter) processMcpRequestHeadersForRestUpstream(header api.RequestHeaderMap, endStream bool) api.StatusType {
+	method := f.req.Method
+	requestUrl := f.req.URL
+	if !strings.HasSuffix(requestUrl.Path, GlobalSSEPathSuffix) {
+		f.proxyURL = requestUrl
 		if f.config.enableUserLevelServer {
-			parts := strings.Split(url.ParsedURL.Path, "/")
+			parts := strings.Split(requestUrl.Path, "/")
 			if len(parts) >= 3 {
 				serverName := parts[1]
 				uid := parts[2]
@@ -102,12 +126,12 @@ func (f *filter) DecodeHeaders(header api.RequestHeaderMap, endStream bool) api.
 		}
 	}

-	if url.Method != http.MethodGet {
+	if method != http.MethodGet {
 		f.callbacks.DecoderFilterCallbacks().SendLocalReply(http.StatusMethodNotAllowed, "Method not allowed", nil, 0, "")
 	} else {
 		f.config.defaultServer = common.NewSSEServer(common.NewMCPServer(DefaultServerName, Version),
 			common.WithSSEEndpoint(GlobalSSEPathSuffix),
-			common.WithMessageEndpoint(strings.TrimSuffix(url.ParsedURL.Path, GlobalSSEPathSuffix)),
+			common.WithMessageEndpoint(strings.TrimSuffix(requestUrl.Path, GlobalSSEPathSuffix)),
 			common.WithRedisClient(f.config.redisClient))
 		f.serverName = f.config.defaultServer.GetServerName()
 		body := "SSE connection create"
@@ -116,10 +140,19 @@ func (f *filter) DecodeHeaders(header api.RequestHeaderMap, endStream bool) api.
 	return api.LocalReply
 }

+func (f *filter) processMcpRequestHeadersForSSEUpstream(header api.RequestHeaderMap, endStream bool) api.StatusType {
+	// We don't need to process the request body for SSE upstream.
+	f.skipRequestBody = true
+	return api.Continue
+}
+
 // DecodeData might be called multiple times during handling the request body.
 // The endStream is true when handling the last piece of the body.
 func (f *filter) DecodeData(buffer api.BufferInstance, endStream bool) api.StatusType {
-	if !f.neepProcess {
+	if !f.needProcess || f.skipRequestBody {
+		return api.Continue
+	}
+	if f.matchedRule.UpstreamType != common.RestUpstream && f.matchedRule.UpstreamType != common.StreamableUpstream {
 		return api.Continue
 	}
 	if !endStream {
@@ -158,10 +191,17 @@ func (f *filter) DecodeData(buffer api.BufferInstance, endStream bool) api.Statu
 	return api.Continue
 }

-// Callbacks which are called in response path
-// The endStream is true if the response doesn't have body
+// EncodeHeaders Callbacks which are called in response path.
+// The endStream is true if the response doesn't have body.
 func (f *filter) EncodeHeaders(header api.ResponseHeaderMap, endStream bool) api.StatusType {
-	if !f.neepProcess {
+	if !f.needProcess {
+		return api.Continue
+	}
+	if f.matchedRule.UpstreamType != common.RestUpstream && f.matchedRule.UpstreamType != common.StreamableUpstream {
+		if contentType, ok := header.Get("content-type"); !ok || !strings.HasPrefix(contentType, "text/event-stream") {
+			api.LogDebugf("Skip response body for non-SSE upstream. Content-Type: %s", contentType)
+			f.skipResponseBody = true
+		}
 		return api.Continue
 	}
 	if f.serverName != "" {
@@ -182,7 +222,30 @@ func (f *filter) EncodeHeaders(header api.ResponseHeaderMap, endStream bool) api
 // EncodeData might be called multiple times during handling the response body.
 // The endStream is true when handling the last piece of the body.
 func (f *filter) EncodeData(buffer api.BufferInstance, endStream bool) api.StatusType {
-	if !f.neepProcess {
+	if !f.needProcess || f.skipResponseBody {
+		return api.Continue
+	}
+
+	ret := api.Continue
+	api.LogDebugf("Upstream Type: %s", f.matchedRule.UpstreamType)
+	switch f.matchedRule.UpstreamType {
+	case common.RestUpstream, common.StreamableUpstream:
+		api.LogDebugf("Encoding data from Rest upstream")
+		ret = f.encodeDataFromRestUpstream(buffer, endStream)
+		break
+	case common.SSEUpstream:
+		api.LogDebugf("Encoding data from SSE upstream")
+		ret = f.encodeDataFromSSEUpstream(buffer, endStream)
+		if endStream {
+			// Always continue as long as the stream has ended.
+			ret = api.Continue
+		}
+	}
+	return ret
+}
+
+func (f *filter) encodeDataFromRestUpstream(buffer api.BufferInstance, endStream bool) api.StatusType {
+	if !f.needProcess {
 		return api.Continue
 	}
 	if !endStream {
@@ -207,13 +270,171 @@ func (f *filter) EncodeData(buffer api.BufferInstance, endStream bool) api.Statu
 			f.config.defaultServer.HandleSSE(f.callbacks, f.stopChan)
 			return api.Running
 		} else {
-			buffer.SetString(RedisNotEnabledResponseBody)
+			_ = buffer.SetString(RedisNotEnabledResponseBody)
 			return api.Continue
 		}
 	}
 	return api.Continue
 }

+func (f *filter) encodeDataFromSSEUpstream(buffer api.BufferInstance, endStream bool) api.StatusType {
+	bufferBytes := buffer.Bytes()
+	bufferData := string(bufferBytes)
+
+	err, endpointUrl := f.findEndpointUrl(bufferData)
+	if err != nil {
+		api.LogWarnf("Failed to find endpoint URL in SSE data: %v", err)
+		f.needProcess = false
+		return api.Continue
+	}
+	if endpointUrl == "" {
+		// No endpoint URL found. Need to buffer and check again.
+		return api.StopAndBuffer
+	}
+
+	// Remove query string since we don't need to change it.
+	queryStringIndex := strings.IndexAny(endpointUrl, "?")
+	if queryStringIndex != -1 {
+		endpointUrl = endpointUrl[:queryStringIndex]
+	}
+
+	if changed, newEndpointUrl := f.rewriteEndpointUrl(endpointUrl); changed {
+		api.LogDebugf("The endpoint URL is changed.\n  Old: %s\n  New: %s", endpointUrl, newEndpointUrl)
+
+		endpointUrlIndex := strings.Index(bufferData, endpointUrl)
+		if endpointUrlIndex == -1 {
+			api.LogWarnf("Something wrong, the previously found endpoint URL %s not found in the SSE data now", endpointUrl)
+		} else {
+			bufferData = bufferData[:endpointUrlIndex] + newEndpointUrl + bufferData[endpointUrlIndex+len(endpointUrl):]
+			_ = buffer.SetString(bufferData)
+		}
+	} else {
+		api.LogDebugf("The endpoint URL %s is not changed", endpointUrl)
+	}
+
+	f.needProcess = false
+	return api.Continue
+}
+
+func (f *filter) rewriteEndpointUrl(endpointUrl string) (bool, string) {
+	if !f.matchedRule.EnablePathRewrite {
+		return false, ""
+	}
+
+	if schemeIndex := strings.Index(endpointUrl, "://"); schemeIndex != -1 {
+		endpointUrl = endpointUrl[schemeIndex+3:]
+		if slashIndex := strings.Index(endpointUrl, "/"); slashIndex != -1 {
+			endpointUrl = endpointUrl[slashIndex:]
+		} else {
+			endpointUrl = "/"
+		}
+	}
+
+	if !strings.HasPrefix(endpointUrl, f.matchedRule.PathRewritePrefix) {
+		// The endpoint URL does not match the path rewrite prefix. We are unable to rewrite it back.
+		api.LogWarnf("The endpoint URL %s does not match the path rewrite prefix %s", endpointUrl, f.matchedRule.PathRewritePrefix)
+		return false, ""
+	}
+
+	suffix := endpointUrl[len(f.matchedRule.PathRewritePrefix):]
+
+	if len(suffix) == 0 {
+		endpointUrl = f.matchedRule.MatchRulePath
+	} else {
+		matchPathHasTrailingSlash := strings.HasSuffix(f.matchedRule.MatchRulePath, "/")
+		suffixHasLeadingSlash := strings.HasPrefix(suffix, "/")
+		if matchPathHasTrailingSlash != suffixHasLeadingSlash {
+			// One has, the other doesn't have.
+			endpointUrl = f.matchedRule.MatchRulePath + suffix
+		} else if matchPathHasTrailingSlash {
+			// Both have.
+			endpointUrl = f.matchedRule.MatchRulePath + suffix[1:]
+		} else {
+			// Neither have.
+			endpointUrl = f.matchedRule.MatchRulePath + "/" + suffix
+		}
+	}
+
+	return true, endpointUrl
+}
+
+func (f *filter) findNextLineBreak(bufferData string) (error, string) {
+	// See https://html.spec.whatwg.org/multipage/server-sent-events.html
+	crIndex := strings.IndexAny(bufferData, "\r")
+	lfIndex := strings.IndexAny(bufferData, "\n")
+	if crIndex == -1 && lfIndex == -1 {
+		// No line break found.
+		return nil, ""
+	}
+	lineBreak := ""
+	if crIndex != -1 && lfIndex != -1 {
+		if crIndex < lfIndex {
+			if crIndex+1 == lfIndex {
+				lineBreak = "\r\n"
+			} else {
+				lineBreak = "\r"
+			}
+		} else {
+			if crIndex == lfIndex+1 {
+				// Found unexpected "\n\r". Skip body processing.
+				return errors.New("found unexpected LF+CR"), ""
+			} else {
+				lineBreak = "\n"
+			}
+		}
+	} else if crIndex != -1 {
+		lineBreak = "\r"
+	} else {
+		lineBreak = "\n"
+	}
+	return nil, lineBreak
+}
+
+func (f *filter) findEndpointUrl(bufferData string) (error, string) {
+	eventIndex := strings.Index(bufferData, "event:")
+	if eventIndex == -1 {
+		return nil, ""
+	}
+	bufferData = bufferData[eventIndex:]
+	err, lineBreak := f.findNextLineBreak(bufferData)
+	if err != nil {
+		return fmt.Errorf("failed to find endpoint URL in SSE data: %v", err), ""
+	}
+	if lineBreak == "" {
+		// No line break found, which means the data is not enough.
+		return nil, ""
+	}
+	api.LogDebugf("event line break sequence: %v", []byte(lineBreak))
+	eventEndIndex := strings.Index(bufferData, lineBreak)
+	if eventEndIndex == -1 {
+		return nil, ""
+	}
+	eventName := strings.TrimSpace(bufferData[len("event:"):eventEndIndex])
+	if eventName != "endpoint" {
+		return fmt.Errorf("the initial event [%s] is not an endpoint event. Skip processing", eventName), ""
+	}
+	bufferData = bufferData[eventEndIndex+len(lineBreak):]
+	err, lineBreak = f.findNextLineBreak(bufferData)
+	if err != nil {
+		return fmt.Errorf("failed to find endpoint URL in SSE data: %v", err), ""
+	}
+	if lineBreak == "" {
+		// No line break found, which means the data is not enough.
+		return nil, ""
+	}
+	api.LogDebugf("data line break sequence: %v", []byte(lineBreak))
+	dataEndIndex := strings.Index(bufferData, lineBreak)
+	if dataEndIndex == -1 {
+		// Data received not enough.
+		return nil, ""
+	}
+	eventData := bufferData[:dataEndIndex]
+	if !strings.HasPrefix(eventData, "data:") {
+		return fmt.Errorf("an unexpected non-data field found in the event. Skip processing. Field: %s", eventData), ""
+	}
+	return nil, strings.TrimSpace(eventData[len("data:"):])
+}
+
 // OnDestroy stops the goroutine
 func (f *filter) OnDestroy(reason api.DestroyReason) {
 	api.LogDebugf("OnDestroy: reason=%v", reason)
--- a/plugins/wasm-cpp/.bazelrc
+++ b/plugins/wasm-cpp/.bazelrc
@@ -4,4 +4,6 @@ build:gcc --cxxopt=-std=c++17
 build:clang --action_env=CC=clang --action_env=CXX=clang++
 build:clang --action_env=BAZEL_COMPILER=clang
 build:clang --linkopt=-fuse-ld=lld
-build:clang --cxxopt=-std=c++17
+build:clang --cxxopt=-std=c++17
+
+build --incompatible_use_platforms_repo_for_constraints=false
--- a/plugins/wasm-cpp/.bazelversion
+++ b/plugins/wasm-cpp/.bazelversion
@@ -1 +1 @@
-5.4.0
+6.0.0
--- a/plugins/wasm-cpp/WORKSPACE
+++ b/plugins/wasm-cpp/WORKSPACE
@@ -1,6 +1,13 @@
 workspace(name = "istio_ecosystem_wasm_extensions")

 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+http_archive(
+    name = "platforms",
+    url = "https://github.com/bazelbuild/platforms/releases/download/0.0.9/platforms-0.0.9.tar.gz",
+    sha256 = "5eda539c841265031c2f82d8ae7a3a6490bd62176e0c038fc469eabf91f6149b",
+)
+
 load("//bazel:third_party.bzl", "wasm_extension_dependency")

 wasm_extension_dependency()
@@ -16,9 +23,9 @@ load("@io_bazel_rules_docker//repositories:deps.bzl", container_deps = "deps")

 container_deps()

-PROXY_WASM_CPP_SDK_SHA = "eaec483b5b3c7bcb89fd208b5a1fa5d79d626f61"
+PROXY_WASM_CPP_SDK_SHA = "0ceca8c81dddc4c9875cf0cb997454764905658c"

-PROXY_WASM_CPP_SDK_SHA256 = "1140bc8114d75db56a6ca6b18423d4df50d988d40b4cec929a1eb246cf5a4a3d"
+PROXY_WASM_CPP_SDK_SHA256 = "cb010b242d49fb02b39124421b6acb69bd4ece64fb6299ba3f98f3b36eef7004"

 http_archive(
    name = "proxy_wasm_cpp_sdk",
--- a/plugins/wasm-cpp/extensions/key_auth/plugin.cc
+++ b/plugins/wasm-cpp/extensions/key_auth/plugin.cc
@@ -202,7 +202,7 @@ bool PluginRootContext::parsePluginConfig(const json& configuration,
          }
          item = consumer.find("keys");
          if (item == consumer.end()) {
-            LOG_WARN("not found keys configuration for consumer " + c.name + ", will use global configuration to extract keys");
+            LOG_DEBUG("not found keys configuration for consumer " + c.name + ", will use global configuration to extract keys");
            need_global_keys = true;
          } else {
            c.keys = std::vector<std::string>{OriginalAuthKey};
--- a/plugins/wasm-cpp/extensions/model_mapper/README.md
+++ b/plugins/wasm-cpp/extensions/model_mapper/README.md
@@ -7,7 +7,7 @@
 | -----------          | --------------- | ----------------------- | ------                   | -------------------------------------------                                                                                                                                                                                                                  |
 | `modelKey`           | string          | 选填                    | model                    | 请求body中model参数的位置                                                                                                                                                                                                                                    |
 | `modelMapping`       | map of string   | 选填                    | -                        | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。 |
-| `enableOnPathSuffix` | array of string | 选填                    | ["/v1/chat/completions"] | 只对这些特定路径后缀的请求生效                                                                                                                                                                                                                               |
+| `enableOnPathSuffix` | array of string | 选填                    | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | 只对这些特定路径后缀的请求生效|


 ## 效果说明
--- a/plugins/wasm-cpp/extensions/model_mapper/README_EN.md
+++ b/plugins/wasm-cpp/extensions/model_mapper/README_EN.md
@@ -7,7 +7,7 @@ The `model-mapper` plugin implements the functionality of routing based on the m
 | -----------          | --------------- | -----------------------            | ------                          | -------------------------------------------                                                                                                                                                                                                                  |
 | `modelKey`           | string          | Optional                           | model                           | The location of the model parameter in the request body.                                                                                                                                                                                                            |
 | `modelMapping`       | map of string   | Optional                           | -                               | AI model mapping table, used to map the model names in the request to the model names supported by the service provider.<br/>1. Supports prefix matching. For example, use "gpt-3-*" to match all models whose names start with “gpt-3-”;<br/>2. Supports using "*" as the key to configure a generic fallback mapping relationship;<br/>3. If the target name in the mapping is an empty string "", it means to keep the original model name. |
-| `enableOnPathSuffix` | array of string | Optional                           | ["/v1/chat/completions"]        | Only applies to requests with these specific path suffixes.                                                                                                                                           |
+| `enableOnPathSuffix` | array of string | Optional                           | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"]        | Only applies to requests with these specific path suffixes.                                                                                                                                           |

 ## Runtime Properties

--- a/plugins/wasm-cpp/extensions/model_mapper/plugin.h
+++ b/plugins/wasm-cpp/extensions/model_mapper/plugin.h
@@ -43,7 +43,8 @@ struct ModelMapperConfigRule {
  std::string default_model_mapping_;
  std::vector<std::string> enable_on_path_suffix_ = {
      "/completions",  "/embeddings",       "/images/generations",
-      "/audio/speech", "/fine_tuning/jobs", "/moderations"};
+      "/audio/speech", "/fine_tuning/jobs", "/moderations",
+      "/image-synthesis", "/video-synthesis"};
 };

 // PluginRootContext is the root context for all streams processed by the
--- a/plugins/wasm-cpp/extensions/model_router/README.md
+++ b/plugins/wasm-cpp/extensions/model_router/README.md
@@ -8,7 +8,7 @@
 | `modelKey`           | string          | 选填                    | model                    | 请求body中model参数的位置                             |
 | `addProviderHeader`  | string          | 选填                    | -                        | 从model参数中解析出的provider名字放到哪个请求header中 |
 | `modelToHeader`      | string          | 选填                    | -                        | 直接将model参数放到哪个请求header中                   |
-| `enableOnPathSuffix` | array of string | 选填                    | ["/v1/chat/completions"] | 只对这些特定路径后缀的请求生效，可以配置为 "*" 以匹配所有路径 |
+| `enableOnPathSuffix` | array of string | 选填                    | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | 只对这些特定路径后缀的请求生效，可以配置为 "*" 以匹配所有路径 |

 ## 运行属性

--- a/plugins/wasm-cpp/extensions/model_router/README_EN.md
+++ b/plugins/wasm-cpp/extensions/model_router/README_EN.md
@@ -8,7 +8,7 @@ The `model-router` plugin implements routing functionality based on the model pa
 | `modelKey`           | string          | Optional                | model                    | Location of the model parameter in the request body          |
 | `addProviderHeader`  | string          | Optional                | -                        | Which request header to add the provider name parsed from the model parameter |
 | `modelToHeader`      | string          | Optional                | -                        | Which request header to directly add the model parameter to  |
-| `enableOnPathSuffix` | array of string | Optional                | ["/v1/chat/completions"] | Only effective for requests with these specific path suffixes, can be configured as "*" to match all paths |
+| `enableOnPathSuffix` | array of string | Optional                | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | Only effective for requests with these specific path suffixes, can be configured as "*" to match all paths |

 ## Runtime Properties

--- a/plugins/wasm-cpp/extensions/model_router/plugin.h
+++ b/plugins/wasm-cpp/extensions/model_router/plugin.h
@@ -49,7 +49,8 @@ struct ModelRouterConfigRule {
  std::string model_to_header_;
  std::vector<std::string> enable_on_path_suffix_ = {
      "/completions",  "/embeddings",       "/images/generations",
-      "/audio/speech", "/fine_tuning/jobs", "/moderations"};
+      "/audio/speech", "/fine_tuning/jobs", "/moderations",
+      "/image-synthesis", "/video-synthesis"};
 };

 class PluginContext;
--- a/plugins/wasm-go/Dockerfile
+++ b/plugins/wasm-go/Dockerfile
@@ -1,5 +1,5 @@
 ARG BUILDER=higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/wasm-go-builder:go1.20.14-tinygo0.29.0-oras1.0.0
-FROM $BUILDER as builder
+FROM $BUILDER AS builder


 ARG GOPROXY
@@ -26,6 +26,6 @@ RUN \
    tinygo build -o /main.wasm -scheduler=none -gc=custom -tags="custommalloc nottinygc_finalizer $EXTRA_TAGS" -target=wasi ./ ; \
  fi

-FROM scratch as output
+FROM scratch AS output

 COPY --from=builder /main.wasm plugin.wasm
--- a/plugins/wasm-go/extensions/ai-cache/config/config.go
+++ b/plugins/wasm-go/extensions/ai-cache/config/config.go
@@ -90,6 +90,8 @@ func (c *PluginConfig) FromJson(json gjson.Result, log wrapper.Log) {

 	if json.Get("enableSemanticCache").Exists() {
 		c.EnableSemanticCache = json.Get("enableSemanticCache").Bool()
+	} else if c.GetVectorProvider() == nil {
+		c.EnableSemanticCache = false	// set value to false when no vector provider 
 	} else {
 		c.EnableSemanticCache = true // set default value to true
 	}
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -1,6 +1,6 @@
 ---
 title: AI 代理
-keywords: [ AI网关, AI代理 ]
+keywords: [AI网关, AI代理]
 description: AI 代理插件配置参考
 ---

@@ -20,53 +20,49 @@ description: AI 代理插件配置参考
 插件执行阶段：`默认阶段`
 插件执行优先级：`100`

-
 ## 配置字段

 ### 基本配置

-| 名称         | 数据类型   | 填写要求 | 默认值 | 描述               |
-|------------|--------|------|-----|------------------|
-| `provider` | object | 必填   | -   | 配置目标 AI 服务提供商的信息 |
+| 名称       | 数据类型 | 填写要求 | 默认值 | 描述                         |
+| ---------- | -------- | -------- | ------ | ---------------------------- |
+| `provider` | object   | 必填     | -      | 配置目标 AI 服务提供商的信息 |

 `provider`的配置字段说明如下：

-| 名称               | 数据类型        | 填写要求 | 默认值 | 描述                                                                                                                                                                                                                                        |
-|------------------| --------------- | -------- | ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `type`           | string          | 必填     | -      | AI 服务提供商名称                                                                                                                                                                                                                                |
-| `apiTokens`      | array of string | 非必填   | -      | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。                                                                                                                                                                     |
-| `timeout`        | number          | 非必填   | -      | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟。此项配置目前仅用于获取上下文信息，并不影响实际转发大模型请求。                                                                                                                                                                    |
-| `modelMapping`   | map of string   | 非必填   | -      | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-\*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "\*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。                                                                               |
-| `protocol`       | string          | 非必填   | -      | 插件对外提供的 API 接口契约。目前支持以下取值：openai（默认值，使用 OpenAI 的接口契约）、original（使用目标服务提供商的原始接口契约）                                                                                                                                                          |
-| `context`        | object          | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                                                                                                             |
-| `customSettings` | array of customSetting | 非必填   | -      | 为AI请求指定覆盖或者填充参数                                                                                                                                                                                                                           |
-| `failover`       | object | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                                                                                                      |
-| `retryOnFailure` | object | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                                                                                                              |  
-| `reasoningContentMode`       | string          | 非必填   | -      | 如何处理大模型服务返回的推理内容。目前支持以下取值：passthrough（正常输出推理内容）、ignore（不输出推理内容）、concat（将推理内容拼接在常规输出内容之前）。默认为 passthrough。仅支持通义千问服务。                                                                                                                            |
-| `capabilities`       | map of string | 非必填   | -      | 部分provider的部分ai能力原生兼容openai/v1格式，不需要重写，可以直接转发，通过此配置项指定来开启转发, key表示的是采用的厂商协议能力，values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
+| 名称                   | 数据类型               | 填写要求 | 默认值 | 描述                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| ---------------------- | ---------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `type`                 | string                 | 必填     | -      | AI 服务提供商名称                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| `apiTokens`            | array of string        | 非必填   | -      | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。                                                                                                                                                                                                                                                                                                               |
+| `timeout`              | number                 | 非必填   | -      | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟。此项配置目前仅用于获取上下文信息，并不影响实际转发大模型请求。                                                                                                                                                                                                                                                                                                           |
+| `modelMapping`         | map of string          | 非必填   | -      | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-\*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "\*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。<br/>4. 支持以 `~` 前缀使用正则匹配。例如用 "~gpt(.\*)" 匹配所有以 "gpt" 开头的模型并支持在目标模型中使用 capture group 引用匹配到的内容。示例: "~gpt(.\*): openai/gpt\$1" |
+| `protocol`             | string                 | 非必填   | -      | 插件对外提供的 API 接口契约。目前支持以下取值：openai（默认值，使用 OpenAI 的接口契约）、original（使用目标服务提供商的原始接口契约）                                                                                                                                                                                                                                                                                                    |
+| `context`              | object                 | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| `customSettings`       | array of customSetting | 非必填   | -      | 为 AI 请求指定覆盖或者填充参数                                                                                                                                                                                                                                                                                                                                                                                                           |
+| `failover`             | object                 | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                                                                                                                                                                                                                                                                   |
+| `retryOnFailure`       | object                 | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| `reasoningContentMode` | string                 | 非必填   | -      | 如何处理大模型服务返回的推理内容。目前支持以下取值：passthrough（正常输出推理内容）、ignore（不输出推理内容）、concat（将推理内容拼接在常规输出内容之前）。默认为 passthrough。仅支持通义千问服务。                                                                                                                                                                                                                                      |
+| `capabilities`         | map of string          | 非必填   | -      | 部分 provider 的部分 ai 能力原生兼容 openai/v1 格式，不需要重写，可以直接转发，通过此配置项指定来开启转发, key 表示的是采用的厂商协议能力，values 表示的真实的厂商该能力的 api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank                                                                                                           |

 `context`的配置字段说明如下：

-| 名称            | 数据类型   | 填写要求 | 默认值 | 描述                               |
-|---------------|--------|------|-----|----------------------------------|
-| `fileUrl`     | string | 必填   | -   | 保存 AI 对话上下文的文件 URL。仅支持纯文本类型的文件内容 |
-| `serviceName` | string | 必填   | -   | URL 所对应的 Higress 后端服务完整名称        |
-| `servicePort` | number | 必填   | -   | URL 所对应的 Higress 后端服务访问端口        |
-
+| 名称          | 数据类型 | 填写要求 | 默认值 | 描述                                                     |
+| ------------- | -------- | -------- | ------ | -------------------------------------------------------- |
+| `fileUrl`     | string   | 必填     | -      | 保存 AI 对话上下文的文件 URL。仅支持纯文本类型的文件内容 |
+| `serviceName` | string   | 必填     | -      | URL 所对应的 Higress 后端服务完整名称                    |
+| `servicePort` | number   | 必填     | -      | URL 所对应的 Higress 后端服务访问端口                    |

 `customSettings`的配置字段说明如下：

 | 名称        | 数据类型              | 填写要求 | 默认值 | 描述                                                                                                                         |
 | ----------- | --------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------- |
 | `name`      | string                | 必填     | -      | 想要设置的参数的名称，例如`max_tokens`                                                                                       |
-| `value`     | string/int/float/bool | 必填     | -      | 想要设置的参数的值，例如0                                                                                                    |
+| `value`     | string/int/float/bool | 必填     | -      | 想要设置的参数的值，例如 0                                                                                                   |
 | `mode`      | string                | 非必填   | "auto" | 参数设置的模式，可以设置为"auto"或者"raw"，如果为"auto"则会自动根据协议对参数名做改写，如果为"raw"则不会有任何改写和限制检查 |
-| `overwrite` | bool                  | 非必填   | true   | 如果为false则只在用户没有设置这个参数时填充参数，否则会直接覆盖用户原有的参数设置                                            |
-
-
-custom-setting会遵循如下表格，根据`name`和协议来替换对应的字段，用户需要填写表格中`settingName`列中存在的值。例如用户将`name`设置为`max_tokens`，在openai协议中会替换`max_tokens`，在gemini中会替换`maxOutputTokens`。
-`none`表示该协议不支持此参数。如果`name`不在此表格中或者对应协议不支持此参数，同时没有设置raw模式，则配置不会生效。
+| `overwrite` | bool                  | 非必填   | true   | 如果为 false 则只在用户没有设置这个参数时填充参数，否则会直接覆盖用户原有的参数设置                                          |

+custom-setting 会遵循如下表格，根据`name`和协议来替换对应的字段，用户需要填写表格中`settingName`列中存在的值。例如用户将`name`设置为`max_tokens`，在 openai 协议中会替换`max_tokens`，在 gemini 中会替换`maxOutputTokens`。
+`none`表示该协议不支持此参数。如果`name`不在此表格中或者对应协议不支持此参数，同时没有设置 raw 模式，则配置不会生效。

 | settingName | openai      | baidu             | spark       | qwen        | gemini          | hunyuan     | claude      | minimax            |
 | ----------- | ----------- | ----------------- | ----------- | ----------- | --------------- | ----------- | ----------- | ------------------ |
@@ -76,32 +72,31 @@ custom-setting会遵循如下表格，根据`name`和协议来替换对应的字
 | top_k       | none        | none              | top_k       | none        | topK            | none        | top_k       | none               |
 | seed        | seed        | none              | none        | seed        | none            | none        | none        | none               |

-如果启用了raw模式，custom-setting会直接用输入的`name`和`value`去更改请求中的json内容，而不对参数名称做任何限制和修改。
-对于大多数协议，custom-setting都会在json内容的根路径修改或者填充参数。对于`qwen`协议，ai-proxy会在json的`parameters`子路径下做配置。对于`gemini`协议，则会在`generation_config`子路径下做配置。
+如果启用了 raw 模式，custom-setting 会直接用输入的`name`和`value`去更改请求中的 json 内容，而不对参数名称做任何限制和修改。
+对于大多数协议，custom-setting 都会在 json 内容的根路径修改或者填充参数。对于`qwen`协议，ai-proxy 会在 json 的`parameters`子路径下做配置。对于`gemini`协议，则会在`generation_config`子路径下做配置。

 `failover` 的配置字段说明如下：

-| 名称               | 数据类型   | 填写要求            | 默认值   | 描述                                |
-|------------------|--------|-----------------|-------|-----------------------------------|
-| enabled | bool   | 非必填             | false | 是否启用 apiToken 的 failover 机制       |
-| failureThreshold | int    | 非必填             | 3     | 触发 failover 连续请求失败的阈值（次数）         |
-| successThreshold | int    | 非必填             | 1     | 健康检测的成功阈值（次数）                     |
-| healthCheckInterval | int    | 非必填             | 5000  | 健康检测的间隔时间，单位毫秒                    |
-| healthCheckTimeout | int    | 非必填             | 5000  | 健康检测的超时时间，单位毫秒                    |
-| healthCheckModel | string | 启用 failover 时必填 |      | 健康检测使用的模型                         |
-| failoverOnStatus | array of string | 非必填    | ["4.*", "5.*"]     | 需要进行 failover 的原始请求的状态码，支持正则表达式匹配 |
+| 名称                | 数据类型        | 填写要求             | 默认值         | 描述                                                     |
+| ------------------- | --------------- | -------------------- | -------------- | -------------------------------------------------------- |
+| enabled             | bool            | 非必填               | false          | 是否启用 apiToken 的 failover 机制                       |
+| failureThreshold    | int             | 非必填               | 3              | 触发 failover 连续请求失败的阈值（次数）                 |
+| successThreshold    | int             | 非必填               | 1              | 健康检测的成功阈值（次数）                               |
+| healthCheckInterval | int             | 非必填               | 5000           | 健康检测的间隔时间，单位毫秒                             |
+| healthCheckTimeout  | int             | 非必填               | 5000           | 健康检测的超时时间，单位毫秒                             |
+| healthCheckModel    | string          | 启用 failover 时必填 |                | 健康检测使用的模型                                       |
+| failoverOnStatus    | array of string | 非必填               | ["4.*", "5.*"] | 需要进行 failover 的原始请求的状态码，支持正则表达式匹配 |

 `retryOnFailure` 的配置字段说明如下：

 目前仅支持对非流式请求进行重试。

-
-| 名称               | 数据类型   | 填写要求   | 默认值   | 描述                        |
-|------------------|--------|--------|-------|---------------------------|
-| enabled | bool   | 非必填    | false | 是否启用失败请求重试                |
-| maxRetries | int    | 非必填    | 1     | 最大重试次数                    |
-| retryTimeout | int    | 非必填    | 30000 | 重试超时时间，单位毫秒               |
-| retryOnStatus | array of string | 非必填    | ["4.*", "5.*"]     | 需要进行重试的原始请求的状态码，支持正则表达式匹配 |
+| 名称          | 数据类型        | 填写要求 | 默认值         | 描述                                               |
+| ------------- | --------------- | -------- | -------------- | -------------------------------------------------- |
+| enabled       | bool            | 非必填   | false          | 是否启用失败请求重试                               |
+| maxRetries    | int             | 非必填   | 1              | 最大重试次数                                       |
+| retryTimeout  | int             | 非必填   | 30000          | 重试超时时间，单位毫秒                             |
+| retryOnStatus | array of string | 非必填   | ["4.*", "5.*"] | 需要进行重试的原始请求的状态码，支持正则表达式匹配 |

 ### 提供商特有配置

@@ -109,19 +104,18 @@ custom-setting会遵循如下表格，根据`name`和协议来替换对应的字

 OpenAI 所对应的 `type` 为 `openai`。它特有的配置字段如下:

-| 名称              | 数据类型 | 填写要求 | 默认值 | 描述                                                                          |
-|-------------------|----------|----------|--------|-------------------------------------------------------------------------------|
-| `openaiCustomUrl` | string   | 非必填   | -      | 基于OpenAI协议的自定义后端URL，例如: www.example.com/myai/v1/chat/completions |
-| `responseJsonSchema` | object | 非必填 | - | 预先定义OpenAI响应需满足的Json Schema, 注意目前仅特定的几种模型支持该用法|
-
+| 名称                 | 数据类型 | 填写要求 | 默认值 | 描述                                                                               |
+| -------------------- | -------- | -------- | ------ | ---------------------------------------------------------------------------------- |
+| `openaiCustomUrl`    | string   | 非必填   | -      | 基于 OpenAI 协议的自定义后端 URL，例如: <www.example.com/myai/v1/chat/completions> |
+| `responseJsonSchema` | object   | 非必填   | -      | 预先定义 OpenAI 响应需满足的 Json Schema, 注意目前仅特定的几种模型支持该用法       |

 #### Azure OpenAI

 Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：

-| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                           |
-|-------------------|--------|------|-----|----------------------------------------------|
-| `azureServiceUrl` | string | 必填   | -   | Azure OpenAI 服务的 URL，须包含 `api-version` 查询参数。 |
+| 名称              | 数据类型 | 填写要求 | 默认值 | 描述                                                     |
+| ----------------- | -------- | -------- | ------ | -------------------------------------------------------- |
+| `azureServiceUrl` | string   | 必填     | -      | Azure OpenAI 服务的 URL，须包含 `api-version` 查询参数。 |

 **注意：** Azure OpenAI 只支持配置一个 API Token。

@@ -129,19 +123,19 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：

 月之暗面所对应的 `type` 为 `moonshot`。它特有的配置字段如下：

-| 名称               | 数据类型   | 填写要求 | 默认值 | 描述                                                          |
-|------------------|--------|------|-----|-------------------------------------------------------------|
-| `moonshotFileId` | string | 非必填  | -   | 通过文件接口上传至月之暗面的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+| 名称             | 数据类型 | 填写要求 | 默认值 | 描述                                                                                                 |
+| ---------------- | -------- | -------- | ------ | ---------------------------------------------------------------------------------------------------- |
+| `moonshotFileId` | string   | 非必填   | -      | 通过文件接口上传至月之暗面的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |

 #### 通义千问（Qwen）

 通义千问所对应的 `type` 为 `qwen`。它特有的配置字段如下：

-| 名称                   | 数据类型        | 填写要求 | 默认值 | 描述                                                         |
-| ---------------------- | --------------- | -------- | ------ | ------------------------------------------------------------ |
-| `qwenEnableSearch`     | boolean         | 非必填   | -      | 是否启用通义千问内置的互联网搜索功能。                       |
-| `qwenFileIds`          | array of string | 非必填   | -      | 通过文件接口上传至Dashscope的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
-| `qwenEnableCompatible` | boolean         | 非必填   | false  | 开启通义千问兼容模式。启用通义千问兼容模式后，将调用千问的兼容模式接口，同时对请求/响应不做修改。 |
+| 名称                   | 数据类型        | 填写要求 | 默认值 | 描述                                                                                                    |
+| ---------------------- | --------------- | -------- | ------ | ------------------------------------------------------------------------------------------------------- |
+| `qwenEnableSearch`     | boolean         | 非必填   | -      | 是否启用通义千问内置的互联网搜索功能。                                                                  |
+| `qwenFileIds`          | array of string | 非必填   | -      | 通过文件接口上传至 Dashscope 的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+| `qwenEnableCompatible` | boolean         | 非必填   | false  | 开启通义千问兼容模式。启用通义千问兼容模式后，将调用千问的兼容模式接口，同时对请求/响应不做修改。       |

 #### 百川智能 (Baichuan AI)

@@ -151,13 +145,13 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：

 零一万物所对应的 `type` 为 `yi`。它并无特有的配置字段。

-#### 智谱AI（Zhipu AI）
+#### 智谱 AI（Zhipu AI）

-智谱AI所对应的 `type` 为 `zhipuai`。它并无特有的配置字段。
+智谱 AI 所对应的 `type` 为 `zhipuai`。它并无特有的配置字段。

 #### DeepSeek（DeepSeek）

-DeepSeek所对应的 `type` 为 `deepseek`。它并无特有的配置字段。
+DeepSeek 所对应的 `type` 为 `deepseek`。它并无特有的配置字段。

 #### Groq

@@ -167,13 +161,13 @@ Groq 所对应的 `type` 为 `groq`。它并无特有的配置字段。

 文心一言所对应的 `type` 为 `baidu`。它并无特有的配置字段。

-#### 360智脑
+#### 360 智脑

-360智脑所对应的 `type` 为 `ai360`。它并无特有的配置字段。
+360 智脑所对应的 `type` 为 `ai360`。它并无特有的配置字段。

-#### GitHub模型
+#### GitHub 模型

-GitHub模型所对应的 `type` 为 `github`。它并无特有的配置字段。
+GitHub 模型所对应的 `type` 为 `github`。它并无特有的配置字段。

 #### Mistral

@@ -181,38 +175,38 @@ Mistral 所对应的 `type` 为 `mistral`。它并无特有的配置字段。

 #### MiniMax

-MiniMax所对应的 `type` 为 `minimax`。它特有的配置字段如下：
+MiniMax 所对应的 `type` 为 `minimax`。它特有的配置字段如下：

-| 名称             | 数据类型 | 填写要求                       | 默认值 | 描述                                                             |
-| ---------------- | -------- | ------------------------------ | ------ |----------------------------------------------------------------|
-| `minimaxApiType` | string   | v2 和 pro 中选填一项           | v2     | v2 代表 ChatCompletion v2 API，pro 代表 ChatCompletion Pro API      |
+| 名称             | 数据类型 | 填写要求                       | 默认值 | 描述                                                                    |
+| ---------------- | -------- | ------------------------------ | ------ | ----------------------------------------------------------------------- |
+| `minimaxApiType` | string   | v2 和 pro 中选填一项           | v2     | v2 代表 ChatCompletion v2 API，pro 代表 ChatCompletion Pro API          |
 | `minimaxGroupId` | string   | `minimaxApiType` 为 pro 时必填 | -      | `minimaxApiType` 为 pro 时使用 ChatCompletion Pro API，需要设置 groupID |

 #### Anthropic Claude

 Anthropic Claude 所对应的 `type` 为 `claude`。它特有的配置字段如下：

-| 名称        | 数据类型   | 填写要求 | 默认值 | 描述                               |
-|-----------|--------|------|-----|----------------------------------|
-| `claudeVersion` | string | 可选   | -   | Claude 服务的 API 版本，默认为 2023-06-01 |
+| 名称            | 数据类型 | 填写要求 | 默认值 | 描述                                      |
+| --------------- | -------- | -------- | ------ | ----------------------------------------- |
+| `claudeVersion` | string   | 可选     | -      | Claude 服务的 API 版本，默认为 2023-06-01 |

 #### Ollama

 Ollama 所对应的 `type` 为 `ollama`。它特有的配置字段如下：

-| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                           |
-|-------------------|--------|------|-----|----------------------------------------------|
-| `ollamaServerHost` | string | 必填   | -   | Ollama 服务器的主机地址 |
-| `ollamaServerPort` | number | 必填   | -   | Ollama 服务器的端口号，默认为11434 |
+| 名称               | 数据类型 | 填写要求 | 默认值 | 描述                                |
+| ------------------ | -------- | -------- | ------ | ----------------------------------- |
+| `ollamaServerHost` | string   | 必填     | -      | Ollama 服务器的主机地址             |
+| `ollamaServerPort` | number   | 必填     | -      | Ollama 服务器的端口号，默认为 11434 |

 #### 混元

 混元所对应的 `type` 为 `hunyuan`。它特有的配置字段如下：

-| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                           |
-|-------------------|--------|------|-----|----------------------------------------------|
-| `hunyuanAuthId` | string | 必填   | -   | 混元用于v3版本认证的id |
-| `hunyuanAuthKey` | string | 必填   | -   | 混元用于v3版本认证的key |
+| 名称             | 数据类型 | 填写要求 | 默认值 | 描述                       |
+| ---------------- | -------- | -------- | ------ | -------------------------- |
+| `hunyuanAuthId`  | string   | 必填     | -      | 混元用于 v3 版本认证的 id  |
+| `hunyuanAuthKey` | string   | 必填     | -      | 混元用于 v3 版本认证的 key |

 #### 阶跃星辰 (Stepfun)

@@ -222,23 +216,24 @@ Ollama 所对应的 `type` 为 `ollama`。它特有的配置字段如下：

 Cloudflare Workers AI 所对应的 `type` 为 `cloudflare`。它特有的配置字段如下：

-| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                                                                                                         |
-|-------------------|--------|------|-----|----------------------------------------------------------------------------------------------------------------------------|
-| `cloudflareAccountId` | string | 必填   | -   | [Cloudflare Account ID](https://developers.cloudflare.com/workers-ai/get-started/rest-api/#1-get-api-token-and-account-id) |
+| 名称                  | 数据类型 | 填写要求 | 默认值 | 描述                                                                                                                       |
+| --------------------- | -------- | -------- | ------ | -------------------------------------------------------------------------------------------------------------------------- |
+| `cloudflareAccountId` | string   | 必填     | -      | [Cloudflare Account ID](https://developers.cloudflare.com/workers-ai/get-started/rest-api/#1-get-api-token-and-account-id) |

 #### 星火 (Spark)

 星火所对应的 `type` 为 `spark`。它并无特有的配置字段。

-讯飞星火认知大模型的`apiTokens`字段值为`APIKey:APISecret`。即填入自己的APIKey与APISecret，并以`:`分隔。
+讯飞星火认知大模型的`apiTokens`字段值为`APIKey:APISecret`。即填入自己的 APIKey 与 APISecret，并以`:`分隔。

 #### Gemini

 Gemini 所对应的 `type` 为 `gemini`。它特有的配置字段如下：

-| 名称                  | 数据类型 | 填写要求 | 默认值 | 描述                                                                                              |
-| --------------------- | -------- | -------- |-----|-------------------------------------------------------------------------------------------------|
-| `geminiSafetySetting` | map of string   | 非必填     | -   | Gemini AI内容过滤和安全级别设定。参考[Safety settings](https://ai.google.dev/gemini-api/docs/safety-settings) |
+| 名称                  | 数据类型      | 填写要求 | 默认值   | 描述                                                                                                                                       |
+| --------------------- | ------------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| `geminiSafetySetting` | map of string | 非必填   | -        | Gemini AI 内容过滤和安全级别设定。参考[Safety settings](https://ai.google.dev/gemini-api/docs/safety-settings)                             |
+| `apiVersion`          | string        | 非必填   | `v1beta` | 用于指定 API 的版本, 可选择 `v1` 或 `v1beta` 。 版本差异请参考[API versions explained](https://ai.google.dev/gemini-api/docs/api-versions)。 |

 #### DeepL

@@ -253,18 +248,42 @@ DeepL 所对应的 `type` 为 `deepl`。它特有的配置字段如下：
 Cohere 所对应的 `type` 为 `cohere`。它并无特有的配置字段。

 #### Together-AI
+
 Together-AI 所对应的 `type` 为 `together-ai`。它并无特有的配置字段。

 #### Dify
+
 Dify 所对应的 `type` 为 `dify`。它特有的配置字段如下:

-| 名称 | 数据类型 | 填写要求 | 默认值 | 描述                         |
-| -- | -------- |------| ------ | ---------------------------- |
-| `difyApiUrl` | string   | 非必填  | -      | dify私有化部署的url |
-| `botType` | string   | 非必填  | -      | dify的应用类型，Chat/Completion/Agent/Workflow |
-| `inputVariable` | string   | 非必填  | -      | dify中应用类型为workflow时需要设置输入变量，当botType为workflow时一起使用 |
-| `outputVariable` | string   | 非必填  | -      | dify中应用类型为workflow时需要设置输出变量，当botType为workflow时一起使用 |
+| 名称             | 数据类型 | 填写要求 | 默认值 | 描述                                                                             |
+| ---------------- | -------- | -------- | ------ | -------------------------------------------------------------------------------- |
+| `difyApiUrl`     | string   | 非必填   | -      | dify 私有化部署的 url                                                            |
+| `botType`        | string   | 非必填   | -      | dify 的应用类型，Chat/Completion/Agent/Workflow                                  |
+| `inputVariable`  | string   | 非必填   | -      | dify 中应用类型为 workflow 时需要设置输入变量，当 botType 为 workflow 时一起使用 |
+| `outputVariable` | string   | 非必填   | -      | dify 中应用类型为 workflow 时需要设置输出变量，当 botType 为 workflow 时一起使用 |

+#### Google Vertex AI
+
+Google Vertex AI 所对应的 type 为 vertex。它特有的配置字段如下：
+
+| 名称                         | 数据类型       | 填写要求   | 默认值    | 描述                                                                            |
+|-----------------------------|---------------|--------|--------|-------------------------------------------------------------------------------|
+| `vertexAuthKey`             | string        | 必填     | -      | 用于认证的 Google Service Account JSON Key，格式为 PEM 编码的 PKCS#8 私钥和 client_email 等信息 |
+| `vertexRegion`              | string        | 必填     | -      | Google Cloud 区域（如 us-central1, europe-west4 等），用于构建 Vertex API 地址             |
+| `vertexProjectId`           | string        | 必填     | -      | Google Cloud 项目 ID，用于标识目标 GCP 项目                                              |
+| `vertexAuthServiceName`     | string        | 必填     | -      | 用于 OAuth2 认证的服务名称，该服务为了访问oauth2.googleapis.com                                |
+| `vertexGeminiSafetySetting` | map of string | 非必填    | -      | Gemini 模型的内容安全过滤设置。                                                           |
+| `vertexTokenRefreshAhead`   | number        | 非必填    | -      | Vertex access token刷新提前时间(单位秒)                                                |
+
+#### AWS Bedrock
+
+AWS Bedrock 所对应的 type 为 bedrock。它特有的配置字段如下：
+
+| 名称             | 数据类型   | 填写要求 | 默认值 | 描述                           |
+|----------------|--------|------|-----|------------------------------|
+| `awsAccessKey` | string | 必填   | -   | AWS Access Key，用于身份认证        |
+| `awsSecretKey` | string | 必填   | -   | AWS Secret Access Key，用于身份认证 |
+| `awsRegion`    | string | 必填   | -   | AWS 区域，例如：us-east-1          |

 ## 用法示例

@@ -376,20 +395,20 @@ provider:
 provider:
  type: qwen
  apiTokens:
-    - "YOUR_QWEN_API_TOKEN"
+    - 'YOUR_QWEN_API_TOKEN'
  modelMapping:
-    'gpt-3': "qwen-turbo"
-    'gpt-35-turbo': "qwen-plus"
-    'gpt-4-turbo': "qwen-max"
-    'gpt-4-*': "qwen-max"
-    'gpt-4o': "qwen-vl-plus"
+    'gpt-3': 'qwen-turbo'
+    'gpt-35-turbo': 'qwen-plus'
+    'gpt-4-turbo': 'qwen-max'
+    'gpt-4-*': 'qwen-max'
+    'gpt-4o': 'qwen-vl-plus'
    'text-embedding-v1': 'text-embedding-v1'
-    '*': "qwen-turbo"
+    '*': 'qwen-turbo'
 ```

 **AI 对话请求示例**

-URL: http://your-domain/v1/chat/completions
+URL: <http://your-domain/v1/chat/completions>

 请求示例：

@@ -434,7 +453,7 @@ URL: http://your-domain/v1/chat/completions

 **多模态模型 API 请求示例（适用于 `qwen-vl-plus` 和 `qwen-vl-max` 模型）**

-URL: http://your-domain/v1/chat/completions
+URL: <http://your-domain/v1/chat/completions>

 请求示例：

@@ -493,7 +512,7 @@ URL: http://your-domain/v1/chat/completions

 **文本向量请求示例**

-URL: http://your-domain/v1/embeddings
+URL: <http://your-domain/v1/embeddings>

 请求示例：

@@ -606,12 +625,12 @@ provider:
 provider:
  type: qwen
  apiTokens:
-    - "YOUR_QWEN_API_TOKEN"
+    - 'YOUR_QWEN_API_TOKEN'
  modelMapping:
-    "*": "qwen-long" # 通义千问的文件上下文只能在 qwen-long 模型下使用
+    '*': 'qwen-long' # 通义千问的文件上下文只能在 qwen-long 模型下使用
  qwenFileIds:
-    - "file-fe-xxx"
-    - "file-fe-yyy"
+    - 'file-fe-xxx'
+    - 'file-fe-yyy'
 ```

 **请求示例**
@@ -653,7 +672,7 @@ provider:
 }
 ```

-### 使用original协议代理百炼智能体应用
+### 使用 original 协议代理百炼智能体应用

 **配置信息**

@@ -661,17 +680,18 @@ provider:
 provider:
  type: qwen
  apiTokens:
-    - "YOUR_DASHSCOPE_API_TOKEN"
+    - 'YOUR_DASHSCOPE_API_TOKEN'
  protocol: original
 ```

 **请求实例**
+
 ```json
 {
  "input": {
    "prompt": "介绍一下Dubbo"
  },
-  "parameters":  {},
+  "parameters": {},
  "debug": {}
 }
 ```
@@ -789,7 +809,7 @@ provider:
 provider:
  type: groq
  apiTokens:
-    - "YOUR_GROQ_API_TOKEN"
+    - 'YOUR_GROQ_API_TOKEN'
 ```

 **请求示例**
@@ -848,8 +868,8 @@ provider:
 provider:
  type: claude
  apiTokens:
-    - "YOUR_CLAUDE_API_TOKEN"
-  version: "2023-06-01"
+    - 'YOUR_CLAUDE_API_TOKEN'
+  version: '2023-06-01'
 ```

 **请求示例**
@@ -899,14 +919,14 @@ provider:

 ```yaml
 provider:
-  type: "hunyuan"
-  hunyuanAuthKey: "<YOUR AUTH KEY>"
+  type: 'hunyuan'
+  hunyuanAuthKey: '<YOUR AUTH KEY>'
  apiTokens:
-    - ""
-  hunyuanAuthId: "<YOUR AUTH ID>"
+    - ''
+  hunyuanAuthId: '<YOUR AUTH ID>'
  timeout: 1200000
  modelMapping:
-    "*": "hunyuan-lite"
+    '*': 'hunyuan-lite'
 ```

 **请求示例**
@@ -967,10 +987,10 @@ curl --location 'http://<your higress domain>/v1/chat/completions' \
 provider:
  type: baidu
  apiTokens:
-    - "YOUR_BAIDU_API_TOKEN"
+    - 'YOUR_BAIDU_API_TOKEN'
  modelMapping:
-    'gpt-3': "ERNIE-4.0"
-    '*': "ERNIE-4.0"
+    'gpt-3': 'ERNIE-4.0'
+    '*': 'ERNIE-4.0'
 ```

 **请求示例**
@@ -1014,7 +1034,7 @@ provider:
 }
 ```

-### 使用 OpenAI 协议代理MiniMax服务
+### 使用 OpenAI 协议代理 MiniMax 服务

 **配置信息**

@@ -1022,11 +1042,11 @@ provider:
 provider:
  type: minimax
  apiTokens:
-    - "YOUR_MINIMAX_API_TOKEN"
+    - 'YOUR_MINIMAX_API_TOKEN'
  modelMapping:
-    "gpt-3": "abab6.5s-chat"
-    "gpt-4": "abab6.5g-chat"
-    "*": "abab6.5t-chat"
+    'gpt-3': 'abab6.5s-chat'
+    'gpt-4': 'abab6.5g-chat'
+    '*': 'abab6.5t-chat'
 ```

 **请求示例**
@@ -1090,12 +1110,12 @@ provider:
 provider:
  type: github
  apiTokens:
-    - "YOUR_GITHUB_ACCESS_TOKEN"
+    - 'YOUR_GITHUB_ACCESS_TOKEN'
  modelMapping:
-    "gpt-4o": "gpt-4o"
-    "gpt-4": "Phi-3.5-MoE-instruct"
-    "gpt-3.5": "cohere-command-r-08-2024"
-    "text-embedding-3-large": "text-embedding-3-large"
+    'gpt-4o': 'gpt-4o'
+    'gpt-4': 'Phi-3.5-MoE-instruct'
+    'gpt-3.5': 'cohere-command-r-08-2024'
+    'text-embedding-3-large': 'text-embedding-3-large'
 ```

 **请求示例**
@@ -1121,6 +1141,7 @@ provider:
 ```

 **响应示例**
+
 ```json
 {
  "choices": [
@@ -1183,7 +1204,7 @@ provider:
 }
 ```

-### 使用 OpenAI 协议代理360智脑服务
+### 使用 OpenAI 协议代理 360 智脑服务

 **配置信息**

@@ -1191,13 +1212,13 @@ provider:
 provider:
  type: ai360
  apiTokens:
-    - "YOUR_360_API_TOKEN"
+    - 'YOUR_360_API_TOKEN'
  modelMapping:
-    "gpt-4o": "360gpt-turbo-responsibility-8k"
-    "gpt-4": "360gpt2-pro"
-    "gpt-3.5": "360gpt-turbo"
-    "text-embedding-3-small": "embedding_s1_v1.2"
-    "*": "360gpt-pro"
+    'gpt-4o': '360gpt-turbo-responsibility-8k'
+    'gpt-4': '360gpt2-pro'
+    'gpt-3.5': '360gpt-turbo'
+    'text-embedding-3-small': 'embedding_s1_v1.2'
+    '*': '360gpt-pro'
 ```

 **请求示例**
@@ -1257,14 +1278,14 @@ provider:

 **文本向量请求示例**

-URL: http://your-domain/v1/embeddings
+URL: <http://your-domain/v1/embeddings>

 请求示例：

 ```json
 {
-  "input":["你好"],
-  "model":"text-embedding-3-small"
+  "input": ["你好"],
+  "model": "text-embedding-3-small"
 }
 ```

@@ -1305,10 +1326,10 @@ URL: http://your-domain/v1/embeddings
 provider:
  type: cloudflare
  apiTokens:
-    - "YOUR_WORKERS_AI_API_TOKEN"
-  cloudflareAccountId: "YOUR_CLOUDFLARE_ACCOUNT_ID"
+    - 'YOUR_WORKERS_AI_API_TOKEN'
+  cloudflareAccountId: 'YOUR_CLOUDFLARE_ACCOUNT_ID'
  modelMapping:
-    "*": "@cf/meta/llama-3-8b-instruct"
+    '*': '@cf/meta/llama-3-8b-instruct'
 ```

 **请求示例**
@@ -1348,7 +1369,7 @@ provider:
 }
 ```

-### 使用 OpenAI 协议代理Spark服务
+### 使用 OpenAI 协议代理 Spark 服务

 **配置信息**

@@ -1356,11 +1377,11 @@ provider:
 provider:
  type: spark
  apiTokens:
-    - "APIKey:APISecret"
+    - 'APIKey:APISecret'
  modelMapping:
-    "gpt-4o": "generalv3.5"
-    "gpt-4": "generalv3"
-    "*": "general"
+    'gpt-4o': 'generalv3.5'
+    'gpt-4': 'generalv3'
+    '*': 'general'
 ```

 **请求示例**
@@ -1474,8 +1495,8 @@ provider:
 provider:
  type: deepl
  apiTokens:
-    - "YOUR_DEEPL_API_TOKEN"
-  targetLang: "ZH"
+    - 'YOUR_DEEPL_API_TOKEN'
+  targetLang: 'ZH'
 ```

 **请求示例**
@@ -1500,6 +1521,7 @@ provider:
 ```

 **响应示例**
+
 ```json
 {
  "choices": [
@@ -1522,16 +1544,18 @@ provider:
 ### 使用 OpenAI 协议代理 Together-AI 服务

 **配置信息**
+
 ```yaml
 provider:
  type: together-ai
  apiTokens:
-    - "YOUR_TOGETHER_AI_API_TOKEN"
+    - 'YOUR_TOGETHER_AI_API_TOKEN'
  modelMapping:
-    "*": "Qwen/Qwen2.5-72B-Instruct-Turbo"
+    '*': 'Qwen/Qwen2.5-72B-Instruct-Turbo'
 ```

 **请求示例**
+
 ```json
 {
  "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
@@ -1545,6 +1569,7 @@ provider:
 ```

 **响应示例**
+
 ```json
 {
  "id": "8f5809d54b73efac",
@@ -1576,16 +1601,18 @@ provider:
 ### 使用 OpenAI 协议代理 Dify 服务

 **配置信息**
+
 ```yaml
 provider:
  type: dify
  apiTokens:
-    - "YOUR_DIFY_API_TOKEN"
+    - 'YOUR_DIFY_API_TOKEN'
  modelMapping:
-    "*": "dify"
+    '*': 'dify'
 ```

 **请求示例**
+
 ```json
 {
  "model": "gpt-4-turbo",
@@ -1600,6 +1627,7 @@ provider:
 ```

 **响应示例**
+
 ```json
 {
  "id": "e33fc636-f9e8-4fae-8d5e-fbd0acb09401",
@@ -1624,6 +1652,121 @@ provider:
 }
 ```

+### 使用 OpenAI 协议代理 Google Vertex 服务
+
+**配置信息**
+
+```yaml
+provider:
+  type: vertex
+  vertexAuthKey: |
+    {
+      "type": "service_account",
+      "project_id": "your-project-id",
+      "private_key_id": "your-private-key-id",
+      "private_key": "-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n",
+      "client_email": "your-service-account@your-project.iam.gserviceaccount.com",
+      "token_uri": "https://oauth2.googleapis.com/token"
+    }
+  vertexRegion: us-central1
+  vertexProjectId: your-project-id
+  vertexAuthServiceName: your-auth-service-name
+```
+
+**请求示例**
+
+```json
+{
+  "model": "gemini-2.0-flash-001",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
+}
+```
+
+**响应示例**
+
+```json
+{
+  "id": "chatcmpl-0000000000000",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "你好！我是 Vertex AI 提供的 Gemini 模型，由 Google 开发的人工智能助手。我可以回答问题、提供信息和帮助完成各种任务。有什么我可以帮您的吗？"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1729986750,
+  "model": "gemini-2.0-flash-001",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 43,
+    "total_tokens": 58
+  }
+}
+```
+
+### 使用 OpenAI 协议代理 AWS Bedrock 服务
+
+**配置信息**
+
+```yaml
+provider:
+  type: bedrock
+  awsAccessKey: "YOUR_AWS_ACCESS_KEY_ID"
+  awsSecretKey: "YOUR_AWS_SECRET_ACCESS_KEY"
+  awsRegion: "YOUR_AWS_REGION"
+```
+
+**请求示例**
+
+```json
+{
+  "model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
+}
+```
+
+**响应示例**
+
+```json
+{
+  "id": "dc5812e2-6a62-49d6-829e-5c327b15e4e2",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "你好!我是Claude,一个由Anthropic开发的AI助手。很高兴认识你!我的目标是以诚实、有益且有意义的方式与人类交流。我会尽力提供准确和有帮助的信息,同时保持诚实和正直。请问我今天能为你做些什么呢?"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1749657608,
+  "model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 16,
+    "completion_tokens": 101,
+    "total_tokens": 117
+  }
+}
+```
+

 ## 完整配置示例

@@ -1643,7 +1786,7 @@ spec:
        provider:
          type: groq
          apiTokens:
-            - "YOUR_API_TOKEN"
+            - 'YOUR_API_TOKEN'
      ingress:
        - groq
  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
@@ -1655,7 +1798,7 @@ metadata:
    higress.io/backend-protocol: HTTPS
    higress.io/destination: groq.dns
    higress.io/proxy-ssl-name: api.groq.com
-    higress.io/proxy-ssl-server-name: "on"
+    higress.io/proxy-ssl-server-name: 'on'
  labels:
    higress.io/resource-definer: higress
  name: groq
@@ -1716,7 +1859,7 @@ services:
    networks:
      - higress-net
    ports:
-      - "10000:10000"
+      - '10000:10000'
    volumes:
      - ./envoy.yaml:/etc/envoy/envoy.yaml
      - ./plugin.wasm:/etc/envoy/plugin.wasm
@@ -1745,7 +1888,7 @@ static_resources:
        - filters:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
-                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                '@type': type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
                scheme_header_transformation:
                  scheme_to_overwrite: https
                stat_prefix: ingress_http
@@ -1753,23 +1896,23 @@ static_resources:
                access_log:
                  - name: envoy.access_loggers.stdout
                    typed_config:
-                      "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
+                      '@type': type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
                # Modify as required
                route_config:
                  name: local_route
                  virtual_hosts:
                    - name: local_service
-                      domains: [ "*" ]
+                      domains: ['*']
                      routes:
                        - match:
-                            prefix: "/"
+                            prefix: '/'
                          route:
                            cluster: claude
                            timeout: 300s
                http_filters:
                  - name: claude
                    typed_config:
-                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
+                      '@type': type.googleapis.com/udpa.type.v1.TypedStruct
                      type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                      value:
                        config:
@@ -1780,7 +1923,7 @@ static_resources:
                              local:
                                filename: /etc/envoy/plugin.wasm
                          configuration:
-                            "@type": "type.googleapis.com/google.protobuf.StringValue"
+                            '@type': 'type.googleapis.com/google.protobuf.StringValue'
                            value: | # 插件配置
                              {
                                "provider": {
@@ -1809,8 +1952,8 @@ static_resources:
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
-          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
-          "sni": "api.anthropic.com"
+          '@type': type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+          'sni': 'api.anthropic.com'
 ```

 访问示例：
--- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md
@@ -208,6 +208,28 @@ For DeepL, the corresponding `type` is `deepl`. Its unique configuration field i
 | ------------ | --------- | ----------- | ------- | ------------------------------------ |
 | `targetLang` | string    | Required    | -       | The target language required by the DeepL translation service |

+#### Google Vertex AI
+For Vertex, the corresponding `type` is `vertex`. Its unique configuration field is:
+
+| Name                        | Data Type     | Requirement   | Default | Description                                                                                                                                                 |
+|-----------------------------|---------------|---------------| ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `vertexAuthKey`             | string        | Required      | -      | Google Service Account JSON Key used for authentication. The format should be PEM encoded PKCS#8 private key along with client_email and other information  |
+| `vertexRegion`              | string        | Required      | -      | Google Cloud region (e.g., us-central1, europe-west4) used to build the Vertex API address                                                                  |
+| `vertexProjectId`           | string        | Required      | -      | Google Cloud Project ID, used to identify the target GCP project                                                                                            |
+| `vertexAuthServiceName`     | string        | Required      | -      | Service name for OAuth2 authentication, used to access oauth2.googleapis.com                                                                                |
+| `vertexGeminiSafetySetting` | map of string | Optional      | -      | Gemini model content safety filtering settings.                                                                                                             |
+| `vertexTokenRefreshAhead`   | number        | Optional      | -      | Vertex access token refresh ahead time in seconds                                                                                                           |
+
+#### AWS Bedrock
+
+For AWS Bedrock, the corresponding `type` is `bedrock`. Its unique configuration field is:
+
+| Name           | Data Type | Requirement | Default | Description                                   |
+|----------------|-----------|-------------|---------|-----------------------------------------------|
+| `awsAccessKey` | string    | Required    | -       | AWS Access Key used for authentication        |
+| `awsSecretKey` | string    | Required    | -       | AWS Secret Access Key used for authentication |
+| `awsRegion`    | string    | Required    | -       | AWS region, e.g., us-east-1                   |
+
 ## Usage Examples

 ### Using OpenAI Protocol Proxy for Azure OpenAI Service
@@ -1411,6 +1433,113 @@ provider:
 }
 ```

+### Utilizing OpenAI Protocol Proxy for Google Vertex Services
+**Configuration Information**
+```yaml
+provider:
+  type: vertex
+  vertexAuthKey: |
+    {
+      "type": "service_account",
+      "project_id": "your-project-id",
+      "private_key_id": "your-private-key-id",
+      "private_key": "-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n",
+      "client_email": "your-service-account@your-project.iam.gserviceaccount.com",
+      "token_uri": "https://oauth2.googleapis.com/token"
+    }
+  vertexRegion: us-central1
+  vertexProjectId: your-project-id
+  vertexAuthServiceName: your-auth-service-name
+```
+
+**Request Example**
+```json
+{
+  "model": "gemini-2.0-flash-001",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Who are you?"
+    }
+  ],
+  "stream": false
+}
+```
+
+**Response Example**
+```json
+{
+  "id": "chatcmpl-0000000000000",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Hello! I am the Gemini model provided by Vertex AI, developed by Google. I can answer questions, provide information, and assist in completing various tasks. How can I help you today?"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1729986750,
+  "model": "gemini-2.0-flash-001",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 43,
+    "total_tokens": 58
+  }
+}
+```
+
+### Utilizing OpenAI Protocol Proxy for AWS Bedrock Services
+**Configuration Information**
+```yaml
+provider:
+  type: bedrock
+  awsAccessKey: "YOUR_AWS_ACCESS_KEY_ID"
+  awsSecretKey: "YOUR_AWS_SECRET_ACCESS_KEY"
+  awsRegion: "YOUR_AWS_REGION"
+```
+
+**Request Example**
+```json
+{
+  "model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
+  "messages": [
+    {
+      "role": "user",
+      "content": "who are you"
+    }
+  ],
+  "stream": false
+}
+```
+
+**Response Example**
+```json
+{
+  "id": "d52da49d-daf3-49d9-a105-0b527481fe14",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "I'm Claude, an AI created by Anthropic. I aim to be helpful, honest, and harmless. I won't pretend to be human, and I'll always try to be direct and truthful about what I am and what I can do."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1749659050,
+  "model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 10,
+    "completion_tokens": 57,
+    "total_tokens": 67
+  }
+}
+```
+
 ## Full Configuration Example

 ### Kubernetes Example
--- a/plugins/wasm-go/extensions/ai-proxy/go.mod
+++ b/plugins/wasm-go/extensions/ai-proxy/go.mod
@@ -11,11 +11,11 @@ require (
 	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
 	github.com/stretchr/testify v1.8.4
 	github.com/tidwall/gjson v1.17.3
+	github.com/wasilibs/go-re2 v1.6.0
 )

 require (
 	github.com/tetratelabs/wazero v1.7.2 // indirect
-	github.com/wasilibs/go-re2 v1.6.0 // indirect
 	golang.org/x/sys v0.21.0 // indirect
 )

--- a/plugins/wasm-go/extensions/ai-proxy/go.sum
+++ b/plugins/wasm-go/extensions/ai-proxy/go.sum
@@ -6,8 +6,6 @@ github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbG
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
 github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
 github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
-github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
-github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/magefile/mage v1.15.1-0.20230912152418-9f54e0f83e2a h1:tdPcGgyiH0K+SbsJBBm2oPyEIOTAvLBwD9TuUwVtZho=
 github.com/magefile/mage v1.15.1-0.20230912152418-9f54e0f83e2a/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -29,6 +27,7 @@ github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
 github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
 github.com/wasilibs/go-re2 v1.6.0 h1:CLlhDebt38wtl/zz4ww+hkXBMcxjrKFvTDXzFW2VOz8=
 github.com/wasilibs/go-re2 v1.6.0/go.mod h1:prArCyErsypRBI/jFAFJEbzyHzjABKqkzlidF0SNA04=
+github.com/wasilibs/nottinygc v0.4.0 h1:h1TJMihMC4neN6Zq+WKpLxgd9xCFMw7O9ETLwY2exJQ=
 golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
 golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -352,12 +352,60 @@ func getApiName(path string) provider.ApiName {
 	if strings.HasSuffix(path, "/v1/images/generations") {
 		return provider.ApiNameImageGeneration
 	}
+	if strings.HasSuffix(path, "/v1/images/variations") {
+		return provider.ApiNameImageVariation
+	}
+	if strings.HasSuffix(path, "/v1/images/edits") {
+		return provider.ApiNameImageEdit
+	}
 	if strings.HasSuffix(path, "/v1/batches") {
 		return provider.ApiNameBatches
 	}
+	if util.RegRetrieveBatchPath.MatchString(path) {
+		return provider.ApiNameRetrieveBatch
+	}
+	if util.RegCancelBatchPath.MatchString(path) {
+		return provider.ApiNameCancelBatch
+	}
 	if strings.HasSuffix(path, "/v1/files") {
 		return provider.ApiNameFiles
 	}
+	if util.RegRetrieveFilePath.MatchString(path) {
+		return provider.ApiNameRetrieveFile
+	}
+	if util.RegRetrieveFileContentPath.MatchString(path) {
+		return provider.ApiNameRetrieveFileContent
+	}
+	if strings.HasSuffix(path, "/v1/models") {
+		return provider.ApiNameModels
+	}
+	if strings.HasSuffix(path, "/v1/fine_tuning/jobs") {
+		return provider.ApiNameFineTuningJobs
+	}
+	if util.RegRetrieveFineTuningJobPath.MatchString(path) {
+		return provider.ApiNameFineTuningRetrieveJob
+	}
+	if util.RegRetrieveFineTuningJobEventsPath.MatchString(path) {
+		return provider.PathOpenAIFineTuningJobEvents
+	}
+	if util.RegRetrieveFineTuningJobCheckpointsPath.MatchString(path) {
+		return provider.PathOpenAIFineTuningJobCheckpoints
+	}
+	if util.RegCancelFineTuningJobPath.MatchString(path) {
+		return provider.ApiNameFineTuningCancelJob
+	}
+	if util.RegResumeFineTuningJobPath.MatchString(path) {
+		return provider.ApiNameFineTuningResumeJob
+	}
+	if util.RegPauseFineTuningJobPath.MatchString(path) {
+		return provider.ApiNameFineTuningPauseJob
+	}
+	if util.RegFineTuningCheckpointPermissionPath.MatchString(path) {
+		return provider.ApiNameFineTuningCheckpointPermissions
+	}
+	if util.RegDeleteFineTuningCheckpointPermissionPath.MatchString(path) {
+		return provider.PathOpenAIFineDeleteTuningCheckpointPermission
+	}
 	// cohere style
 	if strings.HasSuffix(path, "/v1/rerank") {
 		return provider.ApiNameCohereV1Rerank
--- a/plugins/wasm-go/extensions/ai-proxy/provider/bedrock.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/bedrock.go
@@ -13,6 +13,8 @@ import (
 	"hash/crc32"
 	"io"
 	"net/http"
+	"net/url"
+	"strconv"
 	"strings"
 	"time"

@@ -21,6 +23,8 @@ import (
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )

 const (
@@ -32,11 +36,13 @@ const (
 	bedrockChatCompletionPath = "/model/%s/converse"
 	// converseStream路径 /model/{modelId}/converse-stream
 	bedrockStreamChatCompletionPath = "/model/%s/converse-stream"
-	bedrockSignedHeaders            = "host;x-amz-date"
+	// invoke_model 路径 /model/{modelId}/invoke
+	bedrockInvokeModelPath = "/model/%s/invoke"
+	bedrockSignedHeaders   = "host;x-amz-date"
+	requestIdHeader        = "X-Amzn-Requestid"
 )

-type bedrockProviderInitializer struct {
-}
+type bedrockProviderInitializer struct{}

 func (b *bedrockProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if len(config.awsAccessKey) == 0 || len(config.awsSecretKey) == 0 {
@@ -50,7 +56,8 @@ func (b *bedrockProviderInitializer) ValidateConfig(config *ProviderConfig) erro

 func (b *bedrockProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): bedrockChatCompletionPath,
+		string(ApiNameChatCompletion):  bedrockChatCompletionPath,
+		string(ApiNameImageGeneration): bedrockInvokeModelPath,
 	}
 }

@@ -96,10 +103,10 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
 		chatChoice.Delta = &chatMessage{Content: bedrockEvent.Delta.Text}
 	}
 	if bedrockEvent.StopReason != nil {
-		chatChoice.FinishReason = stopReasonBedrock2OpenAI(*bedrockEvent.StopReason)
+		chatChoice.FinishReason = util.Ptr(stopReasonBedrock2OpenAI(*bedrockEvent.StopReason))
 	}
 	choices = append(choices, chatChoice)
-	requestId := ctx.GetStringContext("X-Amzn-Requestid", "")
+	requestId := ctx.GetStringContext(requestIdHeader, "")
 	openAIFormattedChunk := &chatCompletionResponse{
 		Id:                requestId,
 		Created:           time.Now().UnixMilli() / 1000,
@@ -110,7 +117,7 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
 	}
 	if bedrockEvent.Usage != nil {
 		openAIFormattedChunk.Choices = choices[:0]
-		openAIFormattedChunk.Usage = usage{
+		openAIFormattedChunk.Usage = &usage{
 			CompletionTokens: bedrockEvent.Usage.OutputTokens,
 			PromptTokens:     bedrockEvent.Usage.InputTokens,
 			TotalTokens:      bedrockEvent.Usage.TotalTokens,
@@ -152,6 +159,74 @@ type toolUseBlockDelta struct {
 	Input string `json:"input"`
 }

+type bedrockImageGenerationResponse struct {
+	Images []string `json:"images"`
+	Error  string   `json:"error"`
+}
+
+type bedrockImageGenerationTextToImageParams struct {
+	Text            string  `json:"text"`
+	NegativeText    string  `json:"negativeText,omitempty"`
+	ConditionImage  string  `json:"conditionImage,omitempty"`
+	ControlMode     string  `json:"controlMode,omitempty"`
+	ControlStrength float32 `json:"controlLength,omitempty"`
+}
+
+type bedrockImageGenerationConfig struct {
+	Width          int     `json:"width"`
+	Height         int     `json:"height"`
+	Quality        string  `json:"quality,omitempty"`
+	CfgScale       float32 `json:"cfgScale,omitempty"`
+	Seed           int     `json:"seed,omitempty"`
+	NumberOfImages int     `json:"numberOfImages"`
+}
+
+type bedrockImageGenerationColorGuidedGenerationParams struct {
+	Colors         []string `json:"colors"`
+	ReferenceImage string   `json:"referenceImage"`
+	Text           string   `json:"text"`
+	NegativeText   string   `json:"negativeText,omitempty"`
+}
+
+type bedrockImageGenerationImageVariationParams struct {
+	Images             []string `json:"images"`
+	SimilarityStrength float32  `json:"similarityStrength"`
+	Text               string   `json:"text"`
+	NegativeText       string   `json:"negativeText,omitempty"`
+}
+
+type bedrockImageGenerationInPaintingParams struct {
+	Image        string `json:"image"`
+	MaskPrompt   string `json:"maskPrompt"`
+	MaskImage    string `json:"maskImage"`
+	Text         string `json:"text"`
+	NegativeText string `json:"negativeText,omitempty"`
+}
+
+type bedrockImageGenerationOutPaintingParams struct {
+	Image           string `json:"image"`
+	MaskPrompt      string `json:"maskPrompt"`
+	MaskImage       string `json:"maskImage"`
+	OutPaintingMode string `json:"outPaintingMode"`
+	Text            string `json:"text"`
+	NegativeText    string `json:"negativeText,omitempty"`
+}
+
+type bedrockImageGenerationBackgroundRemovalParams struct {
+	Image string `json:"image"`
+}
+
+type bedrockImageGenerationRequest struct {
+	TaskType                    string                                             `json:"taskType"`
+	ImageGenerationConfig       *bedrockImageGenerationConfig                      `json:"imageGenerationConfig"`
+	TextToImageParams           *bedrockImageGenerationTextToImageParams           `json:"textToImageParams,omitempty"`
+	ColorGuidedGenerationParams *bedrockImageGenerationColorGuidedGenerationParams `json:"colorGuidedGenerationParams,omitempty"`
+	ImageVariationParams        *bedrockImageGenerationImageVariationParams        `json:"imageVariationParams,omitempty"`
+	InPaintingParams            *bedrockImageGenerationInPaintingParams            `json:"inPaintingParams,omitempty"`
+	OutPaintingParams           *bedrockImageGenerationOutPaintingParams           `json:"outPaintingParams,omitempty"`
+	BackgroundRemovalParams     *bedrockImageGenerationBackgroundRemovalParams     `json:"backgroundRemovalParams,omitempty"`
+}
+
 func extractAmazonEventStreamEvents(ctx wrapper.HttpContext, chunk []byte) []ConverseStreamEvent {
 	body := chunk
 	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
@@ -489,7 +564,7 @@ func validateCRC(r io.Reader, expect uint32) error {
 }

 func (b *bedrockProvider) TransformResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
-	ctx.SetContext("X-Amzn-Requestid", headers.Get("X-Amzn-Requestid"))
+	ctx.SetContext(requestIdHeader, headers.Get(requestIdHeader))
 	if headers.Get("Content-Type") == "application/vnd.amazon.eventstream" {
 		headers.Set("Content-Type", "text/event-stream; charset=utf-8")
 	}
@@ -534,21 +609,91 @@ func (b *bedrockProvider) insertHttpContextMessage(body []byte, content string,
 }

 func (b *bedrockProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) {
+	if gjson.GetBytes(body, "model").Exists() {
+		rawModel := gjson.GetBytes(body, "model").String()
+		encodedModel := url.QueryEscape(rawModel)
+		body, _ = sjson.SetBytes(body, "model", encodedModel)
+	}
 	switch apiName {
 	case ApiNameChatCompletion:
 		return b.onChatCompletionRequestBody(ctx, body, headers)
+	case ApiNameImageGeneration:
+		return b.onImageGenerationRequestBody(ctx, body, headers)
 	default:
 		return b.config.defaultTransformRequestBody(ctx, apiName, body)
 	}
 }

 func (b *bedrockProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
-	if apiName == ApiNameChatCompletion {
+	switch apiName {
+	case ApiNameChatCompletion:
 		return b.onChatCompletionResponseBody(ctx, body)
+	case ApiNameImageGeneration:
+		return b.onImageGenerationResponseBody(ctx, body)
 	}
 	return nil, errUnsupportedApiName
 }

+func (b *bedrockProvider) onImageGenerationResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	bedrockResponse := &bedrockImageGenerationResponse{}
+	if err := json.Unmarshal(body, bedrockResponse); err != nil {
+		log.Errorf("unable to unmarshal bedrock image gerneration response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal bedrock image generation response: %v", err)
+	}
+	response := b.buildBedrockImageGenerationResponse(ctx, bedrockResponse)
+	return json.Marshal(response)
+}
+
+func (b *bedrockProvider) onImageGenerationRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
+	request := &imageGenerationRequest{}
+	err := b.config.parseRequestAndMapModel(ctx, request, body)
+	if err != nil {
+		return nil, err
+	}
+	headers.Set("Accept", "*/*")
+	util.OverwriteRequestPathHeader(headers, fmt.Sprintf(bedrockInvokeModelPath, request.Model))
+	return b.buildBedrockImageGenerationRequest(request, headers)
+}
+
+func (b *bedrockProvider) buildBedrockImageGenerationRequest(origRequest *imageGenerationRequest, headers http.Header) ([]byte, error) {
+	width, height := 1024, 1024
+	pairs := strings.Split(origRequest.Size, "x")
+	if len(pairs) == 2 {
+		width, _ = strconv.Atoi(pairs[0])
+		height, _ = strconv.Atoi(pairs[1])
+	}
+
+	request := &bedrockImageGenerationRequest{
+		TaskType: "TEXT_IMAGE",
+		TextToImageParams: &bedrockImageGenerationTextToImageParams{
+			Text: origRequest.Prompt,
+		},
+		ImageGenerationConfig: &bedrockImageGenerationConfig{
+			NumberOfImages: origRequest.N,
+			Width:          width,
+			Height:         height,
+			Quality:        origRequest.Quality,
+		},
+	}
+	util.OverwriteRequestPathHeader(headers, fmt.Sprintf(bedrockInvokeModelPath, origRequest.Model))
+	requestBytes, err := json.Marshal(request)
+	b.setAuthHeaders(requestBytes, headers)
+	return requestBytes, err
+}
+
+func (b *bedrockProvider) buildBedrockImageGenerationResponse(ctx wrapper.HttpContext, bedrockResponse *bedrockImageGenerationResponse) *imageGenerationResponse {
+	data := make([]imageGenerationData, len(bedrockResponse.Images))
+	for i, image := range bedrockResponse.Images {
+		data[i] = imageGenerationData{
+			B64: image,
+		}
+	}
+	return &imageGenerationResponse{
+		Created: time.Now().UnixMilli() / 1000,
+		Data:    data,
+	}
+}
+
 func (b *bedrockProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
 	bedrockResponse := &bedrockConverseResponse{}
 	if err := json.Unmarshal(body, bedrockResponse); err != nil {
@@ -610,18 +755,19 @@ func (b *bedrockProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, b
 				Role:    bedrockResponse.Output.Message.Role,
 				Content: outputContent,
 			},
-			FinishReason: stopReasonBedrock2OpenAI(bedrockResponse.StopReason),
+			FinishReason: util.Ptr(stopReasonBedrock2OpenAI(bedrockResponse.StopReason)),
 		},
 	}
-	requestId := ctx.GetStringContext("X-Amzn-Requestid", "")
+	requestId := ctx.GetStringContext(requestIdHeader, "")
+	modelId, _ := url.QueryUnescape(ctx.GetStringContext(ctxKeyFinalRequestModel, ""))
 	return &chatCompletionResponse{
 		Id:                requestId,
 		Created:           time.Now().UnixMilli() / 1000,
-		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		Model:             modelId,
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           choices,
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     bedrockResponse.Usage.InputTokens,
 			CompletionTokens: bedrockResponse.Usage.OutputTokens,
 			TotalTokens:      bedrockResponse.Usage.TotalTokens,
@@ -762,8 +908,8 @@ func (b *bedrockProvider) setAuthHeaders(body []byte, headers http.Header) {
 }

 func (b *bedrockProvider) generateSignature(path, amzDate, dateStamp string, body []byte) string {
+	path = encodeSigV4Path(path)
 	hashedPayload := sha256Hex(body)
-	path = urlEncoding(path)

 	endpoint := fmt.Sprintf(bedrockDefaultDomain, b.config.awsRegion)
 	canonicalHeaders := fmt.Sprintf("host:%s\nx-amz-date:%s\n", endpoint, amzDate)
@@ -780,14 +926,15 @@ func (b *bedrockProvider) generateSignature(path, amzDate, dateStamp string, bod
 	return signature
 }

-func urlEncoding(rawStr string) string {
-	encodedStr := strings.ReplaceAll(rawStr, ":", "%3A")
-	encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B")
-	encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D")
-	encodedStr = strings.ReplaceAll(encodedStr, "&", "%26")
-	encodedStr = strings.ReplaceAll(encodedStr, "$", "%24")
-	encodedStr = strings.ReplaceAll(encodedStr, "@", "%40")
-	return encodedStr
+func encodeSigV4Path(path string) string {
+	segments := strings.Split(path, "/")
+	for i, seg := range segments {
+		if seg == "" {
+			continue
+		}
+		segments[i] = url.PathEscape(seg)
+	}
+	return strings.Join(segments, "/")
 }

 func getSignatureKey(key, dateStamp, region, service string) []byte {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
@@ -19,22 +19,55 @@ const (
 	claudeDomain             = "api.anthropic.com"
 	claudeChatCompletionPath = "/v1/messages"
 	claudeCompletionPath     = "/v1/complete"
-	defaultVersion           = "2023-06-01"
-	defaultMaxTokens         = 4096
+	claudeDefaultVersion     = "2023-06-01"
+	claudeDefaultMaxTokens   = 4096
 )

 type claudeProviderInitializer struct{}

+type claudeTool struct {
+	Name        string                 `json:"name"`
+	Description string                 `json:"description,omitempty"`
+	InputSchema map[string]interface{} `json:"input_schema,omitempty"`
+}
+
+type claudeToolChoice struct {
+	Type                   string `json:"type"`
+	Name                   string `json:"name,omitempty"`
+	DisableParallelToolUse bool   `json:"disable_parallel_tool_use,omitempty"`
+}
+
+type claudeChatMessage struct {
+	Role    string `json:"role"`
+	Content any    `json:"content"`
+}
+
+type claudeChatMessageContentSource struct {
+	Type      string `json:"type"`
+	MediaType string `json:"media_type,omitempty"`
+	Data      string `json:"data,omitempty"`
+	Url       string `json:"url,omitempty"`
+	FileId    string `json:"file_id,omitempty"`
+}
+
+type claudeChatMessageContent struct {
+	Type   string                          `json:"type"`
+	Text   string                          `json:"text,omitempty"`
+	Source *claudeChatMessageContentSource `json:"source,omitempty"`
+}
 type claudeTextGenRequest struct {
-	Model         string        `json:"model"`
-	Messages      []chatMessage `json:"messages"`
-	System        string        `json:"system,omitempty"`
-	MaxTokens     int           `json:"max_tokens,omitempty"`
-	StopSequences []string      `json:"stop_sequences,omitempty"`
-	Stream        bool          `json:"stream,omitempty"`
-	Temperature   float64       `json:"temperature,omitempty"`
-	TopP          float64       `json:"top_p,omitempty"`
-	TopK          int           `json:"top_k,omitempty"`
+	Model         string              `json:"model"`
+	Messages      []claudeChatMessage `json:"messages"`
+	System        string              `json:"system,omitempty"`
+	MaxTokens     int                 `json:"max_tokens,omitempty"`
+	StopSequences []string            `json:"stop_sequences,omitempty"`
+	Stream        bool                `json:"stream,omitempty"`
+	Temperature   float64             `json:"temperature,omitempty"`
+	TopP          float64             `json:"top_p,omitempty"`
+	TopK          int                 `json:"top_k,omitempty"`
+	ToolChoice    *claudeToolChoice   `json:"tool_choice,omitempty"`
+	Tools         []claudeTool        `json:"tools,omitempty"`
+	ServiceTier   string              `json:"service_tier,omitempty"`
 }

 type claudeTextGenResponse struct {
@@ -50,13 +83,14 @@ type claudeTextGenResponse struct {
 }

 type claudeTextGenContent struct {
-	Type string `json:"type"`
+	Type string `json:"type,omitempty"`
 	Text string `json:"text,omitempty"`
 }

 type claudeTextGenUsage struct {
-	InputTokens  int `json:"input_tokens"`
-	OutputTokens int `json:"output_tokens"`
+	InputTokens  int    `json:"input_tokens,omitempty"`
+	OutputTokens int    `json:"output_tokens,omitempty"`
+	ServiceTier  string `json:"service_tier,omitempty"`
 }

 type claudeTextGenError struct {
@@ -65,12 +99,12 @@ type claudeTextGenError struct {
 }

 type claudeTextGenStreamResponse struct {
-	Type         string                `json:"type"`
-	Message      claudeTextGenResponse `json:"message"`
-	Index        int                   `json:"index"`
-	ContentBlock *claudeTextGenContent `json:"content_block"`
-	Delta        *claudeTextGenDelta   `json:"delta"`
-	Usage        claudeTextGenUsage    `json:"usage"`
+	Type         string                 `json:"type"`
+	Message      *claudeTextGenResponse `json:"message,omitempty"`
+	Index        int                    `json:"index,omitempty"`
+	ContentBlock *claudeTextGenContent  `json:"content_block,omitempty"`
+	Delta        *claudeTextGenDelta    `json:"delta,omitempty"`
+	Usage        *claudeTextGenUsage    `json:"usage,omitempty"`
 }

 type claudeTextGenDelta struct {
@@ -93,6 +127,7 @@ func (c *claudeProviderInitializer) DefaultCapabilities() map[string]string {
 		string(ApiNameCompletion):     claudeCompletionPath,
 		// docs: https://docs.anthropic.com/en/docs/build-with-claude/embeddings#voyage-http-api
 		string(ApiNameEmbeddings): PathOpenAIEmbeddings,
+		string(ApiNameModels):     PathOpenAIModels,
 	}
 }

@@ -107,6 +142,10 @@ func (c *claudeProviderInitializer) CreateProvider(config ProviderConfig) (Provi
 type claudeProvider struct {
 	config       ProviderConfig
 	contextCache *contextCache
+
+	messageId   string
+	usage       usage
+	serviceTier string
 }

 func (c *claudeProvider) GetProviderType() string {
@@ -124,16 +163,16 @@ func (c *claudeProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam

 	headers.Set("x-api-key", c.config.GetApiTokenInUse(ctx))

-	if c.config.claudeVersion == "" {
-		c.config.claudeVersion = defaultVersion
+	if c.config.apiVersion == "" {
+		c.config.apiVersion = claudeDefaultVersion
 	}

-	headers.Set("anthropic-version", c.config.claudeVersion)
+	headers.Set("anthropic-version", c.config.apiVersion)
 }

 func (c *claudeProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
 	if !c.config.isSupportedAPI(apiName) {
-		return types.ActionContinue, errUnsupportedApiName
+		return types.ActionContinue, nil
 	}
 	return c.config.handleRequestBody(c, c.contextCache, ctx, apiName, body)
 }
@@ -205,14 +244,15 @@ func (c *claudeProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 func (c *claudeProvider) buildClaudeTextGenRequest(origRequest *chatCompletionRequest) *claudeTextGenRequest {
 	claudeRequest := claudeTextGenRequest{
 		Model:         origRequest.Model,
-		MaxTokens:     origRequest.MaxTokens,
+		MaxTokens:     origRequest.getMaxTokens(),
 		StopSequences: origRequest.Stop,
 		Stream:        origRequest.Stream,
 		Temperature:   origRequest.Temperature,
 		TopP:          origRequest.TopP,
+		// ServiceTier:   origRequest.ServiceTier,
 	}
 	if claudeRequest.MaxTokens == 0 {
-		claudeRequest.MaxTokens = defaultMaxTokens
+		claudeRequest.MaxTokens = claudeDefaultMaxTokens
 	}

 	for _, message := range origRequest.Messages {
@@ -220,12 +260,80 @@ func (c *claudeProvider) buildClaudeTextGenRequest(origRequest *chatCompletionRe
 			claudeRequest.System = message.StringContent()
 			continue
 		}
-		claudeMessage := chatMessage{
-			Role:    message.Role,
-			Content: message.Content,
+
+		claudeMessage := claudeChatMessage{
+			Role: message.Role,
+		}
+		if message.IsStringContent() {
+			claudeMessage.Content = message.StringContent()
+		} else {
+			chatMessageContents := make([]claudeChatMessageContent, 0)
+			for _, messageContent := range message.ParseContent() {
+				switch messageContent.Type {
+				case contentTypeText:
+					chatMessageContents = append(chatMessageContents, claudeChatMessageContent{
+						Type: contentTypeText,
+						Text: messageContent.Text,
+					})
+				case contentTypeImageUrl:
+					if strings.HasPrefix(messageContent.ImageUrl.Url, "data:") {
+						parts := strings.SplitN(messageContent.ImageUrl.Url, ";", 2)
+						if len(parts) != 2 {
+							log.Errorf("invalid image url format: %s", messageContent.ImageUrl.Url)
+							continue
+						}
+						chatMessageContents = append(chatMessageContents, claudeChatMessageContent{
+							Type: "image",
+							Source: &claudeChatMessageContentSource{
+								Type:      "base64",
+								MediaType: strings.TrimPrefix(parts[0], "data:"),
+								Data:      strings.TrimPrefix(parts[1], "base64,"),
+							},
+						})
+					} else {
+						chatMessageContents = append(chatMessageContents, claudeChatMessageContent{
+							Type: "image",
+							Source: &claudeChatMessageContentSource{
+								Type: "url",
+								Url:  messageContent.ImageUrl.Url,
+							},
+						})
+					}
+				case contentTypeFile:
+					chatMessageContents = append(chatMessageContents, claudeChatMessageContent{
+						Type: "file",
+						Source: &claudeChatMessageContentSource{
+							Type:   "url",
+							FileId: messageContent.File.FileId,
+						},
+					})
+				default:
+					log.Errorf("Unsupported content type: %s", messageContent.Type)
+					continue
+				}
+			}
+			claudeMessage.Content = chatMessageContents
 		}
 		claudeRequest.Messages = append(claudeRequest.Messages, claudeMessage)
 	}
+
+	for _, tool := range origRequest.Tools {
+		claudeTool := claudeTool{
+			Name:        tool.Function.Name,
+			Description: tool.Function.Description,
+			InputSchema: tool.Function.Parameters,
+		}
+		claudeRequest.Tools = append(claudeRequest.Tools, claudeTool)
+	}
+
+	if tc := origRequest.getToolChoiceObject(); tc != nil {
+		claudeRequest.ToolChoice = &claudeToolChoice{
+			Name:                   tc.Function.Name,
+			Type:                   tc.Type,
+			DisableParallelToolUse: !origRequest.ParallelToolCalls,
+		}
+	}
+
 	return &claudeRequest
 }

@@ -233,7 +341,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
 	choice := chatCompletionChoice{
 		Index:        0,
 		Message:      &chatMessage{Role: roleAssistant, Content: origResponse.Content[0].Text},
-		FinishReason: stopReasonClaude2OpenAI(origResponse.StopReason),
+		FinishReason: util.Ptr(stopReasonClaude2OpenAI(origResponse.StopReason)),
 	}

 	return &chatCompletionResponse{
@@ -243,7 +351,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           []chatCompletionChoice{choice},
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     origResponse.Usage.InputTokens,
 			CompletionTokens: origResponse.Usage.OutputTokens,
 			TotalTokens:      origResponse.Usage.InputTokens + origResponse.Usage.OutputTokens,
@@ -270,27 +378,50 @@ func stopReasonClaude2OpenAI(reason *string) string {
 func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, origResponse *claudeTextGenStreamResponse) *chatCompletionResponse {
 	switch origResponse.Type {
 	case "message_start":
+		c.messageId = origResponse.Message.Id
+		c.usage = usage{
+			PromptTokens:     origResponse.Message.Usage.InputTokens,
+			CompletionTokens: origResponse.Message.Usage.OutputTokens,
+		}
+		c.serviceTier = origResponse.Message.Usage.ServiceTier
 		choice := chatCompletionChoice{
-			Index: 0,
+			Index: origResponse.Index,
 			Delta: &chatMessage{Role: roleAssistant, Content: ""},
 		}
-		return createChatCompletionResponse(ctx, origResponse, choice)
+		return c.createChatCompletionResponse(ctx, origResponse, choice)

 	case "content_block_delta":
 		choice := chatCompletionChoice{
-			Index: 0,
+			Index: origResponse.Index,
 			Delta: &chatMessage{Content: origResponse.Delta.Text},
 		}
-		return createChatCompletionResponse(ctx, origResponse, choice)
+		return c.createChatCompletionResponse(ctx, origResponse, choice)

 	case "message_delta":
+		c.usage.CompletionTokens += origResponse.Usage.OutputTokens
+		c.usage.TotalTokens = c.usage.PromptTokens + c.usage.CompletionTokens
+
 		choice := chatCompletionChoice{
-			Index:        0,
+			Index:        origResponse.Index,
 			Delta:        &chatMessage{},
-			FinishReason: stopReasonClaude2OpenAI(origResponse.Delta.StopReason),
+			FinishReason: util.Ptr(stopReasonClaude2OpenAI(origResponse.Delta.StopReason)),
 		}
-		return createChatCompletionResponse(ctx, origResponse, choice)
-	case "content_block_stop", "message_stop":
+		return c.createChatCompletionResponse(ctx, origResponse, choice)
+	case "message_stop":
+		return &chatCompletionResponse{
+			Id:          c.messageId,
+			Created:     time.Now().UnixMilli() / 1000,
+			Model:       ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+			Object:      objectChatCompletionChunk,
+			Choices:     []chatCompletionChoice{},
+			ServiceTier: c.serviceTier,
+			Usage: &usage{
+				PromptTokens:     c.usage.PromptTokens,
+				CompletionTokens: c.usage.CompletionTokens,
+				TotalTokens:      c.usage.TotalTokens,
+			},
+		}
+	case "content_block_stop", "ping", "content_block_start":
 		log.Debugf("skip processing response type: %s", origResponse.Type)
 		return nil
 	default:
@@ -299,13 +430,14 @@ func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, or
 	}
 }

-func createChatCompletionResponse(ctx wrapper.HttpContext, response *claudeTextGenStreamResponse, choice chatCompletionChoice) *chatCompletionResponse {
+func (c *claudeProvider) createChatCompletionResponse(ctx wrapper.HttpContext, response *claudeTextGenStreamResponse, choice chatCompletionChoice) *chatCompletionResponse {
 	return &chatCompletionResponse{
-		Id:      response.Message.Id,
-		Created: time.Now().UnixMilli() / 1000,
-		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
-		Object:  objectChatCompletionChunk,
-		Choices: []chatCompletionChoice{choice},
+		Id:          c.messageId,
+		Created:     time.Now().UnixMilli() / 1000,
+		Model:       ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		Object:      objectChatCompletionChunk,
+		Choices:     []chatCompletionChoice{choice},
+		ServiceTier: c.serviceTier,
 	}
 }

@@ -332,5 +464,14 @@ func (c *claudeProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, claudeChatCompletionPath) {
 		return ApiNameChatCompletion
 	}
+	if strings.Contains(path, claudeCompletionPath) {
+		return ApiNameCompletion
+	}
+	if strings.Contains(path, PathOpenAIModels) {
+		return ApiNameModels
+	}
+	if strings.Contains(path, PathOpenAIEmbeddings) {
+		return ApiNameEmbeddings
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/dify.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/dify.go
@@ -6,13 +6,13 @@ import (
 	"fmt"
 	"net/http"
 	"strings"
-	"time"
+
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
-	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 )

 const (
@@ -116,34 +116,34 @@ func (d *difyProvider) responseDify2OpenAI(ctx wrapper.HttpContext, response *Di
 		choice = chatCompletionChoice{
 			Index:        0,
 			Message:      &chatMessage{Role: roleAssistant, Content: response.Answer},
-			FinishReason: finishReasonStop,
+			FinishReason: util.Ptr(finishReasonStop),
 		}
-		//response header中增加conversationId字段
+		// response header中增加conversationId字段
 		_ = proxywasm.ReplaceHttpResponseHeader("ConversationId", response.ConversationId)
 		id = response.ConversationId
 	case BotTypeCompletion:
 		choice = chatCompletionChoice{
 			Index:        0,
 			Message:      &chatMessage{Role: roleAssistant, Content: response.Answer},
-			FinishReason: finishReasonStop,
+			FinishReason: util.Ptr(finishReasonStop),
 		}
 		id = response.MessageId
 	case BotTypeWorkflow:
 		choice = chatCompletionChoice{
 			Index:        0,
 			Message:      &chatMessage{Role: roleAssistant, Content: response.Data.Outputs[d.config.outputVariable]},
-			FinishReason: finishReasonStop,
+			FinishReason: util.Ptr(finishReasonStop),
 		}
 		id = response.Data.WorkflowId
 	}
 	return &chatCompletionResponse{
 		Id:                id,
-		Created:           time.Now().UnixMilli() / 1000,
+		Created:           response.CreatedAt,
 		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           []chatCompletionChoice{choice},
-		Usage:             response.MetaData.Usage,
+		Usage:             &response.MetaData.Usage,
 	}
 }

@@ -188,7 +188,7 @@ func (d *difyProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name Api
 func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, response *DifyChunkChatResponse) *chatCompletionResponse {
 	var choice chatCompletionChoice
 	var id string
-	var responseUsage usage
+	var responseUsage *usage
 	switch d.config.botType {
 	case BotTypeChat, BotTypeAgent:
 		choice = chatCompletionChoice{
@@ -211,9 +211,9 @@ func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, respon
 		id = response.Data.WorkflowId
 	}
 	if response.Event == "message_end" || response.Event == "workflow_finished" {
-		choice.FinishReason = finishReasonStop
+		choice.FinishReason = util.Ptr(finishReasonStop)
 		if response.Event == "message_end" {
-			responseUsage = usage{
+			responseUsage = &usage{
 				PromptTokens:     response.MetaData.Usage.PromptTokens,
 				CompletionTokens: response.MetaData.Usage.CompletionTokens,
 				TotalTokens:      response.MetaData.Usage.TotalTokens,
@@ -222,7 +222,7 @@ func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, respon
 	}
 	return &chatCompletionResponse{
 		Id:                id,
-		Created:           time.Now().UnixMilli() / 1000,
+		Created:           response.CreatedAt,
 		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		SystemFingerprint: "",
 		Object:            objectChatCompletionChunk,
@@ -309,7 +309,7 @@ type DifyChatResponse struct {
 	ConversationId string       `json:"conversation_id"`
 	MessageId      string       `json:"message_id"`
 	Answer         string       `json:"answer"`
-	CreateAt       int64        `json:"create_at"`
+	CreatedAt      int64        `json:"created_at"`
 	Data           DifyData     `json:"data"`
 	MetaData       DifyMetaData `json:"metadata"`
 }
@@ -319,6 +319,7 @@ type DifyChunkChatResponse struct {
 	ConversationId string       `json:"conversation_id"`
 	MessageId      string       `json:"message_id"`
 	Answer         string       `json:"answer"`
+	CreatedAt      int64        `json:"created_at"`
 	Data           DifyData     `json:"data"`
 	MetaData       DifyMetaData `json:"metadata"`
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/doubao.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/doubao.go
@@ -11,9 +11,10 @@ import (
 )

 const (
-	doubaoDomain             = "ark.cn-beijing.volces.com"
-	doubaoChatCompletionPath = "/api/v3/chat/completions"
-	doubaoEmbeddingsPath     = "/api/v3/embeddings"
+	doubaoDomain              = "ark.cn-beijing.volces.com"
+	doubaoChatCompletionPath  = "/api/v3/chat/completions"
+	doubaoEmbeddingsPath      = "/api/v3/embeddings"
+	doubaoImageGenerationPath = "/api/v3/images/generations"
 )

 type doubaoProviderInitializer struct{}
@@ -27,8 +28,9 @@ func (m *doubaoProviderInitializer) ValidateConfig(config *ProviderConfig) error

 func (m *doubaoProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): doubaoChatCompletionPath,
-		string(ApiNameEmbeddings):     doubaoEmbeddingsPath,
+		string(ApiNameChatCompletion):  doubaoChatCompletionPath,
+		string(ApiNameEmbeddings):      doubaoEmbeddingsPath,
+		string(ApiNameImageGeneration): doubaoImageGenerationPath,
 	}
 }

@@ -75,5 +77,8 @@ func (m *doubaoProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, doubaoEmbeddingsPath) {
 		return ApiNameEmbeddings
 	}
+	if strings.Contains(path, doubaoImageGenerationPath) {
+		return ApiNameImageGeneration
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/gemini.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/gemini.go
@@ -19,14 +19,16 @@ import (

 const (
 	geminiApiKeyHeader             = "x-goog-api-key"
+	geminiDefaultApiVersion        = "v1beta" // 可选: v1, v1beta
 	geminiDomain                   = "generativelanguage.googleapis.com"
 	geminiChatCompletionPath       = "generateContent"
 	geminiChatCompletionStreamPath = "streamGenerateContent?alt=sse"
 	geminiEmbeddingPath            = "batchEmbedContents"
+	geminiModelsPath               = "models"
+	geminiImageGenerationPath      = "predict"
 )

-type geminiProviderInitializer struct {
-}
+type geminiProviderInitializer struct{}

 func (g *geminiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
@@ -36,7 +38,12 @@ func (g *geminiProviderInitializer) ValidateConfig(config *ProviderConfig) error
 }

 func (g *geminiProviderInitializer) DefaultCapabilities() map[string]string {
-	return map[string]string{}
+	return map[string]string{
+		string(ApiNameChatCompletion):  "",
+		string(ApiNameEmbeddings):      "",
+		string(ApiNameModels):          "",
+		string(ApiNameImageGeneration): "",
+	}
 }

 func (g *geminiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
@@ -65,6 +72,7 @@ func (g *geminiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiNa
 func (g *geminiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
 	util.OverwriteRequestHostHeader(headers, geminiDomain)
 	headers.Set(geminiApiKeyHeader, g.config.GetApiTokenInUse(ctx))
+	util.OverwriteRequestAuthorizationHeader(headers, "")
 }

 func (g *geminiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
@@ -75,11 +83,38 @@ func (g *geminiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 }

 func (g *geminiProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) {
-	if apiName == ApiNameChatCompletion {
+	switch apiName {
+	case ApiNameChatCompletion:
 		return g.onChatCompletionRequestBody(ctx, body, headers)
-	} else {
+	case ApiNameEmbeddings:
 		return g.onEmbeddingsRequestBody(ctx, body, headers)
+	case ApiNameImageGeneration:
+		return g.onImageGenerationRequestBody(ctx, body, headers)
 	}
+	return body, nil
+}
+
+func (g *geminiProvider) onImageGenerationRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
+	request := &imageGenerationRequest{}
+	if err := g.config.parseRequestAndMapModel(ctx, request, body); err != nil {
+		return nil, err
+	}
+	path := g.getRequestPath(ApiNameImageGeneration, request.Model, false)
+	log.Debugf("request path:%s", path)
+	util.OverwriteRequestPathHeader(headers, path)
+	geminiRequest := g.buildGeminiImageGenerationRequest(request)
+	return json.Marshal(geminiRequest)
+}
+
+func (g *geminiProvider) buildGeminiImageGenerationRequest(request *imageGenerationRequest) *geminiImageGenerationRequest {
+	geminiRequest := &geminiImageGenerationRequest{
+		Instances: []geminiImageGenerationInstance{{Prompt: request.Prompt}},
+		Parameters: &geminiImageGenerationParameters{
+			SampleCount: request.N,
+		},
+	}
+
+	return geminiRequest
 }

 func (g *geminiProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
@@ -108,7 +143,7 @@ func (g *geminiProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body [
 }

 func (g *geminiProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool) ([]byte, error) {
-	log.Infof("chunk body:%s", string(chunk))
+	log.Debugf("chunk body:%s", string(chunk))
 	if isLastChunk || len(chunk) == 0 {
 		return nil, nil
 	}
@@ -144,14 +179,43 @@ func (g *geminiProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 }

 func (g *geminiProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
-	if apiName == ApiNameChatCompletion {
+	switch apiName {
+	case ApiNameChatCompletion:
 		return g.onChatCompletionResponseBody(ctx, body)
-	} else {
+	case ApiNameEmbeddings:
 		return g.onEmbeddingsResponseBody(ctx, body)
+	case ApiNameImageGeneration:
+		return g.onImageGenerationResponseBody(ctx, body)
+	default:
+		return body, nil
 	}
 }

+func (g *geminiProvider) onImageGenerationResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	geminiResponse := &geminiImageGenerationResponse{}
+	if err := json.Unmarshal(body, geminiResponse); err != nil {
+		return nil, fmt.Errorf("unable to unmarshal gemini image generation response: %v", err)
+	}
+	response := g.buildImageGenerationResponse(ctx, geminiResponse)
+	return json.Marshal(response)
+}
+
+func (g *geminiProvider) buildImageGenerationResponse(ctx wrapper.HttpContext, geminiResponse *geminiImageGenerationResponse) *imageGenerationResponse {
+	data := make([]imageGenerationData, len(geminiResponse.Predictions))
+	for i, prediction := range geminiResponse.Predictions {
+		data[i] = imageGenerationData{
+			B64: prediction.BytesBase64Encoded,
+		}
+	}
+	response := &imageGenerationResponse{
+		Created: time.Now().UnixMilli() / 1000,
+		Data:    data,
+	}
+	return response
+}
+
 func (g *geminiProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	log.Debugf("chat completion response body:%s", string(body))
 	geminiResponse := &geminiChatResponse{}
 	if err := json.Unmarshal(body, geminiResponse); err != nil {
 		return nil, fmt.Errorf("unable to unmarshal gemini chat response: %v", err)
@@ -177,26 +241,37 @@ func (g *geminiProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body
 	return json.Marshal(response)
 }

-func (g *geminiProvider) getRequestPath(apiName ApiName, geminiModel string, stream bool) string {
+func (g *geminiProvider) getRequestPath(apiName ApiName, model string, stream bool) string {
 	action := ""
-	if apiName == ApiNameEmbeddings {
-		action = geminiEmbeddingPath
-	} else if stream {
-		action = geminiChatCompletionStreamPath
-	} else {
-		action = geminiChatCompletionPath
+	if g.config.apiVersion == "" {
+		g.config.apiVersion = geminiDefaultApiVersion
 	}
-	return fmt.Sprintf("/v1/models/%s:%s", geminiModel, action)
+	switch apiName {
+	case ApiNameModels:
+		return fmt.Sprintf("/%s/%s", g.config.apiVersion, geminiModelsPath)
+	case ApiNameEmbeddings:
+		action = geminiEmbeddingPath
+	case ApiNameChatCompletion:
+		if stream {
+			action = geminiChatCompletionStreamPath
+		} else {
+			action = geminiChatCompletionPath
+		}
+	case ApiNameImageGeneration:
+		action = geminiImageGenerationPath
+	}
+	return fmt.Sprintf("/%s/models/%s:%s", g.config.apiVersion, model, action)
 }

-type geminiChatRequest struct {
+type geminiGenerationContentRequest struct {
 	// Model and Stream are only used when using the gemini original protocol
-	Model            string                     `json:"model,omitempty"`
-	Stream           bool                       `json:"stream,omitempty"`
-	Contents         []geminiChatContent        `json:"contents"`
-	SafetySettings   []geminiChatSafetySetting  `json:"safety_settings,omitempty"`
-	GenerationConfig geminiChatGenerationConfig `json:"generation_config,omitempty"`
-	Tools            []geminiChatTools          `json:"tools,omitempty"`
+	Model             string                     `json:"model,omitempty"`
+	Stream            bool                       `json:"stream,omitempty"`
+	Contents          []geminiChatContent        `json:"contents"`
+	SystemInstruction *geminiChatContent         `json:"system_instruction,omitempty"`
+	SafetySettings    []geminiChatSafetySetting  `json:"safetySettings,omitempty"`
+	GenerationConfig  geminiChatGenerationConfig `json:"generationConfig,omitempty"`
+	Tools             []geminiChatTools          `json:"tools,omitempty"`
 }

 type geminiChatContent struct {
@@ -209,13 +284,26 @@ type geminiChatSafetySetting struct {
 	Threshold string `json:"threshold"`
 }

+type geminiThinkingConfig struct {
+	IncludeThoughts bool  `json:"includeThoughts,omitempty"`
+	ThinkingBudget  int64 `json:"thinkingBudget,omitempty"`
+}
+
 type geminiChatGenerationConfig struct {
-	Temperature     float64  `json:"temperature,omitempty"`
-	TopP            float64  `json:"topP,omitempty"`
-	TopK            float64  `json:"topK,omitempty"`
-	MaxOutputTokens int      `json:"maxOutputTokens,omitempty"`
-	CandidateCount  int      `json:"candidateCount,omitempty"`
-	StopSequences   []string `json:"stopSequences,omitempty"`
+	Temperature        float64               `json:"temperature,omitempty"`
+	TopP               float64               `json:"topP,omitempty"`
+	TopK               int64                 `json:"topK,omitempty"`
+	Seed               int64                 `json:"seed,omitempty"`
+	Logprobs           bool                  `json:"logprobs,omitempty"`
+	MaxOutputTokens    int                   `json:"maxOutputTokens,omitempty"`
+	CandidateCount     int                   `json:"candidateCount,omitempty"`
+	StopSequences      []string              `json:"stopSequences,omitempty"`
+	PresencePenalty    int64                 `json:"presencePenalty,omitempty"`
+	FrequencyPenalty   int64                 `json:"frequencyPenalty,omitempty"`
+	ResponseModalities []string              `json:"responseModalities,omitempty"`
+	NegativePrompt     string                `json:"negativePrompt,omitempty"`
+	ThinkingConfig     *geminiThinkingConfig `json:"thinkingConfig,omitempty"`
+	MediaResolution    string                `json:"mediaResolution,omitempty"`
 }

 type geminiChatTools struct {
@@ -238,25 +326,52 @@ type geminiFunctionCall struct {
 	Arguments    any    `json:"args"`
 }

-func (g *geminiProvider) buildGeminiChatRequest(request *chatCompletionRequest) *geminiChatRequest {
+// geminiImageGenerationRequest is the request body for generate image using Imagen 3
+type geminiImageGenerationRequest struct {
+	Instances  []geminiImageGenerationInstance  `json:"instances"`
+	Parameters *geminiImageGenerationParameters `json:"parameters,omitempty"`
+}
+
+type geminiImageGenerationInstance struct {
+	Prompt string `json:"prompt"`
+}
+
+type geminiImageGenerationParameters struct {
+	SampleCount int    `json:"sampleCount,omitempty"`
+	AspectRatio string `json:"aspectRatio,omitempty"`
+}
+
+type geminiImageGenerationPrediction struct {
+	BytesBase64Encoded string `json:"bytesBase64Encoded"`
+	MimeType           string `json:"mimeType"`
+}
+
+type geminiImageGenerationResponse struct {
+	Predictions []geminiImageGenerationPrediction `json:"predictions"`
+}
+
+func (g *geminiProvider) buildGeminiChatRequest(request *chatCompletionRequest) *geminiGenerationContentRequest {
 	var safetySettings []geminiChatSafetySetting
-	{
-	}
 	for category, threshold := range g.config.geminiSafetySetting {
 		safetySettings = append(safetySettings, geminiChatSafetySetting{
 			Category:  category,
 			Threshold: threshold,
 		})
 	}
-	geminiRequest := geminiChatRequest{
+	geminiRequest := geminiGenerationContentRequest{
 		Contents:       make([]geminiChatContent, 0, len(request.Messages)),
 		SafetySettings: safetySettings,
 		GenerationConfig: geminiChatGenerationConfig{
-			Temperature:     request.Temperature,
-			TopP:            request.TopP,
-			MaxOutputTokens: request.MaxTokens,
+			Temperature:        request.Temperature,
+			TopP:               request.TopP,
+			MaxOutputTokens:    request.MaxTokens,
+			PresencePenalty:    int64(request.PresencePenalty),
+			FrequencyPenalty:   int64(request.FrequencyPenalty),
+			Logprobs:           request.Logprobs,
+			ResponseModalities: request.Modalities,
 		},
 	}
+
 	if request.Tools != nil {
 		functions := make([]function, 0, len(request.Tools))
 		for _, tool := range request.Tools {
@@ -268,7 +383,7 @@ func (g *geminiProvider) buildGeminiChatRequest(request *chatCompletionRequest)
 			},
 		}
 	}
-	shouldAddDummyModelMessage := false
+	// shouldAddDummyModelMessage := false
 	for _, message := range request.Messages {
 		content := geminiChatContent{
 			Role: message.Role,
@@ -280,32 +395,22 @@ func (g *geminiProvider) buildGeminiChatRequest(request *chatCompletionRequest)
 		}

 		// there's no assistant role in gemini and API shall vomit if role is not user or model
-		if content.Role == roleAssistant {
+		switch content.Role {
+		case roleSystem:
+			content.Role = ""
+			geminiRequest.SystemInstruction = &content
+			continue
+		case roleAssistant:
 			content.Role = "model"
-		} else if content.Role == roleSystem { // converting system prompt to prompt from user for the same reason
-			content.Role = roleUser
-			shouldAddDummyModelMessage = true
 		}
 		geminiRequest.Contents = append(geminiRequest.Contents, content)

-		// if a system message is the last message, we need to add a dummy model message to make gemini happy
-		if shouldAddDummyModelMessage {
-			geminiRequest.Contents = append(geminiRequest.Contents, geminiChatContent{
-				Role: "model",
-				Parts: []geminiPart{
-					{
-						Text: "Okay",
-					},
-				},
-			})
-			shouldAddDummyModelMessage = false
-		}
 	}

 	return &geminiRequest
 }

-func (g *geminiProvider) setSystemContent(request *geminiChatRequest, content string) {
+func (g *geminiProvider) setSystemContent(request *geminiGenerationContentRequest, content string) {
 	systemContents := []geminiChatContent{{
 		Role: roleUser,
 		Parts: []geminiPart{
@@ -395,32 +500,34 @@ func (g *geminiProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
 		Object:  objectChatCompletion,
 		Created: time.Now().UnixMilli() / 1000,
 		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
-		Choices: make([]chatCompletionChoice, 0, len(response.Candidates)),
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     response.UsageMetadata.PromptTokenCount,
 			CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
 			TotalTokens:      response.UsageMetadata.TotalTokenCount,
 		},
 	}
-	for i, candidate := range response.Candidates {
-		choice := chatCompletionChoice{
-			Index: i,
-			Message: &chatMessage{
-				Role: roleAssistant,
-			},
-			FinishReason: finishReasonStop,
-		}
-		if len(candidate.Content.Parts) > 0 {
-			if candidate.Content.Parts[0].FunctionCall != nil {
-				choice.Message.ToolCalls = g.buildToolCalls(&candidate)
-			} else {
-				choice.Message.Content = candidate.Content.Parts[0].Text
+	choiceIndex := 0
+	for _, candidate := range response.Candidates {
+		for _, part := range candidate.Content.Parts {
+			choice := chatCompletionChoice{
+				Index: choiceIndex,
+				Message: &chatMessage{
+					Role: roleAssistant,
+				},
+				FinishReason: util.Ptr(finishReasonStop),
 			}
-		} else {
-			choice.Message.Content = ""
-			choice.FinishReason = candidate.FinishReason
+			if part.FunctionCall != nil {
+				choice.Message.ToolCalls = g.buildToolCalls(&candidate)
+			} else if part.InlineData != nil {
+				choice.Message.Content = part.InlineData.Data
+			} else {
+				choice.Message.Content = part.Text
+			}
+
+			choice.FinishReason = util.Ptr(strings.ToLower(candidate.FinishReason))
+			fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
+			choiceIndex += 1
 		}
-		fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
 	}
 	return &fullTextResponse
 }
@@ -453,6 +560,9 @@ func (g *geminiProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
 	var choice chatCompletionChoice
 	if len(geminiResp.Candidates) > 0 && len(geminiResp.Candidates[0].Content.Parts) > 0 {
 		choice.Delta = &chatMessage{Content: geminiResp.Candidates[0].Content.Parts[0].Text}
+		if geminiResp.Candidates[0].FinishReason != "" {
+			choice.FinishReason = util.Ptr(strings.ToLower(geminiResp.Candidates[0].FinishReason))
+		}
 	}
 	streamResponse := chatCompletionResponse{
 		Id:      fmt.Sprintf("chatcmpl-%s", uuid.New().String()),
@@ -460,7 +570,7 @@ func (g *geminiProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
 		Created: time.Now().UnixMilli() / 1000,
 		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		Choices: []chatCompletionChoice{choice},
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     geminiResp.UsageMetadata.PromptTokenCount,
 			CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
 			TotalTokens:      geminiResp.UsageMetadata.TotalTokenCount,
@@ -508,5 +618,8 @@ func (g *geminiProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, geminiEmbeddingPath) {
 		return ApiNameEmbeddings
 	}
+	if strings.Contains(path, geminiImageGenerationPath) {
+		return ApiNameImageGeneration
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/hunyuan.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/hunyuan.go
@@ -42,13 +42,10 @@ const (
 	hunyuanAuthIdLen  = 36

 	// docs: https://cloud.tencent.com/document/product/1729/111007
-	hunyuanOpenAiDomain      = "api.hunyuan.cloud.tencent.com"
-	hunyuanOpenAiRequestPath = "/v1/chat/completions"
-	hunyuanOpenAiEmbeddings  = "/v1/embeddings"
+	hunyuanOpenAiDomain = "api.hunyuan.cloud.tencent.com"
 )

-type hunyuanProviderInitializer struct {
-}
+type hunyuanProviderInitializer struct{}

 // ref: https://console.cloud.tencent.com/api/explorer?Product=hunyuan&Version=2023-09-01&Action=ChatCompletions
 type hunyuanTextGenRequest struct {
@@ -105,8 +102,8 @@ func (m *hunyuanProviderInitializer) ValidateConfig(config *ProviderConfig) erro

 func (m *hunyuanProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): hunyuanOpenAiRequestPath,
-		string(ApiNameEmbeddings):     hunyuanOpenAiEmbeddings,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+		string(ApiNameEmbeddings):     PathOpenAIEmbeddings,
 	}
 }

@@ -324,7 +321,7 @@ func (m *hunyuanProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name
 	}

 	// hunyuan的流式返回:
-	//data: {"Note":"以上内容为AI生成，不代表开发者立场，请勿删除或修改本标记","Choices":[{"Delta":{"Role":"assistant","Content":"有助于"},"FinishReason":""}],"Created":1716359713,"Id":"086b6b19-8b2c-4def-a65c-db6a7bc86acd","Usage":{"PromptTokens":7,"CompletionTokens":145,"TotalTokens":152}}
+	// data: {"Note":"以上内容为AI生成，不代表开发者立场，请勿删除或修改本标记","Choices":[{"Delta":{"Role":"assistant","Content":"有助于"},"FinishReason":""}],"Created":1716359713,"Id":"086b6b19-8b2c-4def-a65c-db6a7bc86acd","Usage":{"PromptTokens":7,"CompletionTokens":145,"TotalTokens":152}}

 	// openai的流式返回
 	// data: {"id": "chatcmpl-7QyqpwdfhqwajicIEznoc6Q47XAyW", "object": "chat.completion.chunk", "created": 1677664795, "model": "gpt-3.5-turbo-0613", "choices": [{"delta": {"content": "The "}, "index": 0, "finish_reason": null}]}
@@ -338,7 +335,7 @@ func (m *hunyuanProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name
 	}

 	// 初始化处理下标，以及将要返回的处理过的chunks
-	var newEventPivot = -1
+	newEventPivot := -1
 	var outputBuffer []byte

 	// 从buffer区取出若干完整的chunk，将其转为openAI格式后返回
@@ -390,7 +387,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
 		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		SystemFingerprint: "",
 		Object:            objectChatCompletionChunk,
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     hunyuanFormattedChunk.Usage.PromptTokens,
 			CompletionTokens: hunyuanFormattedChunk.Usage.CompletionTokens,
 			TotalTokens:      hunyuanFormattedChunk.Usage.TotalTokens,
@@ -403,7 +400,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
 	if hunyuanFormattedChunk.Choices[0].FinishReason == hunyuanStreamEndMark {
 		// log.Debugf("@@@ --- 最后chunk: ")
 		openAIFormattedChunk.Choices = append(openAIFormattedChunk.Choices, chatCompletionChoice{
-			FinishReason: hunyuanFormattedChunk.Choices[0].FinishReason,
+			FinishReason: util.Ptr(hunyuanFormattedChunk.Choices[0].FinishReason),
 		})
 	} else {
 		deltaMsg := chatMessage{
@@ -451,7 +448,6 @@ func (m *hunyuanProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName
 }

 func (m *hunyuanProvider) insertContextMessageIntoHunyuanRequest(request *hunyuanTextGenRequest, content string) {
-
 	fileMessage := hunyuanChatMessage{
 		Role:    roleSystem,
 		Content: content,
@@ -499,7 +495,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
 				Content:   choice.Message.Content,
 				ToolCalls: nil,
 			},
-			FinishReason: choice.FinishReason,
+			FinishReason: util.Ptr(choice.FinishReason),
 		})
 	}
 	return &chatCompletionResponse{
@@ -509,7 +505,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           choices,
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     hunyuanResponse.Response.Usage.PromptTokens,
 			CompletionTokens: hunyuanResponse.Response.Usage.CompletionTokens,
 			TotalTokens:      hunyuanResponse.Response.Usage.TotalTokens,
--- a/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go
@@ -36,8 +36,7 @@ const (
 	defaultSenderName        string = "小明"
 )

-type minimaxProviderInitializer struct {
-}
+type minimaxProviderInitializer struct{}

 func (m *minimaxProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	// If using the chat completion Pro API, a group ID must be set.
@@ -368,7 +367,7 @@ func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionPro
 				Content: message.Text,
 			}
 			choices = append(choices, chatCompletionChoice{
-				FinishReason: choice.FinishReason,
+				FinishReason: util.Ptr(choice.FinishReason),
 				Index:        messageIndex,
 				Message:      message,
 			})
@@ -381,7 +380,7 @@ func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionPro
 		Created: response.Created,
 		Model:   response.Model,
 		Choices: choices,
-		Usage: usage{
+		Usage: &usage{
 			TotalTokens:      int(response.Usage.TotalTokens),
 			PromptTokens:     int(response.Usage.PromptTokens),
 			CompletionTokens: int(response.Usage.CompletionTokens),
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -20,8 +20,10 @@ const (

 	httpStatus200 = "200"

-	contentTypeText     = "text"
-	contentTypeImageUrl = "image_url"
+	contentTypeText       = "text"
+	contentTypeImageUrl   = "image_url"
+	contentTypeInputAudio = "input_audio"
+	contentTypeFile       = "file"

 	reasoningStartTag = "<think>"
 	reasoningEndTag   = "</think>"
@@ -53,11 +55,40 @@ type chatCompletionRequest struct {
 	Temperature         float64                `json:"temperature,omitempty"`
 	TopP                float64                `json:"top_p,omitempty"`
 	Tools               []tool                 `json:"tools,omitempty"`
-	ToolChoice          *toolChoice            `json:"tool_choice,omitempty"`
+	ToolChoice          interface{}            `json:"tool_choice,omitempty"`
 	ParallelToolCalls   bool                   `json:"parallel_tool_calls,omitempty"`
 	User                string                 `json:"user,omitempty"`
 }

+func (c *chatCompletionRequest) getMaxTokens() int {
+	if c.MaxCompletionTokens > 0 {
+		return c.MaxCompletionTokens
+	}
+	return c.MaxTokens
+}
+
+func (c *chatCompletionRequest) getToolChoiceString() string {
+	if c.ToolChoice == nil {
+		return ""
+	}
+
+	if tc, ok := c.ToolChoice.(string); ok {
+		return tc
+	}
+	return ""
+}
+
+func (c *chatCompletionRequest) getToolChoiceObject() *toolChoice {
+	if c.ToolChoice == nil {
+		return nil
+	}
+
+	if tc, ok := c.ToolChoice.(*toolChoice); ok {
+		return tc
+	}
+	return nil
+}
+
 type CompletionRequest struct {
 	Model            string         `json:"model"`
 	Prompt           string         `json:"prompt"`
@@ -107,15 +138,15 @@ type chatCompletionResponse struct {
 	ServiceTier       string                 `json:"service_tier,omitempty"`
 	SystemFingerprint string                 `json:"system_fingerprint,omitempty"`
 	Object            string                 `json:"object,omitempty"`
-	Usage             usage                  `json:"usage,omitempty"`
+	Usage             *usage                 `json:"usage"`
 }

 type chatCompletionChoice struct {
 	Index        int                    `json:"index"`
 	Message      *chatMessage           `json:"message,omitempty"`
 	Delta        *chatMessage           `json:"delta,omitempty"`
-	FinishReason string                 `json:"finish_reason,omitempty"`
-	Logprobs     map[string]interface{} `json:"logprobs,omitempty"`
+	FinishReason *string                `json:"finish_reason"`
+	Logprobs     map[string]interface{} `json:"logprobs"`
 }

 type usage struct {
@@ -200,13 +231,26 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
 	}
 }

-type messageContent struct {
-	Type     string    `json:"type,omitempty"`
-	Text     string    `json:"text"`
-	ImageUrl *imageUrl `json:"image_url,omitempty"`
+type chatMessageContent struct {
+	Type       string                      `json:"type,omitempty"`
+	Text       string                      `json:"text"`
+	ImageUrl   *chatMessageContentImageUrl `json:"image_url,omitempty"`
+	File       *chatMessageContentFile     `json:"file,omitempty"`
+	InputAudio *chatMessageContentAudio    `json:"input_audio,omitempty"`
 }

-type imageUrl struct {
+type chatMessageContentAudio struct {
+	Data   string `json:"data"`
+	Format string `json:"format"`
+}
+
+type chatMessageContentFile struct {
+	FileData string `json:"file_data,omitempty"`
+	FileId   string `json:"file_id,omitempty"`
+	FileName string `json:"file_name,omitempty"`
+}
+
+type chatMessageContentImageUrl struct {
 	Url    string `json:"url,omitempty"`
 	Detail string `json:"detail,omitempty"`
 }
@@ -266,11 +310,11 @@ func (m *chatMessage) StringContent() string {
 	return ""
 }

-func (m *chatMessage) ParseContent() []messageContent {
-	var contentList []messageContent
+func (m *chatMessage) ParseContent() []chatMessageContent {
+	var contentList []chatMessageContent
 	content, ok := m.Content.(string)
 	if ok {
-		contentList = append(contentList, messageContent{
+		contentList = append(contentList, chatMessageContent{
 			Type: contentTypeText,
 			Text: content,
 		})
@@ -286,18 +330,43 @@ func (m *chatMessage) ParseContent() []messageContent {
 			switch contentMap["type"] {
 			case contentTypeText:
 				if subStr, ok := contentMap[contentTypeText].(string); ok {
-					contentList = append(contentList, messageContent{
+					contentList = append(contentList, chatMessageContent{
 						Type: contentTypeText,
 						Text: subStr,
 					})
 				}
 			case contentTypeImageUrl:
 				if subObj, ok := contentMap[contentTypeImageUrl].(map[string]any); ok {
-					contentList = append(contentList, messageContent{
+					msg := chatMessageContent{
 						Type: contentTypeImageUrl,
-						ImageUrl: &imageUrl{
+						ImageUrl: &chatMessageContentImageUrl{
 							Url: subObj["url"].(string),
 						},
+					}
+					if detail, ok := subObj["detail"].(string); ok {
+						msg.ImageUrl.Detail = detail
+					}
+					contentList = append(contentList, msg)
+				}
+			case contentTypeInputAudio:
+				if subObj, ok := contentMap[contentTypeInputAudio].(map[string]any); ok {
+					contentList = append(contentList, chatMessageContent{
+						Type: contentTypeInputAudio,
+						InputAudio: &chatMessageContentAudio{
+							Data:   subObj["data"].(string),
+							Format: subObj["format"].(string),
+						},
+					})
+				}
+			case contentTypeFile:
+				if subObj, ok := contentMap[contentTypeFile].(map[string]any); ok {
+					contentList = append(contentList, chatMessageContent{
+						Type: contentTypeFile,
+						File: &chatMessageContentFile{
+							FileId: subObj["file_id"].(string),
+							// FileName: subObj["file_name"].(string),
+							// FileData: subObj["file_data"].(string),
+						},
 					})
 				}
 			}
@@ -356,10 +425,39 @@ func (e *StreamEvent) ToHttpString() string {

 // https://platform.openai.com/docs/guides/images
 type imageGenerationRequest struct {
-	Model  string `json:"model"`
-	Prompt string `json:"prompt"`
-	N      int    `json:"n,omitempty"`
-	Size   string `json:"size,omitempty"`
+	Model             string `json:"model"`
+	Prompt            string `json:"prompt"`
+	Background        string `json:"background,omitempty"`
+	Moderation        string `json:"moderation,omitempty"`
+	OutputCompression int    `json:"output_compression,omitempty"`
+	OutputFormat      string `json:"output_format,omitempty"`
+	Quality           string `json:"quality,omitempty"`
+	ResponseFormat    string `json:"response_format,omitempty"`
+	Style             string `json:"style,omitempty"`
+	N                 int    `json:"n,omitempty"`
+	Size              string `json:"size,omitempty"`
+}
+
+type imageGenerationData struct {
+	URL           string `json:"url,omitempty"`
+	B64           string `json:"b64_json,omitempty"`
+	RevisedPrompt string `json:"revised_prompt,omitempty"`
+}
+
+type imageGenerationUsage struct {
+	TotalTokens        int `json:"total_tokens"`
+	InputTokens        int `json:"input_tokens"`
+	OutputTokens       int `json:"output_tokens"`
+	InputTokensDetails struct {
+		TextTokens  int `json:"text_tokens"`
+		ImageTokens int `json:"image_tokens"`
+	} `json:"input_tokens_details"`
+}
+
+type imageGenerationResponse struct {
+	Created int64                 `json:"created"`
+	Data    []imageGenerationData `json:"data"`
+	Usage   *imageGenerationUsage `json:"usage,omitempty"`
 }

 // https://platform.openai.com/docs/guides/speech-to-text
--- a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
@@ -18,12 +18,10 @@ import (
 // moonshotProvider is the provider for Moonshot AI service.

 const (
-	moonshotDomain             = "api.moonshot.cn"
-	moonshotChatCompletionPath = "/v1/chat/completions"
+	moonshotDomain = "api.moonshot.cn"
 )

-type moonshotProviderInitializer struct {
-}
+type moonshotProviderInitializer struct{}

 func (m *moonshotProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.moonshotFileId != "" && config.context != nil {
@@ -37,7 +35,8 @@ func (m *moonshotProviderInitializer) ValidateConfig(config *ProviderConfig) err

 func (m *moonshotProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): moonshotChatCompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+		string(ApiNameModels):         PathOpenAIModels,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/ollama.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/ollama.go
@@ -30,6 +30,7 @@ func (m *ollamaProviderInitializer) DefaultCapabilities() map[string]string {
 		// ollama的chat接口path和OpenAI的chat接口一样
 		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 		string(ApiNameEmbeddings):     PathOpenAIEmbeddings,
+		string(ApiNameModels):         PathOpenAIModels,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
@@ -15,16 +15,10 @@ import (
 // openaiProvider is the provider for OpenAI service.

 const (
-	defaultOpenaiDomain             = "api.openai.com"
-	defaultOpenaiChatCompletionPath = "/v1/chat/completions"
-	defaultOpenaiCompletionPath     = "/v1/completions"
-	defaultOpenaiEmbeddingsPath     = "/v1/embeddings"
-	defaultOpenaiAudioSpeech        = "/v1/audio/speech"
-	defaultOpenaiImageGeneration    = "/v1/images/generations"
+	defaultOpenaiDomain = "api.openai.com"
 )

-type openaiProviderInitializer struct {
-}
+type openaiProviderInitializer struct{}

 func (m *openaiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	return nil
@@ -32,19 +26,45 @@ func (m *openaiProviderInitializer) ValidateConfig(config *ProviderConfig) error

 func (m *openaiProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameCompletion):      defaultOpenaiCompletionPath,
-		string(ApiNameChatCompletion):  defaultOpenaiChatCompletionPath,
-		string(ApiNameEmbeddings):      defaultOpenaiEmbeddingsPath,
-		string(ApiNameImageGeneration): defaultOpenaiImageGeneration,
-		string(ApiNameAudioSpeech):     defaultOpenaiAudioSpeech,
+		string(ApiNameCompletion):                           PathOpenAICompletions,
+		string(ApiNameChatCompletion):                       PathOpenAIChatCompletions,
+		string(ApiNameEmbeddings):                           PathOpenAIEmbeddings,
+		string(ApiNameImageGeneration):                      PathOpenAIImageGeneration,
+		string(ApiNameImageEdit):                            PathOpenAIImageEdit,
+		string(ApiNameImageVariation):                       PathOpenAIImageVariation,
+		string(ApiNameAudioSpeech):                          PathOpenAIAudioSpeech,
+		string(ApiNameModels):                               PathOpenAIModels,
+		string(ApiNameFiles):                                PathOpenAIFiles,
+		string(ApiNameRetrieveFile):                         PathOpenAIRetrieveFile,
+		string(ApiNameRetrieveFileContent):                  PathOpenAIRetrieveFileContent,
+		string(ApiNameBatches):                              PathOpenAIBatches,
+		string(ApiNameRetrieveBatch):                        PathOpenAIRetrieveBatch,
+		string(ApiNameCancelBatch):                          PathOpenAICancelBatch,
+		string(ApiNameResponses):                            PathOpenAIResponses,
+		string(ApiNameFineTuningJobs):                       PathOpenAIFineTuningJobs,
+		string(ApiNameFineTuningRetrieveJob):                PathOpenAIFineTuningRetrieveJob,
+		string(ApiNameFineTuningJobEvents):                  PathOpenAIFineTuningJobEvents,
+		string(ApiNameFineTuningJobCheckpoints):             PathOpenAIFineTuningJobCheckpoints,
+		string(ApiNameFineTuningCancelJob):                  PathOpenAIFineTuningCancelJob,
+		string(ApiNameFineTuningResumeJob):                  PathOpenAIFineTuningResumeJob,
+		string(ApiNameFineTuningPauseJob):                   PathOpenAIFineTuningPauseJob,
+		string(ApiNameFineTuningCheckpointPermissions):      PathOpenAIFineTuningCheckpointPermissions,
+		string(ApiNameDeleteFineTuningCheckpointPermission): PathOpenAIFineDeleteTuningCheckpointPermission,
 	}
 }

+// isDirectPath checks if the path is a known standard OpenAI interface path.
 func isDirectPath(path string) bool {
 	return strings.HasSuffix(path, "/completions") ||
 		strings.HasSuffix(path, "/embeddings") ||
 		strings.HasSuffix(path, "/audio/speech") ||
-		strings.HasSuffix(path, "/images/generations")
+		strings.HasSuffix(path, "/images/generations") ||
+		strings.HasSuffix(path, "/images/variations") ||
+		strings.HasSuffix(path, "/images/edits") ||
+		strings.HasSuffix(path, "/models") ||
+		strings.HasSuffix(path, "/responses") ||
+		strings.HasSuffix(path, "/fine_tuning/jobs") ||
+		strings.HasSuffix(path, "/fine_tuning/checkpoints")
 }

 func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
@@ -98,15 +118,14 @@ func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiNa
 }

 func (m *openaiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
-	if m.customPath != "" {
-		if m.isDirectCustomPath || apiName == "" {
-			util.OverwriteRequestPathHeader(headers, m.customPath)
-		} else {
-			util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
-		}
-	} else {
+	if m.isDirectCustomPath {
+		util.OverwriteRequestPathHeader(headers, m.customPath)
+	}
+
+	if apiName != "" {
 		util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	}
+
 	if m.customDomain != "" {
 		util.OverwriteRequestHostHeader(headers, m.customDomain)
 	} else {
@@ -119,7 +138,7 @@ func (m *openaiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam
 }

 func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.needToProcessRequestBody(apiName) {
 		// We don't need to process the request body for other APIs.
 		return types.ActionContinue, nil
 	}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"math/rand"
 	"net/http"
+	"regexp"
 	"strings"

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
@@ -16,30 +17,70 @@ import (
 	"github.com/tidwall/sjson"
 )

-type ApiName string
-type Pointcut string
+type (
+	ApiName  string
+	Pointcut string
+)

 const (

 	// ApiName 格式 {vendor}/{version}/{apitype}
 	// 表示遵循 厂商/版本/接口类型 的格式
 	// 目前openai是事实意义上的标准，但是也有其他厂商存在其他任务的一些可能的标准，比如cohere的rerank
-	ApiNameCompletion      ApiName = "openai/v1/completions"
-	ApiNameChatCompletion  ApiName = "openai/v1/chatcompletions"
-	ApiNameEmbeddings      ApiName = "openai/v1/embeddings"
-	ApiNameImageGeneration ApiName = "openai/v1/imagegeneration"
-	ApiNameAudioSpeech     ApiName = "openai/v1/audiospeech"
-	ApiNameFiles           ApiName = "openai/v1/files"
-	ApiNameBatches         ApiName = "openai/v1/batches"
+	ApiNameCompletion                           ApiName = "openai/v1/completions"
+	ApiNameChatCompletion                       ApiName = "openai/v1/chatcompletions"
+	ApiNameEmbeddings                           ApiName = "openai/v1/embeddings"
+	ApiNameImageGeneration                      ApiName = "openai/v1/imagegeneration"
+	ApiNameImageEdit                            ApiName = "openai/v1/imageedit"
+	ApiNameImageVariation                       ApiName = "openai/v1/imagevariation"
+	ApiNameAudioSpeech                          ApiName = "openai/v1/audiospeech"
+	ApiNameFiles                                ApiName = "openai/v1/files"
+	ApiNameRetrieveFile                         ApiName = "openai/v1/retrievefile"
+	ApiNameRetrieveFileContent                  ApiName = "openai/v1/retrievefilecontent"
+	ApiNameBatches                              ApiName = "openai/v1/batches"
+	ApiNameRetrieveBatch                        ApiName = "openai/v1/retrievebatch"
+	ApiNameCancelBatch                          ApiName = "openai/v1/cancelbatch"
+	ApiNameModels                               ApiName = "openai/v1/models"
+	ApiNameResponses                            ApiName = "openai/v1/responses"
+	ApiNameFineTuningJobs                       ApiName = "openai/v1/fine-tuningjobs"
+	ApiNameFineTuningRetrieveJob                ApiName = "openai/v1/retrievefine-tuningjob"
+	ApiNameFineTuningJobEvents                  ApiName = "openai/v1/fine-tuningjobsevents"
+	ApiNameFineTuningJobCheckpoints             ApiName = "openai/v1/fine-tuningjobcheckpoints"
+	ApiNameFineTuningCancelJob                  ApiName = "openai/v1/cancelfine-tuningjob"
+	ApiNameFineTuningResumeJob                  ApiName = "openai/v1/resumefine-tuningjob"
+	ApiNameFineTuningPauseJob                   ApiName = "openai/v1/pausefine-tuningjob"
+	ApiNameFineTuningCheckpointPermissions      ApiName = "openai/v1/fine-tuningjobcheckpointpermissions"
+	ApiNameDeleteFineTuningCheckpointPermission ApiName = "openai/v1/deletefine-tuningjobcheckpointpermission"

-	PathOpenAICompletions     = "/v1/completions"
-	PathOpenAIChatCompletions = "/v1/chat/completions"
-	PathOpenAIEmbeddings      = "/v1/embeddings"
-	PathOpenAIFiles           = "/v1/files"
-	PathOpenAIBatches         = "/v1/batches"
+	PathOpenAICompletions                          = "/v1/completions"
+	PathOpenAIChatCompletions                      = "/v1/chat/completions"
+	PathOpenAIEmbeddings                           = "/v1/embeddings"
+	PathOpenAIFiles                                = "/v1/files"
+	PathOpenAIRetrieveFile                         = "/v1/files/{file_id}"
+	PathOpenAIRetrieveFileContent                  = "/v1/files/{file_id}/content"
+	PathOpenAIBatches                              = "/v1/batches"
+	PathOpenAIRetrieveBatch                        = "/v1/batches/{batch_id}"
+	PathOpenAICancelBatch                          = "/v1/batches/{batch_id}/cancel"
+	PathOpenAIModels                               = "/v1/models"
+	PathOpenAIImageGeneration                      = "/v1/images/generations"
+	PathOpenAIImageEdit                            = "/v1/images/edits"
+	PathOpenAIImageVariation                       = "/v1/images/variations"
+	PathOpenAIAudioSpeech                          = "/v1/audio/speech"
+	PathOpenAIResponses                            = "/v1/responses"
+	PathOpenAIFineTuningJobs                       = "/v1/fine_tuning/jobs"
+	PathOpenAIFineTuningRetrieveJob                = "/v1/fine_tuning/jobs/{fine_tuning_job_id}"
+	PathOpenAIFineTuningJobEvents                  = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/events"
+	PathOpenAIFineTuningJobCheckpoints             = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints"
+	PathOpenAIFineTuningCancelJob                  = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel"
+	PathOpenAIFineTuningResumeJob                  = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/resume"
+	PathOpenAIFineTuningPauseJob                   = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/pause"
+	PathOpenAIFineTuningCheckpointPermissions      = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions"
+	PathOpenAIFineDeleteTuningCheckpointPermission = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}"

 	// TODO: 以下是一些非标准的API名称，需要进一步确认是否支持
 	ApiNameCohereV1Rerank ApiName = "cohere/v1/rerank"
+	ApiNameQwenAsyncAIGC  ApiName = "api/v1/services/aigc"
+	ApiNameQwenAsyncTask  ApiName = "api/v1/tasks/"

 	providerTypeMoonshot   = "moonshot"
 	providerTypeAzure      = "azure"
@@ -69,6 +110,7 @@ const (
 	providerTypeTogetherAI = "together-ai"
 	providerTypeDify       = "dify"
 	providerTypeBedrock    = "bedrock"
+	providerTypeVertex     = "vertex"

 	protocolOpenAI   = "openai"
 	protocolOriginal = "original"
@@ -140,6 +182,7 @@ var (
 		providerTypeTogetherAI: &togetherAIProviderInitializer{},
 		providerTypeDify:       &difyProviderInitializer{},
 		providerTypeBedrock:    &bedrockProviderInitializer{},
+		providerTypeVertex:     &vertexProviderInitializer{},
 	}
 )

@@ -269,14 +312,29 @@ type ProviderConfig struct {
 	// @Description zh-CN 配置一个外部获取对话上下文的文件来源，用于在AI请求中补充对话上下文
 	context *ContextConfig `required:"false" yaml:"context" json:"context"`
 	// @Title zh-CN 版本
-	// @Description zh-CN 请求AI服务的版本，目前仅适用于Claude AI服务
-	claudeVersion string `required:"false" yaml:"version" json:"version"`
+	// @Description zh-CN 请求AI服务的版本，目前仅适用于 Gemini 和 Claude AI服务
+	apiVersion string `required:"false" yaml:"apiVersion" json:"apiVersion"`
 	// @Title zh-CN Cloudflare Account ID
 	// @Description zh-CN 仅适用于 Cloudflare Workers AI 服务。参考：https://developers.cloudflare.com/workers-ai/get-started/rest-api/#2-run-a-model-via-api
 	cloudflareAccountId string `required:"false" yaml:"cloudflareAccountId" json:"cloudflareAccountId"`
 	// @Title zh-CN Gemini AI内容过滤和安全级别设定
 	// @Description zh-CN 仅适用于 Gemini AI 服务。参考：https://ai.google.dev/gemini-api/docs/safety-settings
 	geminiSafetySetting map[string]string `required:"false" yaml:"geminiSafetySetting" json:"geminiSafetySetting"`
+	// @Title zh-CN Vertex AI访问区域
+	// @Description zh-CN 仅适用于Vertex AI服务。如需查看支持的区域的完整列表，请参阅https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations?hl=zh-cn#available-regions
+	vertexRegion string `required:"false" yaml:"vertexRegion" json:"vertexRegion"`
+	// @Title zh-CN Vertex AI项目Id
+	// @Description zh-CN 仅适用于Vertex AI服务。创建和管理项目请参阅https://cloud.google.com/resource-manager/docs/creating-managing-projects?hl=zh-cn#identifiers
+	vertexProjectId string `required:"false" yaml:"vertexProjectId" json:"vertexProjectId"`
+	// @Title zh-CN Vertex 认证秘钥
+	// @Description zh-CN 用于Google服务账号认证的完整JSON密钥文件内容，获取可参考https://cloud.google.com/iam/docs/keys-create-delete?hl=zh-cn#iam-service-account-keys-create-console
+	vertexAuthKey string `required:"false" yaml:"vertexAuthKey" json:"vertexAuthKey"`
+	// @Title zh-CN Vertex 认证服务名
+	// @Description zh-CN 用于Google服务账号认证的服务,DNS类型的服务名
+	vertexAuthServiceName string `required:"false" yaml:"vertexAuthServiceName" json:"vertexAuthServiceName"`
+	// @Title zh-CN Vertex token刷新提前时间
+	// @Description zh-CN 用于Google服务账号认证，access token过期时间判定提前刷新，单位为秒，默认值为60秒
+	vertexTokenRefreshAhead int64 `required:"false" yaml:"vertexTokenRefreshAhead" json:"vertexTokenRefreshAhead"`
 	// @Title zh-CN 翻译服务需指定的目标语种
 	// @Description zh-CN 翻译结果的语种，目前仅适用于DeepL服务。
 	targetLang string `required:"false" yaml:"targetLang" json:"targetLang"`
@@ -354,7 +412,13 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		c.context = &ContextConfig{}
 		c.context.FromJson(contextJson)
 	}
-	c.claudeVersion = json.Get("claudeVersion").String()
+
+	// 这里获取 claudeVersion 字段，与结构体中定义 yaml/json 的 tag 不一致
+	c.apiVersion = json.Get("claudeVersion").String()
+	if c.apiVersion == "" {
+		// 增加获取 version 字段，用于适配其他模型的配置，并保持与结构体中定义的 tag 一致
+		c.apiVersion = json.Get("apiVersion").String()
+	}
 	c.hunyuanAuthId = json.Get("hunyuanAuthId").String()
 	c.hunyuanAuthKey = json.Get("hunyuanAuthKey").String()
 	c.awsAccessKey = json.Get("awsAccessKey").String()
@@ -363,12 +427,20 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	c.minimaxApiType = json.Get("minimaxApiType").String()
 	c.minimaxGroupId = json.Get("minimaxGroupId").String()
 	c.cloudflareAccountId = json.Get("cloudflareAccountId").String()
-	if c.typ == providerTypeGemini {
+	if c.typ == providerTypeGemini || c.typ == providerTypeVertex {
 		c.geminiSafetySetting = make(map[string]string)
 		for k, v := range json.Get("geminiSafetySetting").Map() {
 			c.geminiSafetySetting[k] = v.String()
 		}
 	}
+	c.vertexRegion = json.Get("vertexRegion").String()
+	c.vertexProjectId = json.Get("vertexProjectId").String()
+	c.vertexAuthKey = json.Get("vertexAuthKey").String()
+	c.vertexAuthServiceName = json.Get("vertexAuthServiceName").String()
+	c.vertexTokenRefreshAhead = json.Get("vertexTokenRefreshAhead").Int()
+	if c.vertexTokenRefreshAhead == 0 {
+		c.vertexTokenRefreshAhead = 60
+	}
 	c.targetLang = json.Get("targetLang").String()

 	if schemaValue, ok := json.Get("responseJsonSchema").Value().(map[string]interface{}); ok {
@@ -437,6 +509,8 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		case string(ApiNameChatCompletion),
 			string(ApiNameEmbeddings),
 			string(ApiNameImageGeneration),
+			string(ApiNameImageVariation),
+			string(ApiNameImageEdit),
 			string(ApiNameAudioSpeech),
 			string(ApiNameCohereV1Rerank):
 			c.capabilities[capability] = pathJson.String()
@@ -532,6 +606,11 @@ func (c *ProviderConfig) parseRequestAndMapModel(ctx wrapper.HttpContext, reques
 			return err
 		}
 		return c.setRequestModel(ctx, req)
+	case *imageGenerationRequest:
+		if err := decodeImageGenerationRequest(body, req); err != nil {
+			return err
+		}
+		return c.setRequestModel(ctx, req)
 	default:
 		return errors.New("unsupported request type")
 	}
@@ -545,6 +624,8 @@ func (c *ProviderConfig) setRequestModel(ctx wrapper.HttpContext, request interf
 		model = &req.Model
 	case *embeddingsRequest:
 		model = &req.Model
+	case *imageGenerationRequest:
+		model = &req.Model
 	default:
 		return errors.New("unsupported request type")
 	}
@@ -587,13 +668,25 @@ func doGetMappedModel(model string, modelMapping map[string]string) string {
 	}

 	for k, v := range modelMapping {
-		if k == wildcard || !strings.HasSuffix(k, wildcard) {
+		if k == wildcard {
 			continue
 		}
-		k = strings.TrimSuffix(k, wildcard)
-		if strings.HasPrefix(model, k) {
-			log.Debugf("model [%s] is mapped to [%s] via prefix [%s]", model, v, k)
-			return v
+		if strings.HasSuffix(k, wildcard) {
+			k = strings.TrimSuffix(k, wildcard)
+			if strings.HasPrefix(model, k) {
+				log.Debugf("model [%s] is mapped to [%s] via prefix [%s]", model, v, k)
+				return v
+			}
+		}
+
+		if strings.HasPrefix(k, "~") {
+			k = strings.TrimPrefix(k, "~")
+			re := regexp.MustCompile(k)
+			if re.MatchString(model) {
+				v = re.ReplaceAllString(model, v)
+				log.Debugf("model [%s] is mapped to [%s] via regex [%s]", model, v, k)
+				return v
+			}
 		}
 	}

@@ -694,7 +787,8 @@ func (c *ProviderConfig) setDefaultCapabilities(capabilities map[string]string)
 }

 func (c *ProviderConfig) handleRequestBody(
-	provider Provider, contextCache *contextCache, ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
+	provider Provider, contextCache *contextCache, ctx wrapper.HttpContext, apiName ApiName, body []byte,
+) (types.Action, error) {
 	// use original protocol
 	if c.IsOriginal() {
 		return types.ActionContinue, nil
@@ -762,3 +856,19 @@ func (c *ProviderConfig) DefaultTransformResponseHeaders(ctx wrapper.HttpContext
 		headers.Del("Content-Length")
 	}
 }
+
+func (c *ProviderConfig) needToProcessRequestBody(apiName ApiName) bool {
+	switch apiName {
+	case ApiNameChatCompletion,
+		ApiNameCompletion,
+		ApiNameEmbeddings,
+		ApiNameImageGeneration,
+		ApiNameImageEdit,
+		ApiNameImageVariation,
+		ApiNameAudioSpeech,
+		ApiNameFineTuningJobs,
+		ApiNameResponses:
+		return true
+	}
+	return false
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -23,13 +23,22 @@ import (
 const (
 	qwenResultFormatMessage = "message"

-	qwenDefaultDomain               = "dashscope.aliyuncs.com"
-	qwenChatCompletionPath          = "/api/v1/services/aigc/text-generation/generation"
-	qwenTextEmbeddingPath           = "/api/v1/services/embeddings/text-embedding/text-embedding"
-	qwenChatCompatiblePath          = "/compatible-mode/v1/chat/completions"
-	qwenTextEmbeddingCompatiblePath = "/compatible-mode/v1/embeddings"
-	qwenBailianPath                 = "/api/v1/apps"
-	qwenMultimodalGenerationPath    = "/api/v1/services/aigc/multimodal-generation/generation"
+	qwenDefaultDomain                     = "dashscope.aliyuncs.com"
+	qwenChatCompletionPath                = "/api/v1/services/aigc/text-generation/generation"
+	qwenTextEmbeddingPath                 = "/api/v1/services/embeddings/text-embedding/text-embedding"
+	qwenCompatibleChatCompletionPath      = "/compatible-mode/v1/chat/completions"
+	qwenCompatibleCompletionsPath         = "/compatible-mode/v1/completions"
+	qwenCompatibleTextEmbeddingPath       = "/compatible-mode/v1/embeddings"
+	qwenCompatibleFilesPath               = "/compatible-mode/v1/files"
+	qwenCompatibleRetrieveFilePath        = "/compatible-mode/v1/files/{file_id}"
+	qwenCompatibleRetrieveFileContentPath = "/compatible-mode/v1/files/{file_id}/content"
+	qwenCompatibleBatchesPath             = "/compatible-mode/v1/batches"
+	qwenCompatibleRetrieveBatchPath       = "/compatible-mode/v1/batches/{batch_id}"
+	qwenBailianPath                       = "/api/v1/apps"
+	qwenMultimodalGenerationPath          = "/api/v1/services/aigc/multimodal-generation/generation"
+
+	qwenAsyncAIGCPath                     = "/api/v1/services/aigc/"
+	qwenAsyncTaskPath                     = "/api/v1/tasks/"

 	qwenTopPMin = 0.000001
 	qwenTopPMax = 0.999999
@@ -40,8 +49,7 @@ const (
 	qwenVlModelPrefixName = "qwen-vl"
 )

-type qwenProviderInitializer struct {
-}
+type qwenProviderInitializer struct{}

 func (m *qwenProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if len(config.qwenFileIds) != 0 && config.context != nil {
@@ -56,13 +64,21 @@ func (m *qwenProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 func (m *qwenProviderInitializer) DefaultCapabilities(qwenEnableCompatible bool) map[string]string {
 	if qwenEnableCompatible {
 		return map[string]string{
-			string(ApiNameChatCompletion): qwenChatCompatiblePath,
-			string(ApiNameEmbeddings):     qwenTextEmbeddingCompatiblePath,
+			string(ApiNameChatCompletion):      qwenCompatibleChatCompletionPath,
+			string(ApiNameEmbeddings):          qwenCompatibleTextEmbeddingPath,
+			string(ApiNameCompletion):          qwenCompatibleCompletionsPath,
+			string(ApiNameFiles):               qwenCompatibleFilesPath,
+			string(ApiNameRetrieveFile):        qwenCompatibleRetrieveFilePath,
+			string(ApiNameRetrieveFileContent): qwenCompatibleRetrieveFileContentPath,
+			string(ApiNameBatches):             qwenCompatibleBatchesPath,
+			string(ApiNameRetrieveBatch):       qwenCompatibleRetrieveBatchPath,
 		}
 	} else {
 		return map[string]string{
 			string(ApiNameChatCompletion): qwenChatCompletionPath,
 			string(ApiNameEmbeddings):     qwenTextEmbeddingPath,
+			string(ApiNameQwenAsyncAIGC): qwenAsyncAIGCPath,
+			string(ApiNameQwenAsyncTask): qwenAsyncTaskPath,
 		}
 	}
 }
@@ -291,7 +307,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
 		message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
 		choices = append(choices, chatCompletionChoice{
 			Message:      &message,
-			FinishReason: qwenChoice.FinishReason,
+			FinishReason: util.Ptr(qwenChoice.FinishReason),
 		})
 	}
 	return &chatCompletionResponse{
@@ -301,7 +317,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           choices,
-		Usage: usage{
+		Usage: &usage{
 			PromptTokens:     qwenResponse.Usage.InputTokens,
 			CompletionTokens: qwenResponse.Usage.OutputTokens,
 			TotalTokens:      qwenResponse.Usage.TotalTokens,
@@ -402,11 +418,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont

 	if finished {
 		finishResponse := *&baseMessage
-		finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: qwenChoice.FinishReason})
+		finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: util.Ptr(qwenChoice.FinishReason)})

 		usageResponse := *&baseMessage
 		usageResponse.Choices = []chatCompletionChoice{{Delta: &chatMessage{}}}
-		usageResponse.Usage = usage{
+		usageResponse.Usage = &usage{
 			PromptTokens:     qwenResponse.Usage.InputTokens,
 			CompletionTokens: qwenResponse.Usage.OutputTokens,
 			TotalTokens:      qwenResponse.Usage.TotalTokens,
@@ -673,11 +689,15 @@ func (m *qwenProvider) GetApiName(path string) ApiName {
 	case strings.Contains(path, qwenChatCompletionPath),
 		strings.Contains(path, qwenMultimodalGenerationPath),
 		strings.Contains(path, qwenBailianPath),
-		strings.Contains(path, qwenChatCompatiblePath):
+		strings.Contains(path, qwenCompatibleChatCompletionPath):
 		return ApiNameChatCompletion
 	case strings.Contains(path, qwenTextEmbeddingPath),
-		strings.Contains(path, qwenTextEmbeddingCompatiblePath):
+		strings.Contains(path, qwenCompatibleTextEmbeddingPath):
 		return ApiNameEmbeddings
+	case strings.Contains(path, qwenAsyncAIGCPath):
+		return ApiNameQwenAsyncAIGC
+	case strings.Contains(path, qwenAsyncTaskPath):
+		return ApiNameQwenAsyncTask
 	default:
 		return ""
 	}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go
@@ -25,6 +25,13 @@ func decodeEmbeddingsRequest(body []byte, request *embeddingsRequest) error {
 	return nil
 }

+func decodeImageGenerationRequest(body []byte, request *imageGenerationRequest) error {
+	if err := json.Unmarshal(body, request); err != nil {
+		return fmt.Errorf("unable to unmarshal request: %v", err)
+	}
+	return nil
+}
+
 func replaceJsonRequestBody(request interface{}) error {
 	body, err := json.Marshal(request)
 	if err != nil {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/spark.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/spark.go
@@ -15,12 +15,10 @@ import (

 // sparkProvider is the provider for SparkLLM AI service.
 const (
-	sparkHost               = "spark-api-open.xf-yun.com"
-	sparkChatCompletionPath = "/v1/chat/completions"
+	sparkHost = "spark-api-open.xf-yun.com"
 )

-type sparkProviderInitializer struct {
-}
+type sparkProviderInitializer struct{}

 type sparkProvider struct {
 	config       ProviderConfig
@@ -58,7 +56,7 @@ func (i *sparkProviderInitializer) ValidateConfig(config *ProviderConfig) error

 func (i *sparkProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): sparkChatCompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 	}
 }

@@ -152,7 +150,7 @@ func (p *sparkProvider) responseSpark2OpenAI(ctx wrapper.HttpContext, response *
 		Object:  objectChatCompletion,
 		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		Choices: choices,
-		Usage:   response.Usage,
+		Usage:   &response.Usage,
 	}
 }

@@ -170,7 +168,7 @@ func (p *sparkProvider) streamResponseSpark2OpenAI(ctx wrapper.HttpContext, resp
 		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
 		Object:  objectChatCompletion,
 		Choices: choices,
-		Usage:   response.Usage,
+		Usage:   &response.Usage,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/stepfun.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/stepfun.go
@@ -10,12 +10,10 @@ import (
 )

 const (
-	stepfunDomain             = "api.stepfun.com"
-	stepfunChatCompletionPath = "/v1/chat/completions"
+	stepfunDomain = "api.stepfun.com"
 )

-type stepfunProviderInitializer struct {
-}
+type stepfunProviderInitializer struct{}

 func (m *stepfunProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
@@ -27,7 +25,7 @@ func (m *stepfunProviderInitializer) ValidateConfig(config *ProviderConfig) erro
 func (m *stepfunProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
 		// stepfun的chat接口path和OpenAI的chat接口一样
-		string(ApiNameChatCompletion): stepfunChatCompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go
@@ -11,8 +11,7 @@ import (
 )

 const (
-	togetherAIDomain         = "api.together.xyz"
-	togetherAICompletionPath = "/v1/chat/completions"
+	togetherAIDomain = "api.together.xyz"
 )

 type togetherAIProviderInitializer struct{}
@@ -26,7 +25,7 @@ func (m *togetherAIProviderInitializer) ValidateConfig(config *ProviderConfig) e

 func (m *togetherAIProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): togetherAICompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 	}
 }

@@ -67,7 +66,7 @@ func (m *togetherAIProvider) TransformRequestHeaders(ctx wrapper.HttpContext, ap
 }

 func (m *togetherAIProvider) GetApiName(path string) ApiName {
-	if strings.Contains(path, togetherAICompletionPath) {
+	if strings.Contains(path, PathOpenAIChatCompletions) {
 		return ApiNameChatCompletion
 	}
 	return ""
--- a/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
@@ -0,0 +1,667 @@
+package provider
+
+import (
+	"crypto"
+	"crypto/rsa"
+	"crypto/sha256"
+	"crypto/x509"
+	"encoding/base64"
+	"encoding/json"
+	"encoding/pem"
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+)
+
+const (
+	vertexAuthDomain = "oauth2.googleapis.com"
+	vertexDomain     = "{REGION}-aiplatform.googleapis.com"
+	// /v1/projects/{PROJECT_ID}/locations/{REGION}/publishers/google/models/{MODEL_ID}:{ACTION}
+	vertexPathTemplate               = "/v1/projects/%s/locations/%s/publishers/google/models/%s:%s"
+	vertexChatCompletionAction       = "generateContent"
+	vertexChatCompletionStreamAction = "streamGenerateContent?alt=sse"
+	vertexEmbeddingAction            = "predict"
+)
+
+type vertexProviderInitializer struct{}
+
+func (v *vertexProviderInitializer) ValidateConfig(config *ProviderConfig) error {
+	if config.vertexAuthKey == "" {
+		return errors.New("missing vertexAuthKey in vertex provider config")
+	}
+	if config.vertexRegion == "" || config.vertexProjectId == "" {
+		return errors.New("missing vertexRegion or vertexProjectId in vertex provider config")
+	}
+	if config.vertexAuthServiceName == "" {
+		return errors.New("missing vertexAuthServiceName in vertex provider config")
+	}
+	return nil
+}
+
+func (v *vertexProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): vertexPathTemplate,
+		string(ApiNameEmbeddings):     vertexPathTemplate,
+	}
+}
+
+func (v *vertexProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(v.DefaultCapabilities())
+	return &vertexProvider{
+		config: config,
+		client: wrapper.NewClusterClient(wrapper.DnsCluster{
+			Domain:      vertexAuthDomain,
+			ServiceName: config.vertexAuthServiceName,
+			Port:        443,
+		}),
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type vertexProvider struct {
+	client       wrapper.HttpClient
+	config       ProviderConfig
+	contextCache *contextCache
+}
+
+func (v *vertexProvider) GetProviderType() string {
+	return providerTypeVertex
+}
+
+func (v *vertexProvider) GetApiName(path string) ApiName {
+	if strings.HasSuffix(path, vertexChatCompletionAction) || strings.HasSuffix(path, vertexChatCompletionStreamAction) {
+		return ApiNameChatCompletion
+	}
+	if strings.HasSuffix(path, vertexEmbeddingAction) {
+		return ApiNameEmbeddings
+	}
+	return ""
+}
+
+func (v *vertexProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName) error {
+	v.config.handleRequestHeaders(v, ctx, apiName)
+	return nil
+}
+
+func (v *vertexProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
+	vertexRegionDomain := strings.Replace(vertexDomain, "{REGION}", v.config.vertexRegion, 1)
+	util.OverwriteRequestHostHeader(headers, vertexRegionDomain)
+}
+
+func (v *vertexProvider) getToken() (cached bool, err error) {
+	cacheKeyName := v.buildTokenKey()
+	cachedAccessToken, err := v.getCachedAccessToken(cacheKeyName)
+	if err == nil && cachedAccessToken != "" {
+		_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+cachedAccessToken)
+		return true, nil
+	}
+
+	var key ServiceAccountKey
+	if err := json.Unmarshal([]byte(v.config.vertexAuthKey), &key); err != nil {
+		return false, fmt.Errorf("[vertex]: unable to unmarshal auth key json: %v", err)
+	}
+
+	if key.ClientEmail == "" || key.PrivateKey == "" || key.TokenURI == "" {
+		return false, fmt.Errorf("[vertex]: missing auth params")
+	}
+
+	jwtToken, err := createJWT(&key)
+	if err != nil {
+		log.Errorf("[vertex]: unable to create JWT token: %v", err)
+		return false, err
+	}
+
+	err = v.getAccessToken(jwtToken)
+	if err != nil {
+		log.Errorf("[vertex]: unable to get access token: %v", err)
+		return false, err
+	}
+
+	return false, err
+}
+
+func (v *vertexProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
+	if !v.config.isSupportedAPI(apiName) {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	if v.config.IsOriginal() {
+		return types.ActionContinue, nil
+	}
+	headers := util.GetOriginalRequestHeaders()
+	body, err := v.TransformRequestBodyHeaders(ctx, apiName, body, headers)
+	util.ReplaceRequestHeaders(headers)
+	_ = proxywasm.ReplaceHttpRequestBody(body)
+	if err != nil {
+		return types.ActionContinue, err
+	}
+	cached, err := v.getToken()
+	if cached {
+		return types.ActionContinue, nil
+	}
+	if err == nil {
+		return types.ActionPause, nil
+	}
+	return types.ActionContinue, err
+}
+
+func (v *vertexProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) {
+	if apiName == ApiNameChatCompletion {
+		return v.onChatCompletionRequestBody(ctx, body, headers)
+	} else {
+		return v.onEmbeddingsRequestBody(ctx, body, headers)
+	}
+}
+
+func (v *vertexProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
+	request := &chatCompletionRequest{}
+	err := v.config.parseRequestAndMapModel(ctx, request, body)
+	if err != nil {
+		return nil, err
+	}
+	path := v.getRequestPath(ApiNameChatCompletion, request.Model, request.Stream)
+	util.OverwriteRequestPathHeader(headers, path)
+
+	vertexRequest := v.buildVertexChatRequest(request)
+	return json.Marshal(vertexRequest)
+}
+
+func (v *vertexProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) {
+	request := &embeddingsRequest{}
+	if err := v.config.parseRequestAndMapModel(ctx, request, body); err != nil {
+		return nil, err
+	}
+	path := v.getRequestPath(ApiNameEmbeddings, request.Model, false)
+	util.OverwriteRequestPathHeader(headers, path)
+
+	vertexRequest := v.buildEmbeddingRequest(request)
+	return json.Marshal(vertexRequest)
+}
+
+func (v *vertexProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool) ([]byte, error) {
+	log.Infof("[vertexProvider] receive chunk body: %s", string(chunk))
+	if isLastChunk || len(chunk) == 0 {
+		return nil, nil
+	}
+	if name != ApiNameChatCompletion {
+		return chunk, nil
+	}
+	responseBuilder := &strings.Builder{}
+	lines := strings.Split(string(chunk), "\n")
+	for _, data := range lines {
+		if len(data) < 6 {
+			// ignore blank line or wrong format
+			continue
+		}
+		data = data[6:]
+		var vertexResp vertexChatResponse
+		if err := json.Unmarshal([]byte(data), &vertexResp); err != nil {
+			log.Errorf("unable to unmarshal vertex response: %v", err)
+			continue
+		}
+		response := v.buildChatCompletionStreamResponse(ctx, &vertexResp)
+		responseBody, err := json.Marshal(response)
+		if err != nil {
+			log.Errorf("unable to marshal response: %v", err)
+			return nil, err
+		}
+		v.appendResponse(responseBuilder, string(responseBody))
+	}
+	modifiedResponseChunk := responseBuilder.String()
+	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
+	return []byte(modifiedResponseChunk), nil
+}
+
+func (v *vertexProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
+	if apiName == ApiNameChatCompletion {
+		return v.onChatCompletionResponseBody(ctx, body)
+	} else {
+		return v.onEmbeddingsResponseBody(ctx, body)
+	}
+}
+
+func (v *vertexProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	vertexResponse := &vertexChatResponse{}
+	if err := json.Unmarshal(body, vertexResponse); err != nil {
+		return nil, fmt.Errorf("unable to unmarshal vertex chat response: %v", err)
+	}
+	response := v.buildChatCompletionResponse(ctx, vertexResponse)
+	return json.Marshal(response)
+}
+
+func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, response *vertexChatResponse) *chatCompletionResponse {
+	fullTextResponse := chatCompletionResponse{
+		Id:      response.ResponseId,
+		Object:  objectChatCompletion,
+		Created: time.Now().UnixMilli() / 1000,
+		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		Choices: make([]chatCompletionChoice, 0, len(response.Candidates)),
+		Usage: &usage{
+			PromptTokens:     response.UsageMetadata.PromptTokenCount,
+			CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
+			TotalTokens:      response.UsageMetadata.TotalTokenCount,
+		},
+	}
+	for _, candidate := range response.Candidates {
+		choice := chatCompletionChoice{
+			Index: candidate.Index,
+			Message: &chatMessage{
+				Role: roleAssistant,
+			},
+			FinishReason: util.Ptr(candidate.FinishReason),
+		}
+		if len(candidate.Content.Parts) > 0 {
+			choice.Message.Content = candidate.Content.Parts[0].Text
+		} else {
+			choice.Message.Content = ""
+		}
+		fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
+	}
+	return &fullTextResponse
+}
+
+func (v *vertexProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body []byte) ([]byte, error) {
+	vertexResponse := &vertexEmbeddingResponse{}
+	if err := json.Unmarshal(body, vertexResponse); err != nil {
+		return nil, fmt.Errorf("unable to unmarshal vertex embeddings response: %v", err)
+	}
+	response := v.buildEmbeddingsResponse(ctx, vertexResponse)
+	return json.Marshal(response)
+}
+
+func (v *vertexProvider) buildEmbeddingsResponse(ctx wrapper.HttpContext, vertexResp *vertexEmbeddingResponse) *embeddingsResponse {
+	response := embeddingsResponse{
+		Object: "list",
+		Data:   make([]embedding, 0, len(vertexResp.Predictions)),
+		Model:  ctx.GetContext(ctxKeyFinalRequestModel).(string),
+	}
+	totalTokens := 0
+	for _, item := range vertexResp.Predictions {
+		response.Data = append(response.Data, embedding{
+			Object:    `embedding`,
+			Index:     0,
+			Embedding: item.Embeddings.Values,
+		})
+		if item.Embeddings.Statistics != nil {
+			totalTokens += item.Embeddings.Statistics.TokenCount
+		}
+	}
+	response.Usage.TotalTokens = totalTokens
+	return &response
+}
+
+func (v *vertexProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpContext, vertexResp *vertexChatResponse) *chatCompletionResponse {
+	var choice chatCompletionChoice
+	if len(vertexResp.Candidates) > 0 && len(vertexResp.Candidates[0].Content.Parts) > 0 {
+		choice.Delta = &chatMessage{Content: vertexResp.Candidates[0].Content.Parts[0].Text}
+	}
+	streamResponse := chatCompletionResponse{
+		Id:      vertexResp.ResponseId,
+		Object:  objectChatCompletionChunk,
+		Created: time.Now().UnixMilli() / 1000,
+		Model:   ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		Choices: []chatCompletionChoice{choice},
+		Usage: &usage{
+			PromptTokens:     vertexResp.UsageMetadata.PromptTokenCount,
+			CompletionTokens: vertexResp.UsageMetadata.CandidatesTokenCount,
+			TotalTokens:      vertexResp.UsageMetadata.TotalTokenCount,
+		},
+	}
+	return &streamResponse
+}
+
+func (v *vertexProvider) appendResponse(responseBuilder *strings.Builder, responseBody string) {
+	responseBuilder.WriteString(fmt.Sprintf("%s %s\n\n", streamDataItemKey, responseBody))
+}
+
+func (v *vertexProvider) getRequestPath(apiName ApiName, modelId string, stream bool) string {
+	action := ""
+	if apiName == ApiNameEmbeddings {
+		action = vertexEmbeddingAction
+	} else if stream {
+		action = vertexChatCompletionStreamAction
+	} else {
+		action = vertexChatCompletionAction
+	}
+	return fmt.Sprintf(vertexPathTemplate, v.config.vertexProjectId, v.config.vertexRegion, modelId, action)
+}
+
+func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest) *vertexChatRequest {
+	safetySettings := make([]vertexChatSafetySetting, 0)
+	for category, threshold := range v.config.geminiSafetySetting {
+		safetySettings = append(safetySettings, vertexChatSafetySetting{
+			Category:  category,
+			Threshold: threshold,
+		})
+	}
+	vertexRequest := vertexChatRequest{
+		Contents:       make([]vertexChatContent, 0),
+		SafetySettings: safetySettings,
+		GenerationConfig: vertexChatGenerationConfig{
+			Temperature:     request.Temperature,
+			TopP:            request.TopP,
+			MaxOutputTokens: request.MaxTokens,
+		},
+	}
+	if request.Tools != nil {
+		functions := make([]function, 0, len(request.Tools))
+		for _, tool := range request.Tools {
+			functions = append(functions, tool.Function)
+		}
+		vertexRequest.Tools = []vertexTool{
+			{
+				FunctionDeclarations: functions,
+			},
+		}
+	}
+	shouldAddDummyModelMessage := false
+	for _, message := range request.Messages {
+		content := vertexChatContent{
+			Role: message.Role,
+			Parts: []vertexPart{
+				{
+					Text: message.StringContent(),
+				},
+			},
+		}
+
+		// there's no assistant role in vertex and API shall vomit if role is not user or model
+		if content.Role == roleAssistant {
+			content.Role = "model"
+		} else if content.Role == roleSystem { // converting system prompt to prompt from user for the same reason
+			content.Role = roleUser
+			shouldAddDummyModelMessage = true
+		}
+		vertexRequest.Contents = append(vertexRequest.Contents, content)
+
+		// if a system message is the last message, we need to add a dummy model message to make vertex happy
+		if shouldAddDummyModelMessage {
+			vertexRequest.Contents = append(vertexRequest.Contents, vertexChatContent{
+				Role: "model",
+				Parts: []vertexPart{
+					{
+						Text: "Okay",
+					},
+				},
+			})
+			shouldAddDummyModelMessage = false
+		}
+	}
+
+	return &vertexRequest
+}
+
+func (v *vertexProvider) buildEmbeddingRequest(request *embeddingsRequest) *vertexEmbeddingRequest {
+	inputs := request.ParseInput()
+	instances := make([]vertexEmbeddingInstance, len(inputs))
+	for i, input := range inputs {
+		instances[i] = vertexEmbeddingInstance{
+			Content: input,
+		}
+	}
+	return &vertexEmbeddingRequest{Instances: instances}
+}
+
+type vertexChatRequest struct {
+	CachedContent     string                     `json:"cachedContent,omitempty"`
+	Contents          []vertexChatContent        `json:"contents"`
+	SystemInstruction *vertexSystemInstruction   `json:"systemInstruction,omitempty"`
+	Tools             []vertexTool               `json:"tools,omitempty"`
+	SafetySettings    []vertexChatSafetySetting  `json:"safetySettings,omitempty"`
+	GenerationConfig  vertexChatGenerationConfig `json:"generationConfig,omitempty"`
+	Labels            map[string]string          `json:"labels,omitempty"`
+}
+
+type vertexChatContent struct {
+	// The producer of the content. Must be either 'user' or 'model'.
+	Role  string       `json:"role,omitempty"`
+	Parts []vertexPart `json:"parts"`
+}
+
+type vertexPart struct {
+	Text       string    `json:"text,omitempty"`
+	InlineData *blob     `json:"inlineData,omitempty"`
+	FileData   *fileData `json:"fileData,omitempty"`
+}
+
+type blob struct {
+	MimeType string `json:"mimeType"`
+	Data     string `json:"data"`
+}
+
+type fileData struct {
+	MimeType string `json:"mimeType"`
+	FileUri  string `json:"fileUri"`
+}
+
+type vertexSystemInstruction struct {
+	Role  string       `json:"role"`
+	Parts []vertexPart `json:"parts"`
+}
+
+type vertexTool struct {
+	FunctionDeclarations any `json:"functionDeclarations"`
+}
+
+type vertexChatSafetySetting struct {
+	Category  string `json:"category"`
+	Threshold string `json:"threshold"`
+}
+
+type vertexChatGenerationConfig struct {
+	Temperature     float64 `json:"temperature,omitempty"`
+	TopP            float64 `json:"topP,omitempty"`
+	TopK            int     `json:"topK,omitempty"`
+	CandidateCount  int     `json:"candidateCount,omitempty"`
+	MaxOutputTokens int     `json:"maxOutputTokens,omitempty"`
+}
+
+type vertexEmbeddingRequest struct {
+	Instances  []vertexEmbeddingInstance `json:"instances"`
+	Parameters *vertexEmbeddingParams    `json:"parameters,omitempty"`
+}
+
+type vertexEmbeddingInstance struct {
+	TaskType string `json:"task_type"`
+	Title    string `json:"title,omitempty"`
+	Content  string `json:"content"`
+}
+
+type vertexEmbeddingParams struct {
+	AutoTruncate bool `json:"autoTruncate,omitempty"`
+}
+
+type vertexChatResponse struct {
+	Candidates     []vertexChatCandidate    `json:"candidates"`
+	ResponseId     string                   `json:"responseId,omitempty"`
+	PromptFeedback vertexChatPromptFeedback `json:"promptFeedback"`
+	UsageMetadata  vertexUsageMetadata      `json:"usageMetadata"`
+}
+
+type vertexChatCandidate struct {
+	Content       vertexChatContent        `json:"content"`
+	FinishReason  string                   `json:"finishReason"`
+	Index         int                      `json:"index"`
+	SafetyRatings []vertexChatSafetyRating `json:"safetyRatings"`
+}
+
+type vertexChatSafetyRating struct {
+	Category    string `json:"category"`
+	Probability string `json:"probability"`
+}
+
+type vertexChatPromptFeedback struct {
+	SafetyRatings []vertexChatSafetyRating `json:"safetyRatings"`
+}
+
+type vertexUsageMetadata struct {
+	PromptTokenCount     int `json:"promptTokenCount,omitempty"`
+	CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"`
+	TotalTokenCount      int `json:"totalTokenCount,omitempty"`
+}
+
+type vertexEmbeddingResponse struct {
+	Predictions []vertexPredictions `json:"predictions"`
+}
+
+type vertexPredictions struct {
+	Embeddings struct {
+		Values     []float64         `json:"values"`
+		Statistics *vertexStatistics `json:"statistics,omitempty"`
+	} `json:"embeddings"`
+}
+
+type vertexStatistics struct {
+	TokenCount int  `json:"token_count"`
+	Truncated  bool `json:"truncated"`
+}
+
+type ServiceAccountKey struct {
+	ClientEmail  string `json:"client_email"`
+	PrivateKeyID string `json:"private_key_id"`
+	PrivateKey   string `json:"private_key"`
+	TokenURI     string `json:"token_uri"`
+}
+
+func createJWT(key *ServiceAccountKey) (string, error) {
+	// 解析 PEM 格式的 RSA 私钥
+	block, _ := pem.Decode([]byte(key.PrivateKey))
+	if block == nil {
+		return "", fmt.Errorf("invalid PEM block")
+	}
+	parsedKey, err := x509.ParsePKCS8PrivateKey(block.Bytes)
+	if err != nil {
+		return "", err
+	}
+	rsaKey := parsedKey.(*rsa.PrivateKey)
+
+	// 构造 JWT Header
+	jwtHeader := map[string]string{
+		"alg": "RS256",
+		"typ": "JWT",
+		"kid": key.PrivateKeyID,
+	}
+	headerJSON, _ := json.Marshal(jwtHeader)
+	headerB64 := base64.RawURLEncoding.EncodeToString(headerJSON)
+
+	// 构造 JWT Claims
+	now := time.Now().Unix()
+	claims := map[string]interface{}{
+		"iss":   key.ClientEmail,
+		"scope": "https://www.googleapis.com/auth/cloud-platform",
+		"aud":   key.TokenURI,
+		"iat":   now,
+		"exp":   now + 3600, // 1 小时有效期
+	}
+	claimsJSON, _ := json.Marshal(claims)
+	claimsB64 := base64.RawURLEncoding.EncodeToString(claimsJSON)
+
+	signingInput := fmt.Sprintf("%s.%s", headerB64, claimsB64)
+	hashed := sha256.Sum256([]byte(signingInput))
+	signature, err := rsaKey.Sign(nil, hashed[:], crypto.SHA256)
+	if err != nil {
+		return "", err
+	}
+	sigB64 := base64.RawURLEncoding.EncodeToString(signature)
+
+	return fmt.Sprintf("%s.%s.%s", headerB64, claimsB64, sigB64), nil
+}
+
+func (v *vertexProvider) getAccessToken(jwtToken string) error {
+	headers := [][2]string{
+		{"Content-Type", "application/x-www-form-urlencoded"},
+	}
+	reqBody := "grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer&assertion=" + jwtToken
+	err := v.client.Post("/token", headers, []byte(reqBody), func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+		responseString := string(responseBody)
+		defer func() {
+			_ = proxywasm.ResumeHttpRequest()
+		}()
+		if statusCode != http.StatusOK {
+			log.Errorf("failed to create vertex access key, status: %d body: %s", statusCode, responseString)
+			_ = util.ErrorHandler("ai-proxy.vertex.load_ak_failed", fmt.Errorf("failed to load vertex ak"))
+			return
+		}
+		responseJson := gjson.Parse(responseString)
+		accessToken := responseJson.Get("access_token").String()
+		_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+accessToken)
+
+		expiresIn := int64(3600)
+		if expiresInVal := responseJson.Get("expires_in"); expiresInVal.Exists() {
+			expiresIn = expiresInVal.Int()
+		}
+		expireTime := time.Now().Add(time.Duration(expiresIn) * time.Second).Unix()
+		keyName := v.buildTokenKey()
+		err := setCachedAccessToken(keyName, accessToken, expireTime)
+		if err != nil {
+			log.Errorf("[vertex]: unable to cache access token: %v", err)
+		}
+	}, v.config.timeout)
+	return err
+}
+
+func (v *vertexProvider) buildTokenKey() string {
+	region := v.config.vertexRegion
+	projectID := v.config.vertexProjectId
+
+	return fmt.Sprintf("vertex-%s-%s-access-token", region, projectID)
+}
+
+type cachedAccessToken struct {
+	Token    string `json:"token"`
+	ExpireAt int64  `json:"expireAt"`
+}
+
+func (v *vertexProvider) getCachedAccessToken(key string) (string, error) {
+	data, _, err := proxywasm.GetSharedData(key)
+	if err != nil {
+		if errors.Is(err, types.ErrorStatusNotFound) {
+			return "", nil
+		}
+		return "", err
+	}
+	if data == nil {
+		return "", nil
+	}
+
+	var tokenInfo cachedAccessToken
+	if err = json.Unmarshal(data, &tokenInfo); err != nil {
+		return "", err
+	}
+
+	now := time.Now().Unix()
+	refreshAhead := v.config.vertexTokenRefreshAhead
+
+	if tokenInfo.ExpireAt > now+refreshAhead {
+		return tokenInfo.Token, nil
+	}
+
+	return "", nil
+}
+
+func setCachedAccessToken(key string, accessToken string, expireTime int64) error {
+	tokenInfo := cachedAccessToken{
+		Token:    accessToken,
+		ExpireAt: expireTime,
+	}
+
+	_, cas, err := proxywasm.GetSharedData(key)
+	if err != nil && !errors.Is(err, types.ErrorStatusNotFound) {
+		return err
+	}
+
+	data, err := json.Marshal(tokenInfo)
+	if err != nil {
+		return err
+	}
+
+	return proxywasm.SetSharedData(key, data, cas)
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/yi.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/yi.go
@@ -10,12 +10,10 @@ import (
 )

 const (
-	yiDomain             = "api.lingyiwanwu.com"
-	yiChatCompletionPath = "/v1/chat/completions"
+	yiDomain = "api.lingyiwanwu.com"
 )

-type yiProviderInitializer struct {
-}
+type yiProviderInitializer struct{}

 func (m *yiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
@@ -26,7 +24,7 @@ func (m *yiProviderInitializer) ValidateConfig(config *ProviderConfig) error {

 func (m *yiProviderInitializer) DefaultCapabilities() map[string]string {
 	return map[string]string{
-		string(ApiNameChatCompletion): yiChatCompletionPath,
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
 	}
 }

--- a/plugins/wasm-go/extensions/ai-proxy/util/http.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/http.go
@@ -2,7 +2,10 @@ package util

 import (
 	"net/http"
+	"regexp"
+	"strings"

+	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 )

@@ -13,6 +16,21 @@ const (
 	MimeTypeApplicationJson = "application/json"
 )

+var (
+	RegRetrieveBatchPath                        = regexp.MustCompile(`^.*/v1/batches/(?P<batch_id>[^/]+)$`)
+	RegCancelBatchPath                          = regexp.MustCompile(`^.*/v1/batches/(?P<batch_id>[^/]+)/cancel$`)
+	RegRetrieveFilePath                         = regexp.MustCompile(`^.*/v1/files/(?P<file_id>[^/]+)$`)
+	RegRetrieveFileContentPath                  = regexp.MustCompile(`^.*/v1/files/(?P<file_id>[^/]+)/content$`)
+	RegRetrieveFineTuningJobPath                = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)$`)
+	RegRetrieveFineTuningJobEventsPath          = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/events$`)
+	RegRetrieveFineTuningJobCheckpointsPath     = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/checkpoints$`)
+	RegCancelFineTuningJobPath                  = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/cancel$`)
+	RegResumeFineTuningJobPath                  = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/resume$`)
+	RegPauseFineTuningJobPath                   = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/pause$`)
+	RegFineTuningCheckpointPermissionPath       = regexp.MustCompile(`^.*/v1/fine_tuning/checkpoints/(?P<fine_tuned_model_checkpoint>[^/]+)/permissions$`)
+	RegDeleteFineTuningCheckpointPermissionPath = regexp.MustCompile(`^.*/v1/fine_tuning/checkpoints/(?P<fine_tuned_model_checkpoint>[^/]+)/permissions/(?P<permission_id>[^/]+)$`)
+)
+
 type ErrorHandlerFunc func(statusCodeDetails string, err error) error

 var ErrorHandler ErrorHandlerFunc = func(statusCodeDetails string, err error) error {
@@ -62,10 +80,45 @@ func OverwriteRequestPathHeaderByCapability(headers http.Header, apiName string,
 	if !exist {
 		return
 	}
-	if originPath, err := proxywasm.GetHttpRequestHeader(":path"); err == nil {
+	originPath, err := proxywasm.GetHttpRequestHeader(":path")
+	if err == nil {
 		headers.Set("X-ENVOY-ORIGINAL-PATH", originPath)
 	}
+	/**
+	这里实现不太优雅，理应通过 apiName 来判断使用哪个正则替换
+	但 ApiName 定义在 provider 中， 而 provider 中又引用了 util
+	会导致循环引用
+	**/
+	if strings.Contains(mappedPath, "{") && strings.Contains(mappedPath, "}") {
+		replacements := []struct {
+			regx *regexp.Regexp
+			key  string
+		}{
+			{RegRetrieveFilePath, "file_id"},
+			{RegRetrieveFileContentPath, "file_id"},
+			{RegRetrieveBatchPath, "batch_id"},
+			{RegCancelBatchPath, "batch_id"},
+		}
+
+		for _, r := range replacements {
+			if r.regx.MatchString(originPath) {
+				subMatch := r.regx.FindStringSubmatch(originPath)
+				if subMatch == nil {
+					continue
+				}
+				index := r.regx.SubexpIndex(r.key)
+				if index < 0 || index >= len(subMatch) {
+					continue
+				}
+				id := subMatch[index]
+				mappedPath = r.regx.ReplaceAllStringFunc(mappedPath, func(s string) string {
+					return strings.Replace(s, "{"+r.key+"}", id, 1)
+				})
+			}
+		}
+	}
 	headers.Set(":path", mappedPath)
+	log.Debugf("[OverwriteRequestPath] originPath=%s, mappedPath=%s", originPath, mappedPath)
 }

 func OverwriteRequestAuthorizationHeader(headers http.Header, credential string) {
--- a/plugins/wasm-go/extensions/ai-proxy/util/ptr.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/ptr.go
@@ -0,0 +1,5 @@
+package util
+
+func Ptr[T any](v T) *T {
+	return &v
+}
--- a/plugins/wasm-go/extensions/ai-search/main.go
+++ b/plugins/wasm-go/extensions/ai-search/main.go
@@ -289,6 +289,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Lo
 	}
 	ctx.SetRequestBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
 	_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+	_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
 	return types.ActionContinue
 }

@@ -362,7 +363,8 @@ func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte, log
 			}, rewriteBody,
 			func(statusCode int, responseHeaders http.Header, responseBody []byte) {
 				if statusCode != http.StatusOK {
-					log.Errorf("search rewrite failed, status: %d", statusCode)
+					log.Errorf("search rewrite failed, status: %d, request url: %s, request cluster: %s, search rewrite model: %s",
+						statusCode, searchRewrite.url, searchRewrite.client.ClusterName(), searchRewrite.modelName)
 					// After a rewrite failure, no further search is performed, thus quickly identifying the failure.
 					proxywasm.ResumeHttpRequest()
 					return
--- a/plugins/wasm-go/extensions/ai-statistics/README.md
+++ b/plugins/wasm-go/extensions/ai-statistics/README.md
@@ -23,6 +23,7 @@ description: AI可观测配置参考
 | 名称             | 数据类型  | 填写要求 | 默认值 | 描述                     |
 |----------------|-------|------|-----|------------------------|
 | `attributes` | []Attribute | 非必填  | -   | 用户希望记录在log/span中的信息 |
+| `disable_openai_usage` | bool | 非必填  | false   | 非openai兼容协议时，model、token的支持非标，配置为true时可以避免报错 |

 Attribute 配置说明:

--- a/plugins/wasm-go/extensions/ai-statistics/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-statistics/README_EN.md
@@ -22,7 +22,8 @@ Users can also expand observable values through configuration:

 | Name             | Type  | Required | Default | Description |
 |----------------|-------|------|-----|------------------------|
-| `attributes` | []Attribute | required  | -   | Information that the user wants to record in log/span |
+| `attributes` | []Attribute | optional  | -   | Information that the user wants to record in log/span |
+| `disable_openai_usage` | bool | optional  | false   | When using a non-OpenAI-compatible protocol, the support for model and token is non-standard. Setting the configuration to true can prevent errors. |

 Attribute Configuration instructions:

--- a/plugins/wasm-go/extensions/ai-statistics/main.go
+++ b/plugins/wasm-go/extensions/ai-statistics/main.go
@@ -27,6 +27,7 @@ func main() {
 }

 const (
+	defaultMaxBodyBytes uint32 = 100 * 1024 * 1024
 	// Context consts
 	StatisticsRequestStartTime = "ai-statistics-request-start-time"
 	StatisticsFirstTokenTime   = "ai-statistics-first-token-time"
@@ -91,6 +92,8 @@ type AIStatisticsConfig struct {
 	attributes []Attribute
 	// If there exist attributes extracted from streaming body, chunks should be buffered
 	shouldBufferStreamingBody bool
+	// If disableOpenaiUsage is true, model/input_token/output_token logs will be skipped
+	disableOpenaiUsage bool
 }

 func generateMetricName(route, cluster, model, consumer, metricName string) string {
@@ -159,6 +162,10 @@ func parseConfig(configJson gjson.Result, config *AIStatisticsConfig, log wrappe
 	}
 	// Metric settings
 	config.counterMetrics = make(map[string]proxywasm.MetricCounter)
+
+	// Parse openai usage config setting.
+	config.disableOpenaiUsage = configJson.Get("disable_openai_usage").Bool()
+
 	return nil
 }

@@ -176,6 +183,11 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config AIStatisticsConfig, lo
 	if consumer, _ := proxywasm.GetHttpRequestHeader(ConsumerKey); consumer != "" {
 		ctx.SetContext(ConsumerKey, consumer)
 	}
+	hasRequestBody := wrapper.HasRequestBody()
+	if hasRequestBody {
+		_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
+		ctx.SetRequestBodyBufferLimit(defaultMaxBodyBytes)
+	}

 	// Set user defined log & span attributes which type is fixed_value
 	setAttributeBySource(ctx, config, FixedValue, nil, log)
@@ -258,15 +270,17 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config AIStatisticsConfig, dat
 	}

 	// Set information about this request
-	if model, inputToken, outputToken, ok := getUsage(data); ok {
-		ctx.SetUserAttribute(Model, model)
-		ctx.SetUserAttribute(InputToken, inputToken)
-		ctx.SetUserAttribute(OutputToken, outputToken)
-		// Set span attributes for ARMS.
-		setSpanAttribute(ArmsModelName, model, log)
-		setSpanAttribute(ArmsInputToken, inputToken, log)
-		setSpanAttribute(ArmsOutputToken, outputToken, log)
-		setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
+	if !config.disableOpenaiUsage {
+		if model, inputToken, outputToken, ok := getUsage(data); ok {
+			ctx.SetUserAttribute(Model, model)
+			ctx.SetUserAttribute(InputToken, inputToken)
+			ctx.SetUserAttribute(OutputToken, outputToken)
+			// Set span attributes for ARMS.
+			setSpanAttribute(ArmsModelName, model, log)
+			setSpanAttribute(ArmsInputToken, inputToken, log)
+			setSpanAttribute(ArmsOutputToken, outputToken, log)
+			setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
+		}
 	}
 	// If the end of the stream is reached, record metrics/logs/spans.
 	if endOfStream {
@@ -305,15 +319,17 @@ func onHttpResponseBody(ctx wrapper.HttpContext, config AIStatisticsConfig, body
 	}

 	// Set information about this request
-	if model, inputToken, outputToken, ok := getUsage(body); ok {
-		ctx.SetUserAttribute(Model, model)
-		ctx.SetUserAttribute(InputToken, inputToken)
-		ctx.SetUserAttribute(OutputToken, outputToken)
-		// Set span attributes for ARMS.
-		setSpanAttribute(ArmsModelName, model, log)
-		setSpanAttribute(ArmsInputToken, inputToken, log)
-		setSpanAttribute(ArmsOutputToken, outputToken, log)
-		setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
+	if !config.disableOpenaiUsage {
+		if model, inputToken, outputToken, ok := getUsage(body); ok {
+			ctx.SetUserAttribute(Model, model)
+			ctx.SetUserAttribute(InputToken, inputToken)
+			ctx.SetUserAttribute(OutputToken, outputToken)
+			// Set span attributes for ARMS.
+			setSpanAttribute(ArmsModelName, model, log)
+			setSpanAttribute(ArmsInputToken, inputToken, log)
+			setSpanAttribute(ArmsOutputToken, outputToken, log)
+			setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
+		}
 	}

 	// Set user defined log & span attributes.
@@ -465,6 +481,11 @@ func writeMetric(ctx wrapper.HttpContext, config AIStatisticsConfig, log wrapper
 		log.Warnf("ClusterName typd assert failed, skip metric record")
 		return
 	}
+	
+	if config.disableOpenaiUsage {
+		return
+	} 
+
 	if ctx.GetUserAttribute(Model) == nil || ctx.GetUserAttribute(InputToken) == nil || ctx.GetUserAttribute(OutputToken) == nil {
 		log.Warnf("get usage information failed, skip metric record")
 		return
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
@@ -142,7 +142,7 @@ func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLim
 		config.rejectedCode = DefaultRejectedCode
 	}
 	rejectedMsg := json.Get("rejected_msg")
-	if rejectedCode.Exists() {
+	if rejectedMsg.Exists() {
 		config.rejectedMsg = rejectedMsg.String()
 	} else {
 		config.rejectedMsg = DefaultRejectedMsg
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
@@ -6,8 +6,12 @@ description: 基于 Key 集群限流插件配置参考

 ## 功能说明

-`cluster-key-rate-limit` 插件基于 Redis 实现集群限流，适用于需要跨多个 Higress Gateway 实例实现全局一致速率限制的场景。
-限流所使用的 Key 可以来源于 URL 参数、HTTP 请求头、客户端 IP 地址、消费者名称或 Cookie 中的 Key。
+`cluster-key-rate-limit` 插件基于 Redis 实现**集群级限流**，适用于需要跨多个 Higress Gateway 实例进行**全局一致速率限制**的场景。
+
+支持两种限流模式：
+
+- **规则级全局限流**：基于相同的 `rule_name` 和 `global_threshold` 配置，对自定义规则组设置全局限流阈值
+- **Key 级动态限流**：根据请求中的动态 Key（如 URL 参数、请求头、客户端 IP、Consumer 名称或 Cookie 字段）进行分组限流

 ## 运行属性

@@ -19,12 +23,22 @@ description: 基于 Key 集群限流插件配置参考
 | 配置项                  | 类型   | 必填 | 默认值 | 说明                                                                          |
 | ----------------------- | ------ | ---- | ------ |-----------------------------------------------------------------------------|
 | rule_name               | string | 是 | - | 限流规则名称，根据限流规则名称 + 限流类型 + 限流 key 名称 + 限流 key 对应的实际值来拼装 redis key             |
-| rule_items | array of object | 是   | -                 | 限流规则项，按照 rule_items 下的排列顺序，匹配第一个 rule_item 后命中限流规则，后续规则将被忽略                 |
+| global_threshold | Object | 否，`global_threshold` 或 `rule_items` 选填一项 | - | 对整个自定义规则组进行限流 |
+| rule_items | array of object | 否，`global_threshold` 或 `rule_items` 选填一项 | -                 | 限流规则项，按照 rule_items 下的排列顺序，匹配第一个 rule_item 后命中限流规则，后续规则将被忽略                 |
 | show_limit_quota_header | bool | 否 | false | 响应头中是否显示 `X-RateLimit-Limit`（限制的总请求数）和 `X-RateLimit-Remaining`（剩余还可以发送的请求数） |
 | rejected_code           | int | 否 | 429 | 请求被限流时，返回的 HTTP 状态码                                                         |
 | rejected_msg            | string | 否 | Too many requests | 请求被限流时，返回的响应体                                                               |
 | redis                   | object          | 是                                                           | -                 | redis 相关配置                                                                  |

+`global_threshold` 中每一项的配置字段说明。
+
+| 配置项           | 类型 | 必填                                                         | 默认值 | 说明               |
+| ---------------- | ---- | ------------------------------------------------------------ | ------ | ------------------ |
+| query_per_second | int  | 否，`query_per_second`,`query_per_minute`,`query_per_hour`,`query_per_day` 中选填一项 | -      | 允许每秒请求次数   |
+| query_per_minute | int  | 否，`query_per_second`,`query_per_minute`,`query_per_hour`,`query_per_day` 中选填一项 | -      | 允许每分钟请求次数 |
+| query_per_hour   | int  | 否，`query_per_second`,`query_per_minute`,`query_per_hour`,`query_per_day` 中选填一项 | -      | 允许每小时请求次数 |
+| query_per_day    | int  | 否，`query_per_second`,`query_per_minute`,`query_per_hour`,`query_per_day` 中选填一项 | -      | 允许每天请求次数   |
+
 `rule_items` 中每一项的配置字段说明。

 | 配置项                | 类型            | 必填                   | 默认值 | 说明                                                                                                                                                       |
@@ -63,28 +77,39 @@ description: 基于 Key 集群限流插件配置参考

 ## 配置示例

+### 自定义规则组全局限流
+
+```yaml
+rule_name: routeA-global-limit-rule
+global_threshold:
+  query_per_minute: 1000 # 自定义规则组每分钟最多1000次请求
+redis:
+  service_name: redis.static
+show_limit_quota_header: true
+```
+
 ### 识别请求参数 apikey，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-request-param-limit-rule
 rule_items:
- limit_by_param: apikey
-  limit_keys:
-  - key: 9a342114-ba8a-11ec-b1bf-00163e1250b5
-    query_per_minute: 10
-  - key: a6a6d7f2-ba8a-11ec-bec2-00163e1250b5
-    query_per_hour: 100
- limit_by_per_param: apikey
-  limit_keys:
-  # 正则表达式，匹配以 a 开头的所有字符串，每个 apikey 对应的请求 10qds
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # 正则表达式，匹配以 b 开头的所有字符串，每个 apikey 对应的请求 100qd
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # 兜底用，匹配所有请求，每个 apikey 对应的请求 1000qdh
-  - key: "*"
-    query_per_hour: 1000
+  - limit_by_param: apikey
+    limit_keys:
+      - key: 9a342114-ba8a-11ec-b1bf-00163e1250b5
+        query_per_minute: 10
+      - key: a6a6d7f2-ba8a-11ec-bec2-00163e1250b5
+        query_per_hour: 100
+  - limit_by_per_param: apikey
+    limit_keys:
+      # 正则表达式，匹配以 a 开头的所有字符串，每个 apikey 对应的请求 10qds
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # 正则表达式，匹配以 b 开头的所有字符串，每个 apikey 对应的请求 100qd
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # 兜底用，匹配所有请求，每个 apikey 对应的请求 1000qdh
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
@@ -93,25 +118,25 @@ show_limit_quota_header: true
 ### 识别请求头 x-ca-key，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-request-header-limit-rule
 rule_items:
- limit_by_header: x-ca-key
-  limit_keys:
-  - key: 102234
-    query_per_minute: 10
-  - key: 308239
-    query_per_hour: 10
- limit_by_per_header: x-ca-key
-  limit_keys:
-  # 正则表达式，匹配以 a 开头的所有字符串，每个 apikey 对应的请求 10qds
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # 正则表达式，匹配以b开头的所有字符串，每个 apikey 对应的请求 100qd
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # 兜底用，匹配所有请求，每个 apikey 对应的请求 1000qdh
-  - key: "*"
-    query_per_hour: 1000            
+  - limit_by_header: x-ca-key
+    limit_keys:
+      - key: 102234
+        query_per_minute: 10
+      - key: 308239
+        query_per_hour: 10
+  - limit_by_per_header: x-ca-key
+    limit_keys:
+      # 正则表达式，匹配以 a 开头的所有字符串，每个 apikey 对应的请求 10qds
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # 正则表达式，匹配以b开头的所有字符串，每个 apikey 对应的请求 100qd
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # 兜底用，匹配所有请求，每个 apikey 对应的请求 1000qdh
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
@@ -120,19 +145,19 @@ show_limit_quota_header: true
 ### 根据请求头 x-forwarded-for 获取对端 IP，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-client-ip-limit-rule
 rule_items:
- limit_by_per_ip: from-header-x-forwarded-for
-  limit_keys:
-  # 精确 IP
-  - key: 1.1.1.1
-    query_per_day: 10
-  # IP 段，符合这个 IP 段的 IP，每个 IP 100qpd
-  - key: 1.1.1.0/24
-    query_per_day: 100
-  # 兜底用，即默认每个 IP 1000 qpd
-  - key: 0.0.0.0/0
-    query_per_day: 1000
+  - limit_by_per_ip: from-header-x-forwarded-for
+    limit_keys:
+      # 精确 IP
+      - key: 1.1.1.1
+        query_per_day: 10
+      # IP 段，符合这个 IP 段的 IP，每个 IP 100qpd
+      - key: 1.1.1.0/24
+        query_per_day: 100
+      # 兜底用，即默认每个 IP 1000 qpd
+      - key: 0.0.0.0/0
+        query_per_day: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
@@ -141,25 +166,25 @@ show_limit_quota_header: true
 ### 识别 consumer，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-consumer-limit-rule
 rule_items:
- limit_by_consumer: ''
-  limit_keys:
-  - key: consumer1
-    query_per_second: 10
-  - key: consumer2
-    query_per_hour: 100
- limit_by_per_consumer: ''
-  limit_keys:
-  # 正则表达式，匹配以 a 开头的所有字符串，每个 consumer 对应的请求 10qds
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # 正则表达式，匹配以 b 开头的所有字符串，每个 consumer 对应的请求 100qd
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # 兜底用，匹配所有请求，每个 consumer 对应的请求 1000qdh
-  - key: "*"
-    query_per_hour: 1000     
+  - limit_by_consumer: ''
+    limit_keys:
+      - key: consumer1
+        query_per_second: 10
+      - key: consumer2
+        query_per_hour: 100
+  - limit_by_per_consumer: ''
+    limit_keys:
+      # 正则表达式，匹配以 a 开头的所有字符串，每个 consumer 对应的请求 10qds
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # 正则表达式，匹配以 b 开头的所有字符串，每个 consumer 对应的请求 100qd
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # 兜底用，匹配所有请求，每个 consumer 对应的请求 1000qdh
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true 
@@ -168,7 +193,7 @@ show_limit_quota_header: true
 ### 识别 Cookie 中的键值对，进行区别限流

 ```yaml
-rule_name: default_rule
+rule_name: routeA-cookie-limit-rule
 rule_items:
  - limit_by_cookie: key1
    limit_keys:
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
@@ -1,164 +1,201 @@
 ---
-title: Key-Based Cluster Rate Limiting
-keywords: [higress, rate-limit]
-description: Configuration reference for the Key-Based Cluster Rate Limiting plugin
+title: Cluster Rate Limiting Based on Key  
+keywords: [higress, rate-limit]  
+description: Configuration reference for the Key-based cluster rate limiting plugin
+
 ---
+
 ## Function Description
-The `cluster-key-rate-limit` plugin implements cluster rate limiting based on Redis, suitable for scenarios that require global consistent rate limiting across multiple Higress Gateway instances. 

-The Key used for rate limiting can originate from URL parameters, HTTP request headers, client IP addresses, consumer names, or keys in cookies. 
+The `cluster-key-rate-limit` plugin implements **cluster-level rate limiting** based on Redis, suitable for scenarios
+requiring **globally consistent rate limiting across multiple Higress Gateway instances**.

-## Execution Attributes
-Plugin Execution Phase: `default phase`  
-Plugin Execution Priority: `20` 
+It supports two rate limiting modes:

-## Configuration Description
-| Configuration Item        | Type          | Required | Default Value | Description                                                                               |
-|---------------------------|---------------|----------|---------------|-------------------------------------------------------------------------------------------|
-| rule_name                 | string        | Yes      | -             | The name of the rate limiting rule. The Redis key is constructed using rule name + rate limit type + limit key name + actual value of the limit key.         |
-| rule_items                | array of object| Yes     | -             | Rate limiting rule items. The first matching `rule_item` based on the order under `rule_items` will trigger the rate limiting, and subsequent rules will be ignored.                 |
-| show_limit_quota_header   | bool          | No       | false         | Whether to display `X-RateLimit-Limit` (total requests allowed) and `X-RateLimit-Remaining` (remaining requests that can be sent) in the response headers. |
-| rejected_code             | int           | No       | 429           | HTTP status code returned when a request is rate limited.                                                          |
-| rejected_msg              | string        | No       | Too many requests | Response body returned when a request is rate limited.                                                               |
-| redis                     | object        | Yes      | -             | Redis related configuration.                                                                  |
+- **Rule-Level Global Rate Limiting**: Applies a unified rate limit threshold to custom rule groups based on identical `rule_name` and `global_threshold` configurations.
+- **Key-Level Dynamic Rate Limiting**: Groups and limits requests by dynamic keys extracted from requests, such as URL parameters, request headers, client IPs, consumer names, or cookie fields.

-Description of configuration fields for each item in `rule_items`.
-| Configuration Item        | Type          | Required               | Default Value | Description                                                                                           |
-|---------------------------|---------------|------------------------|---------------|-------------------------------------------------------------------------------------------------------|
-| limit_by_header           | string        | No, one of `limit_by_*` | -             | The name of the HTTP request header from which to retrieve the rate limiting key value.               |
-| limit_by_param            | string        | No, one of `limit_by_*` | -             | The name of the URL parameter from which to retrieve the rate limiting key value.                     |
-| limit_by_consumer         | string        | No, one of `limit_by_*` | -             | Applies rate limiting based on consumer name without needing to add an actual value.                  |
-| limit_by_cookie           | string        | No, one of `limit_by_*` | -             | The name of the key in the Cookie from which to retrieve the rate limiting key value.                |
-| limit_by_per_header       | string        | No, one of `limit_by_*` | -             | Matches specific HTTP request headers according to the rules and calculates rate limits for each header. The `limit_keys` configuration supports regular expressions or `*`. |
-| limit_by_per_param        | string        | No, one of `limit_by_*` | -             | Matches specific URL parameters according to the rules and calculates rate limits for each parameter. The `limit_keys` configuration supports regular expressions or `*`. |
-| limit_by_per_consumer     | string        | No, one of `limit_by_*` | -             | Matches specific consumers according to the rules and calculates rate limits for each consumer. The `limit_keys` configuration supports regular expressions or `*`. |
-| limit_by_per_cookie       | string        | No, one of `limit_by_*` | -             | Matches specific cookies according to the rules and calculates rate limits for each cookie. The `limit_keys` configuration supports regular expressions or `*`. |
-| limit_by_per_ip           | string        | No, one of `limit_by_*` | -             | Matches specific IPs according to the rules and calculates rate limits for each IP. Retrieve via IP parameter name from request headers, defined as `from-header-{header name}`, e.g., `from-header-x-forwarded-for`. To get the remote socket IP directly, use `from-remote-addr`. |
-| limit_keys                | array of object | Yes                    | -             | Configures the limit counts after matching key values.                                               |
+## Operational Attributes

-Description of configuration fields for each item in `limit_keys`.
-| Configuration Item        | Type          | Required                                                         | Default Value | Description                                                        |
-|---------------------------|---------------|------------------------------------------------------------------|---------------|--------------------------------------------------------------------|
-| key                       | string        | Yes                                                              | -             | Matched key value; types `limit_by_per_header`, `limit_by_per_param`, `limit_by_per_consumer`, `limit_by_per_cookie` support regular expression configurations (starting with regexp: followed by a regular expression) or `*` (representing all), e.g., `regexp:^d.*` (all strings starting with d); `limit_by_per_ip` supports configuring IP addresses or IP segments. |
-| query_per_second          | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per second.                           |
-| query_per_minute          | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per minute.                           |
-| query_per_hour            | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per hour.                             |
-| query_per_day             | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per day.                              |
+- **Plugin execution phase**: `Default phase`
+- **Plugin execution priority**: `20`

-Description of configuration fields for each item in `redis`.
-| Configuration Item | Type   | Required | Default Value                          | Description                                                                                                     |
-|--------------------|--------|----------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------|
-| service_name       | string | Required | -                                      | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
-| service_port       | int    | No       | 80 for static services; otherwise 6379 | Service port for the Redis service.                                                                             |
-| username           | string | No       | -                                      | Redis username.                                                                                                 |
-| password           | string | No       | -                                      | Redis password.                                                                                                 |
-| timeout            | int    | No       | 1000                                   | Redis connection timeout in milliseconds.                                                                       |
-| database           | int    | No       | 0                                      | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                                  |
+## Configuration Instructions

+| Configuration Item       | Type          | Required                                  | Default Value       | Description                                                                |  
+|--------------------------|---------------|-------------------------------------------|---------------------|----------------------------------------------------------------------------|  
+| rule_name                | string        | Yes                                       | -                   | Name of the rate limiting rule. Used to construct the Redis key in the format: `rule_name:rate_limit_type:key_name:key_value`. |  
+| global_threshold         | Object        | No (choose either `global_threshold` or `rule_items`) | -                 | Apply rate limiting to the entire custom rule group.|  
+| rule_items               | array of object | No (choose either `global_threshold` or `rule_items`) | -               | Rate limiting rule items. Rules are matched in the order of the array; once the first matching rule is hit, subsequent rules are ignored. |  
+| show_limit_quota_header  | bool          | No                                        | false             | Whether to display `X-RateLimit-Limit` (total allowed requests) and `X-RateLimit-Remaining` (remaining allowed requests) in the response header. |  
+| rejected_code            | int           | No                                        | 429               | HTTP status code returned when a request is rate-limited.                  |  
+| rejected_msg             | string        | No                                        | Too many requests | Response body returned when a request is rate-limited.                      |  
+| redis                    | object        | Yes                                       | -                   | Configuration for Redis.                                                   |  
+
+### Configuration Fields for `global_threshold`
+
+| Configuration Item       | Type | Required                                 | Default Value | Description                          |  
+|--------------------------|------|------------------------------------------|---------------|--------------------------------------|  
+| query_per_second         | int  | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per second.         |  
+| query_per_minute         | int  | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per minute.         |  
+| query_per_hour           | int  | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per hour.           |  
+| query_per_day            | int  | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per day.            |  
+
+### Configuration Fields for `rule_items`
+
+| Configuration Item            | Type          | Required                          | Default Value | Description                                                                 |  
+|-------------------------------|---------------|-----------------------------------|---------------|-----------------------------------------------------------------------------|  
+| limit_by_header               | string        | No (choose one of `limit_by_*` fields) | -           | Configures the HTTP request header name to extract the rate limiting key.   |  
+| limit_by_param                | string        | No (choose one of `limit_by_*` fields) | -           | Configures the URL parameter name to extract the rate limiting key.        |  
+| limit_by_consumer             | string        | No (choose one of `limit_by_*` fields) | -           | Rate limits based on the consumer name (no need to add a specific value).   |  
+| limit_by_cookie               | string        | No (choose one of `limit_by_*` fields) | -           | Configures the Cookie key name to extract the rate limiting key.           |  
+| limit_by_per_header           | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific HTTP headers by rule and calculates rate limits for each header. Supports regular expressions (starting with `regexp:`) or `*` for the `limit_keys` configuration. |  
+| limit_by_per_param            | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific URL parameters by rule and calculates rate limits for each parameter. Supports regular expressions (starting with `regexp:`) or `*` for the `limit_keys` configuration. |  
+| limit_by_per_consumer         | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific consumers by rule and calculates rate limits for each consumer. Supports regular expressions (starting with `regexp:`) or `*` for the `limit_keys` configuration (no need to add a specific value for the consumer name). |  
+| limit_by_per_cookie           | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific Cookies by rule and calculates rate limits for each Cookie value. Supports regular expressions (starting with `regexp:`) or `*` for the `limit_keys` configuration. |  
+| limit_by_per_ip               | string        | No (choose one of `limit_by_*` fields) | -           | Matches specific IPs by rule and calculates rate limits for each IP. The IP can be extracted from a request header (formatted as `from-header-<header_name>`, e.g., `from-header-x-forwarded-for`) or directly from the peer socket IP (configured as `from-remote-addr`). |  
+| limit_keys                    | array of object | Yes                               | -           | Configures the rate limits for matched key values.                          |  
+
+### Configuration Fields for `limit_keys`
+
+| Configuration Item       | Type   | Required                                 | Default Value | Description                                                                 |  
+|--------------------------|--------|------------------------------------------|---------------|-----------------------------------------------------------------------------|  
+| key                      | string | Yes                                      | -             | The matched key value. For `limit_by_per_header`, `limit_by_per_param`, `limit_by_per_consumer`, and `limit_by_per_cookie` types, supports regular expressions (prefixed with `regexp:`) or `*` (wildcard for all). Example regular expression: `regexp:^d.*` (matches all strings starting with `d`). For `limit_by_per_ip`, supports IP addresses or CIDR blocks. |  
+| query_per_second         | int    | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per second.                                                |  
+| query_per_minute         | int    | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per minute.                                                |  
+| query_per_hour           | int    | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per hour.                                                  |  
+| query_per_day            | int    | No (choose one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day`) | -           | Allowed requests per day.                                                   |  
+
+### Configuration Fields for `redis`
+
+| Configuration Item   | Type   | Required | Default Value                                                     | Description                                                                 |  
+|----------------------|--------|----------|-------------------------------------------------------------------|-----------------------------------------------------------------------------|  
+| service_name         | string | Yes      | -                                                                 | The fully qualified domain name (FQDN) of the Redis service, including the service type (e.g., `my-redis.dns`, `redis.my-ns.svc.cluster.local`). |  
+| service_port         | int    | No       | 80 (for static services), 6379 for other services                  | The port of the Redis service.                                              |  
+| username             | string | No       | -                                                                 | Redis username for authentication.                                          |  
+| password             | string | No       | -                                                                 | Redis password for authentication.                                          |  
+| timeout              | int    | No       | 1000 (milliseconds)                                               | Redis connection timeout in milliseconds.                                  |  
+| database             | int    | No       | 0                                                                 | The ID of the Redis database to use (e.g., configuring `1` corresponds to `SELECT 1`). |  

 ## Configuration Examples

-### Distinguish rate limiting based on the request parameter apikey
-```yaml
-rule_name: default_rule
-rule_items:
- limit_by_param: apikey
-  limit_keys:
-  - key: 9a342114-ba8a-11ec-b1bf-00163e1250b5
-    query_per_minute: 10
-  - key: a6a6d7f2-ba8a-11ec-bec2-00163e1250b5
-    query_per_hour: 100
- limit_by_per_param: apikey
-  limit_keys:
-  # Regular expression, matches all strings starting with a, each apikey corresponds to 10qds.
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # Regular expression, matches all strings starting with b, each apikey corresponds to 100qd.
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # As a fallback, matches all requests, each apikey corresponds to 1000qdh.
-  - key: "*"
-    query_per_hour: 1000
+### Global Rate Limiting for Custom Rule Group
+
+```yaml  
+rule_name: routeA-global-limit-rule
+global_threshold:
+  query_per_minute: 1000 # Maximum 1000 requests per minute for this rule group
 redis:
  service_name: redis.static
 show_limit_quota_header: true
 ```

-### Distinguish rate limiting based on the header x-ca-key
-```yaml
-rule_name: default_rule
+### Rate Limiting by Request Parameter `apikey`
+
+```yaml  
+rule_name: routeA-request-param-limit-rule
 rule_items:
- limit_by_header: x-ca-key
-  limit_keys:
-  - key: 102234
-    query_per_minute: 10
-  - key: 308239
-    query_per_hour: 10
- limit_by_per_header: x-ca-key
-  limit_keys:
-  # Regular expression, matches all strings starting with a, each apikey corresponds to 10qds.
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # Regular expression, matches all strings starting with b, each apikey corresponds to 100qd.
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # As a fallback, matches all requests, each apikey corresponds to 1000qdh.
-  - key: "*"
-    query_per_hour: 1000
+  - limit_by_param: apikey
+    limit_keys:
+      - key: 9a342114-ba8a-11ec-b1bf-00163e1250b5
+        query_per_minute: 10
+      - key: a6a6d7f2-ba8a-11ec-bec2-00163e1250b5
+        query_per_hour: 100
+  - limit_by_per_param: apikey
+    limit_keys:
+      # Regular expression to match all strings starting with "a"; 10 requests per second for each apikey  
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # Regular expression to match all strings starting with "b"; 100 requests per minute for each apikey  
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # Fallback rule to match all requests; 1000 requests per hour for each apikey  
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
 ```

-### Distinguish rate limiting based on the client IP from the request header x-forwarded-for
-```yaml
-rule_name: default_rule
+### Rate Limiting by Request Header `x-ca-key`
+
+```yaml  
+rule_name: routeA-request-header-limit-rule
 rule_items:
- limit_by_per_ip: from-header-x-forwarded-for
-  limit_keys:
-  # Exact IP
-  - key: 1.1.1.1
-    query_per_day: 10
-  # IP segment, for IPs matching this segment, each IP corresponds to 100qpd.
-  - key: 1.1.1.0/24
-    query_per_day: 100
-  # As a fallback, defaults to 1000 qpd for each IP.
-  - key: 0.0.0.0/0
-    query_per_day: 1000
+  - limit_by_header: x-ca-key
+    limit_keys:
+      - key: 102234
+        query_per_minute: 10
+      - key: 308239
+        query_per_hour: 10
+  - limit_by_per_header: x-ca-key
+    limit_keys:
+      # Regular expression to match all strings starting with "a"; 10 requests per second for each key  
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # Regular expression to match all strings starting with "b"; 100 requests per minute for each key  
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # Fallback rule to match all requests; 1000 requests per hour for each key  
+      - key: "*"
+        query_per_hour: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
 ```

-### Distinguish rate limiting based on consumers
-```yaml
-rule_name: default_rule
+### Rate Limiting by Client IP Extracted from `x-forwarded-for` Header
+
+```yaml  
+rule_name: routeA-client-ip-limit-rule
 rule_items:
- limit_by_consumer: ''
-  limit_keys:
-  - key: consumer1
-    query_per_second: 10
-  - key: consumer2
-    query_per_hour: 100
- limit_by_per_consumer: ''
-  limit_keys:
-  # Regular expression, matches all strings starting with a, each consumer corresponds to 10qds.
-  - key: "regexp:^a.*"
-    query_per_second: 10
-  # Regular expression, matches all strings starting with b, each consumer corresponds to 100qd.
-  - key: "regexp:^b.*"
-    query_per_minute: 100
-  # As a fallback, matches all requests, each consumer corresponds to 1000qdh.
-  - key: "*"
-    query_per_hour: 1000
+  - limit_by_per_ip: from-header-x-forwarded-for
+    limit_keys:
+      # Exact IP match  
+      - key: 1.1.1.1
+        query_per_day: 10
+      # CIDR block match; 100 requests per day for each IP in the block  
+      - key: 1.1.1.0/24
+        query_per_day: 100
+      # Fallback rule for all IPs; 1000 requests per day for each IP  
+      - key: 0.0.0.0/0
+        query_per_day: 1000
 redis:
  service_name: redis.static
 show_limit_quota_header: true
 ```

-### Distinguish rate limiting based on key-value pairs in cookies
-```yaml
-rule_name: default_rule
+### Rate Limiting by Consumer
+
+```yaml  
+rule_name: routeA-consumer-limit-rule
+rule_items:
+  - limit_by_consumer: ''
+    limit_keys:
+      - key: consumer1
+        query_per_second: 10
+      - key: consumer2
+        query_per_hour: 100
+  - limit_by_per_consumer: ''
+    limit_keys:
+      # Regular expression to match all consumer names starting with "a"; 10 requests per second for each consumer  
+      - key: "regexp:^a.*"
+        query_per_second: 10
+      # Regular expression to match all consumer names starting with "b"; 100 requests per minute for each consumer  
+      - key: "regexp:^b.*"
+        query_per_minute: 100
+      # Fallback rule to match all consumers; 1000 requests per hour for each consumer  
+      - key: "*"
+        query_per_hour: 1000
+redis:
+  service_name: redis.static
+show_limit_quota_header: true
+```
+
+### Rate Limiting by Cookie Value
+
+```yaml  
+rule_name: routeA-cookie-limit-rule
 rule_items:
  - limit_by_cookie: key1
    limit_keys:
@@ -168,13 +205,13 @@ rule_items:
        query_per_hour: 100
  - limit_by_per_cookie: key1
    limit_keys:
-      # Regular expression, matches all strings starting with a, each cookie's value corresponds to 10qds.
+      # Regular expression to match all cookie values starting with "a"; 10 requests per second for each value  
      - key: "regexp:^a.*"
        query_per_second: 10
-      # Regular expression, matches all strings starting with b, each cookie's value corresponds to 100qd.
+      # Regular expression to match all cookie values starting with "b"; 100 requests per minute for each value  
      - key: "regexp:^b.*"
        query_per_minute: 100
-      # As a fallback, matches all requests, each cookie's value corresponds to 1000qdh.
+      # Fallback rule to match all cookie values; 1000 requests per hour for each value  
      - key: "*"
        query_per_hour: 1000
 rejected_code: 200
@@ -182,4 +219,4 @@ rejected_msg: '{"code":-1,"msg":"Too many requests"}'
 redis:
  service_name: redis.static
 show_limit_quota_header: true
-```
+```
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
@@ -1,303 +0,0 @@
-package main
-
-import (
-	"errors"
-	"fmt"
-	"strings"
-
-	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
-	"github.com/tidwall/gjson"
-	re "github.com/wasilibs/go-re2"
-	"github.com/zmap/go-iptree/iptree"
-)
-
-// 限流规则项类型
-type limitRuleItemType string
-
-// 限流配置项key类型
-type limitConfigItemType string
-
-const (
-	limitByHeaderType      limitRuleItemType = "limit_by_header"
-	limitByParamType       limitRuleItemType = "limit_by_param"
-	limitByConsumerType    limitRuleItemType = "limit_by_consumer"
-	limitByCookieType      limitRuleItemType = "limit_by_cookie"
-	limitByPerHeaderType   limitRuleItemType = "limit_by_per_header"
-	limitByPerParamType    limitRuleItemType = "limit_by_per_param"
-	limitByPerConsumerType limitRuleItemType = "limit_by_per_consumer"
-	limitByPerCookieType   limitRuleItemType = "limit_by_per_cookie"
-	limitByPerIpType       limitRuleItemType = "limit_by_per_ip"
-
-	exactType  limitConfigItemType = "exact"  // 精确匹配
-	regexpType limitConfigItemType = "regexp" // 正则表达式
-	allType    limitConfigItemType = "*"      // 匹配所有情况
-	ipNetType  limitConfigItemType = "ipNet"  // ip段
-
-	RemoteAddrSourceType = "remote-addr"
-	HeaderSourceType     = "header"
-
-	DefaultRejectedCode uint32 = 429
-	DefaultRejectedMsg  string = "Too many requests"
-
-	Second           int64 = 1
-	SecondsPerMinute       = 60 * Second
-	SecondsPerHour         = 60 * SecondsPerMinute
-	SecondsPerDay          = 24 * SecondsPerHour
-)
-
-var timeWindows = map[string]int64{
-	"query_per_second": Second,
-	"query_per_minute": SecondsPerMinute,
-	"query_per_hour":   SecondsPerHour,
-	"query_per_day":    SecondsPerDay,
-}
-
-type ClusterKeyRateLimitConfig struct {
-	ruleName             string          // 限流规则名称
-	ruleItems            []LimitRuleItem // 限流规则项
-	showLimitQuotaHeader bool            // 响应头中是否显示X-RateLimit-Limit和X-RateLimit-Remaining
-	rejectedCode         uint32          // 当请求超过阈值被拒绝时,返回的HTTP状态码
-	rejectedMsg          string          // 当请求超过阈值被拒绝时,返回的响应体
-	redisClient          wrapper.RedisClient
-}
-
-type LimitRuleItem struct {
-	limitType    limitRuleItemType // 限流类型
-	key          string            // 根据该key值进行限流,limit_by_consumer和limit_by_per_consumer两种类型为ConsumerHeader,其他类型为对应的key值
-	limitByPerIp LimitByPerIp      // 对端ip地址或ip段
-	configItems  []LimitConfigItem // 限流配置项
-}
-
-type LimitByPerIp struct {
-	sourceType string // ip来源类型
-	headerName string // 根据该请求头获取客户端ip
-}
-
-type LimitConfigItem struct {
-	configType limitConfigItemType // 限流配置项key类型
-	key        string              // 限流key
-	ipNet      *iptree.IPTree      // 限流key转换的ip地址或者ip段,仅用于itemType为ipNetType
-	regexp     *re.Regexp          // 正则表达式,仅用于itemType为regexpType
-	count      int64               // 指定时间窗口内的总请求数量阈值
-	timeWindow int64               // 时间窗口大小
-}
-
-func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
-	redisConfig := json.Get("redis")
-	if !redisConfig.Exists() {
-		return errors.New("missing redis in config")
-	}
-	serviceName := redisConfig.Get("service_name").String()
-	if serviceName == "" {
-		return errors.New("redis service name must not be empty")
-	}
-	servicePort := int(redisConfig.Get("service_port").Int())
-	if servicePort == 0 {
-		if strings.HasSuffix(serviceName, ".static") {
-			// use default logic port which is 80 for static service
-			servicePort = 80
-		} else {
-			servicePort = 6379
-		}
-	}
-	username := redisConfig.Get("username").String()
-	password := redisConfig.Get("password").String()
-	timeout := int(redisConfig.Get("timeout").Int())
-	if timeout == 0 {
-		timeout = 1000
-	}
-	config.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
-		FQDN: serviceName,
-		Port: int64(servicePort),
-	})
-	database := int(redisConfig.Get("database").Int())
-	return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
-}
-
-func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
-	ruleName := json.Get("rule_name")
-	if !ruleName.Exists() {
-		return errors.New("missing rule_name in config")
-	}
-	config.ruleName = ruleName.String()
-
-	// 初始化ruleItems
-	err := initRuleItems(json, config)
-	if err != nil {
-		return err
-	}
-
-	showLimitQuotaHeader := json.Get("show_limit_quota_header")
-	if showLimitQuotaHeader.Exists() {
-		config.showLimitQuotaHeader = showLimitQuotaHeader.Bool()
-	}
-
-	rejectedCode := json.Get("rejected_code")
-	if rejectedCode.Exists() {
-		config.rejectedCode = uint32(rejectedCode.Uint())
-	} else {
-		config.rejectedCode = DefaultRejectedCode
-	}
-	rejectedMsg := json.Get("rejected_msg")
-	if rejectedCode.Exists() {
-		config.rejectedMsg = rejectedMsg.String()
-	} else {
-		config.rejectedMsg = DefaultRejectedMsg
-	}
-	return nil
-}
-
-func initRuleItems(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
-	ruleItemsResult := json.Get("rule_items")
-	if !ruleItemsResult.Exists() {
-		return errors.New("missing rule_items in config")
-	}
-	if len(ruleItemsResult.Array()) == 0 {
-		return errors.New("config rule_items cannot be empty")
-	}
-	var ruleItems []LimitRuleItem
-	for _, item := range ruleItemsResult.Array() {
-		var ruleItem LimitRuleItem
-
-		// 根据配置区分限流类型
-		var limitType limitRuleItemType
-		setLimitByKeyIfExists := func(field gjson.Result, limitTypeStr limitRuleItemType) {
-			if field.Exists() && field.String() != "" {
-				ruleItem.key = field.String()
-				limitType = limitTypeStr
-			}
-		}
-		setLimitByKeyIfExists(item.Get("limit_by_header"), limitByHeaderType)
-		setLimitByKeyIfExists(item.Get("limit_by_param"), limitByParamType)
-		setLimitByKeyIfExists(item.Get("limit_by_cookie"), limitByCookieType)
-		setLimitByKeyIfExists(item.Get("limit_by_per_header"), limitByPerHeaderType)
-		setLimitByKeyIfExists(item.Get("limit_by_per_param"), limitByPerParamType)
-		setLimitByKeyIfExists(item.Get("limit_by_per_cookie"), limitByPerCookieType)
-
-		limitByConsumer := item.Get("limit_by_consumer")
-		if limitByConsumer.Exists() {
-			ruleItem.key = ConsumerHeader
-			limitType = limitByConsumerType
-		}
-		limitByPerConsumer := item.Get("limit_by_per_consumer")
-		if limitByPerConsumer.Exists() {
-			ruleItem.key = ConsumerHeader
-			limitType = limitByPerConsumerType
-		}
-
-		limitByPerIpResult := item.Get("limit_by_per_ip")
-		if limitByPerIpResult.Exists() && limitByPerIpResult.String() != "" {
-			limitByPerIp := limitByPerIpResult.String()
-			ruleItem.key = limitByPerIp
-			if strings.HasPrefix(limitByPerIp, "from-header-") {
-				headerName := limitByPerIp[len("from-header-"):]
-				if headerName == "" {
-					return errors.New("limit_by_per_ip parse error: empty after 'from-header-'")
-				}
-				ruleItem.limitByPerIp = LimitByPerIp{
-					sourceType: HeaderSourceType,
-					headerName: headerName,
-				}
-			} else if limitByPerIp == "from-remote-addr" {
-				ruleItem.limitByPerIp = LimitByPerIp{
-					sourceType: RemoteAddrSourceType,
-					headerName: "",
-				}
-			} else {
-				return errors.New("the 'limit_by_per_ip' restriction must start with 'from-header-' or be exactly 'from-remote-addr'")
-			}
-			limitType = limitByPerIpType
-		}
-
-		if limitType == "" {
-			return errors.New("only one of 'limit_by_header' and 'limit_by_param' and 'limit_by_consumer' and 'limit_by_cookie' and 'limit_by_per_header' and 'limit_by_per_param' and 'limit_by_per_consumer' and 'limit_by_per_cookie' and 'limit_by_per_ip' can be set")
-		}
-		ruleItem.limitType = limitType
-
-		// 初始化configItems
-		err := initConfigItems(item, &ruleItem)
-		if err != nil {
-			return err
-		}
-
-		ruleItems = append(ruleItems, ruleItem)
-	}
-	config.ruleItems = ruleItems
-	return nil
-}
-
-func initConfigItems(json gjson.Result, rule *LimitRuleItem) error {
-	limitKeys := json.Get("limit_keys")
-	if !limitKeys.Exists() {
-		return errors.New("missing limit_keys in config")
-	}
-	if len(limitKeys.Array()) == 0 {
-		return errors.New("config limit_keys cannot be empty")
-	}
-	var configItems []LimitConfigItem
-	for _, item := range limitKeys.Array() {
-		key := item.Get("key")
-		if !key.Exists() || key.String() == "" {
-			return errors.New("limit_keys key is required")
-		}
-
-		var (
-			itemKey  = key.String()
-			itemType limitConfigItemType
-			ipNet    *iptree.IPTree
-			regexp   *re.Regexp
-		)
-		if rule.limitType == limitByPerIpType {
-			var err error
-			ipNet, err = parseIPNet(itemKey)
-			if err != nil {
-				return fmt.Errorf("failed to parse IPNet for key '%s': %w", itemKey, err)
-			}
-			itemType = ipNetType
-		} else if rule.limitType == limitByPerHeaderType ||
-			rule.limitType == limitByPerParamType ||
-			rule.limitType == limitByPerConsumerType ||
-			rule.limitType == limitByPerCookieType {
-			if itemKey == "*" {
-				itemType = allType
-			} else if strings.HasPrefix(itemKey, "regexp:") {
-				regexpStr := itemKey[len("regexp:"):]
-				var err error
-				regexp, err = re.Compile(regexpStr)
-				if err != nil {
-					return fmt.Errorf("failed to compile regex for key '%s': %w", itemKey, err)
-				}
-				itemType = regexpType
-			} else {
-				return fmt.Errorf("the '%s' restriction must start with 'regexp:' or be exactly '*'", rule.limitType)
-			}
-		} else {
-			itemType = exactType
-		}
-
-		if configItem, err := createConfigItemFromRate(item, itemType, itemKey, ipNet, regexp); err != nil {
-			return err
-		} else if configItem != nil {
-			configItems = append(configItems, *configItem)
-		}
-	}
-	rule.configItems = configItems
-	return nil
-}
-
-func createConfigItemFromRate(item gjson.Result, itemType limitConfigItemType, key string, ipNet *iptree.IPTree, regexp *re.Regexp) (*LimitConfigItem, error) {
-	for timeWindowKey, duration := range timeWindows {
-		q := item.Get(timeWindowKey)
-		if q.Exists() && q.Int() > 0 {
-			return &LimitConfigItem{
-				configType: itemType,
-				key:        key,
-				ipNet:      ipNet,
-				regexp:     regexp,
-				count:      q.Int(),
-				timeWindow: duration,
-			}, nil
-		}
-	}
-	return nil, errors.New("one of 'query_per_second', 'query_per_minute', 'query_per_hour', or 'query_per_day' must be set for key: " + key)
-}
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config/config.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config/config.go
@@ -0,0 +1,357 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+
+	"cluster-key-rate-limit/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+	re "github.com/wasilibs/go-re2"
+	"github.com/zmap/go-iptree/iptree"
+)
+
+// LimitRuleItemType 限流规则项类型
+type LimitRuleItemType string
+
+// LimitConfigItemType 限流配置项key类型
+type LimitConfigItemType string
+
+const (
+	LimitByHeaderType      LimitRuleItemType = "limit_by_header"
+	LimitByParamType       LimitRuleItemType = "limit_by_param"
+	LimitByConsumerType    LimitRuleItemType = "limit_by_consumer"
+	LimitByCookieType      LimitRuleItemType = "limit_by_cookie"
+	LimitByPerHeaderType   LimitRuleItemType = "limit_by_per_header"
+	LimitByPerParamType    LimitRuleItemType = "limit_by_per_param"
+	LimitByPerConsumerType LimitRuleItemType = "limit_by_per_consumer"
+	LimitByPerCookieType   LimitRuleItemType = "limit_by_per_cookie"
+	LimitByPerIpType       LimitRuleItemType = "limit_by_per_ip"
+
+	ExactType  LimitConfigItemType = "exact"  // 精确匹配
+	RegexpType LimitConfigItemType = "regexp" // 正则表达式
+	AllType    LimitConfigItemType = "*"      // 匹配所有情况
+	IpNetType  LimitConfigItemType = "ipNet"  // ip段
+
+	ConsumerHeader = "x-mse-consumer" // LimitByConsumer从该request header获取consumer的名字
+
+	RemoteAddrSourceType = "remote-addr"
+	HeaderSourceType     = "header"
+
+	DefaultRejectedCode uint32 = 429
+	DefaultRejectedMsg  string = "Too many requests"
+
+	Second           int64 = 1
+	SecondsPerMinute       = 60 * Second
+	SecondsPerHour         = 60 * SecondsPerMinute
+	SecondsPerDay          = 24 * SecondsPerHour
+)
+
+var timeWindows = map[string]int64{
+	"query_per_second": Second,
+	"query_per_minute": SecondsPerMinute,
+	"query_per_hour":   SecondsPerHour,
+	"query_per_day":    SecondsPerDay,
+}
+
+type ClusterKeyRateLimitConfig struct {
+	RuleName             string           // 限流规则名称
+	GlobalThreshold      *GlobalThreshold // 全局限流配置
+	RuleItems            []LimitRuleItem  // 限流规则项
+	ShowLimitQuotaHeader bool             // 响应头中是否显示X-RateLimit-Limit和X-RateLimit-Remaining
+	RejectedCode         uint32           // 当请求超过阈值被拒绝时,返回的HTTP状态码
+	RejectedMsg          string           // 当请求超过阈值被拒绝时,返回的响应体
+	RedisClient          wrapper.RedisClient
+}
+
+type GlobalThreshold struct {
+	Count      int64 // 时间窗口内请求数
+	TimeWindow int64 // 时间窗口大小(秒)
+}
+
+type LimitRuleItem struct {
+	LimitType    LimitRuleItemType // 限流类型
+	Key          string            // 根据该key值进行限流,limit_by_consumer和limit_by_per_consumer两种类型为ConsumerHeader,其他类型为对应的key值
+	LimitByPerIp LimitByPerIp      // 对端ip地址或ip段
+	ConfigItems  []LimitConfigItem // 限流配置项
+}
+
+type LimitByPerIp struct {
+	SourceType string // ip来源类型
+	HeaderName string // 根据该请求头获取客户端ip
+}
+
+type LimitConfigItem struct {
+	ConfigType LimitConfigItemType // 限流配置项key类型
+	Key        string              // 限流key
+	IpNet      *iptree.IPTree      // 限流key转换的ip地址或者ip段,仅用于itemType为ipNetType
+	Regexp     *re.Regexp          // 正则表达式,仅用于itemType为regexpType
+	Count      int64               // 指定时间窗口内的总请求数量阈值
+	TimeWindow int64               // 时间窗口大小
+}
+
+func InitRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
+	redisConfig := json.Get("redis")
+	if !redisConfig.Exists() {
+		return errors.New("missing redis in config")
+	}
+
+	serviceName := redisConfig.Get("service_name").String()
+	if serviceName == "" {
+		return errors.New("redis service name must not be empty")
+	}
+
+	servicePort := int(redisConfig.Get("service_port").Int())
+	if servicePort == 0 {
+		if strings.HasSuffix(serviceName, ".static") {
+			// use default logic port which is 80 for static service
+			servicePort = 80
+		} else {
+			servicePort = 6379
+		}
+	}
+
+	username := redisConfig.Get("username").String()
+	password := redisConfig.Get("password").String()
+	timeout := int(redisConfig.Get("timeout").Int())
+	if timeout == 0 {
+		timeout = 1000
+	}
+
+	config.RedisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: int64(servicePort),
+	})
+	database := int(redisConfig.Get("database").Int())
+	return config.RedisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
+}
+
+func ParseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
+	ruleName := json.Get("rule_name")
+	if !ruleName.Exists() {
+		return errors.New("missing rule_name in config")
+	}
+	config.RuleName = ruleName.String()
+
+	// 初始化限流规则
+	if err := initLimitRule(json, config); err != nil {
+		return err
+	}
+
+	showLimitQuotaHeader := json.Get("show_limit_quota_header")
+	if showLimitQuotaHeader.Exists() {
+		config.ShowLimitQuotaHeader = showLimitQuotaHeader.Bool()
+	}
+
+	rejectedCode := json.Get("rejected_code")
+	if rejectedCode.Exists() {
+		config.RejectedCode = uint32(rejectedCode.Uint())
+	} else {
+		config.RejectedCode = DefaultRejectedCode
+	}
+
+	rejectedMsg := json.Get("rejected_msg")
+	if rejectedMsg.Exists() {
+		config.RejectedMsg = rejectedMsg.String()
+	} else {
+		config.RejectedMsg = DefaultRejectedMsg
+	}
+	return nil
+}
+
+func initLimitRule(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
+	globalThresholdResult := json.Get("global_threshold")
+	ruleItemsResult := json.Get("rule_items")
+
+	hasGlobal := globalThresholdResult.Exists()
+	hasRule := ruleItemsResult.Exists()
+	if !hasGlobal && !hasRule {
+		return errors.New("at least one of 'global_threshold' or 'rule_items' must be set")
+	} else if hasGlobal && hasRule {
+		return errors.New("'global_threshold' and 'rule_items' cannot be set at the same time")
+	}
+
+	// 处理全局限流配置
+	if hasGlobal {
+		threshold, err := parseGlobalThreshold(globalThresholdResult)
+		if err != nil {
+			return fmt.Errorf("failed to parse global_threshold: %w", err)
+		}
+		config.GlobalThreshold = threshold
+		return nil
+	}
+
+	// 处理条件限流规则
+	items := ruleItemsResult.Array()
+	if len(items) == 0 {
+		return errors.New("config rule_items cannot be empty")
+	}
+
+	var ruleItems []LimitRuleItem
+	for _, item := range items {
+		ruleItem, err := parseLimitRuleItem(item)
+		if err != nil {
+			return fmt.Errorf("failed to parse rule_item in rule_items: %w", err)
+		}
+		ruleItems = append(ruleItems, *ruleItem)
+	}
+	config.RuleItems = ruleItems
+	return nil
+}
+
+func parseGlobalThreshold(item gjson.Result) (*GlobalThreshold, error) {
+	for timeWindowKey, duration := range timeWindows {
+		q := item.Get(timeWindowKey)
+		if q.Exists() && q.Int() > 0 {
+			return &GlobalThreshold{
+				Count:      q.Int(),
+				TimeWindow: duration,
+			}, nil
+		}
+	}
+	return nil, errors.New("one of 'query_per_second', 'query_per_minute', 'query_per_hour', or 'query_per_day' must be set for global_threshold")
+}
+
+func parseLimitRuleItem(item gjson.Result) (*LimitRuleItem, error) {
+	var ruleItem LimitRuleItem
+	// 根据配置区分限流类型
+	var limitType LimitRuleItemType
+
+	trySetLimitType := func(field gjson.Result, limitTypeStr LimitRuleItemType) {
+		if field.Exists() && field.String() != "" {
+			ruleItem.Key = field.String()
+			limitType = limitTypeStr
+		}
+	}
+	trySetLimitType(item.Get("limit_by_header"), LimitByHeaderType)
+	trySetLimitType(item.Get("limit_by_param"), LimitByParamType)
+	trySetLimitType(item.Get("limit_by_cookie"), LimitByCookieType)
+	trySetLimitType(item.Get("limit_by_per_header"), LimitByPerHeaderType)
+	trySetLimitType(item.Get("limit_by_per_param"), LimitByPerParamType)
+	trySetLimitType(item.Get("limit_by_per_cookie"), LimitByPerCookieType)
+
+	limitByConsumer := item.Get("limit_by_consumer")
+	if limitByConsumer.Exists() {
+		ruleItem.Key = ConsumerHeader
+		limitType = LimitByConsumerType
+	}
+	limitByPerConsumer := item.Get("limit_by_per_consumer")
+	if limitByPerConsumer.Exists() {
+		ruleItem.Key = ConsumerHeader
+		limitType = LimitByPerConsumerType
+	}
+
+	limitByPerIpResult := item.Get("limit_by_per_ip")
+	if limitByPerIpResult.Exists() && limitByPerIpResult.String() != "" {
+		limitByPerIp := limitByPerIpResult.String()
+		ruleItem.Key = limitByPerIp
+		if strings.HasPrefix(limitByPerIp, "from-header-") {
+			headerName := limitByPerIp[len("from-header-"):]
+			if headerName == "" {
+				return nil, errors.New("limit_by_per_ip parse error: empty after 'from-header-'")
+			}
+			ruleItem.LimitByPerIp = LimitByPerIp{
+				SourceType: HeaderSourceType,
+				HeaderName: headerName,
+			}
+		} else if limitByPerIp == "from-remote-addr" {
+			ruleItem.LimitByPerIp = LimitByPerIp{
+				SourceType: RemoteAddrSourceType,
+				HeaderName: "",
+			}
+		} else {
+			return nil, errors.New("the 'limit_by_per_ip' restriction must start with 'from-header-' or be exactly 'from-remote-addr'")
+		}
+		limitType = LimitByPerIpType
+	}
+
+	if limitType == "" {
+		return nil, errors.New("only one of 'limit_by_header' and 'limit_by_param' and 'limit_by_consumer' and 'limit_by_cookie' and 'limit_by_per_header' and 'limit_by_per_param' and 'limit_by_per_consumer' and 'limit_by_per_cookie' and 'limit_by_per_ip' can be set")
+	}
+	ruleItem.LimitType = limitType
+
+	// 初始化configItems
+	err := initConfigItems(item, &ruleItem)
+	if err != nil {
+		return nil, fmt.Errorf("failed to init config items: %w", err)
+	}
+
+	return &ruleItem, nil
+}
+
+func initConfigItems(json gjson.Result, rule *LimitRuleItem) error {
+	limitKeys := json.Get("limit_keys")
+	if !limitKeys.Exists() {
+		return errors.New("missing limit_keys in config")
+	}
+	if len(limitKeys.Array()) == 0 {
+		return errors.New("config limit_keys cannot be empty")
+	}
+	var configItems []LimitConfigItem
+	for _, item := range limitKeys.Array() {
+		key := item.Get("key")
+		if !key.Exists() || key.String() == "" {
+			return errors.New("limit_keys key is required")
+		}
+
+		var (
+			itemKey  = key.String()
+			itemType LimitConfigItemType
+			ipNet    *iptree.IPTree
+			regexp   *re.Regexp
+		)
+		if rule.LimitType == LimitByPerIpType {
+			var err error
+			ipNet, err = util.ParseIPNet(itemKey)
+			if err != nil {
+				return fmt.Errorf("failed to parse IPNet for key '%s': %w", itemKey, err)
+			}
+			itemType = IpNetType
+		} else if rule.LimitType == LimitByPerHeaderType ||
+			rule.LimitType == LimitByPerParamType ||
+			rule.LimitType == LimitByPerConsumerType ||
+			rule.LimitType == LimitByPerCookieType {
+			if itemKey == "*" {
+				itemType = AllType
+			} else if strings.HasPrefix(itemKey, "regexp:") {
+				regexpStr := itemKey[len("regexp:"):]
+				var err error
+				regexp, err = re.Compile(regexpStr)
+				if err != nil {
+					return fmt.Errorf("failed to compile regex for key '%s': %w", itemKey, err)
+				}
+				itemType = RegexpType
+			} else {
+				return fmt.Errorf("the '%s' restriction must start with 'regexp:' or be exactly '*'", rule.LimitType)
+			}
+		} else {
+			itemType = ExactType
+		}
+
+		if configItem, err := createConfigItemFromRate(item, itemType, itemKey, ipNet, regexp); err != nil {
+			return err
+		} else if configItem != nil {
+			configItems = append(configItems, *configItem)
+		}
+	}
+	rule.ConfigItems = configItems
+	return nil
+}
+
+func createConfigItemFromRate(item gjson.Result, itemType LimitConfigItemType, key string, ipNet *iptree.IPTree, regexp *re.Regexp) (*LimitConfigItem, error) {
+	for timeWindowKey, duration := range timeWindows {
+		q := item.Get(timeWindowKey)
+		if q.Exists() && q.Int() > 0 {
+			return &LimitConfigItem{
+				ConfigType: itemType,
+				Key:        key,
+				IpNet:      ipNet,
+				Regexp:     regexp,
+				Count:      q.Int(),
+				TimeWindow: duration,
+			}, nil
+		}
+	}
+	return nil, errors.New("one of 'query_per_second', 'query_per_minute', 'query_per_hour', or 'query_per_day' must be set for key: " + key)
+}
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config/config_test.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config/config_test.go
@@ -0,0 +1,211 @@
+package config
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/tidwall/gjson"
+)
+
+func TestParseClusterKeyRateLimitConfig(t *testing.T) {
+	tests := []struct {
+		name        string
+		json        string
+		expected    ClusterKeyRateLimitConfig
+		expectedErr error
+	}{
+		{
+			name:        "MissingRuleName",
+			json:        `{}`,
+			expectedErr: errors.New("missing rule_name in config"),
+		},
+		{
+			name: "GlobalThreshold_QueryPerSecond",
+			json: `{
+				"rule_name": "global-route-limit",
+				"global_threshold": {
+					"query_per_second": 100
+				}
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "global-route-limit",
+				GlobalThreshold: &GlobalThreshold{
+					Count:      100,
+					TimeWindow: Second,
+				},
+				RejectedCode: DefaultRejectedCode,
+				RejectedMsg:  DefaultRejectedMsg,
+			},
+		},
+		{
+			name: "GlobalThreshold_QueryPerMinute",
+			json: `{
+				"rule_name": "global-route-limit",
+				"global_threshold": {
+					"query_per_minute": 1000
+				}
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "global-route-limit",
+				GlobalThreshold: &GlobalThreshold{
+					Count:      1000,
+					TimeWindow: SecondsPerMinute,
+				},
+				RejectedCode: DefaultRejectedCode,
+				RejectedMsg:  DefaultRejectedMsg,
+			},
+		},
+		{
+			name: "RuleItems_SingleRule",
+			json: `{
+				"rule_name": "rule-based-limit",
+				"rule_items": [
+					{
+						"limit_by_header": "x-test",
+						"limit_keys": [
+							{"key": "key1", "query_per_second": 10}
+						]
+					}
+				]
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "rule-based-limit",
+				RuleItems: []LimitRuleItem{
+					{
+						LimitType: LimitByHeaderType,
+						Key:       "x-test",
+						ConfigItems: []LimitConfigItem{
+							{
+								ConfigType: ExactType,
+								Key:        "key1",
+								Count:      10,
+								TimeWindow: Second,
+							},
+						},
+					},
+				},
+				RejectedCode: DefaultRejectedCode,
+				RejectedMsg:  DefaultRejectedMsg,
+			},
+		},
+		{
+			name: "RuleItems_MultipleRules",
+			json: `{
+				"rule_name": "multi-rule-limit",
+				"rule_items": [
+					{
+						"limit_by_param": "user_id",
+						"limit_keys": [
+							{"key": "123", "query_per_hour": 50}
+						]
+					},
+					{
+						"limit_by_per_cookie": "session_id",
+						"limit_keys": [
+							{"key": "*", "query_per_day": 100}
+						]
+					}
+				]
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "multi-rule-limit",
+				RuleItems: []LimitRuleItem{
+					{
+						LimitType: LimitByParamType,
+						Key:       "user_id",
+						ConfigItems: []LimitConfigItem{
+							{
+								ConfigType: ExactType,
+								Key:        "123",
+								Count:      50,
+								TimeWindow: SecondsPerHour,
+							},
+						},
+					},
+					{
+						LimitType: LimitByPerCookieType,
+						Key:       "session_id",
+						ConfigItems: []LimitConfigItem{
+							{
+								ConfigType: AllType,
+								Key:        "*",
+								Count:      100,
+								TimeWindow: SecondsPerDay,
+							},
+						},
+					},
+				},
+				RejectedCode: DefaultRejectedCode,
+				RejectedMsg:  DefaultRejectedMsg,
+			},
+		},
+		{
+			name: "Conflict_GlobalThresholdAndRuleItems",
+			json: `{
+				"rule_name": "test-conflict",
+				"global_threshold": {"query_per_second": 100},
+				"rule_items": [{"limit_by_header": "x-test"}]
+			}`,
+			expectedErr: errors.New("'global_threshold' and 'rule_items' cannot be set at the same time"),
+		},
+		{
+			name: "Missing_GlobalThresholdAndRuleItems",
+			json: `{
+				"rule_name": "test-missing"
+			}`,
+			expectedErr: errors.New("at least one of 'global_threshold' or 'rule_items' must be set"),
+		},
+		{
+			name: "Custom_RejectedCodeAndMessage",
+			json: `{
+				"rule_name": "custom-reject",
+				"rejected_code": 403,
+				"rejected_msg": "Forbidden",
+				"global_threshold": {"query_per_second": 100}
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "custom-reject",
+				GlobalThreshold: &GlobalThreshold{
+					Count:      100,
+					TimeWindow: Second,
+				},
+				RejectedCode: 403,
+				RejectedMsg:  "Forbidden",
+			},
+		},
+		{
+			name: "ShowLimitQuotaHeader_Enabled",
+			json: `{
+				"rule_name": "show-header",
+				"show_limit_quota_header": true,
+				"global_threshold": {"query_per_second": 100}
+			}`,
+			expected: ClusterKeyRateLimitConfig{
+				RuleName: "show-header",
+				GlobalThreshold: &GlobalThreshold{
+					Count:      100,
+					TimeWindow: Second,
+				},
+				ShowLimitQuotaHeader: true,
+				RejectedCode:         DefaultRejectedCode,
+				RejectedMsg:          DefaultRejectedMsg,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var config ClusterKeyRateLimitConfig
+			result := gjson.Parse(tt.json)
+			err := ParseClusterKeyRateLimitConfig(result, &config)
+
+			if tt.expectedErr != nil {
+				assert.EqualError(t, err, tt.expectedErr.Error())
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expected, config)
+			}
+		})
+	}
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
johnlanni	272d693df3	fix higress-console version in helm chart	2025-06-18 09:15:46 +08:00
澄潭	69bc800198	fix: The mcp to rest capability of the mcp server supports returning status without returning a body from the backend, and instead responds via sse (#2445 )	2025-06-17 21:26:38 +08:00
澄潭	1daaa4b880	release 2.1.5-rc.1 (#2446 )	2025-06-17 21:23:42 +08:00
澄潭	6e31a7b67c	update envoy and istio (#2440 )	2025-06-17 17:22:46 +08:00
澄潭	91f070906a	feat: add mcp-router plugin (#2409 )	2025-06-17 15:40:13 +08:00
澄潭	e3aeddcc24	add release-notes of 2.1.4 (#2433 )	2025-06-17 14:41:14 +08:00
woody	926913f0e7	feat(ai-proxy): add support for OpenAI Fine-Tuning API (#2424 )	2025-06-17 13:44:00 +08:00
mirror	c471bb2003	feat: add default route support for wanx image&video synthesis (#2431 )	2025-06-17 13:43:26 +08:00
澄潭	0b9256617e	fix: When configuring an MCP server for SSE forwarding, the controller may crash (#2423 )	2025-06-16 16:08:39 +08:00
hourmoneys	2670ecbf8e	feat: Add AI-based bidding information tool MCP service (#2343 )	2025-06-16 10:14:46 +08:00
mirror	7040e4bd34	feat: support for wanxiang image/video generation in ai-proxy & ai-statistics (#2378 )	2025-06-16 09:39:37 +08:00
xuruidong	de8a4d0b03	docs: fix broken link in mcp-servers README_zh.md (#2418 )	2025-06-15 22:14:10 +08:00
Xijun Dai	b33a3a4d2e	fix(ai-proxy): fix gemini provider missing finishReason (#2408 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com> Co-authored-by: Se7en <chengzw258@163.com>	2025-06-13 21:51:44 +08:00
澄潭	087cb48fc5	opt: unify the `end-of-line` markers in the MCP session filter. (#2403 )	2025-06-12 18:58:56 +08:00
hourmoneys	95f32002d2	add mcp-server doc (#2327 )	2025-06-12 17:14:39 +08:00
Xijun Dai	fb8dd819e9	feat(ai-proxy): Adjust the streaming response structure to keep it consistent with the openai (#2391 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-12 16:25:35 +08:00
EricaLiu	86934b3203	fix: fix const McpStreamableProtocol spell mistake (#2405 )	2025-06-12 15:35:39 +08:00
HaoJie Liu	38068ee43d	fix(ai-proxy): fix bedrock Sigv4 mismatch (#2402 )	2025-06-12 10:46:02 +08:00
EricaLiu	d81573e0d2	fix: change auto generate se namespace to mcp (#2398 )	2025-06-11 20:30:48 +08:00
tangchang	312b80f91d	feat: Plugin server supports k8s deployment and configures the default download URL of the plugin(#2232 , #2280,#2312) (#2389 ) Co-authored-by: xujingfeng <jingfeng.xjf@alibaba-inc.com> Co-authored-by: 澄潭 <zty98751@alibaba-inc.com>	2025-06-11 12:20:09 +08:00
zty98751	e42e6eeee6	split translae-readme from helm-docs action	2025-06-11 09:52:41 +08:00
澄潭	9f5067d22f	Update release-hgctl.yaml	2025-06-10 22:21:42 +08:00
澄潭	6af9587372	Update release-crd.yaml	2025-06-10 22:21:00 +08:00
johnlanni	5812c1e734	release 2.1.4	2025-06-10 20:58:22 +08:00
github-actions[bot]	bafbe7972d	Update CRD file in the helm folder (#2392 ) Co-authored-by: CH3CHO <2909796+CH3CHO@users.noreply.github.com>	2025-06-10 20:29:23 +08:00
Kent Dong	f3fbf7d6c8	fix: Support mixing line breaks in a single SSE response (#2344 )	2025-06-10 20:21:04 +08:00
EricaLiu	1666dfb01c	fix : fix credential process logic for nacos mcp util and add ut for it (#2394 )	2025-06-10 20:03:45 +08:00
EricaLiu	d2f09fe8c5	fix: refactored mcp server auto discovery logic and fix some issue (#2382 ) Co-authored-by: johnlanni <zty98751@alibaba-inc.com>	2025-06-10 17:11:34 +08:00
Xijun Dai	69d877c116	feat(ai-proxy): 添加 Claude 图片理解与 Tools 调用能力 \|\| feat(ai-proxy): Add Claude image understanding and Tools calling capabilities (#2385 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-10 15:11:18 +08:00
澄潭	5bc0058779	add upstream override wasm abi (#2387 )	2025-06-10 14:20:02 +08:00
HaoJie Liu	d4e114b152	feat(ai-proxy): support Google Cloud Vertex (#2119 ) Co-authored-by: Kent Dong <ch3cho@qq.com>	2025-06-09 18:11:30 +08:00
Xijun Dai	e674c780c6	feat(ai-proxy): add models & image generation support for gemini (#2380 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com> Co-authored-by: Kent Dong <ch3cho@qq.com>	2025-06-08 15:25:22 +08:00
mamba	26cd6837d5	feat(frontend-gray): Add uniqueGrayTag configuration detection (#2371 ) Co-authored-by: rinfx <yucheng.lxr@alibaba-inc.com>	2025-06-07 15:35:28 +08:00
Xijun Dai	5674d91a10	feat(ai-proxy): 修复 openai 配置 openaiCustomUrl 之后, 对不支持 Api 透传路径错误的问题 \|\| feat(ai-proxy): Fixed the issue that the API pass-through path error does not support openaiCustomUrl after openai is configured. (#2364 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-06 17:02:56 +08:00
澄潭	c78b4aaba3	Update README.md	2025-06-05 13:36:33 +08:00
澄潭	0e4e8da9c1	Update README.md	2025-06-05 13:35:58 +08:00
澄潭	c9ec8a12bb	Update README.md	2025-06-05 12:00:59 +08:00
澄潭	7484bcea62	Update README.md	2025-06-05 12:00:08 +08:00
Xijun Dai	896780b60e	feat(ai-proxy): add modelMapping regexp support (#2358 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-03 22:29:17 +08:00
澄潭	7b1ae49cd4	fix content-length header not remove in ai-search plugin (#2363 )	2025-06-03 20:40:14 +08:00
VinciWu557	ee26baf054	feat: support dify ai-proxy e2e test \|\| feat: support diify ai-proxy e2e test (#2319 )	2025-06-03 19:31:58 +08:00
Xijun Dai	33fc47cefb	feat(ai-proxy): add batches & files support (#2355 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-06-03 09:42:36 +08:00
澄潭	19946d46ca	Update README.md	2025-05-30 17:24:28 +08:00
mirror	52d0212698	fix: set "EnableSemanticCachefalse" to false when no vector configured in ai-cache (#2351 )	2025-05-30 13:38:06 +08:00
Xijun Dai	a73c33f1da	feat(ai-proxy): support OpenAI-compatible image and audio model Mapping (#2341 )	2025-05-30 12:16:52 +08:00
韩贤涛	69b755a10d	feat: cluster-key-rate-limit support setting global rate limit thresholds for routes (#2262 )	2025-05-29 09:57:10 +08:00
johnlanni	52464c0e06	fix empty authority rewrite in mcp-server plugin	2025-05-28 19:56:16 +08:00
澄潭	d7d5d1c571	Update README.md	2025-05-28 15:31:12 +08:00
johnlanni	ea948ee818	add more info log in mcp-server	2025-05-28 10:30:35 +08:00
Xijun Dai	767f51adce	feat(ai-proxy): add doubao Image Generation support (#2331 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com>	2025-05-27 18:59:07 +08:00
HaoJie Liu	168cb04c61	fix(ai-proxy): URL encode model name in Bedrock requests (#2321 )	2025-05-27 16:06:52 +08:00
johnlanni	323aabf72b	rm .tgitconfig	2025-05-27 07:14:13 +08:00
澄潭	b8d75598ed	Update mcp-server.yaml	2025-05-26 16:51:03 +08:00
johnlanni	b37649a62f	update README of shebao-tools mcp server	2025-05-26 16:31:14 +08:00
澄潭	76f76a70ab	add info log of ai-search plugin (#2323 )	2025-05-26 16:23:59 +08:00
澄潭	647c961f51	Update README.md	2025-05-26 16:12:52 +08:00
澄潭	5a5a72a9f8	Update README.md	2025-05-26 16:09:30 +08:00
Kent Dong	ffcf5df28a	feat: Refactor mcpServer.matchList config generation logic (#2207 )	2025-05-26 15:26:44 +08:00
Se7en	ec83623614	feat: allow skipping higress dev image build during wasmplugin e2e tests (#2264 )	2025-05-26 10:20:05 +08:00
Kent Dong	bf5be07d74	feat: Add a github action to copy CRD definitions from api folder to helm folder (#2268 )	2025-05-26 10:10:56 +08:00
hourmoneys	f6bb5d7729	add mcp service shebao tools (#2303 )	2025-05-23 17:27:15 +08:00
Whitea	031ae21caa	feat(mcp-server): add HackMD mcp server (#2260 )	2025-05-22 16:53:01 +08:00
Forgottener	fa3c5ea0fc	feat: Supports recording request header, request body, response header and response body information in the access log (#2265 )	2025-05-21 16:15:05 +08:00
澄潭	93436db13c	fix proxy-wasm-cpp-sdk (#2281 )	2025-05-21 13:59:27 +08:00
xujingfeng	be2c6f8a4a	fix: modify log level WARN -> DEBUG in key-auth plugin (#2275 )	2025-05-20 13:52:17 +08:00
EricaLiu	c768973e47	Fix : add fail strategy for wasmplugin generated by mcp server (#2237 )	2025-05-15 16:28:37 +08:00
澄潭	8ec65ed377	mcp server support API auth through OAS3 `security schemes` \|\| mcp server support API auth through OAS3 `security schemes` (#2241 )	2025-05-15 15:48:27 +08:00
Rishi Mondal	675a8ce4a9	Add test translation workflow (#2228 ) Signed-off-by: Rishi Mondal <mavrickrishi@gmail.com>	2025-05-14 17:35:50 +08:00
澄潭	06c5ddd80b	Update README.md	2025-05-14 16:25:00 +08:00
EricaLiu	8ccc170500	fix : fix issue #2222 (#2231 )	2025-05-14 15:40:19 +08:00
韩贤涛	ff308d5292	fix: Remove the Authorization request header when using AI-proxy to proxy Gemini (#2220 )	2025-05-13 09:36:04 +08:00
littlejian	af8502b0b0	feat: update translate-readme action (#2208 )	2025-05-12 14:34:04 +08:00
Kent Dong	c683936b1c	fix: Fix the incorrect rewrite config generated for Nacos 3 MCP Servers (#2211 )	2025-05-12 14:30:37 +08:00
Xijun Dai	8b3f1aab1a	feat(ai-proxy): support Amazon Bedrock Image Generation (#2212 ) Signed-off-by: Xijun Dai <daixijun1990@gmail.com> Co-authored-by: Kent Dong <ch3cho@qq.com>	2025-05-10 09:54:31 +08:00
johnlanni	b5eadcdbee	release v2.1.3	2025-05-09 15:30:22 +08:00
EricaLiu	8ca8fd27ab	fix param type error (#2204 )	2025-05-09 14:55:10 +08:00
Kent Dong	ab014cf912	feat: Add SSE direct proxy support to mcp-session filter (#2157 )	2025-05-09 14:28:42 +08:00
EricaLiu	3f67b05fab	fix : fix vs rewrite when mcp protocol is http (#2203 )	2025-05-09 14:03:31 +08:00
HaoJie Liu	cd271c1f87	fix(ai-statistics): adjust requestBodyBufferLimit (#2192 ) Co-authored-by: Kent Dong <ch3cho@qq.com>	2025-05-08 16:18:50 +08:00
johnlanni	755de5ae67	add original path info in mcp-server	2025-05-07 21:17:18 +08:00
johnlanni	40402e7dbd	refactor route call in mcp-server	2025-05-07 20:36:41 +08:00
johnlanni	0a2fb35ae2	fix gemini provider in ai-proxy	2025-05-07 16:54:40 +08:00
澄潭	b16954d8c1	Update README.md	2025-05-07 15:27:28 +08:00
Kent Dong	29370b18d7	feat: Support /v1/models API in ai-proxy (#2164 )	2025-05-06 15:53:13 +08:00
EricaLiu	c9733d405c	fix : Add nacos username and password login option (#2170 )	2025-05-06 15:18:45 +08:00
johnlanni	ec6004dd27	update golang filter dependency	2025-04-30 23:33:04 +08:00
Jingze	ea9a6de8c3	fix: update golang filter README (#2147 )	2025-04-29 22:08:10 +08:00
github-actions[bot]	5e40a700ae	Update helm translated README.zh.md (#2152 )	2025-04-29 21:04:23 +08:00
@@ -1 +1 @@
 .4.0
 .0.0