rustup target add wasm32-wasip1

fix prebuild
fix clippy
2026-02-26 13:40:49 +08:00 · 2025-02-27 13:28:20 +08:00 · 2025-02-27 11:42:34 +08:00 · 2025-02-27 11:12:51 +08:00 · 2025-02-27 10:30:34 +08:00 · 2025-02-26 21:15:53 +08:00
134 changed files with 13370 additions and 2311 deletions
--- a/.github/workflows/build-and-push-wasm-plugin-image.yaml
+++ b/.github/workflows/build-and-push-wasm-plugin-image.yaml
@@ -133,8 +133,13 @@ jobs:
          command="
          set -e
          cd /workspace/plugins/wasm-rust/extensions/${PLUGIN_NAME}
-          cargo build --target wasm32-wasi --release
-          cp target/wasm32-wasi/release/*.wasm plugin.wasm
+          if [ -f ./.prebuild ]; then
+            echo 'Found .prebuild file, sourcing it...'
+            . ./.prebuild
+          fi
+          rustup target add wasm32-wasip1
+          cargo build --target wasm32-wasip1 --release
+          cp target/wasm32-wasip1/release/*.wasm plugin.wasm
          tar czvf plugin.tar.gz plugin.wasm
          echo ${{ secrets.REGISTRY_PASSWORD }} | oras login -u ${{ secrets.REGISTRY_USERNAME }} --password-stdin ${{ env.IMAGE_REGISTRY_SERVICE }}
          oras push ${target_image} ${push_command}
--- a/.github/workflows/build-image-and-push.yaml
+++ b/.github/workflows/build-image-and-push.yaml
@@ -1,229 +1,258 @@
-name: Build Docker Images and Push to Image Registry
-
-on:
-  push:
-    tags:
-    - "v*.*.*"
-  workflow_dispatch: ~
-
-jobs:
-  build-controller-image:
-    runs-on: ubuntu-latest
-    environment:
-      name: image-registry-controller
-    env:
-      CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
-      CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }}
-    steps:
-      - name: "Checkout ${{ github.ref }}"
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
-        uses: jlumbroso/free-disk-space@main
-        with:
-          tool-cache: false
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: true
-
-      - name: "Setup Go"
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.21.5
-
-      - name: Setup Golang Caches
-        uses: actions/cache@v4
-        with:
-          path: |-
-            ~/.cache/go-build
-            ~/go/pkg/mod
-          key: ${{ runner.os }}-go-${{ github.run_id }}
-          restore-keys: ${{ runner.os }}-go
-
-      - name: Calculate Docker metadata
-        id: docker-meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }}
-          tags: |
-            type=sha
-            type=ref,event=tag
-            type=semver,pattern={{version}}
-            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
-      - name: Login to Docker Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }}
-          username: ${{ secrets.REGISTRY_USERNAME }}
-          password: ${{ secrets.REGISTRY_PASSWORD }}
-
-      - name: Build Docker Image and Push
-        run: |
-          GOPROXY="https://proxy.golang.org,direct" make docker-buildx-push
-          BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress"
-          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
-          for image in ${IMAGES[@]}; do
-            echo "Image: $image"
-            docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
-          done
-
-  build-pilot-image:
-    runs-on: ubuntu-latest
-    environment:
-      name: image-registry-pilot
-    env:
-      PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
-      PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }}
-    steps:
-      - name: "Checkout ${{ github.ref }}"
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
-        uses: jlumbroso/free-disk-space@main
-        with:
-          tool-cache: false
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: true
-
-      - name: "Setup Go"
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.21.5
-
-      - name: Setup Golang Caches
-        uses: actions/cache@v4
-        with:
-          path: |-
-            ~/.cache/go-build
-            ~/go/pkg/mod
-          key: ${{ runner.os }}-go-${{ github.run_id }}
-          restore-keys: ${{ runner.os }}-go
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Cache Docker layers
-        uses: actions/cache@v2
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Calculate Docker metadata
-        id: docker-meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }}
-          tags: |
-            type=sha
-            type=ref,event=tag
-            type=semver,pattern={{version}}
-            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
-      - name: Login to Docker Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PILOT_IMAGE_REGISTRY }}
-          username: ${{ secrets.REGISTRY_USERNAME }}
-          password: ${{ secrets.REGISTRY_PASSWORD }}
-
-      - name: Build Pilot-Discovery Image and Push
-        run: |
-          GOPROXY="https://proxy.golang.org,direct" make build-istio
-          BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/pilot"
-          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
-          for image in ${IMAGES[@]}; do
-            echo "Image: $image"
-            docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
-          done
-
-
-  build-gateway-image:
-    runs-on: ubuntu-latest
-    environment:
-      name: image-registry-pilot
-    env:
-      GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
-      GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }}
-    steps:
-      - name: "Checkout ${{ github.ref }}"
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
-        uses: jlumbroso/free-disk-space@main
-        with:
-          tool-cache: false
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: true
-
-      - name: "Setup Go"
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.21.5
-
-      - name: Setup Golang Caches
-        uses: actions/cache@v4
-        with:
-          path: |-
-            ~/.cache/go-build
-            ~/go/pkg/mod
-          key: ${{ runner.os }}-go-${{ github.run_id }}
-          restore-keys: ${{ runner.os }}-go
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Cache Docker layers
-        uses: actions/cache@v2
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Calculate Docker metadata
-        id: docker-meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }}
-          tags: |
-            type=sha
-            type=ref,event=tag
-            type=semver,pattern={{version}}
-            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
-      - name: Login to Docker Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.GATEWAY_IMAGE_REGISTRY }}
-          username: ${{ secrets.REGISTRY_USERNAME }}
-          password: ${{ secrets.REGISTRY_PASSWORD }}            
-          
-      - name: Build Gateway Image and Push
-        run: |
-          GOPROXY="https://proxy.golang.org,direct" make build-gateway
-          BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/proxyv2"
-          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
-          for image in ${IMAGES[@]}; do
-            echo "Image: $image"
-            docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
-          done
+name: Build Docker Images and Push to Image Registry
+
+on:
+  push:
+    tags:
+    - "v*.*.*"
+  workflow_dispatch: ~
+
+jobs:
+  build-controller-image:
+    runs-on: ubuntu-latest
+    environment:
+      name: image-registry-controller
+    env:
+      CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+      CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }}
+    steps:
+      - name: "Checkout ${{ github.ref }}"
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
+      - name: "Setup Go"
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.21.5
+
+      - name: Setup Golang Caches
+        uses: actions/cache@v4
+        with:
+          path: |-
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ github.run_id }}
+          restore-keys: ${{ runner.os }}-go
+
+      - name: Calculate Docker metadata
+        id: docker-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }}
+          tags: |
+            type=sha
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+      - name: Login to Docker Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }}
+          username: ${{ secrets.REGISTRY_USERNAME }}
+          password: ${{ secrets.REGISTRY_PASSWORD }}
+
+      - name: Build Docker Image and Push
+        run: |
+          BUILT_IMAGE=""
+          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+          for image in ${IMAGES[@]}; do
+            echo "Image: $image"
+            if [ "$BUILT_IMAGE" == "" ]; then
+              GOPROXY="https://proxy.golang.org,direct" IMG_URL="$image" make docker-buildx-push
+              BUILT_IMAGE="$image"
+            else
+              docker buildx imagetools create $BUILT_IMAGE --tag $image
+            fi
+          done
+
+  build-pilot-image:
+    runs-on: ubuntu-latest
+    environment:
+      name: image-registry-pilot
+    env:
+      PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+      PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }}
+    steps:
+      - name: "Checkout ${{ github.ref }}"
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
+      - name: "Setup Go"
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.21.5
+
+      - name: Setup Golang Caches
+        uses: actions/cache@v4
+        with:
+          path: |-
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ github.run_id }}
+          restore-keys: ${{ runner.os }}-go
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+        with:
+          image: tonistiigi/binfmt:qemu-v7.0.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Cache Docker layers
+        uses: actions/cache@v2
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ runner.os }}-buildx-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-buildx-
+
+      - name: Calculate Docker metadata
+        id: docker-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }}
+          tags: |
+            type=sha
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+      - name: Login to Docker Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PILOT_IMAGE_REGISTRY }}
+          username: ${{ secrets.REGISTRY_USERNAME }}
+          password: ${{ secrets.REGISTRY_PASSWORD }}
+
+      - name: Build Pilot-Discovery Image and Push
+        run: |
+          BUILT_IMAGE=""
+          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+          for image in ${IMAGES[@]}; do
+            echo "Image: $image"
+            if [ "$BUILT_IMAGE" == "" ]; then
+              TAG=${image#*:}
+              HUB=${image%:*}
+              HUB=${HUB%/*}
+              BUILT_IMAGE="$HUB/pilot:$TAG"
+              GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-istio
+            fi
+            if [ "$BUILT_IMAGE" != "$image" ]; then
+              docker buildx imagetools create $BUILT_IMAGE --tag $image
+            fi
+          done
+
+  build-gateway-image:
+    runs-on: ubuntu-latest
+    environment:
+      name: image-registry-gateway
+    env:
+      GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+      GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }}
+    steps:
+      - name: "Checkout ${{ github.ref }}"
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
+      - name: "Setup Go"
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.21.5
+
+      - name: Setup Golang Caches
+        uses: actions/cache@v4
+        with:
+          path: |-
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ github.run_id }}
+          restore-keys: ${{ runner.os }}-go
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+        with:
+          image: tonistiigi/binfmt:qemu-v7.0.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Cache Docker layers
+        uses: actions/cache@v2
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ runner.os }}-buildx-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-buildx-
+
+      - name: Calculate Docker metadata
+        id: docker-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }}
+          tags: |
+            type=sha
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+      - name: Login to Docker Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.GATEWAY_IMAGE_REGISTRY }}
+          username: ${{ secrets.REGISTRY_USERNAME }}
+          password: ${{ secrets.REGISTRY_PASSWORD }}
+
+      - name: Build Gateway Image and Push
+        run: |
+          BUILT_IMAGE=""
+          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+          for image in ${IMAGES[@]}; do
+            echo "Image: $image"
+            if [ "$BUILT_IMAGE" == "" ]; then
+              TAG=${image#*:}
+              HUB=${image%:*}
+              HUB=${HUB%/*}
+              BUILT_IMAGE="$HUB/proxyv2:$TAG"
+              GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-gateway
+            fi
+            if [ "$BUILT_IMAGE" != "$image" ]; then
+              docker buildx imagetools create $BUILT_IMAGE --tag $image
+            fi
+          done
--- a/.github/workflows/deploy-to-oss.yaml
+++ b/.github/workflows/deploy-to-oss.yaml
@@ -19,7 +19,7 @@ jobs:
      - name: Download Helm Charts Index
        uses: doggycool/ossutil-github-action@master
        with:
-          ossArgs: 'cp -r -u oss://higress-website-cn-hongkong/helm-charts/index.yaml ./artifact/'
+          ossArgs: 'cp oss://higress-website-cn-hongkong/helm-charts/index.yaml ./artifact/'
          accessKey: ${{ secrets.ACCESS_KEYID }}
          accessSecret: ${{ secrets.ACCESS_KEYSECRET }}
          endpoint: oss-cn-hongkong.aliyuncs.com
--- a/.github/workflows/helm-docs.yaml
+++ b/.github/workflows/helm-docs.yaml
@@ -4,11 +4,15 @@ on:
  pull_request:
    branches:
      - "*"
-
+    paths:
+      - 'helm/**'
+  workflow_dispatch: ~      
  push:
+    branches: [ main ]
+    paths:
+      - 'helm/**'    

 jobs:
-
  helm:
    name: Helm Docs
    runs-on: ubuntu-latest
@@ -32,4 +36,79 @@ jobs:
            echo "Please use helm-docs in your clone, of your fork, of the project, and commit a updated README.md for the chart."
          fi
          git diff --exit-code
-          rm -f ./helm-docs
+          rm -f ./helm-docs
+
+  translate-readme:
+    needs: helm
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y jq
+
+      - name: Translate README.md to Chinese
+        env:
+          API_URL: ${{ secrets.HIGRESS_OPENAI_API_URL }}
+          API_KEY: ${{ secrets.HIGRESS_OPENAI_API_KEY }}
+          API_MODEL: ${{ secrets.HIGRESS_OPENAI_API_MODEL }}
+        run: |
+          cd ./helm/higress
+          FILE_CONTENT=$(cat README.md)
+
+          PAYLOAD=$(jq -n \
+            --arg model "$API_MODEL" \
+            --arg content "$FILE_CONTENT" \
+            '{
+              model: $model,
+              messages: [
+                {"role": "system", "content": "You are a translation assistant that translates English Markdown text to Chinese."},
+                {"role": "user", "content": $content}
+              ],
+              temperature: 1.1,
+              stream: false
+            }')
+
+          RESPONSE=$(curl -s -X POST "$API_URL" \
+            -H "Content-Type: application/json" \
+            -H "Authorization: Bearer $API_KEY" \
+            -d "$PAYLOAD")
+
+          echo "response: $RESPONSE"
+
+          TRANSLATED_CONTENT=$(echo "$RESPONSE" | jq -r '.choices[0].message.content')
+
+          if [ -z "$TRANSLATED_CONTENT" ]; then
+            echo "Translation failed! Response: $RESPONSE"
+            exit 1
+          fi
+
+          echo "$TRANSLATED_CONTENT" > README.zh.new.md
+          echo "Translation completed and saved to README.zh.new.md."
+
+      - name: Compare README.zh.md
+        id: compare
+        run: |
+          cd ./helm/higress
+          NEW_README_ZH="README.zh.new.md"
+          EXISTING_README_ZH="README.zh.md"
+
+          if [ ! -f "$EXISTING_README_ZH" ]; then
+            echo "Add README.zh.md."
+            mv "$NEW_README_ZH" "$EXISTING_README_ZH"
+            echo "updated=true" >> $GITHUB_ENV
+            exit 0
+          fi
+
+          if ! diff -q "$NEW_README_ZH" "$EXISTING_README_ZH"; then
+            echo "Files are different. Updating README.zh.md."
+            mv "$NEW_README_ZH" "$EXISTING_README_ZH"
+            echo "updated=true" >> $GITHUB_ENV
+          else
+            echo "Files are identical. No update needed."
+            echo "updated=false" >> $GITHUB_ENV
+          fi
--- a/.gitignore
+++ b/.gitignore
@@ -17,4 +17,3 @@ target/
 tools/hack/cluster.conf
 envoy/1.20
 istio/1.12
-Cargo.lock
--- a/Makefile.core.mk
+++ b/Makefile.core.mk
@@ -144,7 +144,7 @@ docker-buildx-push: clean-env docker.higress-buildx
 export PARENT_GIT_TAG:=$(shell cat VERSION)
 export PARENT_GIT_REVISION:=$(TAG)

-export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.0/envoy-symbol-ARCH.tar.gz
+export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.1/envoy-symbol-ARCH.tar.gz

 build-envoy: prebuild
 	./tools/hack/build-envoy.sh
@@ -162,13 +162,13 @@ buildx-prepare:
 build-gateway: prebuild buildx-prepare
 	USE_REAL_USER=1 TARGET_ARCH=amd64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init
 	USE_REAL_USER=1 TARGET_ARCH=arm64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init
-	DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker.buildx
+	DOCKER_TARGETS="docker.proxyv2" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx

 build-gateway-local: prebuild
 	TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker

 build-istio: prebuild buildx-prepare
-	DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker.buildx
+	DOCKER_TARGETS="docker.pilot" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx

 build-istio-local: prebuild
 	TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker
@@ -188,7 +188,7 @@ install: pre-install
 	helm install higress helm/higress -n higress-system --create-namespace --set 'global.local=true'

 ENVOY_LATEST_IMAGE_TAG ?= 958467a353d411ae3f06e03b096bfd342cddb2c6
-ISTIO_LATEST_IMAGE_TAG ?= f5cd4d940185204f375a0dd863246037c183cb76
+ISTIO_LATEST_IMAGE_TAG ?= d9c728d3b01f64855e012b08d136e306f1160397

 install-dev: pre-install
 	helm install higress helm/core -n higress-system --create-namespace --set 'controller.tag=$(TAG)' --set 'gateway.replicas=1' --set 'pilot.tag=$(ISTIO_LATEST_IMAGE_TAG)' --set 'gateway.tag=$(ENVOY_LATEST_IMAGE_TAG)' --set 'global.local=true'
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ Higress 在阿里内部为解决 Tengine reload 对长连接业务有损，以

 阿里云基于 Higress 构建了云原生 API 网关产品，为大量企业客户提供 99.99% 的网关高可用保障服务能力。

-Higress 基于 AI 网关能力，支撑了通义千问 APP、百炼大模型 API、机器学习 PAI 平台等 AI 业务。同时服务国内头部的 AIGC 企业（如零一万物），以及 AI 产品（如 FastGPT）
+Higress 的 AI 网关能力支持国内外所有[主流模型供应商](https://github.com/alibaba/higress/tree/main/plugins/wasm-go/extensions/ai-proxy/provider)和基于 vllm/ollama 等自建的 DeepSeek 模型；在阿里云内部支撑了通义千问 APP、百炼大模型 API、机器学习 PAI 平台等 AI 业务。同时服务国内头部的 AIGC 企业（如零一万物），以及 AI 产品（如 FastGPT）

 ![](https://img.alicdn.com/imgextra/i2/O1CN011AbR8023V8R5N0HcA_!!6000000007260-2-tps-1080-606.png)

--- a/2
+++ b/2
@@ -1 +1 @@
-v2.0.6-rc.3
+v2.0.7
--- a/docker/docker.mk
+++ b/docker/docker.mk
@@ -35,6 +35,8 @@ DOCKER_ALL_VARIANTS ?= debug distroless
 INCLUDE_UNTAGGED_DEFAULT ?= false
 DEFAULT_DISTRIBUTION=debug

-HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push  ); )
-HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); )
+IMG ?= higress
+IMG_URL ?= $(HUB)/$(IMG):$(TAG)

+HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push  ); )
+HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); )
--- a/envoy/envoy
+++ b/envoy/envoy
--- a/helm/core/Chart.yaml
+++ b/helm/core/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.0.6-rc.3
+appVersion: 2.0.7
 description: Helm chart for deploying higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -10,4 +10,4 @@ name: higress-core
 sources:
 - http://github.com/alibaba/higress
 type: application
-version: 2.0.6-rc.3
+version: 2.0.7
--- a/helm/core/templates/_pod.tpl
+++ b/helm/core/templates/_pod.tpl
@@ -128,7 +128,7 @@ template:
        - name: ISTIO_META_REQUESTED_NETWORK_VIEW
          value: "{{.}}"
        {{- end }}
-        {{- range $key, $val := .Values.env }}
+        {{- range $key, $val := .Values.gateway.env }}
        - name: {{ $key }}
          value: {{ $val | quote }}
        {{- end }}
--- a/helm/core/values.yaml
+++ b/helm/core/values.yaml
@@ -3,7 +3,7 @@ global:
  enableH3: false
  enableIPv6: false
  enableProxyProtocol: false
-  enableLDSCache: true
+  enableLDSCache: false
  enablePushAllMCPClusters: true
  liteMetrics: false
  xdsMaxRecvMsgSize: "104857600"
@@ -488,6 +488,7 @@ gateway:
    externalTrafficPolicy: ""

  rollingMaxSurge: 100%
+  # -- If global.local is true, the default value is 100%, otherwise it is 25%
  rollingMaxUnavailable: 25%

  resources:
--- a/helm/higress/Chart.lock
+++ b/helm/higress/Chart.lock
@@ -1,9 +1,9 @@
 dependencies:
 - name: higress-core
  repository: file://../core
-  version: 2.0.6-rc.3
+  version: 2.0.7
 - name: higress-console
  repository: https://higress.io/helm-charts/
-  version: 2.0.1
-digest: sha256:6821ee9079a795f3e1de2c5126c36d3285f44863938a88f021ee4fbce82c0f15
-generated: "2025-01-15T20:46:00.498051+08:00"
+  version: 2.0.4
+digest: sha256:ca9cc8bdac0488d79c20e7a4e3d7b3a436a59b697a37728daa462601b4d1ea65
+generated: "2025-02-19T16:23:39.424987+08:00"
--- a/helm/higress/Chart.yaml
+++ b/helm/higress/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.0.6-rc.3
+appVersion: 2.0.7
 description: Helm chart for deploying Higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -12,9 +12,9 @@ sources:
 dependencies:
 - name: higress-core
  repository: "file://../core"
-  version: 2.0.6-rc.3
+  version: 2.0.7
 - name: higress-console
  repository: "https://higress.io/helm-charts/"
-  version: 2.0.1
+  version: 2.0.4
 type: application
-version: 2.0.6-rc.3
+version: 2.0.7
--- a/helm/higress/README.md
+++ b/helm/higress/README.md
@@ -128,7 +128,7 @@ The command removes all the Kubernetes components associated with the chart and
 | gateway.resources.requests.memory | string | `"2048Mi"` |  |
 | gateway.revision | string | `""` | revision declares which revision this gateway is a part of |
 | gateway.rollingMaxSurge | string | `"100%"` |  |
-| gateway.rollingMaxUnavailable | string | `"25%"` |  |
+| gateway.rollingMaxUnavailable | string | `"25%"` | If global.local is true, the default value is 100%, otherwise it is 25% |
 | gateway.securityContext | string | `nil` | Define the security context for the pod. If unset, this will be automatically set to the minimum privileges required to bind to port 80 and 443. On Kubernetes 1.22+, this only requires the `net.ipv4.ip_unprivileged_port_start` sysctl. |
 | gateway.service.annotations | object | `{}` |  |
 | gateway.service.externalTrafficPolicy | string | `""` |  |
@@ -162,7 +162,7 @@ The command removes all the Kubernetes components associated with the chart and
 | global.enableH3 | bool | `false` |  |
 | global.enableIPv6 | bool | `false` |  |
 | global.enableIstioAPI | bool | `true` | If true, Higress Controller will monitor istio resources as well |
-| global.enableLDSCache | bool | `true` |  |
+| global.enableLDSCache | bool | `false` |  |
 | global.enableProxyProtocol | bool | `false` |  |
 | global.enablePushAllMCPClusters | bool | `true` |  |
 | global.enableSRDS | bool | `true` |  |
--- a/helm/higress/README.zh.md
+++ b/helm/higress/README.zh.md
@@ -0,0 +1,188 @@
+## Higress for Kubernetes
+
+Higress 是基于阿里巴巴内部网关实践构建的云原生 API 网关。
+
+依托 Istio 和 Envoy，Higress 实现了流量网关、微服务网关和安全网关三重架构的融合，从而大幅降低了部署、运维成本。
+
+## 设置仓库信息
+
+```console
+helm repo add higress.io https://higress.io/helm-charts
+helm repo update
+```
+
+## 安装
+
+以 `higress` 为发布名称安装 chart：
+
+```console
+helm install higress -n higress-system higress.io/higress --create-namespace --render-subchart-notes
+```
+
+## 卸载
+
+要卸载/删除 higress 部署：
+
+```console
+helm delete higress -n higress-system
+```
+
+该命令会移除与 chart 相关的所有 Kubernetes 组件，并删除发布。
+
+## 参数
+
+## 值
+
+| 键 | 类型 | 默认值 | 描述 |
+|-----|------|---------|-------------|
+| clusterName | 字符串 | `""` |  |
+| controller.affinity | 对象 | `{}` |  |
+| controller.automaticHttps.email | 字符串 | `""` |  |
+| controller.automaticHttps.enabled | 布尔值 | `true` |  |
+| controller.autoscaling.enabled | 布尔值 | `false` |  |
+| controller.autoscaling.maxReplicas | 整数 | `5` |  |
+| controller.autoscaling.minReplicas | 整数 | `1` |  |
+| controller.autoscaling.targetCPUUtilizationPercentage | 整数 | `80` |  |
+| controller.env | 对象 | `{}` |  |
+| controller.hub | 字符串 | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` |  |
+| controller.image | 字符串 | `"higress"` |  |
+| controller.imagePullSecrets | 列表 | `[]` |  |
+| controller.labels | 对象 | `{}` |  |
+| controller.name | 字符串 | `"higress-controller"` |  |
+| controller.nodeSelector | 对象 | `{}` |  |
+| controller.podAnnotations | 对象 | `{}` |  |
+| controller.podSecurityContext | 对象 | `{}` |  |
+| controller.ports[0].name | 字符串 | `"http"` |  |
+| controller.ports[0].port | 整数 | `8888` |  |
+| controller.ports[0].protocol | 字符串 | `"TCP"` |  |
+| controller.ports[0].targetPort | 整数 | `8888` |  |
+| controller.ports[1].name | 字符串 | `"http-solver"` |  |
+| controller.ports[1].port | 整数 | `8889` |  |
+| controller.ports[1].protocol | 字符串 | `"TCP"` |  |
+| controller.ports[1].targetPort | 整数 | `8889` |  |
+| controller.ports[2].name | 字符串 | `"grpc"` |  |
+| controller.ports[2].port | 整数 | `15051` |  |
+| controller.ports[2].protocol | 字符串 | `"TCP"` |  |
+| controller.ports[2].targetPort | 整数 | `15051` |  |
+| controller.probe.httpGet.path | 字符串 | `"/ready"` |  |
+| controller.probe.httpGet.port | 整数 | `8888` |  |
+| controller.probe.initialDelaySeconds | 整数 | `1` |  |
+| controller.probe.periodSeconds | 整数 | `3` |  |
+| controller.probe.timeoutSeconds | 整数 | `5` |  |
+| controller.rbac.create | 布尔值 | `true` |  |
+| controller.replicas | 整数 | `1` | Higress Controller 的 Pod 数量 |
+| controller.resources.limits.cpu | 字符串 | `"1000m"` |  |
+| controller.resources.limits.memory | 字符串 | `"2048Mi"` |  |
+| controller.resources.requests.cpu | 字符串 | `"500m"` |  |
+| controller.resources.requests.memory | 字符串 | `"2048Mi"` |  |
+| controller.securityContext | 对象 | `{}` |  |
+| controller.service.type | 字符串 | `"ClusterIP"` |  |
+| controller.serviceAccount.annotations | 对象 | `{}` | 添加到服务账户的注解 |
+| controller.serviceAccount.create | 布尔值 | `true` | 指定是否创建服务账户 |
+| controller.serviceAccount.name | 字符串 | `""` | 如果未设置且 create 为 true，则使用 fullname 模板生成名称 |
+| controller.tag | 字符串 | `""` |  |
+| controller.tolerations | 列表 | `[]` |  |
+| downstream | 对象 | `{"connectionBufferLimits":32768,"http2":{"initialConnectionWindowSize":1048576,"initialStreamWindowSize":65535,"maxConcurrentStreams":100},"idleTimeout":180,"maxRequestHeadersKb":60,"routeTimeout":0}` | 下游配置设置 |
+| gateway.affinity | 对象 | `{}` |  |
+| gateway.annotations | 对象 | `{}` | 应用到所有资源的注解 |
+| gateway.autoscaling.enabled | 布尔值 | `false` |  |
+| gateway.autoscaling.maxReplicas | 整数 | `5` |  |
+| gateway.autoscaling.minReplicas | 整数 | `1` |  |
+| gateway.autoscaling.targetCPUUtilizationPercentage | 整数 | `80` |  |
+| gateway.containerSecurityContext | 字符串 | `nil` |  |
+| gateway.env | 对象 | `{}` | Pod 环境变量 |
+| gateway.hostNetwork | 布尔值 | `false` |  |
+| gateway.httpPort | 整数 | `80` |  |
+| gateway.httpsPort | 整数 | `443` |  |
+| gateway.hub | 字符串 | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` |  |
+| gateway.image | 字符串 | `"gateway"` |  |
+| gateway.kind | 字符串 | `"Deployment"` | 使用 `DaemonSet` 或 `Deployment` |
+| gateway.labels | 对象 | `{}` | 应用到所有资源的标签 |
+| gateway.metrics.enabled | 布尔值 | `false` | 如果为 true，则为网关创建 PodMonitor 或 VMPodScrape |
+| gateway.metrics.honorLabels | 布尔值 | `false` |  |
+| gateway.metrics.interval | 字符串 | `""` |  |
+| gateway.metrics.metricRelabelConfigs | 列表 | `[]` | 用于 operator.victoriametrics.com/v1beta1.VMPodScrape |
+| gateway.metrics.metricRelabelings | 列表 | `[]` | 用于 monitoring.coreos.com/v1.PodMonitor |
+| gateway.metrics.provider | 字符串 | `"monitoring.coreos.com"` | CustomResourceDefinition 的提供者组名，可以是 monitoring.coreos.com 或 operator.victoriametrics.com |
+| gateway.metrics.rawSpec | 对象 | `{}` | 更多原始的 podMetricsEndpoints 规范 |
+| gateway.metrics.relabelConfigs | 列表 | `[]` |  |
+| gateway.metrics.relabelings | 列表 | `[]` |  |
+| gateway.metrics.scrapeTimeout | 字符串 | `""` |  |
+| gateway.name | 字符串 | `"higress-gateway"` |  |
+| gateway.networkGateway | 字符串 | `""` | 如果指定，网关将作为给定网络的网络网关。 |
+| gateway.nodeSelector | 对象 | `{}` |  |
+| gateway.podAnnotations."prometheus.io/path" | 字符串 | `"/stats/prometheus"` |  |
+| gateway.podAnnotations."prometheus.io/port" | 字符串 | `"15020"` |  |
+| gateway.podAnnotations."prometheus.io/scrape" | 字符串 | `"true"` |  |
+| gateway.podAnnotations."sidecar.istio.io/inject" | 字符串 | `"false"` |  |
+| gateway.rbac.enabled | 布尔值 | `true` | 如果启用，将创建角色以启用从网关访问证书。当使用 http://gateway-api.org/ 时不需要。 |
+| gateway.readinessFailureThreshold | 整数 | `30` | 指示准备失败前的连续失败探测次数。 |
+| gateway.readinessInitialDelaySeconds | 整数 | `1` | 准备探测的初始延迟秒数。 |
+| gateway.readinessPeriodSeconds | 整数 | `2` | 准备探测之间的间隔。 |
+| gateway.readinessSuccessThreshold | 整数 | `1` | 指示准备成功前的连续成功探测次数。 |
+| gateway.readinessTimeoutSeconds | 整数 | `3` | 准备探测的超时秒数 |
+| gateway.replicas | 整数 | `2` | Higress Gateway 的 Pod 数量 |
+| gateway.resources.limits.cpu | 字符串 | `"2000m"` |  |
+| gateway.resources.limits.memory | 字符串 | `"2048Mi"` |  |
+| gateway.resources.requests.cpu | 字符串 | `"2000m"` |  |
+| gateway.resources.requests.memory | 字符串 | `"2048Mi"` |  |
+| gateway.revision | 字符串 | `""` | 修订声明此网关属于哪个修订 |
+| gateway.rollingMaxSurge | 字符串 | `"100%"` |  |
+| gateway.rollingMaxUnavailable | 字符串 | `"25%"` |  |
+| gateway.securityContext | 字符串 | `nil` | 定义 Pod 的安全上下文。如果未设置，将自动设置为绑定到端口 80 和 443 所需的最小权限。在 Kubernetes 1.22+ 上，这只需要 `net.ipv4.ip_unprivileged_port_start` 系统调用。 |
+| gateway.service.annotations | 对象 | `{}` |  |
+| gateway.service.externalTrafficPolicy | 字符串 | `""` |  |
+| gateway.service.loadBalancerClass | 字符串 | `""` |  |
+| gateway.service.loadBalancerIP | 字符串 | `""` |  |
+| gateway.service.loadBalancerSourceRanges | 列表 | `[]` |  |
+| gateway.service.ports[0].name | 字符串 | `"http2"` |  |
+| gateway.service.ports[0].port | 整数 | `80` |  |
+| gateway.service.ports[0].protocol | 字符串 | `"TCP"` |  |
+| gateway.service.ports[0].targetPort | 整数 | `80` |  |
+| gateway.service.ports[1].name | 字符串 | `"https"` |  |
+| gateway.service.ports[1].port | 整数 | `443` |  |
+| gateway.service.ports[1].protocol | 字符串 | `"TCP"` |  |
+| gateway.service.ports[1].targetPort | 整数 | `443` |  |
+| gateway.service.type | 字符串 | `"LoadBalancer"` | 服务类型。设置为 "None" 以完全禁用服务 |
+| gateway.serviceAccount.annotations | 对象 | `{}` | 添加到服务账户的注解 |
+| gateway.serviceAccount.create | 布尔值 | `true` | 如果设置，将创建服务账户。否则，使用默认值 |
+| gateway.serviceAccount.name | 字符串 | `""` | 要使用的服务账户名称。如果未设置，则使用发布名称 |
+| gateway.tag | 字符串 | `""` |  |
+| gateway.tolerations | 列表 | `[]` |  |
+| gateway.unprivilegedPortSupported | 字符串 | `nil` |  |
+| global.autoscalingv2API | 布尔值 | `true` | 是否使用 autoscaling/v2 模板进行 HPA 设置，仅供内部使用，用户不应配置。 |
+| global.caAddress | 字符串 | `""` | 自定义的 CA 地址，用于为集群中的 Pod 检索证书。CSR 客户端（如 Istio Agent 和 ingress gateways）可以使用此地址指定 CA 端点。如果未明确设置，则默认为 Istio 发现地址。 |
+| global.caName | 字符串 | `""` | 工作负载证书的 CA 名称。例如，当 caName=GkeWorkloadCertificate 时，GKE 工作负载证书将用作工作负载的证书。默认值为 ""，当 caName="" 时，CA 将通过其他机制（如环境变量 CA_PROVIDER）配置。 |
+| global.configCluster | 布尔值 | `false` | 将远程集群配置为外部 istiod 的配置集群。 |
+| global.defaultPodDisruptionBudget | 对象 | `{"enabled":false}` | 为控制平面启用 Pod 中断预算，用于确保 Istio 控制平面组件逐步升级或恢复。 |
+| global.defaultResources | 对象 | `{"requests":{"cpu":"10m"}}` | 应用于所有部署的最小请求资源集，以便 Horizontal Pod Autoscaler 能够正常工作（如果设置）。每个组件可以通过在相关部分添加自己的资源块并设置所需的资源值来覆盖这些默认值。 |
+| global.defaultUpstreamConcurrencyThreshold | 整数 | `10000` |  |
+| global.disableAlpnH2 | 布尔值 | `false` | 是否在 ALPN 中禁用 HTTP/2 |
+| global.enableGatewayAPI | 布尔值 | `false` | 如果为 true，Higress Controller 还将监控 Gateway API 资源 |
+| global.enableH3 | 布尔值 | `false` |  |
+| global.enableIPv6 | 布尔值 | `false` |  |
+| global.enableIstioAPI | 布尔值 | `true` | 如果为 true，Higress Controller 还将监控 istio 资源 |
+| global.enableLDSCache | 布尔值 | `true` |  |
+| global.enableProxyProtocol | 布尔值 | `false` |  |
+| global.enablePushAllMCPClusters | 布尔值 | `true` |  |
+| global.enableSRDS | 布尔值 | `true` |  |
+| global.enableStatus | 布尔值 | `true` | 如果为 true，Higress Controller 将更新 Ingress 资源的状态字段。从 Nginx Ingress 迁移时，为了避免 Ingress 对象的状态字段被覆盖，需要将此参数设置为 false，以便 Higress 不会将入口 IP 写入相应 Ingress 对象的状态字段。 |
+| global.externalIstiod | 布尔值 | `false` | 配置由外部 istiod 控制的远程集群数据平面。当设置为 true 时，本地不部署 istiod，仅启用其他发现 chart 的子集。 |
+| global.hostRDSMergeSubset | 布尔值 | `false` |  |
+| global.hub | 字符串 | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` | Istio 镜像的默认仓库。发布版本发布到 docker hub 的 'istio' 项目下。来自 prow 的开发构建位于 gcr.io |
+| global.imagePullPolicy | 字符串 | `""` | 如果不需要默认行为，则指定镜像拉取策略。默认行为：最新镜像将始终拉取，否则 IfNotPresent。 |
+| global.imagePullSecrets | 列表 | `[]` | 所有 ServiceAccount 的 ImagePullSecrets，用于引用此 ServiceAccount 的 Pod 拉取任何镜像的同一命名空间中的秘密列表。对于不使用 ServiceAccount 的组件（即 grafana、servicegraph、tracing），ImagePullSecrets 将添加到相应的 Deployment(StatefulSet) 对象中。对于配置了私有 docker 注册表的任何集群，必须设置。 |
+| global.ingressClass | 字符串 | `"higress"` | IngressClass 过滤 higress controller 监听的 ingress 资源。默认的 ingress class 是 higress。有一些特殊情况用于特殊的 ingress class。1. 当 ingress class 设置为 nginx 时，higress controller 将监听带有 nginx ingress class 或没有任何 ingress class 的 ingress 资源。2. 当 ingress class 设置为空时，higress controller 将监听 k8s 集群中的所有 ingress 资源。 |
+| global.istioNamespace | 字符串 | `"istio-system"` | 用于定位 istiod。 |
+| global.istiod | 对象 | `{"enableAnalysis":false}` | 默认在主分支中启用以最大化测试。 |
+| global.jwtPolicy | 字符串 | `"third-party-jwt"` | 配置验证 JWT 的策略。目前支持两个选项："third-party-jwt" 和 "first-party-jwt"。 |
+| global.kind | 布尔值 | `false` |  |
+| global.liteMetrics | 布尔值 | `false` |  |
+| global.local | 布尔值 | `false` | 当部署到本地集群（如：kind 集群）时，将此设置为 true。 |
+| global.logAsJson | 布尔值 | `false` |  |
+| global.logging | 对象 | `{"level":"default:info"}` | 以逗号分隔的每个范围的最小日志级别，格式为 <scope>:<level>,<scope>:<level> 控制平面根据组件不同有不同的范围，但可以配置所有组件的默认日志级别 如果为空，将使用代码中配置的默认范围和级别 |
+| global.meshID | 字符串 | `""` | 如果网格管理员未指定值，Istio 将使用网格的信任域的值。最佳实践是选择一个合适的信任域值。 |
+| global.meshNetworks | 对象 | `{}` |  |
+| global.mountMtlsCerts | 布尔值 | `false` | 使用用户指定的、挂载的密钥和证书用于 Pilot 和工作负载。 |
+| global.multiCluster.clusterName | 字符串 | `""` | 应设置为此安装运行的集群的名称。这是为了正确标记代理的 sidecar 注入所必需的 |
+| global.multiCluster.enabled | 布尔值 | `true` | 设置为 true 以通过各自的 ingressgateway 服务连接两个 kubernetes 集群，当每个集群中的 Pod 无法直接相互通信时。
--- a/istio/istio
+++ b/istio/istio
--- a/istio/proxy
+++ b/istio/proxy
--- a/pkg/ingress/kube/annotations/auth.go
+++ b/pkg/ingress/kube/annotations/auth.go
@@ -15,12 +15,6 @@
 package annotations

 import (
-	"errors"
-	"sort"
-	"strings"
-
-	corev1 "k8s.io/api/core/v1"
-
 	"github.com/alibaba/higress/pkg/ingress/kube/util"
 	. "github.com/alibaba/higress/pkg/ingress/log"
 )
@@ -57,101 +51,10 @@ func (a auth) Parse(annotations Annotations, config *Ingress, globalContext *Glo
 	if !needAuthConfig(annotations) {
 		return nil
 	}
-
-	authConfig := &AuthConfig{
-		AuthType: defaultAuthType,
-	}
-
-	// Check auth type
-	authType, err := annotations.ParseStringASAP(authType)
-	if err != nil {
-		IngressLog.Errorf("Parse auth type error %v within ingress %/%s", err, config.Namespace, config.Name)
-		return nil
-	}
-	if authType != defaultAuthType {
-		IngressLog.Errorf("Auth type %s within ingress %/%s is not supported yet.", authType, config.Namespace, config.Name)
-		return nil
-	}
-
-	secretName, _ := annotations.ParseStringASAP(authSecretAnn)
-	namespaced := util.SplitNamespacedName(secretName)
-	if namespaced.Name == "" {
-		IngressLog.Errorf("Auth secret name within ingress %s/%s is invalid", config.Namespace, config.Name)
-		return nil
-	}
-	if namespaced.Namespace == "" {
-		namespaced.Namespace = config.Namespace
-	}
-
-	configKey := util.ClusterNamespacedName{
-		NamespacedName: namespaced,
-		ClusterId:      config.ClusterId,
-	}
-	authConfig.AuthSecret = configKey
-
-	// Subscribe secret
-	globalContext.WatchedSecrets.Insert(configKey.String())
-
-	secretType := authFileAuthSecretType
-	if rawSecretType, err := annotations.ParseStringASAP(authSecretTypeAnn); err == nil {
-		resultAuthSecretType := authSecretType(rawSecretType)
-		if resultAuthSecretType == authFileAuthSecretType || resultAuthSecretType == authMapAuthSecretType {
-			secretType = resultAuthSecretType
-		}
-	}
-
-	authConfig.AuthRealm, _ = annotations.ParseStringASAP(authRealm)
-
-	// Process credentials.
-	secretLister, exist := globalContext.ClusterSecretLister[config.ClusterId]
-	if !exist {
-		IngressLog.Errorf("secret lister of cluster %s doesn't exist", config.ClusterId)
-		return nil
-	}
-	authSecret, err := secretLister.Secrets(namespaced.Namespace).Get(namespaced.Name)
-	if err != nil {
-		IngressLog.Errorf("Secret %s within ingress %s/%s is not found",
-			namespaced.String(), config.Namespace, config.Name)
-		return nil
-	}
-	credentials, err := convertCredentials(secretType, authSecret)
-	if err != nil {
-		IngressLog.Errorf("Parse auth secret fail, err %v", err)
-		return nil
-	}
-	authConfig.Credentials = credentials
-
-	config.Auth = authConfig
+	IngressLog.Error("The annotation nginx.ingress.kubernetes.io/auth-type is no longer supported after version 2.0.0, please use the higress wasm plugin (e.g., basic-auth) as an alternative.")
 	return nil
 }

-func convertCredentials(secretType authSecretType, secret *corev1.Secret) ([]string, error) {
-	var result []string
-	switch secretType {
-	case authFileAuthSecretType:
-		users, exist := secret.Data[authFileKey]
-		if !exist {
-			return nil, errors.New("the auth file type must has auth key in secret data")
-		}
-		userList := strings.Split(string(users), "\n")
-		for _, item := range userList {
-			if !strings.Contains(item, ":") {
-				continue
-			}
-			result = append(result, item)
-		}
-	case authMapAuthSecretType:
-		for name, password := range secret.Data {
-			result = append(result, name+":"+string(password))
-		}
-	}
-	sort.SliceStable(result, func(i, j int) bool {
-		return result[i] < result[j]
-	})
-
-	return result, nil
-}
-
 func needAuthConfig(annotations Annotations) bool {
 	return annotations.HasASAP(authType) &&
 		annotations.HasASAP(authSecretAnn)
--- a/pkg/ingress/kube/annotations/auth_test.go
+++ b/pkg/ingress/kube/annotations/auth_test.go
@@ -1,197 +0,0 @@
-// Copyright (c) 2022 Alibaba Group Holding Ltd.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package annotations
-
-import (
-	"context"
-	"reflect"
-	"testing"
-	"time"
-
-	"istio.io/istio/pkg/cluster"
-	"istio.io/istio/pkg/util/sets"
-	v1 "k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/types"
-	"k8s.io/client-go/informers"
-	"k8s.io/client-go/kubernetes/fake"
-	listerv1 "k8s.io/client-go/listers/core/v1"
-	"k8s.io/client-go/tools/cache"
-
-	"github.com/alibaba/higress/pkg/ingress/kube/util"
-)
-
-func TestAuthParse(t *testing.T) {
-	auth := auth{}
-	inputCases := []struct {
-		input         map[string]string
-		secret        *v1.Secret
-		expect        *AuthConfig
-		watchedSecret string
-	}{
-		{
-			secret: &v1.Secret{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "bar",
-					Namespace: "foo",
-				},
-				Data: map[string][]byte{
-					"auth": []byte("A:a\nB:b"),
-				},
-			},
-		},
-		{
-			input: map[string]string{
-				buildNginxAnnotationKey(authType): "digest",
-			},
-			expect: nil,
-			secret: &v1.Secret{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "bar",
-					Namespace: "foo",
-				},
-				Data: map[string][]byte{
-					"auth": []byte("A:a\nB:b"),
-				},
-			},
-		},
-		{
-			input: map[string]string{
-				buildNginxAnnotationKey(authType):        defaultAuthType,
-				buildHigressAnnotationKey(authSecretAnn): "foo/bar",
-			},
-			secret: &v1.Secret{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "bar",
-					Namespace: "foo",
-				},
-				Data: map[string][]byte{
-					"auth": []byte("A:a\nB:b"),
-				},
-			},
-			expect: &AuthConfig{
-				AuthType: defaultAuthType,
-				AuthSecret: util.ClusterNamespacedName{
-					NamespacedName: types.NamespacedName{
-						Namespace: "foo",
-						Name:      "bar",
-					},
-					ClusterId: "cluster",
-				},
-				Credentials: []string{"A:a", "B:b"},
-			},
-			watchedSecret: "cluster/foo/bar",
-		},
-		{
-			input: map[string]string{
-				buildNginxAnnotationKey(authType):          defaultAuthType,
-				buildHigressAnnotationKey(authSecretAnn):   "foo/bar",
-				buildNginxAnnotationKey(authSecretTypeAnn): string(authMapAuthSecretType),
-			},
-			secret: &v1.Secret{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "bar",
-					Namespace: "foo",
-				},
-				Data: map[string][]byte{
-					"A": []byte("a"),
-					"B": []byte("b"),
-				},
-			},
-			expect: &AuthConfig{
-				AuthType: defaultAuthType,
-				AuthSecret: util.ClusterNamespacedName{
-					NamespacedName: types.NamespacedName{
-						Namespace: "foo",
-						Name:      "bar",
-					},
-					ClusterId: "cluster",
-				},
-				Credentials: []string{"A:a", "B:b"},
-			},
-			watchedSecret: "cluster/foo/bar",
-		},
-		{
-			input: map[string]string{
-				buildNginxAnnotationKey(authType):          defaultAuthType,
-				buildHigressAnnotationKey(authSecretAnn):   "bar",
-				buildNginxAnnotationKey(authSecretTypeAnn): string(authFileAuthSecretType),
-			},
-			secret: &v1.Secret{
-				ObjectMeta: metav1.ObjectMeta{
-					Name:      "bar",
-					Namespace: "default",
-				},
-				Data: map[string][]byte{
-					"auth": []byte("A:a\nB:b"),
-				},
-			},
-			expect: &AuthConfig{
-				AuthType: defaultAuthType,
-				AuthSecret: util.ClusterNamespacedName{
-					NamespacedName: types.NamespacedName{
-						Namespace: "default",
-						Name:      "bar",
-					},
-					ClusterId: "cluster",
-				},
-				Credentials: []string{"A:a", "B:b"},
-			},
-			watchedSecret: "cluster/default/bar",
-		},
-	}
-
-	for _, inputCase := range inputCases {
-		t.Run("", func(t *testing.T) {
-			config := &Ingress{
-				Meta: Meta{
-					Namespace: "default",
-					ClusterId: "cluster",
-				},
-			}
-
-			globalContext, cancel := initGlobalContext(inputCase.secret)
-			defer cancel()
-
-			_ = auth.Parse(inputCase.input, config, globalContext)
-			if !reflect.DeepEqual(inputCase.expect, config.Auth) {
-				t.Fatal("Should be equal")
-			}
-
-			if inputCase.watchedSecret != "" {
-				if !globalContext.WatchedSecrets.Contains(inputCase.watchedSecret) {
-					t.Fatalf("Should watch secret %s", inputCase.watchedSecret)
-				}
-			}
-		})
-	}
-}
-
-func initGlobalContext(secret *v1.Secret) (*GlobalContext, context.CancelFunc) {
-	ctx, cancel := context.WithCancel(context.Background())
-
-	client := fake.NewSimpleClientset(secret)
-	informerFactory := informers.NewSharedInformerFactory(client, time.Hour)
-	secretInformer := informerFactory.Core().V1().Secrets()
-	go secretInformer.Informer().Run(ctx.Done())
-	cache.WaitForCacheSync(ctx.Done(), secretInformer.Informer().HasSynced)
-
-	return &GlobalContext{
-		WatchedSecrets: sets.New[string](),
-		ClusterSecretLister: map[cluster.ID]listerv1.SecretLister{
-			"cluster": secretInformer.Lister(),
-		},
-	}, cancel
-}
--- a/plugins/wasm-go/extensions/ai-cache/README.md
+++ b/plugins/wasm-go/extensions/ai-cache/README.md
@@ -30,7 +30,7 @@ LLM 结果缓存插件，默认配置方式可以直接用于 openai 协议的

 ## 配置说明

-本插件同时支持基于向量数据库的语义化缓存和基于字符串匹配的缓存方法，如果同时配置了向量数据库和缓存数据库，优先使用向量数据库。
+本插件同时支持基于向量数据库的语义化缓存和基于字符串匹配的缓存方法，如果同时配置了向量数据库和缓存数据库，优先使用缓存数据库，未命中场景下使用向量数据库能力。

 *Note*: 向量数据库(vector) 和 缓存数据库(cache) 不能同时为空，否则本插件无法提供缓存服务。

@@ -86,7 +86,8 @@ LLM 结果缓存插件，默认配置方式可以直接用于 openai 协议的
 | cache.password | string | optional | "" | 缓存服务密码 |
 | cache.timeout | uint32 | optional | 10000 | 缓存服务的超时时间，单位为毫秒。默认值是10000，即10秒 |
 | cache.cacheTTL | int | optional | 0 | 缓存过期时间，单位为秒。默认值是 0，即 永不过期|
-| cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀，默认值为 "higress-ai-cache:" |
+| cache.cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀，默认值为 "higress-ai-cache:" |
+| cache.database | int | optional | 0 | 使用的数据库id，仅限redis，例如配置为1，对应`SELECT 1` |


 ## 其他配置
@@ -168,6 +169,7 @@ redis:
  serviceName: my_redis.dns
  servicePort: 6379
  timeout: 100
+  database: 1
 ```

 ## 进阶用法
--- a/plugins/wasm-go/extensions/ai-cache/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-cache/README_EN.md
@@ -15,26 +15,29 @@ Plugin Execution Phase: `Authentication Phase`
 Plugin Execution Priority: `10`

 ## Configuration Description
-| Name                              | Type     | Requirement | Default                                                                                                                                                                                                                                                 | Description                                                                                                |
-| --------                          | -------- | --------    | --------                                                                                                                                                                                                                                                | --------                                                                                                   |
-| cacheKeyFrom.requestBody          | string   | optional    | "messages.@reverse.0.content"                                                                                                                                                                                                                           | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax     |
-| cacheValueFrom.responseBody       | string   | optional    | "choices.0.message.content"                                                                                                                                                                                                                             | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax     |
+| Name                              | Type     | Requirement | Default                                                                                                                                                                                                                                                 | Description                                                                                                                             |
+| --------                          | -------- | --------    | --------                                                                                                                                                                                                                                                | --------                                                                                                                                |
+| cacheKeyFrom.requestBody          | string   | optional    | "messages.@reverse.0.content"                                                                                                                                                                                                                           | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax            |
+| cacheValueFrom.responseBody       | string   | optional    | "choices.0.message.content"                                                                                                                                                                                                                             | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax           |
 | cacheStreamValueFrom.responseBody | string   | optional    | "choices.0.delta.content"                                                                                                                                                                                                                               | Extracts a string from the streaming response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
-| cacheKeyPrefix                    | string   | optional    | "higress-ai-cache:"                                                                                                                                                                                                                                     | Prefix for the Redis cache key                                                                                         |
-| cacheTTL                          | integer  | optional    | 0                                                                                                                                                                                                                                                       | Cache expiration time in seconds, default value is 0, which means never expire                                                            |
-| redis.serviceName                 | string   | required    | -                                                                                                                                                                                                                                                       | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local               |
-| redis.servicePort                 | integer  | optional    | 6379                                                                                                                                                                                                                                                    | Redis service port                                                                                             |
-| redis.timeout                     | integer  | optional    | 1000                                                                                                                                                                                                                                                    | Timeout for requests to Redis, in milliseconds                                                                          |
-| redis.username                    | string   | optional    | -                                                                                                                                                                                                                                                       | Username for logging into Redis                                                                                        |
-| redis.password                    | string   | optional    | -                                                                                                                                                                                                                                                       | Password for logging into Redis                                                                                          |
-| returnResponseTemplate            | string   | optional    | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`                                                                                                     | Template for returning HTTP response, with %s marking the part to be replaced by cache value                                              |
-| returnStreamResponseTemplate      | string   | optional    | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value                                          |
+| cacheKeyPrefix                    | string   | optional    | "higress-ai-cache:"                                                                                                                                                                                                                                     | Prefix for the Redis cache key                                                                                                          |
+| cacheTTL                          | integer  | optional    | 0                                                                                                                                                                                                                                                       | Cache expiration time in seconds, default value is 0, which means never expire                                                          |
+| redis.serviceName                 | string   | required    | -                                                                                                                                                                                                                                                       | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local              |
+| redis.servicePort                 | integer  | optional    | 6379                                                                                                                                                                                                                                                    | Redis service port                                                                                                                      |
+| redis.timeout                     | integer  | optional    | 1000                                                                                                                                                                                                                                                    | Timeout for requests to Redis, in milliseconds                                                                                          |
+| redis.username                    | string   | optional    | -                                                                                                                                                                                                                                                       | Username for logging into Redis                                                                                                         |
+| redis.database                    | int      | optional    | 0                                                                                                                                                                                                                                                       | The database ID used, limited to Redis, for example, configured as 1, corresponds to `SELECT 1`.                                        |
+| redis.password                    | string   | optional    | -                                                                                                                                                                                                                                                       | Password for logging into Redis                                                                                                         |
+| returnResponseTemplate            | string   | optional    | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`                                                                                                     | Template for returning HTTP response, with %s marking the part to be replaced by cache value                                            |
+| returnStreamResponseTemplate      | string   | optional    | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value                                  |

 ## Configuration Example
 ```yaml  
 redis:  
  serviceName: my-redis.dns  
  timeout: 2000  
+  servicePort: 6379
+  database: 1
 ```  

 ## Advanced Usage
--- a/plugins/wasm-go/extensions/ai-cache/cache/provider.go
+++ b/plugins/wasm-go/extensions/ai-cache/cache/provider.go
@@ -52,6 +52,9 @@ type ProviderConfig struct {
 	// @Title 缓存 Key 前缀
 	// @Description 缓存 Key 的前缀，默认值为 "higressAiCache:"
 	cacheKeyPrefix string
+	// @Title redis database
+	// @Description 指定 redis 的 database，默认使用0
+	database int
 }

 func (c *ProviderConfig) GetProviderType() string {
@@ -79,6 +82,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	if !json.Get("password").Exists() {
 		c.password = ""
 	}
+	c.database = int(json.Get("database").Int())
 	c.timeout = uint32(json.Get("timeout").Int())
 	if !json.Get("timeout").Exists() {
 		c.timeout = 10000
--- a/plugins/wasm-go/extensions/ai-cache/cache/redis.go
+++ b/plugins/wasm-go/extensions/ai-cache/cache/redis.go
@@ -38,7 +38,7 @@ func (rp *redisProvider) GetProviderType() string {
 }

 func (rp *redisProvider) Init(username string, password string, timeout uint32) error {
-	return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout))
+	return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout), wrapper.WithDataBase(rp.config.database))
 }

 func (rp *redisProvider) Get(key string, cb wrapper.RedisResponseCallback) error {
--- a/plugins/wasm-go/extensions/ai-cache/config/config.go
+++ b/plugins/wasm-go/extensions/ai-cache/config/config.go
@@ -28,9 +28,9 @@ type PluginConfig struct {
 	embeddingProvider embedding.Provider
 	vectorProvider    vector.Provider

-	embeddingProviderConfig embedding.ProviderConfig
-	vectorProviderConfig    vector.ProviderConfig
-	cacheProviderConfig     cache.ProviderConfig
+	embeddingProviderConfig *embedding.ProviderConfig
+	vectorProviderConfig    *vector.ProviderConfig
+	cacheProviderConfig     *cache.ProviderConfig

 	CacheKeyFrom         string
 	CacheValueFrom       string
@@ -47,7 +47,9 @@ type PluginConfig struct {
 }

 func (c *PluginConfig) FromJson(json gjson.Result, log wrapper.Log) {
-
+	c.embeddingProviderConfig = &embedding.ProviderConfig{}
+	c.vectorProviderConfig = &vector.ProviderConfig{}
+	c.cacheProviderConfig = &cache.ProviderConfig{}
 	c.vectorProviderConfig.FromJson(json.Get("vector"))
 	c.embeddingProviderConfig.FromJson(json.Get("embedding"))
 	c.cacheProviderConfig.FromJson(json.Get("cache"))
@@ -142,7 +144,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 	var err error
 	if c.embeddingProviderConfig.GetProviderType() != "" {
 		log.Debugf("embedding provider is set to %s", c.embeddingProviderConfig.GetProviderType())
-		c.embeddingProvider, err = embedding.CreateProvider(c.embeddingProviderConfig)
+		c.embeddingProvider, err = embedding.CreateProvider(*c.embeddingProviderConfig)
 		if err != nil {
 			return err
 		}
@@ -152,7 +154,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 	}
 	if c.cacheProviderConfig.GetProviderType() != "" {
 		log.Debugf("cache provider is set to %s", c.cacheProviderConfig.GetProviderType())
-		c.cacheProvider, err = cache.CreateProvider(c.cacheProviderConfig)
+		c.cacheProvider, err = cache.CreateProvider(*c.cacheProviderConfig)
 		if err != nil {
 			return err
 		}
@@ -162,7 +164,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 	}
 	if c.vectorProviderConfig.GetProviderType() != "" {
 		log.Debugf("vector provider is set to %s", c.vectorProviderConfig.GetProviderType())
-		c.vectorProvider, err = vector.CreateProvider(c.vectorProviderConfig)
+		c.vectorProvider, err = vector.CreateProvider(*c.vectorProviderConfig)
 		if err != nil {
 			return err
 		}
@@ -182,7 +184,7 @@ func (c *PluginConfig) GetVectorProvider() vector.Provider {
 }

 func (c *PluginConfig) GetVectorProviderConfig() vector.ProviderConfig {
-	return c.vectorProviderConfig
+	return *c.vectorProviderConfig
 }

 func (c *PluginConfig) GetCacheProvider() cache.Provider {
--- a/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go
@@ -0,0 +1,151 @@
+package embedding
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+	"net/http"
+	"strconv"
+)
+
+const (
+	OLLAMA_DOMAIN             = "localhost"
+	OLLAMA_PORT               = 11434
+	OLLAMA_DEFAULT_MODEL_NAME = "llama3.2"
+	OLLAMA_ENDPOINT           = "/api/embed"
+)
+
+type ollamaProviderInitializer struct {
+}
+
+func (c *ollamaProviderInitializer) InitConfig(json gjson.Result) {}
+
+func (c *ollamaProviderInitializer) ValidateConfig() error {
+	return nil
+}
+
+type ollamaProvider struct {
+	config ProviderConfig
+	client *wrapper.ClusterClient[wrapper.FQDNCluster]
+}
+
+func (t *ollamaProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
+	if c.servicePort == 0 {
+		c.servicePort = OLLAMA_PORT
+	}
+	if c.serviceHost == "" {
+		c.serviceHost = OLLAMA_DOMAIN
+	}
+	if c.model == "" {
+		c.model = OLLAMA_DEFAULT_MODEL_NAME
+	}
+
+	return &ollamaProvider{
+		config: c,
+		client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+			FQDN: c.serviceName,
+			Host: c.serviceHost,
+			Port: c.servicePort,
+		}),
+	}, nil
+}
+
+func (t *ollamaProvider) GetProviderType() string {
+	return PROVIDER_TYPE_OLLAMA
+}
+
+type ollamaResponse struct {
+	Model           string      `json:"model"`
+	Embeddings      [][]float64 `json:"embeddings"`
+	TotalDuration   int64       `json:"total_duration"`
+	LoadDuration    int64       `json:"load_duration"`
+	PromptEvalCount int64       `json:"prompt_eval_count"`
+}
+
+type ollamaEmbeddingRequest struct {
+	Input string `json:"input"`
+	Model string `json:"model"`
+}
+
+func (t *ollamaProvider) constructParameters(text string, log wrapper.Log) (string, [][2]string, []byte, error) {
+	if text == "" {
+		err := errors.New("queryString text cannot be empty")
+		return "", nil, nil, err
+	}
+
+	data := ollamaEmbeddingRequest{
+		Input: text,
+		Model: t.config.model,
+	}
+
+	requestBody, err := json.Marshal(data)
+	if err != nil {
+		log.Errorf("failed to marshal request data: %v", err)
+		return "", nil, nil, err
+	}
+
+	headers := [][2]string{
+		{"Content-Type", "application/json"},
+	}
+	log.Debugf("constructParameters: %s", string(requestBody))
+
+	return OLLAMA_ENDPOINT, headers, requestBody, err
+}
+
+func (t *ollamaProvider) parseTextEmbedding(responseBody []byte) (*ollamaResponse, error) {
+	var resp ollamaResponse
+	if err := json.Unmarshal(responseBody, &resp); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+	return &resp, nil
+}
+
+func (t *ollamaProvider) GetEmbedding(
+	queryString string,
+	ctx wrapper.HttpContext,
+	log wrapper.Log,
+	callback func(emb []float64, err error)) error {
+	embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString, log)
+	if err != nil {
+		log.Errorf("failed to construct parameters: %v", err)
+		return err
+	}
+
+	var resp *ollamaResponse
+
+	defer func() {
+		if err != nil {
+			callback(nil, err)
+		}
+	}()
+	err = t.client.Post(embUrl, embHeaders, embRequestBody,
+		func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+
+			if statusCode != http.StatusOK {
+				err = errors.New("failed to get embedding due to status code: " + strconv.Itoa(statusCode))
+				callback(nil, err)
+				return
+			}
+
+			resp, err = t.parseTextEmbedding(responseBody)
+			if err != nil {
+				err = fmt.Errorf("failed to parse response: %v", err)
+				callback(nil, err)
+				return
+			}
+
+			log.Debugf("get embedding response: %d, %s", statusCode, responseBody)
+
+			if len(resp.Embeddings) == 0 {
+				err = errors.New("no embedding found in response")
+				callback(nil, err)
+				return
+			}
+
+			callback(resp.Embeddings[0], nil)
+
+		}, t.config.timeout)
+	return err
+}
--- a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
@@ -12,6 +12,7 @@ const (
 	PROVIDER_TYPE_TEXTIN    = "textin"
 	PROVIDER_TYPE_COHERE    = "cohere"
 	PROVIDER_TYPE_OPENAI    = "openai"
+	PROVIDER_TYPE_OLLAMA    = "ollama"
 )

 type providerInitializer interface {
@@ -26,6 +27,7 @@ var (
 		PROVIDER_TYPE_TEXTIN:    &textInProviderInitializer{},
 		PROVIDER_TYPE_COHERE:    &cohereProviderInitializer{},
 		PROVIDER_TYPE_OPENAI:    &openAIProviderInitializer{},
+		PROVIDER_TYPE_OLLAMA:    &ollamaProviderInitializer{},
 	}
 )

--- a/plugins/wasm-go/extensions/ai-cache/main.go
+++ b/plugins/wasm-go/extensions/ai-cache/main.go
@@ -23,7 +23,7 @@ const (
 	SKIP_CACHE_HEADER           = "x-higress-skip-ai-cache"
 	ERROR_PARTIAL_MESSAGE_KEY   = "errorPartialMessage"

-	DEFAULT_MAX_BODY_BYTES uint32 = 10 * 1024 * 1024
+	DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024
 )

 func main() {
@@ -104,11 +104,11 @@ func onHttpRequestBody(ctx wrapper.HttpContext, c config.PluginConfig, body []by
 		key = strings.Join(userMessages, "\n")
 	} else if c.CacheKeyStrategy == config.CACHE_KEY_STRATEGY_DISABLED {
 		log.Info("[onHttpRequestBody] cache key strategy is disabled")
-		ctx.DontReadRequestBody()
+		ctx.DontReadResponseBody()
 		return types.ActionContinue
 	} else {
 		log.Warnf("[onHttpRequestBody] unknown cache key strategy: %s", c.CacheKeyStrategy)
-		ctx.DontReadRequestBody()
+		ctx.DontReadResponseBody()
 		return types.ActionContinue
 	}

@@ -147,11 +147,6 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, c config.PluginConfig, log w
 		ctx.SetResponseBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
 	}

-	if ctx.GetContext(ERROR_PARTIAL_MESSAGE_KEY) != nil {
-		ctx.DontReadResponseBody()
-		return types.ActionContinue
-	}
-
 	return types.ActionContinue
 }

@@ -159,7 +154,7 @@ func onHttpResponseBody(ctx wrapper.HttpContext, c config.PluginConfig, chunk []
 	log.Debugf("[onHttpResponseBody] is last chunk: %v", isLastChunk)
 	log.Debugf("[onHttpResponseBody] chunk: %s", string(chunk))

-	if ctx.GetContext(TOOL_CALLS_CONTEXT_KEY) != nil {
+	if ctx.GetContext(TOOL_CALLS_CONTEXT_KEY) != nil || ctx.GetContext(ERROR_PARTIAL_MESSAGE_KEY) != nil {
 		return chunk
 	}

--- a/plugins/wasm-go/extensions/ai-history/README.md
+++ b/plugins/wasm-go/extensions/ai-history/README.md
@@ -20,17 +20,18 @@ description: AI 历史对话插件配置参考

 ## 配置字段

-| 名称                | 数据类型    | 填写要求     | 默认值                   | Description                                                               |
-|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------|
-| identityHeader    | string  | optional | "Authorization"       | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等                               |
-| fillHistoryCnt    | integer | optional | 3                     | 默认填充历史对话轮次                                                                |
-| cacheKeyPrefix    | string  | optional | "higress-ai-history:" | Redis缓存Key的前缀                                                             |
-| cacheTTL          | integer | optional | 0                     | 缓存的过期时间，单位是秒，默认值为0，即永不过期                                                  |
-| redis.serviceName | string  | required | -                     | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
-| redis.servicePort | integer | optional | 6379                  | redis 服务端口                                                                |
-| redis.timeout     | integer | optional | 1000                  | 请求 redis 的超时时间，单位为毫秒                                                      |
-| redis.username    | string  | optional | -                     | 登陆 redis 的用户名                                                             |
-| redis.password    | string  | optional | -                     | 登陆 redis 的密码                                                              |
+| 名称              | 数据类型 | 填写要求 | 默认值                | Description                                                                                  |
+|-------------------|----------|----------|-----------------------|----------------------------------------------------------------------------------------------|
+| identityHeader    | string   | optional | "Authorization"       | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等                                     |
+| fillHistoryCnt    | integer  | optional | 3                     | 默认填充历史对话轮次                                                                         |
+| cacheKeyPrefix    | string   | optional | "higress-ai-history:" | Redis缓存Key的前缀                                                                           |
+| cacheTTL          | integer  | optional | 0                     | 缓存的过期时间，单位是秒，默认值为0，即永不过期                                              |
+| redis.serviceName | string   | required | -                     | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| redis.servicePort | integer  | optional | 6379                  | redis 服务端口                                                                               |
+| redis.timeout     | integer  | optional | 1000                  | 请求 redis 的超时时间，单位为毫秒                                                            |
+| redis.username    | string   | optional | -                     | 登陆 redis 的用户名                                                                          |
+| redis.password    | string   | optional | -                     | 登陆 redis 的密码                                                                            |
+| redis.database    | int      | optional | 0                     | 使用的数据库id，例如配置为1，对应`SELECT 1`                                                  |

 ## 用法示例

--- a/plugins/wasm-go/extensions/ai-history/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-history/README_EN.md
@@ -15,17 +15,19 @@ Plugin Execution Phase: `Default Phase`
 Plugin Execution Priority: `650`

 ## Configuration Fields
-| Name                | Data Type  | Required   | Default Value                   | Description                                                               |
-|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------|
-| identityHeader    | string  | optional | "Authorization"       | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc.                               |
-| fillHistoryCnt    | integer | optional | 3                     | Default number of historical dialogues to be filled.                                                                |
-| cacheKeyPrefix    | string  | optional | "higress-ai-history:" | Prefix for Redis cache key.                                                             |
-| cacheTTL          | integer | optional | 0                     | Cache expiration time in seconds, default value is 0, meaning it never expires.                                                  |
-| redis.serviceName | string  | required | -                     | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| redis.servicePort | integer | optional | 6379                  | Redis service port.                                                                |
-| redis.timeout     | integer | optional | 1000                  | Timeout for requests to Redis, in milliseconds.                                                      |
-| redis.username    | string  | optional | -                     | Username for logging into Redis.                                                             |
-| redis.password    | string  | optional | -                     | Password for logging into Redis.                                                              |
+| Name              | Data Type | Required | Default Value         | Description                                                                                             |
+|-------------------|-----------|----------|-----------------------|---------------------------------------------------------------------------------------------------------|
+| identityHeader    | string    | optional | "Authorization"       | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc.                  |
+| fillHistoryCnt    | integer   | optional | 3                     | Default number of historical dialogues to be filled.                                                    |
+| cacheKeyPrefix    | string    | optional | "higress-ai-history:" | Prefix for Redis cache key.                                                                             |
+| cacheTTL          | integer   | optional | 0                     | Cache expiration time in seconds, default value is 0, meaning it never expires.                         |
+| redis.serviceName | string    | required | -                     | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| redis.servicePort | integer   | optional | 6379                  | Redis service port.                                                                                     |
+| redis.timeout     | integer   | optional | 1000                  | Timeout for requests to Redis, in milliseconds.                                                         |
+| redis.username    | string    | optional | -                     | Username for logging into Redis.                                                                        |
+| redis.password    | string    | optional | -                     | Password for logging into Redis.                                                                        |
+| redis.database    | int       | optional | 0                     | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                          |
+

 ## Usage Example
 ### Configuration Information
--- a/plugins/wasm-go/extensions/ai-history/main.go
+++ b/plugins/wasm-go/extensions/ai-history/main.go
@@ -76,6 +76,9 @@ type RedisInfo struct {
 	// @Title zh-CN 请求超时
 	// @Description zh-CN 请求 redis 的超时时间，单位为毫秒。默认值是1000，即1秒
 	Timeout int `required:"false" yaml:"timeout" json:"timeout"`
+	// @Title zh-CN Database
+	// @Description zh-CN redis database
+	Database int `required:"false" yaml:"database" json:"database"`
 }

 type KVExtractor struct {
@@ -138,6 +141,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
 	if c.RedisInfo.Timeout == 0 {
 		c.RedisInfo.Timeout = 1000
 	}
+	c.RedisInfo.Database = int(json.Get("redis.database").Int())
 	c.QuestionFrom.RequestBody = "messages.@reverse.0.content"
 	c.AnswerValueFrom.ResponseBody = "choices.0.message.content"
 	c.AnswerStreamValueFrom.ResponseBody = "choices.0.delta.content"
@@ -159,7 +163,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
 		FQDN: c.RedisInfo.ServiceName,
 		Port: int64(c.RedisInfo.ServicePort),
 	})
-	return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout))
+	return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout), wrapper.WithDataBase(c.RedisInfo.Database))
 }

 func onHttpRequestHeaders(ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log) types.Action {
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -31,17 +31,19 @@ description: AI 代理插件配置参考

 `provider`的配置字段说明如下：

-| 名称               | 数据类型        | 填写要求 | 默认值 | 描述                                                                                                                                                        |
-|------------------| --------------- | -------- | ------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `type`           | string          | 必填     | -      | AI 服务提供商名称                                                                                                                                                |
-| `apiTokens`      | array of string | 非必填   | -      | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。                                                                                     |
-| `timeout`        | number          | 非必填   | -      | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟                                                                                                                    |
-| `modelMapping`   | map of string   | 非必填   | -      | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。 |
-| `protocol`       | string          | 非必填   | -      | 插件对外提供的 API 接口契约。目前支持以下取值：openai（默认值，使用 OpenAI 的接口契约）、original（使用目标服务提供商的原始接口契约）                                                                          |
-| `context`        | object          | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                             |
-| `customSettings` | array of customSetting | 非必填   | -      | 为AI请求指定覆盖或者填充参数                                                                                                                                           |
-| `failover`       | object | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                      |
-| `retryOnFailure` | object | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                              |  
+| 名称               | 数据类型        | 填写要求 | 默认值 | 描述                                                                                                                                                                                                                                        |
+|------------------| --------------- | -------- | ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `type`           | string          | 必填     | -      | AI 服务提供商名称                                                                                                                                                                                                                                |
+| `apiTokens`      | array of string | 非必填   | -      | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。                                                                                                                                                                     |
+| `timeout`        | number          | 非必填   | -      | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟                                                                                                                                                                                                    |
+| `modelMapping`   | map of string   | 非必填   | -      | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。                                                                                 |
+| `protocol`       | string          | 非必填   | -      | 插件对外提供的 API 接口契约。目前支持以下取值：openai（默认值，使用 OpenAI 的接口契约）、original（使用目标服务提供商的原始接口契约）                                                                                                                                                          |
+| `context`        | object          | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                                                                                                             |
+| `customSettings` | array of customSetting | 非必填   | -      | 为AI请求指定覆盖或者填充参数                                                                                                                                                                                                                           |
+| `failover`       | object | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                                                                                                      |
+| `retryOnFailure` | object | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                                                                                                              |  
+| `reasoningContentMode`       | string          | 非必填   | -      | 如何处理大模型服务返回的推理内容。目前支持以下取值：passthrough（正常输出推理内容）、ignore（不输出推理内容）、concat（将推理内容拼接在常规输出内容之前）。默认为 passthrough。仅支持通义千问服务。                                                                                                                            |
+| `capabilities`       | map of string | 非必填   | -      | 部分provider的部分ai能力原生兼容openai/v1格式，不需要重写，可以直接转发，通过此配置项指定来开启转发, key表示的是采用的厂商协议能力，values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |

 `context`的配置字段说明如下：

@@ -130,10 +132,11 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：

 通义千问所对应的 `type` 为 `qwen`。它特有的配置字段如下：

-| 名称                 | 数据类型            | 填写要求 | 默认值 | 描述                                                               |
-|--------------------|-----------------|------|-----|------------------------------------------------------------------|
-| `qwenEnableSearch` | boolean         | 非必填  | -   | 是否启用通义千问内置的互联网搜索功能。                          |
-| `qwenFileIds`      | array of string | 非必填  | -   | 通过文件接口上传至Dashscope的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+| 名称                   | 数据类型        | 填写要求 | 默认值 | 描述                                                         |
+| ---------------------- | --------------- | -------- | ------ | ------------------------------------------------------------ |
+| `qwenEnableSearch`     | boolean         | 非必填   | -      | 是否启用通义千问内置的互联网搜索功能。                       |
+| `qwenFileIds`          | array of string | 非必填   | -      | 通过文件接口上传至Dashscope的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+| `qwenEnableCompatible` | boolean         | 非必填   | false  | 开启通义千问兼容模式。启用通义千问兼容模式后，将调用千问的兼容模式接口，同时对请求/响应不做修改。 |

 #### 百川智能 (Baichuan AI)

@@ -157,15 +160,7 @@ Groq 所对应的 `type` 为 `groq`。它并无特有的配置字段。

 #### 文心一言（Baidu）

-文心一言所对应的 `type` 为 `baidu`。它特有的配置字段如下：
-
-| 名称                 | 数据类型            | 填写要求 | 默认值 | 描述                                                        |
-|--------------------|-----------------|------|-----|-----------------------------------------------------------|
-| `baiduAccessKeyAndSecret`      | array of string | 必填   | -   | Baidu 的 Access Key 和 Secret Key，中间用 `:` 分隔，用于申请 apiToken。 |
-| `baiduApiTokenServiceName`      | string          | 必填   | -   | 请求刷新百度 apiToken 服务名称。                                     |
-| `baiduApiTokenServiceHost`      | string          | 非必填  | -   | 请求刷新百度 apiToken 服务域名，默认是 iam.bj.baidubce.com。             |
-| `baiduApiTokenServicePort`      | int64           | 非必填  | -   | 请求刷新百度 apiToken 服务端口，默认是 443。                             |
-
+文心一言所对应的 `type` 为 `baidu`。它并无特有的配置字段。

 #### 360智脑

@@ -255,6 +250,17 @@ Cohere 所对应的 `type` 为 `cohere`。它并无特有的配置字段。
 #### Together-AI
 Together-AI 所对应的 `type` 为 `together-ai`。它并无特有的配置字段。

+#### Dify
+Dify 所对应的 `type` 为 `dify`。它特有的配置字段如下:
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述                         |
+| -- | -------- |------| ------ | ---------------------------- |
+| `difyApiUrl` | string   | 非必填  | -      | dify私有化部署的url |
+| `botType` | string   | 非必填  | -      | dify的应用类型，Chat/Completion/Agent/Workflow |
+| `inputVariable` | string   | 非必填  | -      | dify中应用类型为workflow时需要设置输入变量，当botType为workflow时一起使用 |
+| `outputVariable` | string   | 非必填  | -      | dify中应用类型为workflow时需要设置输出变量，当botType为workflow时一起使用 |
+
+
 ## 用法示例

 ### 使用 OpenAI 协议代理 Azure OpenAI 服务
@@ -429,25 +435,25 @@ URL: http://your-domain/v1/chat/completions

 ```json
 {
-    "model": "gpt-4o",
-    "messages": [
+  "model": "gpt-4o",
+  "messages": [
+    {
+      "role": "user",
+      "content": [
        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"
-                    }
-                },
-                {
-                    "type": "text",
-                    "text": "这个图片是哪里？"
-                }
-            ]
+          "type": "image_url",
+          "image_url": {
+            "url": "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg"
+          }
+        },
+        {
+          "type": "text",
+          "text": "这个图片是哪里？"
        }
-    ],
-    "temperature": 0.3
+      ]
+    }
+  ],
+  "temperature": 0.3
 }
 ```

@@ -455,28 +461,28 @@ URL: http://your-domain/v1/chat/completions

 ```json
 {
-    "id": "17c5955d-af9c-9f28-bbde-293a9c9a3515",
-    "choices": [
-        {
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": [
-                    {
-                        "text": "这张照片显示的是一位女士和一只狗在海滩上。由于我无法获取具体的地理位置信息，所以不能确定这是哪个地方的海滩。但是从视觉内容来看，它可能是一个位于沿海地区的沙滩海岸线，并且有海浪拍打着岸边。这样的场景在全球许多美丽的海滨地区都可以找到。如果您需要更精确的信息，请提供更多的背景或细节描述。"
-                    }
-                ]
-            },
-            "finish_reason": "stop"
-        }
-    ],
-    "created": 1723949230,
-    "model": "qwen-vl-plus",
-    "object": "chat.completion",
-    "usage": {
-        "prompt_tokens": 1279,
-        "completion_tokens": 78
+  "id": "17c5955d-af9c-9f28-bbde-293a9c9a3515",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": [
+          {
+            "text": "这张照片显示的是一位女士和一只狗在海滩上。由于我无法获取具体的地理位置信息，所以不能确定这是哪个地方的海滩。但是从视觉内容来看，它可能是一个位于沿海地区的沙滩海岸线，并且有海浪拍打着岸边。这样的场景在全球许多美丽的海滨地区都可以找到。如果您需要更精确的信息，请提供更多的背景或细节描述。"
+          }
+        ]
+      },
+      "finish_reason": "stop"
    }
+  ],
+  "created": 1723949230,
+  "model": "qwen-vl-plus",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 1279,
+    "completion_tokens": 78
+  }
 }
 ```

@@ -599,8 +605,8 @@ provider:
  modelMapping:
    "*": "qwen-long" # 通义千问的文件上下文只能在 qwen-long 模型下使用
  qwenFileIds:
-  - "file-fe-xxx"
-  - "file-fe-yyy"
+    - "file-fe-xxx"
+    - "file-fe-yyy"
 ```

 **请求示例**
@@ -658,7 +664,7 @@ provider:
 ```json
 {
  "input": {
-      "prompt": "介绍一下Dubbo"
+    "prompt": "介绍一下Dubbo"
  },
  "parameters":  {},
  "debug": {}
@@ -669,21 +675,21 @@ provider:

 ```json
 {
-    "output": {
-        "finish_reason": "stop",
-        "session_id": "677e7e8fbb874e1b84792b65042e1599",
-        "text": "Apache Dubbo 是一个..."
-    },
-    "usage": {
-        "models": [
-            {
-                "output_tokens": 449,
-                "model_id": "qwen-max",
-                "input_tokens": 282
-            }
-        ]
-    },
-    "request_id": "b59e45e3-5af4-91df-b7c6-9d746fd3297c"
+  "output": {
+    "finish_reason": "stop",
+    "session_id": "677e7e8fbb874e1b84792b65042e1599",
+    "text": "Apache Dubbo 是一个..."
+  },
+  "usage": {
+    "models": [
+      {
+        "output_tokens": 449,
+        "model_id": "qwen-max",
+        "input_tokens": 282
+      }
+    ]
+  },
+  "request_id": "b59e45e3-5af4-91df-b7c6-9d746fd3297c"
 }
 ```

@@ -926,25 +932,25 @@ curl --location 'http://<your higress domain>/v1/chat/completions' \

 ```json
 {
-    "id": "fd140c3e-0b69-4b19-849b-d354d32a6162",
-    "choices": [
-        {
-            "index": 0,
-            "delta": {
-                "role": "assistant",
-                "content": "你好！我是一名专业的开发人员。"
-            },
-            "finish_reason": "stop"
-        }
-    ],
-    "created": 1717493117,
-    "model": "hunyuan-lite",
-    "object": "chat.completion",
-    "usage": {
-        "prompt_tokens": 15,
-        "completion_tokens": 9,
-        "total_tokens": 24
+  "id": "fd140c3e-0b69-4b19-849b-d354d32a6162",
+  "choices": [
+    {
+      "index": 0,
+      "delta": {
+        "role": "assistant",
+        "content": "你好！我是一名专业的开发人员。"
+      },
+      "finish_reason": "stop"
    }
+  ],
+  "created": 1717493117,
+  "model": "hunyuan-lite",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 9,
+    "total_tokens": 24
+  }
 }
 ```

@@ -966,14 +972,14 @@ provider:

 ```json
 {
-    "model": "gpt-4-turbo",
-    "messages": [
-        {
-            "role": "user",
-            "content": "你好，你是谁？"
-        }
-    ],
-    "stream": false
+  "model": "gpt-4-turbo",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
 }
 ```

@@ -981,25 +987,25 @@ provider:

 ```json
 {
-    "id": "as-e90yfg1pk1",
-    "choices": [
-        {
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": "你好，我是文心一言，英文名是ERNIE Bot。我能够与人对话互动，回答问题，协助创作，高效便捷地帮助人们获取信息、知识和灵感。"
-            },
-            "finish_reason": "stop"
-        }
-    ],
-    "created": 1717251488,
-    "model": "ERNIE-4.0",
-    "object": "chat.completion",
-    "usage": {
-        "prompt_tokens": 4,
-        "completion_tokens": 33,
-        "total_tokens": 37
+  "id": "as-e90yfg1pk1",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "你好，我是文心一言，英文名是ERNIE Bot。我能够与人对话互动，回答问题，协助创作，高效便捷地帮助人们获取信息、知识和灵感。"
+      },
+      "finish_reason": "stop"
    }
+  ],
+  "created": 1717251488,
+  "model": "ERNIE-4.0",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 4,
+    "completion_tokens": 33,
+    "total_tokens": 37
+  }
 }
 ```

@@ -1022,14 +1028,14 @@ provider:

 ```json
 {
-    "model": "gpt-3",
-    "messages": [
-        {
-            "role": "user",
-            "content": "你好，你是谁？"
-        }
-    ],
-    "stream": false
+  "model": "gpt-3",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
 }
 ```

@@ -1037,37 +1043,37 @@ provider:

 ```json
 {
-    "id": "03ac4fcfe1c6cc9c6a60f9d12046e2b4",
-    "choices": [
-        {
-            "finish_reason": "stop",
-            "index": 0,
-            "message": {
-                "content": "你好，我是一个由MiniMax公司研发的大型语言模型，名为MM智能助理。我可以帮助回答问题、提供信息、进行对话和执行多种语言处理任务。如果你有任何问题或需要帮助，请随时告诉我！",
-                "role": "assistant",
-                "name": "MM智能助理",
-                "audio_content": ""
-            }
-        }
-    ],
-    "created": 1734155471,
-    "model": "abab6.5s-chat",
-    "object": "chat.completion",
-    "usage": {
-        "total_tokens": 116,
-        "total_characters": 0,
-        "prompt_tokens": 70,
-        "completion_tokens": 46
-    },
-    "input_sensitive": false,
-    "output_sensitive": false,
-    "input_sensitive_type": 0,
-    "output_sensitive_type": 0,
-    "output_sensitive_int": 0,
-    "base_resp": {
-        "status_code": 0,
-        "status_msg": ""
+  "id": "03ac4fcfe1c6cc9c6a60f9d12046e2b4",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "你好，我是一个由MiniMax公司研发的大型语言模型，名为MM智能助理。我可以帮助回答问题、提供信息、进行对话和执行多种语言处理任务。如果你有任何问题或需要帮助，请随时告诉我！",
+        "role": "assistant",
+        "name": "MM智能助理",
+        "audio_content": ""
+      }
    }
+  ],
+  "created": 1734155471,
+  "model": "abab6.5s-chat",
+  "object": "chat.completion",
+  "usage": {
+    "total_tokens": 116,
+    "total_characters": 0,
+    "prompt_tokens": 70,
+    "completion_tokens": 46
+  },
+  "input_sensitive": false,
+  "output_sensitive": false,
+  "input_sensitive_type": 0,
+  "output_sensitive_type": 0,
+  "output_sensitive_int": 0,
+  "base_resp": {
+    "status_code": 0,
+    "status_msg": ""
+  }
 }
 ```

@@ -1356,18 +1362,18 @@ provider:

 ```json
 {
-    "model": "gpt-4o",
-    "messages": [
-        {
-            "role": "system",
-            "content": "你是一名专业的开发人员！"
-        },
-        {
-            "role": "user",
-            "content": "你好，你是谁？"
-        }
-    ],
-    "stream": false
+  "model": "gpt-4o",
+  "messages": [
+    {
+      "role": "system",
+      "content": "你是一名专业的开发人员！"
+    },
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
 }
 ```

@@ -1375,24 +1381,24 @@ provider:

 ```json
 {
-    "id": "cha000c23c6@dx190ef0b4b96b8f2532",
-    "choices": [
-        {
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": "你好！我是一名专业的开发人员，擅长编程和解决技术问题。有什么我可以帮助你的吗？"
-            }
-        }
-    ],
-    "created": 1721997415,
-    "model": "generalv3.5",
-    "object": "chat.completion",
-    "usage": {
-        "prompt_tokens": 10,
-        "completion_tokens": 19,
-        "total_tokens": 29
+  "id": "cha000c23c6@dx190ef0b4b96b8f2532",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "你好！我是一名专业的开发人员，擅长编程和解决技术问题。有什么我可以帮助你的吗？"
+      }
    }
+  ],
+  "created": 1721997415,
+  "model": "generalv3.5",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 10,
+    "completion_tokens": 19,
+    "total_tokens": 29
+  }
 }
 ```

@@ -1418,14 +1424,14 @@ provider:

 ```json
 {
-    "model": "gpt-3.5",
-    "messages": [
-        {
-            "role": "user",
-            "content": "Who are you?"
-        }
-    ],
-    "stream": false
+  "model": "gpt-3.5",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Who are you?"
+    }
+  ],
+  "stream": false
 }
 ```

@@ -1433,25 +1439,25 @@ provider:

 ```json
 {
-    "id": "chatcmpl-b010867c-0d3f-40ba-95fd-4e8030551aeb",
-    "choices": [
-        {
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": "I am a large multi-modal model, trained by Google. I am designed to provide information and answer questions to the best of my abilities."
-            },
-            "finish_reason": "stop"
-        }
-    ],
-    "created": 1722756984,
-    "model": "gemini-pro",
-    "object": "chat.completion",
-    "usage": {
-        "prompt_tokens": 5,
-        "completion_tokens": 29,
-        "total_tokens": 34
+  "id": "chatcmpl-b010867c-0d3f-40ba-95fd-4e8030551aeb",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "I am a large multi-modal model, trained by Google. I am designed to provide information and answer questions to the best of my abilities."
+      },
+      "finish_reason": "stop"
    }
+  ],
+  "created": 1722756984,
+  "model": "gemini-pro",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 5,
+    "completion_tokens": 29,
+    "total_tokens": 34
+  }
 }
 ```

@@ -1523,13 +1529,13 @@ provider:
 **请求示例**
 ```json
 {
-    "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
-    "messages": [
-        {
-            "role": "user",
-            "content": "Who are you?"
-        }
-    ]
+  "model": "Qwen/Qwen2.5-72B-Instruct-Turbo",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Who are you?"
+    }
+  ]
 }
 ```

@@ -1562,6 +1568,57 @@ provider:
 }
 ```

+### 使用 OpenAI 协议代理 Dify 服务
+
+**配置信息**
+```yaml
+provider:
+  type: dify
+  apiTokens:
+    - "YOUR_DIFY_API_TOKEN"
+  modelMapping:
+    "*": "dify"
+```
+
+**请求示例**
+```json
+{
+  "model": "gpt-4-turbo",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
+}
+```
+
+**响应示例**
+```json
+{
+  "id": "e33fc636-f9e8-4fae-8d5e-fbd0acb09401",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "你好！我是ChatGPT，由OpenAI开发的人工智能语言模型。我可以帮助回答问题、提供建议或进行各种对话。如果你有任何需要，随时告诉我哦！"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1736657752,
+  "model": "dify",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 16,
+    "completion_tokens": 243,
+    "total_tokens": 259
+  }
+}
+```
+

 ## 完整配置示例

@@ -1577,13 +1634,13 @@ metadata:
  namespace: higress-system
 spec:
  matchRules:
-  - config:
-      provider:
-        type: groq
-        apiTokens: 
-          - "YOUR_API_TOKEN"
-    ingress:
-    - groq
+    - config:
+        provider:
+          type: groq
+          apiTokens:
+            - "YOUR_API_TOKEN"
+      ingress:
+        - groq
  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
 ---
 apiVersion: networking.k8s.io/v1
@@ -1601,16 +1658,16 @@ metadata:
 spec:
  ingressClassName: higress
  rules:
-  - host: <YOUR-DOMAIN> 
-    http:
-      paths:
-      - backend:
-          resource:
-            apiGroup: networking.higress.io
-            kind: McpBridge
-            name: default
-        path: /
-        pathType: Prefix
+    - host: <YOUR-DOMAIN>
+      http:
+        paths:
+          - backend:
+              resource:
+                apiGroup: networking.higress.io
+                kind: McpBridge
+                name: default
+            path: /
+            pathType: Prefix
 ---
 apiVersion: networking.higress.io/v1
 kind: McpBridge
@@ -1619,10 +1676,10 @@ metadata:
  namespace: higress-system
 spec:
  registries:
-  - domain: api.groq.com
-    name: groq
-    port: 443
-    type: dns
+    - domain: api.groq.com
+      name: groq
+      port: 443
+      type: dns
 ```

 访问示例：
--- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md
@@ -106,6 +106,7 @@ For Qwen (Tongyi Qwen), the corresponding `type` is `qwen`. Its unique configura
 |--------------------|-----------------|----------------------|---------------|------------------------------------------------------------------------------------------------------------------------|
 | `qwenEnableSearch`  | boolean          | Optional             | -             | Whether to enable the built-in Internet search function provided by Qwen.                                             |
 | `qwenFileIds`       | array of string   | Optional             | -             | The file IDs uploaded via the Dashscope file interface, whose content will be used as context for AI conversations. Cannot be configured with the `context` field. |
+| `qwenEnableCompatible` | boolean          | Optional | false         | Enable Qwen compatibility mode. When Qwen compatibility mode is enabled, the compatible mode interface of Qwen will be called, and the request/response will not be modified. |

 #### Baichuan AI

--- a/plugins/wasm-go/extensions/ai-proxy/VERSION
+++ b/plugins/wasm-go/extensions/ai-proxy/VERSION
@@ -0,0 +1 @@
+1.0.0-alpha
--- a/plugins/wasm-go/extensions/ai-proxy/config/config.go
+++ b/plugins/wasm-go/extensions/ai-proxy/config/config.go
@@ -80,18 +80,16 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 		c.activeProvider = nil
 		return nil
 	}
+
 	var err error
+
 	c.activeProvider, err = provider.CreateProvider(*c.activeProviderConfig)
-
-	providerConfig := c.GetProviderConfig()
-	err = providerConfig.SetApiTokensFailover(log, c.activeProvider)
-
-	if handler, ok := c.activeProvider.(provider.TickFuncHandler); ok {
-		tickPeriod, tickFunc := handler.GetTickFunc(log)
-		wrapper.RegisteTickFunc(tickPeriod, tickFunc)
+	if err != nil {
+		return err
 	}

-	return err
+	providerConfig := c.GetProviderConfig()
+	return providerConfig.SetApiTokensFailover(log, c.activeProvider)
 }

 func (c *PluginConfig) GetProvider() provider.Provider {
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -15,12 +15,13 @@ import (
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )

 const (
 	pluginName = "ai-proxy"

-	defaultMaxBodyBytes uint32 = 10 * 1024 * 1024
+	defaultMaxBodyBytes uint32 = 100 * 1024 * 1024
 )

 func main() {
@@ -40,9 +41,11 @@ func parseGlobalConfig(json gjson.Result, pluginConfig *config.PluginConfig, log

 	pluginConfig.FromJson(json)
 	if err := pluginConfig.Validate(); err != nil {
+		log.Errorf("global rule config is invalid: %v", err)
 		return err
 	}
 	if err := pluginConfig.Complete(log); err != nil {
+		log.Errorf("failed to apply global rule config: %v", err)
 		return err
 	}

@@ -56,9 +59,11 @@ func parseOverrideRuleConfig(json gjson.Result, global config.PluginConfig, plug

 	pluginConfig.FromJson(json)
 	if err := pluginConfig.Validate(); err != nil {
+		log.Errorf("overriden rule config is invalid: %v", err)
 		return err
 	}
 	if err := pluginConfig.Complete(log); err != nil {
+		log.Errorf("failed to apply overriden rule config: %v", err)
 		return err
 	}

@@ -78,7 +83,7 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf

 	rawPath := ctx.Path()
 	path, _ := url.Parse(rawPath)
-	apiName := getOpenAiApiName(path.Path)
+	apiName := getApiName(path.Path)
 	providerConfig := pluginConfig.GetProviderConfig()
 	if providerConfig.IsOriginal() {
 		if handler, ok := activeProvider.(provider.ApiNameHandler); ok {
@@ -87,37 +92,39 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf
 	}

 	if apiName == "" {
-		log.Warnf("[onHttpRequestHeader] unsupported path: %s", path.Path)
-		return types.ActionContinue
+		ctx.DontReadRequestBody()
+		ctx.DontReadResponseBody()
+		log.Warnf("[onHttpRequestHeader] unsupported path: %s, will not process http path and body", path.Path)
 	}

 	ctx.SetContext(provider.CtxKeyApiName, apiName)
 	// Disable the route re-calculation since the plugin may modify some headers related to the chosen route.
 	ctx.DisableReroute()

-	_, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
-	if needHandleStreamingBody {
-		proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
-	}
+	// Always remove the Accept-Encoding header to prevent the LLM from sending compressed responses,
+	// allowing plugins to inspect or modify the response correctly
+	_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")

 	if handler, ok := activeProvider.(provider.RequestHeadersHandler); ok {
 		// Set the apiToken for the current request.
 		providerConfig.SetApiTokenInUse(ctx, log)
+		// Set available apiTokens of current request in the context, will be used in the retryOnFailure
+		providerConfig.SetAvailableApiTokens(ctx, log)

-		hasRequestBody := wrapper.HasRequestBody()
 		err := handler.OnRequestHeaders(ctx, apiName, log)
-		if err == nil {
-			if hasRequestBody {
-				proxywasm.RemoveHttpRequestHeader("Content-Length")
-				ctx.SetRequestBodyBufferLimit(defaultMaxBodyBytes)
-				// Delay the header processing to allow changing in OnRequestBody
-				return types.HeaderStopIteration
-			}
-			ctx.DontReadRequestBody()
+		if err != nil {
+			_ = util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
 			return types.ActionContinue
 		}

-		util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
+		hasRequestBody := wrapper.HasRequestBody()
+		if hasRequestBody {
+			_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
+			ctx.SetRequestBodyBufferLimit(defaultMaxBodyBytes)
+			// Delay the header processing to allow changing in OnRequestBody
+			return types.HeaderStopIteration
+		}
+		ctx.DontReadRequestBody()
 		return types.ActionContinue
 	}

@@ -136,23 +143,21 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig

 	if handler, ok := activeProvider.(provider.RequestBodyHandler); ok {
 		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
-
-		newBody, settingErr := pluginConfig.GetProviderConfig().ReplaceByCustomSettings(body)
+		providerConfig := pluginConfig.GetProviderConfig()
+		newBody, settingErr := providerConfig.ReplaceByCustomSettings(body)
 		if settingErr != nil {
-			util.ErrorHandler(
-				"ai-proxy.proc_req_body_failed",
-				fmt.Errorf("failed to replace request body by custom settings: %v", settingErr),
-			)
-			return types.ActionContinue
+			log.Errorf("failed to replace request body by custom settings: %v", settingErr)
+		}
+		if providerConfig.IsOpenAIProtocol() {
+			newBody = normalizeOpenAiRequestBody(newBody, log)
 		}
-
 		log.Debugf("[onHttpRequestBody] newBody=%s", newBody)
 		body = newBody
 		action, err := handler.OnRequestBody(ctx, apiName, body, log)
 		if err == nil {
 			return action
 		}
-		util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
+		_ = util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
 	}
 	return types.ActionContinue
 }
@@ -176,6 +181,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo

 	providerConfig := pluginConfig.GetProviderConfig()
 	apiTokenInUse := providerConfig.GetApiTokenInUse(ctx)
+	apiTokens := providerConfig.GetAvailableApiToken(ctx)

 	status, err := proxywasm.GetHttpResponseHeader(":status")
 	if err != nil || status != "200" {
@@ -183,7 +189,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
 			log.Errorf("unable to load :status header from response: %v", err)
 		}
 		ctx.DontReadResponseBody()
-		return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, log)
+		return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, log)
 	}

 	// Reset ctxApiTokenRequestFailureCount if the request is successful,
@@ -200,8 +206,15 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
 	util.ReplaceResponseHeaders(headers)

 	checkStream(ctx, log)
-	_, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
+	_, needHandleBody := activeProvider.(provider.TransformResponseBodyHandler)
+	var needHandleStreamingBody bool
+	_, needHandleStreamingBody = activeProvider.(provider.StreamingResponseBodyHandler)
 	if !needHandleStreamingBody {
+		_, needHandleStreamingBody = activeProvider.(provider.StreamingEventHandler)
+	}
+	if !needHandleBody && !needHandleStreamingBody {
+		ctx.DontReadResponseBody()
+	} else if !needHandleStreamingBody {
 		ctx.BufferResponseBody()
 	}

@@ -217,7 +230,7 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 	}

 	log.Debugf("[onStreamingResponseBody] provider=%s", activeProvider.GetProviderType())
-	log.Debugf("isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
+	log.Debugf("[onStreamingResponseBody] isLastChunk=%v chunk: %s", isLastChunk, string(chunk))

 	if handler, ok := activeProvider.(provider.StreamingResponseBodyHandler); ok {
 		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
@@ -227,6 +240,38 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 		}
 		return chunk
 	}
+	if handler, ok := activeProvider.(provider.StreamingEventHandler); ok {
+		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
+		events := provider.ExtractStreamingEvents(ctx, chunk, log)
+		log.Debugf("[onStreamingResponseBody] %d events received", len(events))
+		if len(events) == 0 {
+			// No events are extracted, return the original chunk
+			return chunk
+		}
+		var responseBuilder strings.Builder
+		for _, event := range events {
+			log.Debugf("processing event: %v", event)
+
+			if event.IsEndData() {
+				responseBuilder.WriteString(event.ToHttpString())
+				continue
+			}
+
+			outputEvents, err := handler.OnStreamingEvent(ctx, apiName, event, log)
+			if err != nil {
+				log.Errorf("[onStreamingResponseBody] failed to process streaming event: %v\n%s", err, chunk)
+				return chunk
+			}
+			if outputEvents == nil || len(outputEvents) == 0 {
+				responseBuilder.WriteString(event.ToHttpString())
+			} else {
+				for _, outputEvent := range outputEvents {
+					responseBuilder.WriteString(outputEvent.ToHttpString())
+				}
+			}
+		}
+		return []byte(responseBuilder.String())
+	}
 	return chunk
 }

@@ -244,16 +289,28 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
 		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
 		body, err := handler.TransformResponseBody(ctx, apiName, body, log)
 		if err != nil {
-			util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
+			_ = util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
 			return types.ActionContinue
 		}
 		if err = provider.ReplaceResponseBody(body, log); err != nil {
-			util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
+			_ = util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
 		}
 	}
 	return types.ActionContinue
 }

+func normalizeOpenAiRequestBody(body []byte, log wrapper.Log) []byte {
+	var err error
+	// Default setting include_usage.
+	if gjson.GetBytes(body, "stream").Bool() {
+		body, err = sjson.SetBytes(body, "stream_options.include_usage", true)
+		if err != nil {
+			log.Errorf("set include_usage failed, err:%s", err)
+		}
+	}
+	return body
+}
+
 func checkStream(ctx wrapper.HttpContext, log wrapper.Log) {
 	contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
 	if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {
@@ -265,12 +322,26 @@ func checkStream(ctx wrapper.HttpContext, log wrapper.Log) {
 	}
 }

-func getOpenAiApiName(path string) provider.ApiName {
+func getApiName(path string) provider.ApiName {
+	// openai style
+	if strings.HasSuffix(path, "/v1/completions") {
+		return provider.ApiNameCompletion
+	}
 	if strings.HasSuffix(path, "/v1/chat/completions") {
 		return provider.ApiNameChatCompletion
 	}
 	if strings.HasSuffix(path, "/v1/embeddings") {
 		return provider.ApiNameEmbeddings
 	}
+	if strings.HasSuffix(path, "/v1/audio/speech") {
+		return provider.ApiNameAudioSpeech
+	}
+	if strings.HasSuffix(path, "/v1/images/generations") {
+		return provider.ApiNameImageGeneration
+	}
+	// cohere style
+	if strings.HasSuffix(path, "/v1/rerank") {
+		return provider.ApiNameCohereV1Rerank
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/ai360.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/ai360.go
@@ -22,7 +22,14 @@ type ai360Provider struct {
 	contextCache *contextCache
 }

-func (m *ai360ProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *ai360ProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+		string(ApiNameEmbeddings):     PathOpenAIEmbeddings,
+	}
+}
+
+func (m *ai360ProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
@@ -30,6 +37,7 @@ func (m *ai360ProviderInitializer) ValidateConfig(config ProviderConfig) error {
 }

 func (m *ai360ProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &ai360Provider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -41,16 +49,13 @@ func (m *ai360Provider) GetProviderType() string {
 }

 func (m *ai360Provider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
 	return nil
 }

 func (m *ai360Provider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
@@ -58,5 +63,6 @@ func (m *ai360Provider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,

 func (m *ai360Provider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
 	util.OverwriteRequestHostHeader(headers, ai360Domain)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestAuthorizationHeader(headers, m.config.GetApiTokenInUse(ctx))
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/azure.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/azure.go
@@ -15,7 +15,15 @@ import (
 type azureProviderInitializer struct {
 }

-func (m *azureProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *azureProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		// TODO: azure's pattern is the same as openai, just need to handle the prefix, can be done in TransformRequestHeaders to support general capabilities
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+		string(ApiNameEmbeddings):     PathOpenAIEmbeddings,
+	}
+}
+
+func (m *azureProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.azureServiceUrl == "" {
 		return errors.New("missing azureServiceUrl in provider config")
 	}
@@ -35,6 +43,7 @@ func (m *azureProviderInitializer) CreateProvider(config ProviderConfig) (Provid
 	} else {
 		serviceUrl = u
 	}
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &azureProvider{
 		config:       config,
 		serviceUrl:   serviceUrl,
@@ -54,36 +63,35 @@ func (m *azureProvider) GetProviderType() string {
 }

 func (m *azureProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *azureProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *azureProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	u, e := url.Parse(ctx.Path())
-	if e == nil {
-		customApiVersion := u.Query().Get("api-version")
-		if customApiVersion == "" {
-			util.OverwriteRequestPathHeader(headers, m.serviceUrl.RequestURI())
+	if apiName != "" {
+		u, e := url.Parse(ctx.Path())
+		if e == nil {
+			customApiVersion := u.Query().Get("api-version")
+			if customApiVersion == "" {
+				util.OverwriteRequestPathHeader(headers, m.serviceUrl.RequestURI())
+			} else {
+				q := m.serviceUrl.Query()
+				q.Set("api-version", customApiVersion)
+				newUrl := *m.serviceUrl
+				newUrl.RawQuery = q.Encode()
+				util.OverwriteRequestPathHeader(headers, newUrl.RequestURI())
+			}
 		} else {
-			q := m.serviceUrl.Query()
-			q.Set("api-version", customApiVersion)
-			newUrl := *m.serviceUrl
-			newUrl.RawQuery = q.Encode()
-			util.OverwriteRequestPathHeader(headers, newUrl.RequestURI())
+			log.Errorf("failed to parse request path: %v", e)
+			util.OverwriteRequestPathHeader(headers, m.serviceUrl.RequestURI())
 		}
-	} else {
-		log.Errorf("failed to parse request path: %v", e)
-		util.OverwriteRequestPathHeader(headers, m.serviceUrl.RequestURI())
 	}
 	util.OverwriteRequestHostHeader(headers, m.serviceUrl.Host)
 	headers.Set("api-key", m.config.GetApiTokenInUse(ctx))
--- a/plugins/wasm-go/extensions/ai-proxy/provider/baichuan.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/baichuan.go
@@ -12,21 +12,28 @@ import (
 // baichuanProvider is the provider for baichuan Ai service.

 const (
-	baichuanDomain             = "api.baichuan-ai.com"
-	baichuanChatCompletionPath = "/v1/chat/completions"
+	baichuanDomain = "api.baichuan-ai.com"
 )

 type baichuanProviderInitializer struct {
 }

-func (m *baichuanProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *baichuanProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *baichuanProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+		string(ApiNameEmbeddings):     PathOpenAIEmbeddings,
+	}
+}
+
 func (m *baichuanProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &baichuanProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -43,22 +50,19 @@ func (m *baichuanProvider) GetProviderType() string {
 }

 func (m *baichuanProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *baichuanProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *baichuanProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, baichuanChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, baichuanDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
--- a/plugins/wasm-go/extensions/ai-proxy/provider/baidu.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/baidu.go
@@ -1,16 +1,9 @@
 package provider

 import (
-	"crypto/hmac"
-	"crypto/sha256"
-	"encoding/hex"
-	"encoding/json"
 	"errors"
-	"fmt"
 	"net/http"
-	"sort"
 	"strings"
-	"time"

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
@@ -21,33 +14,27 @@ import (
 const (
 	baiduDomain             = "qianfan.baidubce.com"
 	baiduChatCompletionPath = "/v2/chat/completions"
-	baiduApiTokenDomain     = "iam.bj.baidubce.com"
-	baiduApiTokenPort       = 443
-	baiduApiTokenPath       = "/v1/BCE-BEARER/token"
-	// refresh apiToken every 1 hour
-	baiduApiTokenRefreshInterval = 3600
-	// authorizationString expires in 30 minutes, authorizationString is used to generate apiToken
-	// the default expiration time of apiToken is 24 hours
-	baiduAuthorizationStringExpirationSeconds = 1800
-	bce_prefix                                = "x-bce-"
+	baiduEmbeddings         = "/v2/embeddings"
 )

 type baiduProviderInitializer struct{}

-func (g *baiduProviderInitializer) ValidateConfig(config ProviderConfig) error {
-	if config.baiduAccessKeyAndSecret == nil || len(config.baiduAccessKeyAndSecret) == 0 {
-		return errors.New("no baiduAccessKeyAndSecret found in provider config")
-	}
-	if config.baiduApiTokenServiceName == "" {
-		return errors.New("no baiduApiTokenServiceName found in provider config")
-	}
-	if !config.failover.enabled {
-		config.useGlobalApiToken = true
+func (g *baiduProviderInitializer) ValidateConfig(config *ProviderConfig) error {
+	if config.apiTokens == nil || len(config.apiTokens) == 0 {
+		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (g *baiduProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): baiduChatCompletionPath,
+		string(ApiNameEmbeddings):     baiduEmbeddings,
+	}
+}
+
 func (g *baiduProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(g.DefaultCapabilities())
 	return &baiduProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -64,22 +51,19 @@ func (g *baiduProvider) GetProviderType() string {
 }

 func (g *baiduProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	g.config.handleRequestHeaders(g, ctx, apiName, log)
 	return nil
 }

 func (g *baiduProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !g.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return g.config.handleRequestBody(g, g.contextCache, ctx, apiName, body, log)
 }

 func (g *baiduProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, baiduChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), g.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, baiduDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+g.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
@@ -91,203 +75,3 @@ func (g *baiduProvider) GetApiName(path string) ApiName {
 	}
 	return ""
 }
-
-func generateAuthorizationString(accessKeyAndSecret string, expirationInSeconds int) string {
-	c := strings.Split(accessKeyAndSecret, ":")
-	credentials := BceCredentials{
-		AccessKeyId:     c[0],
-		SecretAccessKey: c[1],
-	}
-	httpMethod := "GET"
-	path := baiduApiTokenPath
-	headers := map[string]string{"host": baiduApiTokenDomain}
-	timestamp := time.Now().Unix()
-
-	headersToSign := make([]string, 0, len(headers))
-	for k := range headers {
-		headersToSign = append(headersToSign, k)
-	}
-
-	return sign(credentials, httpMethod, path, headers, timestamp, expirationInSeconds, headersToSign)
-}
-
-// BceCredentials holds the access key and secret key
-type BceCredentials struct {
-	AccessKeyId     string
-	SecretAccessKey string
-}
-
-// normalizeString performs URI encoding according to RFC 3986
-func normalizeString(inStr string, encodingSlash bool) string {
-	if inStr == "" {
-		return ""
-	}
-
-	var result strings.Builder
-	for _, ch := range []byte(inStr) {
-		if (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') ||
-			(ch >= '0' && ch <= '9') || ch == '.' || ch == '-' ||
-			ch == '_' || ch == '~' || (!encodingSlash && ch == '/') {
-			result.WriteByte(ch)
-		} else {
-			result.WriteString(fmt.Sprintf("%%%02X", ch))
-		}
-	}
-	return result.String()
-}
-
-// getCanonicalTime generates a timestamp in UTC format
-func getCanonicalTime(timestamp int64) string {
-	if timestamp == 0 {
-		timestamp = time.Now().Unix()
-	}
-	t := time.Unix(timestamp, 0).UTC()
-	return t.Format("2006-01-02T15:04:05Z")
-}
-
-// getCanonicalUri generates a canonical URI
-func getCanonicalUri(path string) string {
-	return normalizeString(path, false)
-}
-
-// getCanonicalHeaders generates canonical headers
-func getCanonicalHeaders(headers map[string]string, headersToSign []string) string {
-	if len(headers) == 0 {
-		return ""
-	}
-
-	// If headersToSign is not specified, use default headers
-	if len(headersToSign) == 0 {
-		headersToSign = []string{"host", "content-md5", "content-length", "content-type"}
-	}
-
-	// Convert headersToSign to a map for easier lookup
-	headerMap := make(map[string]bool)
-	for _, header := range headersToSign {
-		headerMap[strings.ToLower(strings.TrimSpace(header))] = true
-	}
-
-	// Create a slice to hold the canonical headers
-	var canonicalHeaders []string
-	for k, v := range headers {
-		k = strings.ToLower(strings.TrimSpace(k))
-		v = strings.TrimSpace(v)
-
-		// Add headers that start with x-bce- or are in headersToSign
-		if strings.HasPrefix(k, bce_prefix) || headerMap[k] {
-			canonicalHeaders = append(canonicalHeaders,
-				fmt.Sprintf("%s:%s", normalizeString(k, true), normalizeString(v, true)))
-		}
-	}
-
-	// Sort the canonical headers
-	sort.Strings(canonicalHeaders)
-
-	return strings.Join(canonicalHeaders, "\n")
-}
-
-// sign generates the authorization string
-func sign(credentials BceCredentials, httpMethod, path string, headers map[string]string,
-	timestamp int64, expirationInSeconds int,
-	headersToSign []string) string {
-
-	// Generate sign key
-	signKeyInfo := fmt.Sprintf("bce-auth-v1/%s/%s/%d",
-		credentials.AccessKeyId,
-		getCanonicalTime(timestamp),
-		expirationInSeconds)
-
-	// Generate sign key using HMAC-SHA256
-	h := hmac.New(sha256.New, []byte(credentials.SecretAccessKey))
-	h.Write([]byte(signKeyInfo))
-	signKey := hex.EncodeToString(h.Sum(nil))
-
-	// Generate canonical URI
-	canonicalUri := getCanonicalUri(path)
-
-	// Generate canonical headers
-	canonicalHeaders := getCanonicalHeaders(headers, headersToSign)
-
-	// Generate string to sign
-	stringToSign := strings.Join([]string{
-		httpMethod,
-		canonicalUri,
-		"",
-		canonicalHeaders,
-	}, "\n")
-
-	// Calculate final signature
-	h = hmac.New(sha256.New, []byte(signKey))
-	h.Write([]byte(stringToSign))
-	signature := hex.EncodeToString(h.Sum(nil))
-
-	// Generate final authorization string
-	if len(headersToSign) > 0 {
-		return fmt.Sprintf("%s/%s/%s", signKeyInfo, strings.Join(headersToSign, ";"), signature)
-	}
-	return fmt.Sprintf("%s//%s", signKeyInfo, signature)
-}
-
-// GetTickFunc Refresh apiToken (apiToken) periodically, the maximum apiToken expiration time is 24 hours
-func (g *baiduProvider) GetTickFunc(log wrapper.Log) (tickPeriod int64, tickFunc func()) {
-	vmID := generateVMID()
-
-	return baiduApiTokenRefreshInterval * 1000, func() {
-		// Only the Wasm VM that successfully acquires the lease will refresh the apiToken
-		if g.config.tryAcquireOrRenewLease(vmID, log) {
-			log.Debugf("Successfully acquired or renewed lease for baidu apiToken refresh task, vmID: %v", vmID)
-			// Get the apiToken that is about to expire, will be removed after the new apiToken is obtained
-			oldApiTokens, _, err := getApiTokens(g.config.failover.ctxApiTokens)
-			if err != nil {
-				log.Errorf("Get old apiToken failed: %v", err)
-				return
-			}
-			log.Debugf("Old apiTokens: %v", oldApiTokens)
-
-			for _, accessKeyAndSecret := range g.config.baiduAccessKeyAndSecret {
-				authorizationString := generateAuthorizationString(accessKeyAndSecret, baiduAuthorizationStringExpirationSeconds)
-				log.Debugf("Generate authorizationString: %v", authorizationString)
-				g.generateNewApiToken(authorizationString, log)
-			}
-
-			// remove old old apiToken
-			for _, token := range oldApiTokens {
-				log.Debugf("Remove old apiToken: %v", token)
-				removeApiToken(g.config.failover.ctxApiTokens, token, log)
-			}
-		}
-	}
-}
-
-func (g *baiduProvider) generateNewApiToken(authorizationString string, log wrapper.Log) {
-	client := wrapper.NewClusterClient(wrapper.FQDNCluster{
-		FQDN: g.config.baiduApiTokenServiceName,
-		Host: g.config.baiduApiTokenServiceHost,
-		Port: g.config.baiduApiTokenServicePort,
-	})
-
-	headers := [][2]string{
-		{"content-type", "application/json"},
-		{"Authorization", authorizationString},
-	}
-
-	var apiToken string
-	err := client.Get(baiduApiTokenPath, headers, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
-		if statusCode == 201 {
-			var response map[string]interface{}
-			err := json.Unmarshal(responseBody, &response)
-			if err != nil {
-				log.Errorf("Unmarshal response failed: %v", err)
-			} else {
-				apiToken = response["token"].(string)
-				addApiToken(g.config.failover.ctxApiTokens, apiToken, log)
-			}
-		} else {
-			log.Errorf("Get apiToken failed, status code: %d, response body: %s", statusCode, string(responseBody))
-		}
-	}, 30000)
-
-	if err != nil {
-		log.Errorf("Get apiToken failed: %v", err)
-	}
-}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
@@ -17,6 +17,7 @@ import (
 const (
 	claudeDomain             = "api.anthropic.com"
 	claudeChatCompletionPath = "/v1/messages"
+	claudeCompletionPath     = "/v1/complete"
 	defaultVersion           = "2023-06-01"
 	defaultMaxTokens         = 4096
 )
@@ -78,14 +79,24 @@ type claudeTextGenDelta struct {
 	StopSequence *string `json:"stop_sequence"`
 }

-func (c *claudeProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (c *claudeProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (c *claudeProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): claudeChatCompletionPath,
+		string(ApiNameCompletion):     claudeCompletionPath,
+		// docs: https://docs.anthropic.com/en/docs/build-with-claude/embeddings#voyage-http-api
+		string(ApiNameEmbeddings): PathOpenAIEmbeddings,
+	}
+}
+
 func (c *claudeProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(c.DefaultCapabilities())
 	return &claudeProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -102,15 +113,12 @@ func (c *claudeProvider) GetProviderType() string {
 }

 func (c *claudeProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	c.config.handleRequestHeaders(c, ctx, apiName, log)
 	return nil
 }

 func (c *claudeProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, claudeChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), c.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, claudeDomain)

 	headers.Set("x-api-key", c.config.GetApiTokenInUse(ctx))
@@ -123,13 +131,16 @@ func (c *claudeProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam
 }

 func (c *claudeProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !c.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return c.config.handleRequestBody(c, c.contextCache, ctx, apiName, body, log)
 }

 func (c *claudeProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	if apiName != ApiNameChatCompletion {
+		return c.config.defaultTransformRequestBody(ctx, apiName, body, log)
+	}
 	request := &chatCompletionRequest{}
 	if err := c.config.parseRequestAndMapModel(ctx, request, body, log); err != nil {
 		return nil, err
@@ -139,6 +150,9 @@ func (c *claudeProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName A
 }

 func (c *claudeProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	if apiName != ApiNameChatCompletion {
+		return body, nil
+	}
 	claudeResponse := &claudeTextGenResponse{}
 	if err := json.Unmarshal(body, claudeResponse); err != nil {
 		return nil, fmt.Errorf("unable to unmarshal claude response: %v", err)
@@ -154,6 +168,10 @@ func (c *claudeProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 	if isLastChunk || len(chunk) == 0 {
 		return nil, nil
 	}
+	// only process the response from chat completion, skip other responses
+	if name != ApiNameChatCompletion {
+		return chunk, nil
+	}

 	responseBuilder := &strings.Builder{}
 	lines := strings.Split(string(chunk), "\n")
--- a/plugins/wasm-go/extensions/ai-proxy/provider/cloudflare.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/cloudflare.go
@@ -19,14 +19,20 @@ const (
 type cloudflareProviderInitializer struct {
 }

-func (c *cloudflareProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (c *cloudflareProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }
+func (c *cloudflareProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): cloudflareChatCompletionPath,
+	}
+}

 func (c *cloudflareProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(c.DefaultCapabilities())
 	return &cloudflareProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -43,15 +49,12 @@ func (c *cloudflareProvider) GetProviderType() string {
 }

 func (c *cloudflareProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	c.config.handleRequestHeaders(c, ctx, apiName, log)
 	return nil
 }

 func (c *cloudflareProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !c.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return c.config.handleRequestBody(c, c.contextCache, ctx, apiName, body, log)
--- a/plugins/wasm-go/extensions/ai-proxy/provider/cohere.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/cohere.go
@@ -12,20 +12,30 @@ import (
 )

 const (
-	cohereDomain             = "api.cohere.com"
+	cohereDomain = "api.cohere.com"
+	// TODO: support more capabilities, upgrade to v2, docs: https://docs.cohere.com/v2/reference/chat
 	cohereChatCompletionPath = "/v1/chat"
+	cohereRerankPath         = "/v1/rerank"
 )

 type cohereProviderInitializer struct{}

-func (m *cohereProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *cohereProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *cohereProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): cohereChatCompletionPath,
+		string(ApiNameCohereV1Rerank): cohereRerankPath,
+	}
+}
+
 func (m *cohereProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &cohereProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -56,15 +66,12 @@ func (m *cohereProvider) GetProviderType() string {
 }

 func (m *cohereProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *cohereProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
@@ -90,13 +97,16 @@ func (m *cohereProvider) buildCohereRequest(origin *chatCompletionRequest) *cohe
 }

 func (m *cohereProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, cohereChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, cohereDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
 }

 func (m *cohereProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	if apiName != ApiNameChatCompletion {
+		return m.config.defaultTransformRequestBody(ctx, apiName, body, log)
+	}
 	request := &chatCompletionRequest{}
 	if err := m.config.parseRequestAndMapModel(ctx, request, body, log); err != nil {
 		return nil, err
--- a/plugins/wasm-go/extensions/ai-proxy/provider/coze.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/coze.go
@@ -14,14 +14,19 @@ const (

 type cozeProviderInitializer struct{}

-func (m *cozeProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *cozeProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *cozeProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{}
+}
+
 func (m *cozeProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &cozeProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
--- a/plugins/wasm-go/extensions/ai-proxy/provider/deepl.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/deepl.go
@@ -57,14 +57,21 @@ type deeplResponseTranslation struct {
 	Text                   string `json:"text"`
 }

-func (d *deeplProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (d *deeplProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.targetLang == "" {
 		return errors.New("missing targetLang in deepl provider config")
 	}
 	return nil
 }

+func (d *deeplProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): deeplChatCompletionPath,
+	}
+}
+
 func (d *deeplProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(d.DefaultCapabilities())
 	return &deeplProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -76,20 +83,21 @@ func (d *deeplProvider) GetProviderType() string {
 }

 func (d *deeplProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	d.config.handleRequestHeaders(d, ctx, apiName, log)
 	return nil
 }

 func (d *deeplProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, deeplChatCompletionPath)
+	if apiName != "" {
+		util.OverwriteRequestPathHeader(headers, deeplChatCompletionPath)
+	}
+	// TODO: Support default host through configuration
+	util.OverwriteRequestHostHeader(headers, deeplHostFree)
 	util.OverwriteRequestAuthorizationHeader(headers, "DeepL-Auth-Key "+d.config.GetApiTokenInUse(ctx))
 }

 func (d *deeplProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !d.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return d.config.handleRequestBody(d, d.contextCache, ctx, apiName, body, log)
@@ -112,6 +120,9 @@ func (d *deeplProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, api
 }

 func (d *deeplProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	if apiName != ApiNameChatCompletion {
+		return body, nil
+	}
 	deeplResponse := &deeplResponse{}
 	if err := json.Unmarshal(body, deeplResponse); err != nil {
 		return nil, fmt.Errorf("unable to unmarshal deepl response: %v", err)
--- a/plugins/wasm-go/extensions/ai-proxy/provider/deepseek.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/deepseek.go
@@ -12,21 +12,30 @@ import (
 // deepseekProvider is the provider for deepseek Ai service.

 const (
-	deepseekDomain             = "api.deepseek.com"
+	deepseekDomain = "api.deepseek.com"
+	// TODO: docs: https://api-docs.deepseek.com/api/create-chat-completion
+	// accourding to the docs, the path should be /chat/completions, need to be verified
 	deepseekChatCompletionPath = "/v1/chat/completions"
 )

 type deepseekProviderInitializer struct {
 }

-func (m *deepseekProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *deepseekProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *deepseekProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): deepseekChatCompletionPath,
+	}
+}
+
 func (m *deepseekProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &deepseekProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -43,22 +52,19 @@ func (m *deepseekProvider) GetProviderType() string {
 }

 func (m *deepseekProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *deepseekProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *deepseekProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, deepseekChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, deepseekDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
--- a/plugins/wasm-go/extensions/ai-proxy/provider/dify.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/dify.go
@@ -0,0 +1,323 @@
+package provider
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+)
+
+const (
+	difyDomain         = "api.dify.ai"
+	difyChatPath       = "/v1/chat-messages"
+	difyCompletionPath = "/v1/completion-messages"
+	difyWorkflowPath   = "/v1/workflows/run"
+	BotTypeChat        = "Chat"
+	BotTypeCompletion  = "Completion"
+	BotTypeWorkflow    = "Workflow"
+	BotTypeAgent       = "Agent"
+)
+
+type difyProviderInitializer struct{}
+
+func (d *difyProviderInitializer) ValidateConfig(config *ProviderConfig) error {
+	if config.apiTokens == nil || len(config.apiTokens) == 0 {
+		return errors.New("no apiToken found in provider config")
+	}
+	return nil
+}
+
+func (d *difyProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	return &difyProvider{
+		config:       config,
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type difyProvider struct {
+	config       ProviderConfig
+	contextCache *contextCache
+}
+
+func (d *difyProvider) GetProviderType() string {
+	return providerTypeDify
+}
+
+func (d *difyProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
+	d.config.handleRequestHeaders(d, ctx, apiName, log)
+	return nil
+}
+
+func (d *difyProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
+	if d.config.difyApiUrl != "" {
+		log.Debugf("use local host: %s", d.config.difyApiUrl)
+		util.OverwriteRequestHostHeader(headers, d.config.difyApiUrl)
+	} else {
+		util.OverwriteRequestHostHeader(headers, difyDomain)
+	}
+	switch d.config.botType {
+	case BotTypeChat, BotTypeAgent:
+		util.OverwriteRequestPathHeader(headers, difyChatPath)
+	case BotTypeCompletion:
+		util.OverwriteRequestPathHeader(headers, difyCompletionPath)
+	case BotTypeWorkflow:
+		util.OverwriteRequestPathHeader(headers, difyWorkflowPath)
+	}
+	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+d.config.GetApiTokenInUse(ctx))
+}
+
+func (d *difyProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	return d.config.handleRequestBody(d, d.contextCache, ctx, apiName, body, log)
+}
+
+func (d *difyProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
+	if apiName != ApiNameChatCompletion {
+		return d.config.defaultTransformRequestBody(ctx, apiName, body, log)
+	}
+	request := &chatCompletionRequest{}
+	err := d.config.parseRequestAndMapModel(ctx, request, body, log)
+	if err != nil {
+		return nil, err
+	}
+
+	difyRequest := d.difyChatGenRequest(request)
+
+	return json.Marshal(difyRequest)
+}
+
+func (d *difyProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	if apiName != ApiNameChatCompletion {
+		return body, nil
+	}
+	difyResponse := &DifyChatResponse{}
+	if err := json.Unmarshal(body, difyResponse); err != nil {
+		return nil, fmt.Errorf("unable to unmarshal dify response: %v", err)
+	}
+	response := d.responseDify2OpenAI(ctx, difyResponse)
+	return json.Marshal(response)
+}
+
+func (d *difyProvider) responseDify2OpenAI(ctx wrapper.HttpContext, response *DifyChatResponse) *chatCompletionResponse {
+	var choice chatCompletionChoice
+	var id string
+	switch d.config.botType {
+	case BotTypeChat, BotTypeAgent:
+		choice = chatCompletionChoice{
+			Index:        0,
+			Message:      &chatMessage{Role: roleAssistant, Content: response.Answer},
+			FinishReason: finishReasonStop,
+		}
+		//response header中增加conversationId字段
+		_ = proxywasm.ReplaceHttpResponseHeader("ConversationId", response.ConversationId)
+		id = response.ConversationId
+	case BotTypeCompletion:
+		choice = chatCompletionChoice{
+			Index:        0,
+			Message:      &chatMessage{Role: roleAssistant, Content: response.Answer},
+			FinishReason: finishReasonStop,
+		}
+		id = response.MessageId
+	case BotTypeWorkflow:
+		choice = chatCompletionChoice{
+			Index:        0,
+			Message:      &chatMessage{Role: roleAssistant, Content: response.Data.Outputs[d.config.outputVariable]},
+			FinishReason: finishReasonStop,
+		}
+		id = response.Data.WorkflowId
+	}
+	return &chatCompletionResponse{
+		Id:                id,
+		Created:           time.Now().UnixMilli() / 1000,
+		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		SystemFingerprint: "",
+		Object:            objectChatCompletion,
+		Choices:           []chatCompletionChoice{choice},
+		Usage:             response.MetaData.Usage,
+	}
+}
+
+func (d *difyProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
+	if isLastChunk || len(chunk) == 0 {
+		return nil, nil
+	}
+	if name != ApiNameChatCompletion {
+		return chunk, nil
+	}
+	// sample event response:
+	// data: {"event": "agent_thought", "id": "8dcf3648-fbad-407a-85dd-73a6f43aeb9f", "task_id": "9cf1ddd7-f94b-459b-b942-b77b26c59e9b", "message_id": "1fb10045-55fd-4040-99e6-d048d07cbad3", "position": 1, "thought": "", "observation": "", "tool": "", "tool_input": "", "created_at": 1705639511, "message_files": [], "conversation_id": "c216c595-2d89-438c-b33c-aae5ddddd142"}
+
+	// sample end event response:
+	// data: {"event": "message_end", "id": "5e52ce04-874b-4d27-9045-b3bc80def685", "conversation_id": "45701982-8118-4bc5-8e9b-64562b4555f2", "metadata": {"usage": {"prompt_tokens": 1033, "prompt_unit_price": "0.001", "prompt_price_unit": "0.001", "prompt_price": "0.0010330", "completion_tokens": 135, "completion_unit_price": "0.002", "completion_price_unit": "0.001", "completion_price": "0.0002700", "total_tokens": 1168, "total_price": "0.0013030", "currency": "USD", "latency": 1.381760165997548}, "retriever_resources": [{"position": 1, "dataset_id": "101b4c97-fc2e-463c-90b1-5261a4cdcafb", "dataset_name": "iPhone", "document_id": "8dd1ad74-0b5f-4175-b735-7d98bbbb4e00", "document_name": "iPhone List", "segment_id": "ed599c7f-2766-4294-9d1d-e5235a61270a", "score": 0.98457545, "content": "\"Model\",\"Release Date\",\"Display Size\",\"Resolution\",\"Processor\",\"RAM\",\"Storage\",\"Camera\",\"Battery\",\"Operating System\"\n\"iPhone 13 Pro Max\",\"September 24, 2021\",\"6.7 inch\",\"1284 x 2778\",\"Hexa-core (2x3.23 GHz Avalanche + 4x1.82 GHz Blizzard)\",\"6 GB\",\"128, 256, 512 GB, 1TB\",\"12 MP\",\"4352 mAh\",\"iOS 15\""}]}}
+	responseBuilder := &strings.Builder{}
+	lines := strings.Split(string(chunk), "\n")
+	for _, data := range lines {
+		if len(data) < 6 {
+			// ignore blank line or wrong format
+			continue
+		}
+		data = data[6:]
+		var difyResponse DifyChunkChatResponse
+		if err := json.Unmarshal([]byte(data), &difyResponse); err != nil {
+			log.Errorf("unable to unmarshal dify response: %v", err)
+			continue
+		}
+		response := d.streamResponseDify2OpenAI(ctx, &difyResponse)
+		responseBody, err := json.Marshal(response)
+		if err != nil {
+			log.Errorf("unable to marshal response: %v", err)
+			return nil, err
+		}
+		d.appendResponse(responseBuilder, string(responseBody))
+	}
+	modifiedResponseChunk := responseBuilder.String()
+	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
+	return []byte(modifiedResponseChunk), nil
+}
+
+func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, response *DifyChunkChatResponse) *chatCompletionResponse {
+	var choice chatCompletionChoice
+	var id string
+	var responseUsage usage
+	switch d.config.botType {
+	case BotTypeChat, BotTypeAgent:
+		choice = chatCompletionChoice{
+			Index: 0,
+			Delta: &chatMessage{Role: roleAssistant, Content: response.Answer},
+		}
+		id = response.ConversationId
+		_ = proxywasm.ReplaceHttpResponseHeader("ConversationId", response.ConversationId)
+	case BotTypeCompletion:
+		choice = chatCompletionChoice{
+			Index: 0,
+			Delta: &chatMessage{Role: roleAssistant, Content: response.Answer},
+		}
+		id = response.MessageId
+	case BotTypeWorkflow:
+		choice = chatCompletionChoice{
+			Index: 0,
+			Delta: &chatMessage{Role: roleAssistant, Content: response.Data.Outputs[d.config.outputVariable]},
+		}
+		id = response.Data.WorkflowId
+	}
+	if response.Event == "message_end" || response.Event == "workflow_finished" {
+		choice.FinishReason = finishReasonStop
+		if response.Event == "message_end" {
+			responseUsage = usage{
+				PromptTokens:     response.MetaData.Usage.PromptTokens,
+				CompletionTokens: response.MetaData.Usage.CompletionTokens,
+				TotalTokens:      response.MetaData.Usage.TotalTokens,
+			}
+		}
+	}
+	return &chatCompletionResponse{
+		Id:                id,
+		Created:           time.Now().UnixMilli() / 1000,
+		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
+		SystemFingerprint: "",
+		Object:            objectChatCompletionChunk,
+		Choices:           []chatCompletionChoice{choice},
+		Usage:             responseUsage,
+	}
+}
+
+func (d *difyProvider) appendResponse(responseBuilder *strings.Builder, responseBody string) {
+	responseBuilder.WriteString(fmt.Sprintf("%s %s\n\n", streamDataItemKey, responseBody))
+}
+
+func (d *difyProvider) difyChatGenRequest(request *chatCompletionRequest) *DifyChatRequest {
+	content := ""
+	for _, message := range request.Messages {
+		if message.Role == "system" {
+			content += "SYSTEM: \n" + message.StringContent() + "\n"
+		} else if message.Role == "assistant" {
+			content += "ASSISTANT: \n" + message.StringContent() + "\n"
+		} else {
+			content += "USER: \n" + message.StringContent() + "\n"
+		}
+	}
+	mode := "blocking"
+	if request.Stream {
+		mode = "streaming"
+	}
+	user := request.User
+	if user == "" {
+		user = "api-user"
+	}
+	switch d.config.botType {
+	case BotTypeChat, BotTypeAgent:
+		conversationId, _ := proxywasm.GetHttpRequestHeader("ConversationId")
+		return &DifyChatRequest{
+			Inputs:           make(map[string]interface{}),
+			Query:            content,
+			ResponseMode:     mode,
+			User:             user,
+			AutoGenerateName: false,
+			ConversationId:   conversationId,
+		}
+	case BotTypeCompletion:
+		return &DifyChatRequest{
+			Inputs: map[string]interface{}{
+				"query": content,
+			},
+			ResponseMode: mode,
+			User:         user,
+		}
+	case BotTypeWorkflow:
+		return &DifyChatRequest{
+			Inputs: map[string]interface{}{
+				d.config.inputVariable: content,
+			},
+			ResponseMode: mode,
+			User:         user,
+		}
+	default:
+		return &DifyChatRequest{}
+	}
+}
+
+type DifyChatRequest struct {
+	Inputs           map[string]interface{} `json:"inputs"`
+	Query            string                 `json:"query"`
+	ResponseMode     string                 `json:"response_mode"`
+	User             string                 `json:"user"`
+	AutoGenerateName bool                   `json:"auto_generate_name"`
+	ConversationId   string                 `json:"conversation_id"`
+}
+
+type DifyMetaData struct {
+	Usage usage `json:"usage"`
+}
+
+type DifyData struct {
+	WorkflowId string                 `json:"workflow_id"`
+	Id         string                 `json:"id"`
+	Outputs    map[string]interface{} `json:"outputs"`
+}
+
+type DifyChatResponse struct {
+	ConversationId string       `json:"conversation_id"`
+	MessageId      string       `json:"message_id"`
+	Answer         string       `json:"answer"`
+	CreateAt       int64        `json:"create_at"`
+	Data           DifyData     `json:"data"`
+	MetaData       DifyMetaData `json:"metadata"`
+}
+
+type DifyChunkChatResponse struct {
+	Event          string       `json:"event"`
+	ConversationId string       `json:"conversation_id"`
+	MessageId      string       `json:"message_id"`
+	Answer         string       `json:"answer"`
+	Data           DifyData     `json:"data"`
+	MetaData       DifyMetaData `json:"metadata"`
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/doubao.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/doubao.go
@@ -13,18 +13,27 @@ import (
 const (
 	doubaoDomain             = "ark.cn-beijing.volces.com"
 	doubaoChatCompletionPath = "/api/v3/chat/completions"
+	doubaoEmbeddingsPath     = "/api/v3/embeddings"
 )

 type doubaoProviderInitializer struct{}

-func (m *doubaoProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *doubaoProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *doubaoProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): doubaoChatCompletionPath,
+		string(ApiNameEmbeddings):     doubaoEmbeddingsPath,
+	}
+}
+
 func (m *doubaoProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &doubaoProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -41,22 +50,19 @@ func (m *doubaoProvider) GetProviderType() string {
 }

 func (m *doubaoProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *doubaoProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *doubaoProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, doubaoChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, doubaoDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
@@ -66,5 +72,8 @@ func (m *doubaoProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, doubaoChatCompletionPath) {
 		return ApiNameChatCompletion
 	}
+	if strings.Contains(path, doubaoEmbeddingsPath) {
+		return ApiNameEmbeddings
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
@@ -8,7 +8,7 @@ import (
 	"net/http"
 	"strings"
 	"time"
-	
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/google/uuid"
@@ -32,6 +32,8 @@ type failover struct {
 	healthCheckModel string `required:"false" yaml:"healthCheckModel" json:"healthCheckModel"`
 	// @Title zh-CN 本次请求使用的 apiToken
 	ctxApiTokenInUse string
+	// @Title zh-CN 记录本次请求时所有可用的 apiToken
+	ctxAvailableApiTokensInRequest string
 	// @Title zh-CN 记录 apiToken 请求失败的次数，key 为 apiToken，value 为失败次数
 	ctxApiTokenRequestFailureCount string
 	// @Title zh-CN 记录 apiToken 健康检测成功的次数，key 为 apiToken，value 为成功次数
@@ -527,6 +529,22 @@ func (c *ProviderConfig) GetGlobalRandomToken(log wrapper.Log) string {
 	}
 }

+func (c *ProviderConfig) GetAvailableApiToken(ctx wrapper.HttpContext) []string {
+	apiTokens, _ := ctx.GetContext(c.failover.ctxAvailableApiTokensInRequest).([]string)
+	return apiTokens
+}
+
+// SetAvailableApiTokens set available apiTokens of current request in the context, will be used in the retryOnFailure
+func (c *ProviderConfig) SetAvailableApiTokens(ctx wrapper.HttpContext, log wrapper.Log) {
+	var apiTokens []string
+	if c.isFailoverEnabled() {
+		apiTokens, _, _ = getApiTokens(c.failover.ctxApiTokens)
+	} else {
+		apiTokens = c.apiTokens
+	}
+	ctx.SetContext(c.failover.ctxAvailableApiTokensInRequest, apiTokens)
+}
+
 func (c *ProviderConfig) isFailoverEnabled() bool {
 	return c.failover.enabled
 }
@@ -539,12 +557,12 @@ func (c *ProviderConfig) resetSharedData() {
 	_ = proxywasm.SetSharedData(c.failover.ctxApiTokenRequestFailureCount, nil, 0)
 }

-func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, log wrapper.Log) types.Action {
+func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) types.Action {
 	if c.isFailoverEnabled() {
 		c.handleUnavailableApiToken(ctx, apiTokenInUse, log)
 	}
 	if c.isRetryOnFailureEnabled() && ctx.GetContext(ctxKeyIsStreaming) != nil && !ctx.GetContext(ctxKeyIsStreaming).(bool) {
-		c.retryFailedRequest(activeProvider, ctx, log)
+		c.retryFailedRequest(activeProvider, ctx, apiTokenInUse, apiTokens, log)
 		return types.HeaderStopAllIterationAndWatermark
 	}
 	return types.ActionContinue
@@ -557,8 +575,8 @@ func (c *ProviderConfig) GetApiTokenInUse(ctx wrapper.HttpContext) string {

 func (c *ProviderConfig) SetApiTokenInUse(ctx wrapper.HttpContext, log wrapper.Log) {
 	var apiToken string
-	if c.isFailoverEnabled() || c.useGlobalApiToken {
-		// if enable apiToken failover, only use available apiToken
+	// if enable apiToken failover, only use available apiToken from global apiTokens list
+	if c.isFailoverEnabled() {
 		apiToken = c.GetGlobalRandomToken(log)
 	} else {
 		apiToken = c.GetRandomToken()
--- a/plugins/wasm-go/extensions/ai-proxy/provider/gemini.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/gemini.go
@@ -28,14 +28,19 @@ const (
 type geminiProviderInitializer struct {
 }

-func (g *geminiProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (g *geminiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (g *geminiProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{}
+}
+
 func (g *geminiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(g.DefaultCapabilities())
 	return &geminiProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -52,9 +57,6 @@ func (g *geminiProvider) GetProviderType() string {
 }

 func (g *geminiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
-		return errUnsupportedApiName
-	}
 	g.config.handleRequestHeaders(g, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
 	return nil
@@ -66,7 +68,7 @@ func (g *geminiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam
 }

 func (g *geminiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
+	if !g.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return g.config.handleRequestBody(g, g.contextCache, ctx, apiName, body, log)
@@ -110,6 +112,9 @@ func (g *geminiProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A
 	if isLastChunk || len(chunk) == 0 {
 		return nil, nil
 	}
+	if name != ApiNameChatCompletion {
+		return chunk, nil
+	}
 	// sample end event response:
 	// data: {"candidates": [{"content": {"parts": [{"text": "我是 Gemini，一个大型多模态模型，由 Google 训练。我的职责是尽我所能帮助您，并尽力提供全面且信息丰富的答复。"}],"role": "model"},"finishReason": "STOP","index": 0,"safetyRatings": [{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_HATE_SPEECH","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_HARASSMENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_DANGEROUS_CONTENT","probability": "NEGLIGIBLE"}]}],"usageMetadata": {"promptTokenCount": 2,"candidatesTokenCount": 35,"totalTokenCount": 37}}
 	responseBuilder := &strings.Builder{}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/github.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/github.go
@@ -25,14 +25,22 @@ type githubProvider struct {
 	contextCache *contextCache
 }

-func (m *githubProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *githubProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *githubProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): githubCompletionPath,
+		string(ApiNameEmbeddings):     githubEmbeddingPath,
+	}
+}
+
 func (m *githubProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &githubProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -44,16 +52,13 @@ func (m *githubProvider) GetProviderType() string {
 }

 func (m *githubProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
 	return nil
 }

 func (m *githubProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
@@ -61,12 +66,7 @@ func (m *githubProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,

 func (m *githubProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
 	util.OverwriteRequestHostHeader(headers, githubDomain)
-	if apiName == ApiNameChatCompletion {
-		util.OverwriteRequestPathHeader(headers, githubCompletionPath)
-	}
-	if apiName == ApiNameEmbeddings {
-		util.OverwriteRequestPathHeader(headers, githubEmbeddingPath)
-	}
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestAuthorizationHeader(headers, m.config.GetApiTokenInUse(ctx))
 }

--- a/plugins/wasm-go/extensions/ai-proxy/provider/groq.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/groq.go
@@ -18,14 +18,21 @@ const (

 type groqProviderInitializer struct{}

-func (g *groqProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (g *groqProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (g *groqProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): groqChatCompletionPath,
+	}
+}
+
 func (g *groqProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(g.DefaultCapabilities())
 	return &groqProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -42,22 +49,19 @@ func (g *groqProvider) GetProviderType() string {
 }

 func (g *groqProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	g.config.handleRequestHeaders(g, ctx, apiName, log)
 	return nil
 }

 func (g *groqProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !g.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return g.config.handleRequestBody(g, g.contextCache, ctx, apiName, body, log)
 }

 func (g *groqProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, groqChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), g.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, groqDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+g.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
--- a/plugins/wasm-go/extensions/ai-proxy/provider/hunyuan.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/hunyuan.go
@@ -39,6 +39,11 @@ const (

 	hunyuanAuthKeyLen = 32
 	hunyuanAuthIdLen  = 36
+
+	// docs: https://cloud.tencent.com/document/product/1729/111007
+	hunyuanOpenAiDomain      = "api.hunyuan.cloud.tencent.com"
+	hunyuanOpenAiRequestPath = "/v1/chat/completions"
+	hunyuanOpenAiEmbeddings  = "/v1/embeddings"
 )

 type hunyuanProviderInitializer struct {
@@ -85,7 +90,11 @@ type hunyuanChatMessage struct {
 	Content string `json:"Content,omitempty"`
 }

-func (m *hunyuanProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *hunyuanProviderInitializer) ValidateConfig(config *ProviderConfig) error {
+	// 允许 hunyuanauthid 和 hunyuanauthkey 为空, 当他们都为空的时候，认为是使用openai的 兼容接口
+	if len(config.hunyuanAuthId) == 0 && len(config.hunyuanAuthKey) == 0 {
+		return nil
+	}
 	// 校验hunyuan id 和 key的合法性
 	if len(config.hunyuanAuthId) != hunyuanAuthIdLen || len(config.hunyuanAuthKey) != hunyuanAuthKeyLen {
 		return errors.New("hunyuanAuthId / hunyuanAuthKey is illegal in config file")
@@ -93,7 +102,15 @@ func (m *hunyuanProviderInitializer) ValidateConfig(config ProviderConfig) error
 	return nil
 }

+func (m *hunyuanProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): hunyuanOpenAiRequestPath,
+		string(ApiNameEmbeddings):     hunyuanOpenAiEmbeddings,
+	}
+}
+
 func (m *hunyuanProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &hunyuanProvider{
 		config: config,
 		client: wrapper.NewClusterClient(wrapper.RouteCluster{
@@ -114,29 +131,38 @@ func (m *hunyuanProvider) GetProviderType() string {
 	return providerTypeHunyuan
 }

+func (m *hunyuanProvider) useOpenAICompatibleAPI() bool {
+	return len(m.config.hunyuanAuthId) == 0 && len(m.config.hunyuanAuthKey) == 0
+}
+
 func (m *hunyuanProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
 	return nil
 }

 func (m *hunyuanProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestHostHeader(headers, hunyuanDomain)
-	util.OverwriteRequestPathHeader(headers, hunyuanRequestPath)
-
-	// 添加 hunyuan 需要的自定义字段
-	headers.Set(actionKey, hunyuanChatCompletionTCAction)
-	headers.Set(versionKey, versionValue)
+	if m.useOpenAICompatibleAPI() {
+		util.OverwriteRequestHostHeader(headers, hunyuanOpenAiDomain)
+		util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
+		util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
+	} else {
+		util.OverwriteRequestHostHeader(headers, hunyuanDomain)
+		util.OverwriteRequestPathHeader(headers, hunyuanRequestPath)
+		// 添加 hunyuan 需要的自定义字段
+		headers.Set(actionKey, hunyuanChatCompletionTCAction)
+		headers.Set(versionKey, versionValue)
+	}
 }

 // hunyuan 的 OnRequestBody 逻辑中包含了对 headers 签名的逻辑，并且插入 context 以后还要重新计算签名，因此无法复用 handleRequestBody 方法
 func (m *hunyuanProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
+	if m.useOpenAICompatibleAPI() {
+		return types.ActionContinue, nil
+	}

 	// 为header添加时间戳字段 （因为需要根据body进行签名时依赖时间戳，故于body处理部分创建时间戳）
 	var timestamp int64 = time.Now().Unix()
@@ -264,6 +290,9 @@ func (m *hunyuanProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName

 // hunyuan 的 TransformRequestBodyHeaders 方法只在 failover 健康检查的时候会调用
 func (m *hunyuanProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
+	if m.useOpenAICompatibleAPI() {
+		return m.config.defaultTransformRequestBody(ctx, apiName, body, log)
+	}
 	request := &chatCompletionRequest{}
 	err := m.config.parseRequestAndMapModel(ctx, request, body, log)
 	if err != nil {
@@ -289,7 +318,7 @@ func (m *hunyuanProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, a
 }

 func (m *hunyuanProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
-	if m.config.protocol == protocolOriginal {
+	if m.config.IsOriginal() || m.useOpenAICompatibleAPI() || name != ApiNameChatCompletion {
 		return chunk, nil
 	}

@@ -405,6 +434,12 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
 }

 func (m *hunyuanProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	if m.config.IsOriginal() || m.useOpenAICompatibleAPI() {
+		return body, nil
+	}
+	if apiName != ApiNameChatCompletion {
+		return body, nil
+	}
 	log.Debugf("#debug nash5# onRespBody's resp is: %s", string(body))
 	hunyuanResponse := &hunyuanTextGenResponseNonStreaming{}
 	if err := json.Unmarshal(body, hunyuanResponse); err != nil {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go
@@ -38,10 +38,10 @@ const (
 type minimaxProviderInitializer struct {
 }

-func (m *minimaxProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *minimaxProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	// If using the chat completion Pro API, a group ID must be set.
 	if minimaxApiTypePro == config.minimaxApiType && config.minimaxGroupId == "" {
-		return errors.New(fmt.Sprintf("missing minimaxGroupId in provider config when minimaxApiType is %s", minimaxApiTypePro))
+		return fmt.Errorf("missing minimaxGroupId in provider config when minimaxApiType is %s", minimaxApiTypePro)
 	}
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
@@ -49,7 +49,15 @@ func (m *minimaxProviderInitializer) ValidateConfig(config ProviderConfig) error
 	return nil
 }

+func (m *minimaxProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		// minimax 替换path的时候，要根据modelmapping替换，这儿的配置无实质作用，只是为了保持和其他provider的一致性
+		string(ApiNameChatCompletion): minimaxChatCompletionV2Path,
+	}
+}
+
 func (m *minimaxProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &minimaxProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -66,9 +74,6 @@ func (m *minimaxProvider) GetProviderType() string {
 }

 func (m *minimaxProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	// Delay the header processing to allow changing streaming mode in OnRequestBody
 	return nil
@@ -81,7 +86,7 @@ func (m *minimaxProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNa
 }

 func (m *minimaxProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	if minimaxApiTypePro == m.config.minimaxApiType {
@@ -159,6 +164,9 @@ func (m *minimaxProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name
 	if isLastChunk || len(chunk) == 0 {
 		return nil, nil
 	}
+	if name != ApiNameChatCompletion {
+		return chunk, nil
+	}
 	// Sample event response:
 	// data: {"created":1689747645,"model":"abab6.5s-chat","reply":"","choices":[{"messages":[{"sender_type":"BOT","sender_name":"MM智能助理","text":"am from China."}]}],"output_sensitive":false}

@@ -192,6 +200,9 @@ func (m *minimaxProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name

 // TransformResponseBody handles the final response body from the Minimax service only for requests using the OpenAI protocol and corresponding to the chat completion Pro API.
 func (m *minimaxProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	if apiName != ApiNameChatCompletion {
+		return body, nil
+	}
 	minimaxResp := &minimaxChatCompletionProResp{}
 	if err := json.Unmarshal(body, minimaxResp); err != nil {
 		return nil, fmt.Errorf("unable to unmarshal minimax response: %v", err)
@@ -268,18 +279,6 @@ type minimaxUsage struct {
 	CompletionTokens int64 `json:"completion_tokens"`
 }

-func (m *minimaxProvider) parseModel(body []byte) (string, error) {
-	var tempMap map[string]interface{}
-	if err := json.Unmarshal(body, &tempMap); err != nil {
-		return "", err
-	}
-	model, ok := tempMap["model"].(string)
-	if !ok {
-		return "", errors.New("missing model in chat completion request")
-	}
-	return model, nil
-}
-
 func (m *minimaxProvider) setBotSettings(request *minimaxChatCompletionProRequest, botSettingContent string) {
 	if len(request.BotSettings) == 0 {
 		request.BotSettings = []minimaxBotSetting{
--- a/plugins/wasm-go/extensions/ai-proxy/provider/mistral.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/mistral.go
@@ -15,14 +15,23 @@ const (

 type mistralProviderInitializer struct{}

-func (m *mistralProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *mistralProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *mistralProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		// The chat interface of mistral is the same as that of OpenAI. docs: https://docs.mistral.ai/api/
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+		string(ApiNameEmbeddings):     PathOpenAIEmbeddings,
+	}
+}
+
 func (m *mistralProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &mistralProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -39,15 +48,12 @@ func (m *mistralProvider) GetProviderType() string {
 }

 func (m *mistralProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *mistralProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -1,6 +1,9 @@
 package provider

-import "strings"
+import (
+	"fmt"
+	"strings"
+)

 const (
 	streamEventIdItemKey        = "id:"
@@ -19,22 +22,55 @@ const (
 )

 type chatCompletionRequest struct {
+	Messages            []chatMessage          `json:"messages"`
+	Model               string                 `json:"model"`
+	Store               bool                   `json:"store,omitempty"`
+	ReasoningEffort     string                 `json:"reasoning_effort,omitempty"`
+	Metadata            map[string]string      `json:"metadata,omitempty"`
+	FrequencyPenalty    float64                `json:"frequency_penalty,omitempty"`
+	LogitBias           map[string]int         `json:"logit_bias,omitempty"`
+	Logprobs            bool                   `json:"logprobs,omitempty"`
+	TopLogprobs         int                    `json:"top_logprobs,omitempty"`
+	MaxTokens           int                    `json:"max_tokens,omitempty"`
+	MaxCompletionTokens int                    `json:"max_completion_tokens,omitempty"`
+	N                   int                    `json:"n,omitempty"`
+	Modalities          []string               `json:"modalities,omitempty"`
+	Prediction          map[string]interface{} `json:"prediction,omitempty"`
+	Audio               map[string]interface{} `json:"audio,omitempty"`
+	PresencePenalty     float64                `json:"presence_penalty,omitempty"`
+	ResponseFormat      map[string]interface{} `json:"response_format,omitempty"`
+	Seed                int                    `json:"seed,omitempty"`
+	ServiceTier         string                 `json:"service_tier,omitempty"`
+	Stop                []string               `json:"stop,omitempty"`
+	Stream              bool                   `json:"stream,omitempty"`
+	StreamOptions       *streamOptions         `json:"stream_options,omitempty"`
+	Temperature         float64                `json:"temperature,omitempty"`
+	TopP                float64                `json:"top_p,omitempty"`
+	Tools               []tool                 `json:"tools,omitempty"`
+	ToolChoice          *toolChoice            `json:"tool_choice,omitempty"`
+	ParallelToolCalls   bool                   `json:"parallel_tool_calls,omitempty"`
+	User                string                 `json:"user,omitempty"`
+}
+
+type CompletionRequest struct {
 	Model            string         `json:"model"`
-	Messages         []chatMessage  `json:"messages"`
-	MaxTokens        int            `json:"max_tokens,omitempty"`
+	Prompt           string         `json:"prompt"`
+	BestOf           int            `json:"best_of,omitempty"`
+	Echo             bool           `json:"echo,omitempty"`
 	FrequencyPenalty float64        `json:"frequency_penalty,omitempty"`
+	LogitBias        map[string]int `json:"logit_bias,omitempty"`
+	Logprobs         int            `json:"logprobs,omitempty"`
+	MaxTokens        int            `json:"max_tokens,omitempty"`
 	N                int            `json:"n,omitempty"`
 	PresencePenalty  float64        `json:"presence_penalty,omitempty"`
 	Seed             int            `json:"seed,omitempty"`
+	Stop             []string       `json:"stop,omitempty"`
 	Stream           bool           `json:"stream,omitempty"`
 	StreamOptions    *streamOptions `json:"stream_options,omitempty"`
+	Suffix           string         `json:"suffix,omitempty"`
 	Temperature      float64        `json:"temperature,omitempty"`
 	TopP             float64        `json:"top_p,omitempty"`
-	Tools            []tool         `json:"tools,omitempty"`
-	ToolChoice       *toolChoice    `json:"tool_choice,omitempty"`
 	User             string         `json:"user,omitempty"`
-	Stop             []string       `json:"stop,omitempty"`
-	ResponseFormat   map[string]interface{} `json:"response_format,omitempty"`
 }

 type streamOptions struct {
@@ -62,29 +98,60 @@ type chatCompletionResponse struct {
 	Choices           []chatCompletionChoice `json:"choices"`
 	Created           int64                  `json:"created,omitempty"`
 	Model             string                 `json:"model,omitempty"`
+	ServiceTier       string                 `json:"service_tier,omitempty"`
 	SystemFingerprint string                 `json:"system_fingerprint,omitempty"`
 	Object            string                 `json:"object,omitempty"`
 	Usage             usage                  `json:"usage,omitempty"`
 }

 type chatCompletionChoice struct {
-	Index        int          `json:"index"`
-	Message      *chatMessage `json:"message,omitempty"`
-	Delta        *chatMessage `json:"delta,omitempty"`
-	FinishReason string       `json:"finish_reason,omitempty"`
+	Index        int                    `json:"index"`
+	Message      *chatMessage           `json:"message,omitempty"`
+	Delta        *chatMessage           `json:"delta,omitempty"`
+	FinishReason string                 `json:"finish_reason,omitempty"`
+	Logprobs     map[string]interface{} `json:"logprobs,omitempty"`
 }

 type usage struct {
-	PromptTokens     int `json:"prompt_tokens,omitempty"`
-	CompletionTokens int `json:"completion_tokens,omitempty"`
-	TotalTokens      int `json:"total_tokens,omitempty"`
+	PromptTokens            int                      `json:"prompt_tokens,omitempty"`
+	CompletionTokens        int                      `json:"completion_tokens,omitempty"`
+	TotalTokens             int                      `json:"total_tokens,omitempty"`
+	CompletionTokensDetails *completionTokensDetails `json:"completion_tokens_details,omitempty"`
+}
+
+type completionTokensDetails struct {
+	ReasoningTokens          int `json:"reasoning_tokens,omitempty"`
+	AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
+	RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
 }

 type chatMessage struct {
-	Name      string     `json:"name,omitempty"`
-	Role      string     `json:"role,omitempty"`
-	Content   any        `json:"content,omitempty"`
-	ToolCalls []toolCall `json:"tool_calls,omitempty"`
+	Id               string                 `json:"id,omitempty"`
+	Audio            map[string]interface{} `json:"audio,omitempty"`
+	Name             string                 `json:"name,omitempty"`
+	Role             string                 `json:"role,omitempty"`
+	Content          any                    `json:"content,omitempty"`
+	ReasoningContent string                 `json:"reasoning_content,omitempty"`
+	ToolCalls        []toolCall             `json:"tool_calls,omitempty"`
+	Refusal          string                 `json:"refusal,omitempty"`
+}
+
+func (m *chatMessage) handleReasoningContent(reasoningContentMode string) {
+	if m.ReasoningContent == "" {
+		return
+	}
+	switch reasoningContentMode {
+	case reasoningBehaviorIgnore:
+		m.ReasoningContent = ""
+		break
+	case reasoningBehaviorConcat:
+		m.Content = fmt.Sprintf("%v\n%v", m.ReasoningContent, m.Content)
+		m.ReasoningContent = ""
+		break
+	case reasoningBehaviorPassThrough:
+	default:
+		break
+	}
 }

 type messageContent struct {
@@ -99,6 +166,9 @@ type imageUrl struct {
 }

 func (m *chatMessage) IsEmpty() bool {
+	if m.ReasoningContent != "" {
+		return false
+	}
 	if m.IsStringContent() && m.Content != "" {
 		return false
 	}
@@ -208,14 +278,18 @@ func (m *functionCall) IsEmpty() bool {
 	return m.Name == "" && m.Arguments == ""
 }

-type streamEvent struct {
+type StreamEvent struct {
 	Id         string `json:"id"`
 	Event      string `json:"event"`
 	Data       string `json:"data"`
 	HttpStatus string `json:"http_status"`
 }

-func (e *streamEvent) setValue(key, value string) {
+func (e *StreamEvent) IsEndData() bool {
+	return e.Data == streamEndDataValue
+}
+
+func (e *StreamEvent) SetValue(key, value string) {
 	switch key {
 	case streamEventIdItemKey:
 		e.Id = value
@@ -230,6 +304,25 @@ func (e *streamEvent) setValue(key, value string) {
 	}
 }

+func (e *StreamEvent) ToHttpString() string {
+	return fmt.Sprintf("%s %s\n\n", streamDataItemKey, e.Data)
+}
+
+// https://platform.openai.com/docs/guides/images
+type imageGenerationRequest struct {
+	Model  string `json:"model"`
+	Prompt string `json:"prompt"`
+	N      int    `json:"n,omitempty"`
+	Size   string `json:"size,omitempty"`
+}
+
+// https://platform.openai.com/docs/guides/speech-to-text
+type audioSpeechRequest struct {
+	Model string `json:"model"`
+	Input string `json:"input"`
+	Voice string `json:"voice"`
+}
+
 type embeddingsRequest struct {
 	Input          interface{} `json:"input"`
 	Model          string      `json:"model"`
--- a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
@@ -24,7 +24,7 @@ const (
 type moonshotProviderInitializer struct {
 }

-func (m *moonshotProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *moonshotProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.moonshotFileId != "" && config.context != nil {
 		return errors.New("moonshotFileId and context cannot be configured at the same time")
 	}
@@ -34,7 +34,14 @@ func (m *moonshotProviderInitializer) ValidateConfig(config ProviderConfig) erro
 	return nil
 }

+func (m *moonshotProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): moonshotChatCompletionPath,
+	}
+}
+
 func (m *moonshotProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &moonshotProvider{
 		config: config,
 		client: wrapper.NewClusterClient(wrapper.RouteCluster{
@@ -57,15 +64,12 @@ func (m *moonshotProvider) GetProviderType() string {
 }

 func (m *moonshotProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *moonshotProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, moonshotChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, moonshotDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
@@ -74,9 +78,13 @@ func (m *moonshotProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiN
 // moonshot 有自己获取 context 的配置（moonshotFileId），因此无法复用 handleRequestBody 方法
 // moonshot 的 body 没有修改，无须实现TransformRequestBody，使用默认的 defaultTransformRequestBody 方法
 func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
+	// 非chat类型的请求，不做处理
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, nil
+	}

 	request := &chatCompletionRequest{}
 	if err := m.config.parseRequestAndMapModel(ctx, request, body, log); err != nil {
@@ -94,12 +102,12 @@ func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiNam
 		}()
 		if err != nil {
 			log.Errorf("failed to load context file: %v", err)
-			util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
+			_ = util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
 			return
 		}
 		err = m.performChatCompletion(ctx, content, request, log)
 		if err != nil {
-			util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
+			_ = util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
 		}
 	}, log)
 	if err == nil {
@@ -153,76 +161,9 @@ func (m *moonshotProvider) sendRequest(method, path, body, apiKey string, callba
 	}
 }

-func (m *moonshotProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
-	receivedBody := chunk
-	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
-		receivedBody = append(bufferedStreamingBody, chunk...)
-	}
-
-	eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
-
-	defer func() {
-		if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
-			// Just in case the received chunk is not a complete event.
-			ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
-		} else {
-			ctx.SetContext(ctxKeyStreamingBody, nil)
-		}
-	}()
-
-	var responseBuilder strings.Builder
-	currentKey := ""
-	currentEvent := &streamEvent{}
-	i, length := 0, len(receivedBody)
-	for i = 0; i < length; i++ {
-		ch := receivedBody[i]
-		if ch != '\n' {
-			if lineStartIndex == -1 {
-				if eventStartIndex == -1 {
-					eventStartIndex = i
-				}
-				lineStartIndex = i
-				valueStartIndex = -1
-			}
-			if valueStartIndex == -1 {
-				if ch == ':' {
-					valueStartIndex = i + 1
-					currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
-				}
-			} else if valueStartIndex == i && ch == ' ' {
-				// Skip leading spaces in data.
-				valueStartIndex = i + 1
-			}
-			continue
-		}
-
-		if lineStartIndex != -1 {
-			value := string(receivedBody[valueStartIndex:i])
-			currentEvent.setValue(currentKey, value)
-		} else {
-			// Extra new line. The current event is complete.
-			log.Debugf("processing event: %v", currentEvent)
-			m.convertStreamEvent(&responseBuilder, currentEvent, log)
-			// Reset event parsing state.
-			eventStartIndex = -1
-			currentEvent = &streamEvent{}
-		}
-
-		// Reset line parsing state.
-		lineStartIndex = -1
-		valueStartIndex = -1
-		currentKey = ""
-	}
-
-	modifiedResponseChunk := responseBuilder.String()
-	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
-	return []byte(modifiedResponseChunk), nil
-}
-
-func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder, event *streamEvent, log wrapper.Log) error {
-	if event.Data == streamEndDataValue {
-		m.appendStreamEvent(responseBuilder, event)
-		return nil
+func (m *moonshotProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) {
+	if name != ApiNameChatCompletion {
+		return nil, nil
 	}

 	if gjson.Get(event.Data, "choices.0.usage").Exists() {
@@ -230,20 +171,19 @@ func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder,
 		newData, err := sjson.Delete(event.Data, "choices.0.usage")
 		if err != nil {
 			log.Errorf("convert usage event error: %v", err)
-			return err
+			return nil, err
 		}
 		newData, err = sjson.SetRaw(newData, "usage", usageStr)
 		if err != nil {
 			log.Errorf("convert usage event error: %v", err)
-			return err
+			return nil, err
 		}
 		event.Data = newData
 	}
-	m.appendStreamEvent(responseBuilder, event)
-	return nil
+	return []StreamEvent{event}, nil
 }

-func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
+func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) {
 	responseBuilder.WriteString(streamDataItemKey)
 	responseBuilder.WriteString(event.Data)
 	responseBuilder.WriteString("\n\n")
--- a/plugins/wasm-go/extensions/ai-proxy/provider/ollama.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/ollama.go
@@ -12,14 +12,10 @@ import (

 // ollamaProvider is the provider for Ollama service.

-const (
-	ollamaChatCompletionPath = "/v1/chat/completions"
-)
-
 type ollamaProviderInitializer struct {
 }

-func (m *ollamaProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *ollamaProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.ollamaServerHost == "" {
 		return errors.New("missing ollamaServerHost in provider config")
 	}
@@ -29,9 +25,17 @@ func (m *ollamaProviderInitializer) ValidateConfig(config ProviderConfig) error
 	return nil
 }

+func (m *ollamaProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		// ollama的chat接口path和OpenAI的chat接口一样
+		string(ApiNameChatCompletion): PathOpenAIChatCompletions,
+	}
+}
+
 func (m *ollamaProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
 	serverPortStr := fmt.Sprintf("%d", config.ollamaServerPort)
 	serviceDomain := config.ollamaServerHost + ":" + serverPortStr
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &ollamaProvider{
 		config:        config,
 		serviceDomain: serviceDomain,
@@ -50,22 +54,19 @@ func (m *ollamaProvider) GetProviderType() string {
 }

 func (m *ollamaProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *ollamaProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *ollamaProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, ollamaChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, m.serviceDomain)
 	headers.Del("Content-Length")
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
@@ -2,12 +2,13 @@ package provider

 import (
 	"encoding/json"
-	"fmt"
 	"net/http"
+	"path"
 	"strings"

 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 )

@@ -16,18 +17,39 @@ import (
 const (
 	defaultOpenaiDomain             = "api.openai.com"
 	defaultOpenaiChatCompletionPath = "/v1/chat/completions"
+	defaultOpenaiCompletionPath     = "/v1/completions"
 	defaultOpenaiEmbeddingsPath     = "/v1/chat/embeddings"
+	defaultOpenaiAudioSpeech        = "/v1/audio/speech"
+	defaultOpenaiImageGeneration    = "/v1/images/generations"
 )

 type openaiProviderInitializer struct {
 }

-func (m *openaiProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *openaiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	return nil
 }

+func (m *openaiProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameCompletion):      defaultOpenaiCompletionPath,
+		string(ApiNameChatCompletion):  defaultOpenaiChatCompletionPath,
+		string(ApiNameEmbeddings):      defaultOpenaiEmbeddingsPath,
+		string(ApiNameImageGeneration): defaultOpenaiImageGeneration,
+		string(ApiNameAudioSpeech):     defaultOpenaiAudioSpeech,
+	}
+}
+
+func isDirectPath(path string) bool {
+	return strings.HasSuffix(path, "/completions") ||
+		strings.HasSuffix(path, "/chat/embeddings") ||
+		strings.HasSuffix(path, "/audio/speech") ||
+		strings.HasSuffix(path, "/images/generations")
+}
+
 func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
 	if config.openaiCustomUrl == "" {
+		config.setDefaultCapabilities(m.DefaultCapabilities())
 		return &openaiProvider{
 			config:       config,
 			contextCache: createContextCache(&config),
@@ -35,22 +57,35 @@ func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provi
 	}
 	customUrl := strings.TrimPrefix(strings.TrimPrefix(config.openaiCustomUrl, "http://"), "https://")
 	pairs := strings.SplitN(customUrl, "/", 2)
-	if len(pairs) != 2 {
-		return nil, fmt.Errorf("invalid openaiCustomUrl:%s", config.openaiCustomUrl)
+	customPath := "/"
+	if len(pairs) == 2 {
+		customPath += pairs[1]
 	}
+	isDirectCustomPath := isDirectPath(customPath)
+	capabilities := m.DefaultCapabilities()
+	if !isDirectCustomPath {
+		for key, mapPath := range capabilities {
+			capabilities[key] = path.Join(customPath, strings.TrimPrefix(mapPath, "/v1"))
+		}
+	}
+	config.setDefaultCapabilities(capabilities)
+	proxywasm.LogDebugf("ai-proxy: openai provider customDomain:%s, customPath:%s, isDirectCustomPath:%v, capabilities:%v",
+		pairs[0], customPath, isDirectCustomPath, capabilities)
 	return &openaiProvider{
-		config:       config,
-		customDomain: pairs[0],
-		customPath:   "/" + pairs[1],
-		contextCache: createContextCache(&config),
+		config:             config,
+		customDomain:       pairs[0],
+		customPath:         customPath,
+		isDirectCustomPath: isDirectCustomPath,
+		contextCache:       createContextCache(&config),
 	}, nil
 }

 type openaiProvider struct {
-	config       ProviderConfig
-	customDomain string
-	customPath   string
-	contextCache *contextCache
+	config             ProviderConfig
+	customDomain       string
+	customPath         string
+	isDirectCustomPath bool
+	contextCache       *contextCache
 }

 func (m *openaiProvider) GetProviderType() string {
@@ -63,21 +98,19 @@ func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiNa
 }

 func (m *openaiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	if m.customPath == "" {
-		switch apiName {
-		case ApiNameChatCompletion:
-			util.OverwriteRequestPathHeader(headers, defaultOpenaiChatCompletionPath)
-		case ApiNameEmbeddings:
-			ctx.DontReadRequestBody()
-			util.OverwriteRequestPathHeader(headers, defaultOpenaiEmbeddingsPath)
+	if m.customPath != "" {
+		if m.isDirectCustomPath || apiName == "" {
+			util.OverwriteRequestPathHeader(headers, m.customPath)
+		} else {
+			util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 		}
 	} else {
-		util.OverwriteRequestPathHeader(headers, m.customPath)
+		util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	}
-	if m.customDomain == "" {
-		util.OverwriteRequestHostHeader(headers, defaultOpenaiDomain)
-	} else {
+	if m.customDomain != "" {
 		util.OverwriteRequestHostHeader(headers, m.customDomain)
+	} else {
+		util.OverwriteRequestHostHeader(headers, defaultOpenaiDomain)
 	}
 	if len(m.config.apiTokens) > 0 {
 		util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
@@ -94,21 +127,14 @@ func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 }

 func (m *openaiProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
-	request := &chatCompletionRequest{}
-	if err := decodeChatCompletionRequest(body, request); err != nil {
-		return nil, err
-	}
 	if m.config.responseJsonSchema != nil {
+		request := &chatCompletionRequest{}
+		if err := decodeChatCompletionRequest(body, request); err != nil {
+			return nil, err
+		}
 		log.Debugf("[ai-proxy] set response format to %s", m.config.responseJsonSchema)
 		request.ResponseFormat = m.config.responseJsonSchema
+		body, _ = json.Marshal(request)
 	}
-	if request.Stream {
-		// For stream requests, we need to include usage in the response.
-		if request.StreamOptions == nil {
-			request.StreamOptions = &streamOptions{IncludeUsage: true}
-		} else if !request.StreamOptions.IncludeUsage {
-			request.StreamOptions.IncludeUsage = true
-		}
-	}
-	return json.Marshal(request)
+	return m.config.defaultTransformRequestBody(ctx, apiName, body, log)
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -1,7 +1,6 @@
 package provider

 import (
-	"encoding/json"
 	"errors"
 	"math/rand"
 	"net/http"
@@ -12,14 +11,29 @@ import (
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )

 type ApiName string
 type Pointcut string

 const (
-	ApiNameChatCompletion ApiName = "chatCompletion"
-	ApiNameEmbeddings     ApiName = "embeddings"
+
+	// ApiName 格式 {vendor}/{version}/{apitype}
+	// 表示遵循 厂商/版本/接口类型 的格式
+	// 目前openai是事实意义上的标准，但是也有其他厂商存在其他任务的一些可能的标准，比如cohere的rerank
+	ApiNameCompletion      ApiName = "openai/v1/completions"
+	ApiNameChatCompletion  ApiName = "openai/v1/chatcompletions"
+	ApiNameEmbeddings      ApiName = "openai/v1/embeddings"
+	ApiNameImageGeneration ApiName = "openai/v1/imagegeneration"
+	ApiNameAudioSpeech     ApiName = "openai/v1/audiospeech"
+
+	PathOpenAICompletions     = "/v1/completions"
+	PathOpenAIChatCompletions = "/v1/chat/completions"
+	PathOpenAIEmbeddings      = "/v1/embeddings"
+
+	// TODO: 以下是一些非标准的API名称，需要进一步确认是否支持
+	ApiNameCohereV1Rerank ApiName = "cohere/v1/rerank"

 	providerTypeMoonshot   = "moonshot"
 	providerTypeAzure      = "azure"
@@ -47,6 +61,7 @@ const (
 	providerTypeDoubao     = "doubao"
 	providerTypeCoze       = "coze"
 	providerTypeTogetherAI = "together-ai"
+	providerTypeDify       = "dify"

 	protocolOpenAI   = "openai"
 	protocolOriginal = "original"
@@ -70,13 +85,17 @@ const (
 	objectChatCompletion      = "chat.completion"
 	objectChatCompletionChunk = "chat.completion.chunk"

+	reasoningBehaviorPassThrough = "passthrough"
+	reasoningBehaviorIgnore      = "ignore"
+	reasoningBehaviorConcat      = "concat"
+
 	wildcard = "*"

 	defaultTimeout = 2 * 60 * 1000 // ms
 )

 type providerInitializer interface {
-	ValidateConfig(ProviderConfig) error
+	ValidateConfig(*ProviderConfig) error
 	CreateProvider(ProviderConfig) (Provider, error)
 }

@@ -110,6 +129,7 @@ var (
 		providerTypeDoubao:     &doubaoProviderInitializer{},
 		providerTypeCoze:       &cozeProviderInitializer{},
 		providerTypeTogetherAI: &togetherAIProviderInitializer{},
+		providerTypeDify:       &difyProviderInitializer{},
 	}
 )

@@ -129,6 +149,10 @@ type StreamingResponseBodyHandler interface {
 	OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error)
 }

+type StreamingEventHandler interface {
+	OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error)
+}
+
 type ApiNameHandler interface {
 	GetApiName(path string) ApiName
 }
@@ -155,12 +179,6 @@ type TransformResponseBodyHandler interface {
 	TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error)
 }

-// TickFuncHandler allows the provider to execute a function periodically
-// Use case: the maximum expiration time of baidu apiToken is 24 hours, need to refresh periodically
-type TickFuncHandler interface {
-	GetTickFunc(log wrapper.Log) (tickPeriod int64, tickFunc func())
-}
-
 type ProviderConfig struct {
 	// @Title zh-CN ID
 	// @Description zh-CN AI服务提供商标识
@@ -180,6 +198,9 @@ type ProviderConfig struct {
 	// @Title zh-CN 失败请求重试
 	// @Description zh-CN 对失败的请求立即进行重试
 	retryOnFailure *retryOnFailure `required:"false" yaml:"retryOnFailure" json:"retryOnFailure"`
+	// @Title zh-CN 推理内容处理方式
+	// @Description zh-CN 如何处理大模型服务返回的推理内容。目前支持以下取值：passthrough（正常输出推理内容）、ignore（不输出推理内容）、concat（将推理内容拼接在常规输出内容之前）。默认为 normal。仅支持通义千问服务。
+	reasoningContentMode string `required:"false" yaml:"reasoningContentMode" json:"reasoningContentMode"`
 	// @Title zh-CN 基于OpenAI协议的自定义后端URL
 	// @Description zh-CN 仅适用于支持 openai 协议的服务。
 	openaiCustomUrl string `required:"false" yaml:"openaiCustomUrl" json:"openaiCustomUrl"`
@@ -246,17 +267,17 @@ type ProviderConfig struct {
 	// @Title zh-CN 自定义大模型参数配置
 	// @Description zh-CN 用于填充或者覆盖大模型调用时的参数
 	customSettings []CustomSetting
-	// @Title zh-CN Baidu 的 Access Key 和 Secret Key，中间用 : 分隔，用于申请 apiToken
-	baiduAccessKeyAndSecret []string `required:"false" yaml:"baiduAccessKeyAndSecret" json:"baiduAccessKeyAndSecret"`
-	// @Title zh-CN 请求刷新百度 apiToken 服务名称
-	baiduApiTokenServiceName string `required:"false" yaml:"baiduApiTokenServiceName" json:"baiduApiTokenServiceName"`
-	// @Title zh-CN 请求刷新百度 apiToken 服务域名
-	baiduApiTokenServiceHost string `required:"false" yaml:"baiduApiTokenServiceHost" json:"baiduApiTokenServiceHost"`
-	// @Title zh-CN 请求刷新百度 apiToken 服务端口
-	baiduApiTokenServicePort int64 `required:"false" yaml:"baiduApiTokenServicePort" json:"baiduApiTokenServicePort"`
-	// @Title zh-CN 是否使用全局的 apiToken
-	// @Description zh-CN 如果没有启用 apiToken failover，但是 apiToken 的状态又需要在多个 Wasm VM 中同步时需要将该参数设置为 true，例如 Baidu 的 apiToken 需要定时刷新
-	useGlobalApiToken bool `required:"false" yaml:"useGlobalApiToken" json:"useGlobalApiToken"`
+	// @Title zh-CN dify私有化部署的url
+	difyApiUrl string `required:"false" yaml:"difyApiUrl" json:"difyApiUrl"`
+	// @Title zh-CN dify的应用类型，Chat/Completion/Agent/Workflow
+	botType string `required:"false" yaml:"botType" json:"botType"`
+	// @Title zh-CN dify中应用类型为workflow时需要设置输入变量，当botType为workflow时一起使用
+	inputVariable string `required:"false" yaml:"inputVariable" json:"inputVariable"`
+	// @Title zh-CN dify中应用类型为workflow时需要设置输出变量，当botType为workflow时一起使用
+	outputVariable string `required:"false" yaml:"outputVariable" json:"outputVariable"`
+	// @Title zh-CN 额外支持的ai能力
+	// @Description zh-CN 开放的ai能力和urlpath映射，例如： {"openai/v1/chatcompletions": "/v1/chat/completions"}
+	capabilities map[string]string
 }

 func (c *ProviderConfig) GetId() string {
@@ -271,6 +292,10 @@ func (c *ProviderConfig) GetProtocol() string {
 	return c.protocol
 }

+func (c *ProviderConfig) IsOpenAIProtocol() bool {
+	return c.protocol == protocolOpenAI
+}
+
 func (c *ProviderConfig) FromJson(json gjson.Result) {
 	c.id = json.Get("id").String()
 	c.typ = json.Get("type").String()
@@ -349,6 +374,20 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		}
 	}

+	c.reasoningContentMode = json.Get("reasoningContentMode").String()
+	if c.reasoningContentMode == "" {
+		c.reasoningContentMode = reasoningBehaviorPassThrough
+	} else {
+		c.reasoningContentMode = strings.ToLower(c.reasoningContentMode)
+		switch c.reasoningContentMode {
+		case reasoningBehaviorPassThrough, reasoningBehaviorIgnore, reasoningBehaviorConcat:
+			break
+		default:
+			c.reasoningContentMode = reasoningBehaviorPassThrough
+			break
+		}
+	}
+
 	failoverJson := json.Get("failover")
 	c.failover = &failover{
 		enabled: false,
@@ -364,25 +403,26 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	if retryOnFailureJson.Exists() {
 		c.retryOnFailure.FromJson(retryOnFailureJson)
 	}
+	c.difyApiUrl = json.Get("difyApiUrl").String()
+	c.botType = json.Get("botType").String()
+	c.inputVariable = json.Get("inputVariable").String()
+	c.outputVariable = json.Get("outputVariable").String()

-	for _, accessKeyAndSecret := range json.Get("baiduAccessKeyAndSecret").Array() {
-		c.baiduAccessKeyAndSecret = append(c.baiduAccessKeyAndSecret, accessKeyAndSecret.String())
-	}
-	c.baiduApiTokenServiceName = json.Get("baiduApiTokenServiceName").String()
-	c.baiduApiTokenServiceHost = json.Get("baiduApiTokenServiceHost").String()
-	if c.baiduApiTokenServiceHost == "" {
-		c.baiduApiTokenServiceHost = baiduApiTokenDomain
-	}
-	c.baiduApiTokenServicePort = json.Get("baiduApiTokenServicePort").Int()
-	if c.baiduApiTokenServicePort == 0 {
-		c.baiduApiTokenServicePort = baiduApiTokenPort
+	c.capabilities = make(map[string]string)
+	for capability, pathJson := range json.Get("capabilities").Map() {
+		// 过滤掉不受支持的能力
+		switch capability {
+		case string(ApiNameChatCompletion),
+			string(ApiNameEmbeddings),
+			string(ApiNameImageGeneration),
+			string(ApiNameAudioSpeech),
+			string(ApiNameCohereV1Rerank):
+			c.capabilities[capability] = pathJson.String()
+		}
 	}
 }

 func (c *ProviderConfig) Validate() error {
-	if c.timeout < 0 {
-		return errors.New("invalid timeout in config")
-	}
 	if c.protocol != protocolOpenAI && c.protocol != protocolOriginal {
 		return errors.New("invalid protocol in config")
 	}
@@ -405,7 +445,7 @@ func (c *ProviderConfig) Validate() error {
 	if !has {
 		return errors.New("unknown provider type: " + c.typ)
 	}
-	if err := initializer.ValidateConfig(*c); err != nil {
+	if err := initializer.ValidateConfig(c); err != nil {
 		return err
 	}
 	return nil
@@ -515,7 +555,7 @@ func getMappedModel(model string, modelMapping map[string]string, log wrapper.Lo
 }

 func doGetMappedModel(model string, modelMapping map[string]string, log wrapper.Log) string {
-	if modelMapping == nil || len(modelMapping) == 0 {
+	if len(modelMapping) == 0 {
 		return ""
 	}

@@ -543,11 +583,97 @@ func doGetMappedModel(model string, modelMapping map[string]string, log wrapper.
 	return ""
 }

+func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte, log wrapper.Log) []StreamEvent {
+	body := chunk
+	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
+		body = append(bufferedStreamingBody, chunk...)
+	}
+
+	eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
+
+	defer func() {
+		if eventStartIndex >= 0 && eventStartIndex < len(body) {
+			// Just in case the received chunk is not a complete event.
+			ctx.SetContext(ctxKeyStreamingBody, body[eventStartIndex:])
+		} else {
+			ctx.SetContext(ctxKeyStreamingBody, nil)
+		}
+	}()
+
+	// Sample Qwen event response:
+	//
+	// event:result
+	// :HTTP_STATUS/200
+	// data:{"output":{"choices":[{"message":{"content":"你好！","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"}
+	//
+	// event:error
+	// :HTTP_STATUS/400
+	// data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"}
+	//
+
+	var events []StreamEvent
+
+	currentKey := ""
+	currentEvent := &StreamEvent{}
+	i, length := 0, len(body)
+	for i = 0; i < length; i++ {
+		ch := body[i]
+		if ch != '\n' {
+			if lineStartIndex == -1 {
+				if eventStartIndex == -1 {
+					eventStartIndex = i
+				}
+				lineStartIndex = i
+				valueStartIndex = -1
+			}
+			if valueStartIndex == -1 {
+				if ch == ':' {
+					valueStartIndex = i + 1
+					currentKey = string(body[lineStartIndex:valueStartIndex])
+				}
+			} else if valueStartIndex == i && ch == ' ' {
+				// Skip leading spaces in data.
+				valueStartIndex = i + 1
+			}
+			continue
+		}
+
+		if lineStartIndex != -1 {
+			value := string(body[valueStartIndex:i])
+			currentEvent.SetValue(currentKey, value)
+		} else {
+			// Extra new line. The current event is complete.
+			events = append(events, *currentEvent)
+			// Reset event parsing state.
+			eventStartIndex = -1
+			currentEvent = &StreamEvent{}
+		}
+
+		// Reset line parsing state.
+		lineStartIndex = -1
+		valueStartIndex = -1
+		currentKey = ""
+	}
+
+	return events
+}
+
+func (c *ProviderConfig) isSupportedAPI(apiName ApiName) bool {
+	_, exist := c.capabilities[string(apiName)]
+	return exist
+}
+
+func (c *ProviderConfig) setDefaultCapabilities(capabilities map[string]string) {
+	for capability, path := range capabilities {
+		c.capabilities[capability] = path
+	}
+}
+
 func (c *ProviderConfig) handleRequestBody(
 	provider Provider, contextCache *contextCache, ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log,
 ) (types.Action, error) {
 	// use original protocol
-	if c.protocol == protocolOriginal {
+	if c.IsOriginal() {
 		return types.ActionContinue, nil
 	}

@@ -594,17 +720,21 @@ func (c *ProviderConfig) handleRequestHeaders(provider Provider, ctx wrapper.Htt
 	}
 }

+// defaultTransformRequestBody 默认的请求体转换方法，只做模型映射，用slog替换模型名称，不用序列化和反序列化，提高性能
 func (c *ProviderConfig) defaultTransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
-	var request interface{}
-	if apiName == ApiNameChatCompletion {
-		request = &chatCompletionRequest{}
-	} else {
-		request = &embeddingsRequest{}
+	switch apiName {
+	case ApiNameChatCompletion:
+		stream := gjson.GetBytes(body, "stream").Bool()
+		if stream {
+			_ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream")
+			ctx.SetContext(ctxKeyIsStreaming, true)
+		} else {
+			ctx.SetContext(ctxKeyIsStreaming, false)
+		}
 	}
-	if err := c.parseRequestAndMapModel(ctx, request, body, log); err != nil {
-		return nil, err
-	}
-	return json.Marshal(request)
+	model := gjson.GetBytes(body, "model").String()
+	ctx.SetContext(ctxKeyOriginalRequestModel, model)
+	return sjson.SetBytes(body, "model", getMappedModel(model, c.modelMapping, log))
 }

 func (c *ProviderConfig) DefaultTransformResponseHeaders(ctx wrapper.HttpContext, headers http.Header) {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -42,7 +42,7 @@ const (
 type qwenProviderInitializer struct {
 }

-func (m *qwenProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *qwenProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if len(config.qwenFileIds) != 0 && config.context != nil {
 		return errors.New("qwenFileIds and context cannot be configured at the same time")
 	}
@@ -52,7 +52,15 @@ func (m *qwenProviderInitializer) ValidateConfig(config ProviderConfig) error {
 	return nil
 }

+func (m *qwenProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): qwenChatCompletionPath,
+		string(ApiNameEmbeddings):     qwenTextEmbeddingPath,
+	}
+}
+
 func (m *qwenProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &qwenProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -75,18 +83,19 @@ func (m *qwenProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName
 	if m.config.IsOriginal() {
 	} else if m.config.qwenEnableCompatible {
 		util.OverwriteRequestPathHeader(headers, qwenCompatiblePath)
-	} else if apiName == ApiNameChatCompletion {
-		util.OverwriteRequestPathHeader(headers, qwenChatCompletionPath)
-	} else if apiName == ApiNameEmbeddings {
-		util.OverwriteRequestPathHeader(headers, qwenTextEmbeddingPath)
+	} else if apiName == ApiNameChatCompletion || apiName == ApiNameEmbeddings {
+		util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	}
 }

 func (m *qwenProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
-	if apiName == ApiNameChatCompletion {
+	switch apiName {
+	case ApiNameChatCompletion:
 		return m.onChatCompletionRequestBody(ctx, body, headers, log)
-	} else {
+	case ApiNameEmbeddings:
 		return m.onEmbeddingsRequestBody(ctx, body, log)
+	default:
+		return m.config.defaultTransformRequestBody(ctx, apiName, body, log)
 	}
 }

@@ -95,10 +104,6 @@ func (m *qwenProvider) GetProviderType() string {
 }

 func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
-		return errUnsupportedApiName
-	}
-
 	m.config.handleRequestHeaders(m, ctx, apiName, log)

 	if m.config.protocol == protocolOriginal {
@@ -140,7 +145,7 @@ func (m *qwenProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, b
 		return types.ActionContinue, nil
 	}

-	if apiName != ApiNameChatCompletion && apiName != ApiNameEmbeddings {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
@@ -183,89 +188,32 @@ func (m *qwenProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []b
 	return json.Marshal(qwenRequest)
 }

-func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
+func (m *qwenProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) {
 	if m.config.qwenEnableCompatible || name != ApiNameChatCompletion {
-		return chunk, nil
-	}
-
-	receivedBody := chunk
-	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
-		receivedBody = append(bufferedStreamingBody, chunk...)
+		return nil, nil
 	}

 	incrementalStreaming := ctx.GetBoolContext(ctxKeyIncrementalStreaming, false)

-	eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
-
-	defer func() {
-		if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
-			// Just in case the received chunk is not a complete event.
-			ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
-		} else {
-			ctx.SetContext(ctxKeyStreamingBody, nil)
-		}
-	}()
-
-	// Sample Qwen event response:
-	//
-	// event:result
-	// :HTTP_STATUS/200
-	// data:{"output":{"choices":[{"message":{"content":"你好！","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"}
-	//
-	// event:error
-	// :HTTP_STATUS/400
-	// data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"}
-	//
-
-	var responseBuilder strings.Builder
-	currentKey := ""
-	currentEvent := &streamEvent{}
-	i, length := 0, len(receivedBody)
-	for i = 0; i < length; i++ {
-		ch := receivedBody[i]
-		if ch != '\n' {
-			if lineStartIndex == -1 {
-				if eventStartIndex == -1 {
-					eventStartIndex = i
-				}
-				lineStartIndex = i
-				valueStartIndex = -1
-			}
-			if valueStartIndex == -1 {
-				if ch == ':' {
-					valueStartIndex = i + 1
-					currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
-				}
-			} else if valueStartIndex == i && ch == ' ' {
-				// Skip leading spaces in data.
-				valueStartIndex = i + 1
-			}
-			continue
-		}
-
-		if lineStartIndex != -1 {
-			value := string(receivedBody[valueStartIndex:i])
-			currentEvent.setValue(currentKey, value)
-		} else {
-			// Extra new line. The current event is complete.
-			log.Debugf("processing event: %v", currentEvent)
-			if err := m.convertStreamEvent(ctx, &responseBuilder, currentEvent, incrementalStreaming, log); err != nil {
-				return nil, err
-			}
-			// Reset event parsing state.
-			eventStartIndex = -1
-			currentEvent = &streamEvent{}
-		}
-
-		// Reset line parsing state.
-		lineStartIndex = -1
-		valueStartIndex = -1
-		currentKey = ""
+	qwenResponse := &qwenTextGenResponse{}
+	if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil {
+		log.Errorf("unable to unmarshal Qwen response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
 	}

-	modifiedResponseChunk := responseBuilder.String()
-	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
-	return []byte(modifiedResponseChunk), nil
+	var outputEvents []StreamEvent
+	responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log)
+	for _, response := range responses {
+		responseBody, err := json.Marshal(response)
+		if err != nil {
+			log.Errorf("unable to marshal response: %v", err)
+			return nil, fmt.Errorf("unable to marshal response: %v", err)
+		}
+		modifiedEvent := event
+		modifiedEvent.Data = string(responseBody)
+		outputEvents = append(outputEvents, modifiedEvent)
+	}
+	return outputEvents, nil
 }

 func (m *qwenProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
@@ -278,6 +226,9 @@ func (m *qwenProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName Ap
 	if apiName == ApiNameEmbeddings {
 		return m.onEmbeddingsResponseBody(ctx, body, log)
 	}
+	if m.config.isSupportedAPI(apiName) {
+		return body, nil
+	}
 	return nil, errUnsupportedApiName
 }

@@ -349,7 +300,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(ctx wrapper.HttpContext, o
 func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) *chatCompletionResponse {
 	choices := make([]chatCompletionChoice, 0, len(qwenResponse.Output.Choices))
 	for _, qwenChoice := range qwenResponse.Output.Choices {
-		message := qwenMessageToChatMessage(qwenChoice.Message)
+		message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
 		choices = append(choices, chatCompletionChoice{
 			Message:      &message,
 			FinishReason: qwenChoice.FinishReason,
@@ -387,7 +338,8 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
 	finished := qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null"
 	message := qwenChoice.Message

-	deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content}
+	deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content, ReasoningContent: message.ReasoningContent}
+	deltaContentMessage.handleReasoningContent(m.config.reasoningContentMode)
 	deltaToolCallsMessage := &chatMessage{Role: message.Role, ToolCalls: append([]toolCall{}, message.ToolCalls...)}
 	if !incrementalStreaming {
 		for _, tc := range message.ToolCalls {
@@ -422,6 +374,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
 					}
 				}
 			}
+			if message.ReasoningContent == "" {
+				message.ReasoningContent = pushedMessage.ReasoningContent
+			} else {
+				deltaContentMessage.ReasoningContent = util.StripPrefix(deltaContentMessage.ReasoningContent, pushedMessage.ReasoningContent)
+			}
 			if len(deltaToolCallsMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil {
 				for i, tc := range deltaToolCallsMessage.ToolCalls {
 					if i >= len(pushedMessage.ToolCalls) {
@@ -467,39 +424,6 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
 	return responses
 }

-func (m *qwenProvider) convertStreamEvent(ctx wrapper.HttpContext, responseBuilder *strings.Builder, event *streamEvent, incrementalStreaming bool, log wrapper.Log) error {
-	if event.Data == streamEndDataValue {
-		m.appendStreamEvent(responseBuilder, event)
-		return nil
-	}
-
-	if event.Event != eventResult || event.HttpStatus != httpStatus200 {
-		// Something goes wrong. Just pass through the event.
-		m.appendStreamEvent(responseBuilder, event)
-		return nil
-	}
-
-	qwenResponse := &qwenTextGenResponse{}
-	if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil {
-		log.Errorf("unable to unmarshal Qwen response: %v", err)
-		return fmt.Errorf("unable to unmarshal Qwen response: %v", err)
-	}
-
-	responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log)
-	for _, response := range responses {
-		responseBody, err := json.Marshal(response)
-		if err != nil {
-			log.Errorf("unable to marshal response: %v", err)
-			return fmt.Errorf("unable to marshal response: %v", err)
-		}
-		modifiedEvent := &*event
-		modifiedEvent.Data = string(responseBody)
-		m.appendStreamEvent(responseBuilder, modifiedEvent)
-	}
-
-	return nil
-}
-
 func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onlyOneSystemBeforeFile bool) ([]byte, error) {
 	request := &qwenTextGenRequest{}
 	if err := json.Unmarshal(body, request); err != nil {
@@ -544,7 +468,7 @@ func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onl
 	return json.Marshal(request)
 }

-func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
+func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) {
 	responseBuilder.WriteString(streamDataItemKey)
 	responseBuilder.WriteString(event.Data)
 	responseBuilder.WriteString("\n\n")
@@ -640,10 +564,11 @@ type qwenUsage struct {
 }

 type qwenMessage struct {
-	Name      string     `json:"name,omitempty"`
-	Role      string     `json:"role"`
-	Content   any        `json:"content"`
-	ToolCalls []toolCall `json:"tool_calls,omitempty"`
+	Name             string     `json:"name,omitempty"`
+	Role             string     `json:"role"`
+	Content          any        `json:"content"`
+	ReasoningContent string     `json:"reasoning_content,omitempty"`
+	ToolCalls        []toolCall `json:"tool_calls,omitempty"`
 }

 type qwenVlMessageContent struct {
@@ -681,13 +606,16 @@ type qwenTextEmbeddings struct {
 	Embedding []float64 `json:"embedding"`
 }

-func qwenMessageToChatMessage(qwenMessage qwenMessage) chatMessage {
-	return chatMessage{
-		Name:      qwenMessage.Name,
-		Role:      qwenMessage.Role,
-		Content:   qwenMessage.Content,
-		ToolCalls: qwenMessage.ToolCalls,
+func qwenMessageToChatMessage(qwenMessage qwenMessage, reasoningContentMode string) chatMessage {
+	msg := chatMessage{
+		Name:             qwenMessage.Name,
+		Role:             qwenMessage.Role,
+		Content:          qwenMessage.Content,
+		ReasoningContent: qwenMessage.ReasoningContent,
+		ToolCalls:        qwenMessage.ToolCalls,
 	}
+	msg.handleReasoningContent(reasoningContentMode)
+	return msg
 }

 func (m *qwenMessage) IsStringContent() bool {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
@@ -1,11 +1,13 @@
 package provider

 import (
+	"math/rand"
+	"net/http"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/tidwall/gjson"
-	"net/http"
 )

 const (
@@ -38,12 +40,12 @@ func (c *ProviderConfig) isRetryOnFailureEnabled() bool {
 	return c.retryOnFailure.enabled
 }

-func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, log wrapper.Log) {
+func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) {
 	log.Debugf("Retry failed request: provider=%s", activeProvider.GetProviderType())
 	retryClient := createRetryClient(ctx)
 	apiName, _ := ctx.GetContext(CtxKeyApiName).(ApiName)
 	ctx.SetContext(ctxRetryCount, 1)
-	c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
+	c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log)
 }

 func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, headers http.Header, body []byte, log wrapper.Log) ([][2]string, []byte) {
@@ -67,7 +69,8 @@ func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext
 func (c *ProviderConfig) retryCall(
 	ctx wrapper.HttpContext, log wrapper.Log, activeProvider Provider,
 	apiName ApiName, statusCode int, responseHeaders http.Header, responseBody []byte,
-	retryClient *wrapper.ClusterClient[wrapper.RouteCluster]) {
+	retryClient *wrapper.ClusterClient[wrapper.RouteCluster],
+	apiTokenInUse string, apiTokens []string) {

 	retryCount := ctx.GetContext(ctxRetryCount).(int)
 	log.Debugf("Sent retry request: %d/%d", retryCount, c.retryOnFailure.maxRetries)
@@ -76,6 +79,7 @@ func (c *ProviderConfig) retryCall(
 		log.Debugf("Retry request succeeded")
 		headers, body := c.transformResponseHeadersAndBody(ctx, activeProvider, apiName, responseHeaders, responseBody, log)
 		proxywasm.SendHttpResponse(200, headers, body, -1)
+		return
 	} else {
 		log.Debugf("The retry request still failed, status: %d, responseHeaders: %v, responseBody: %s", statusCode, responseHeaders, string(responseBody))
 	}
@@ -83,26 +87,41 @@ func (c *ProviderConfig) retryCall(
 	retryCount++
 	if retryCount <= int(c.retryOnFailure.maxRetries) {
 		ctx.SetContext(ctxRetryCount, retryCount)
-		c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
+		c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log)
 	} else {
 		log.Debugf("Reached the maximum retry count: %d", c.retryOnFailure.maxRetries)
 		proxywasm.ResumeHttpResponse()
+		return
 	}
 }

 func (c *ProviderConfig) sendRetryRequest(
 	ctx wrapper.HttpContext, apiName ApiName, activeProvider Provider,
-	retryClient *wrapper.ClusterClient[wrapper.RouteCluster], log wrapper.Log) {
+	retryClient *wrapper.ClusterClient[wrapper.RouteCluster],
+	apiTokenInUse string, apiTokens []string, log wrapper.Log) {
+
+	// Remove last failed token from retry apiTokens list
+	apiTokens = removeApiTokenFromRetryList(apiTokens, apiTokenInUse, log)
+	if len(apiTokens) == 0 {
+		log.Debugf("No more apiTokens to retry")
+		proxywasm.ResumeHttpResponse()
+		return
+	}
+	// Set apiTokenInUse for the retry request
+	apiTokenInUse = GetRandomToken(apiTokens)
+	log.Debugf("Retry request with apiToken: %s", apiTokenInUse)
+	ctx.SetContext(c.failover.ctxApiTokenInUse, apiTokenInUse)

 	requestHeaders, requestBody := c.getRetryRequestHeadersAndBody(ctx, activeProvider, apiName, log)
 	path := getRetryPath(ctx)

 	err := retryClient.Post(path, util.HeaderToSlice(requestHeaders), requestBody, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
-		c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient)
+		c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient, apiTokenInUse, apiTokens)
 	}, uint32(c.retryOnFailure.retryTimeout))
 	if err != nil {
 		log.Errorf("Failed to send retry request: %v", err)
 		proxywasm.ResumeHttpResponse()
+		return
 	}
 }

@@ -126,9 +145,7 @@ func getRetryPath(ctx wrapper.HttpContext) string {
 }

 func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, log wrapper.Log) (http.Header, []byte) {
-	// The retry request may be sent with different apiToken, so the header needs to be regenerated
-	c.SetApiTokenInUse(ctx, log)
-
+	// The retry request is sent with different apiToken, so the header needs to be regenerated
 	requestHeaders := http.Header{
 		"Content-Type": []string{"application/json"},
 	}
@@ -139,3 +156,27 @@ func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext,

 	return requestHeaders, requestBody
 }
+
+func removeApiTokenFromRetryList(apiTokens []string, removedApiToken string, log wrapper.Log) []string {
+	var availableApiTokens []string
+	for _, s := range apiTokens {
+		if s != removedApiToken {
+			availableApiTokens = append(availableApiTokens, s)
+		}
+	}
+	log.Debugf("Remove apiToken %s from retry apiTokens list", removedApiToken)
+	log.Debugf("Available retry apiTokens: %v", availableApiTokens)
+	return availableApiTokens
+}
+
+func GetRandomToken(apiTokens []string) string {
+	count := len(apiTokens)
+	switch count {
+	case 0:
+		return ""
+	case 1:
+		return apiTokens[0]
+	default:
+		return apiTokens[rand.Intn(count)]
+	}
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/spark.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/spark.go
@@ -51,11 +51,18 @@ type sparkStreamResponse struct {
 	Created int64  `json:"created"`
 }

-func (i *sparkProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (i *sparkProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	return nil
 }

+func (i *sparkProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): sparkChatCompletionPath,
+	}
+}
+
 func (i *sparkProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(i.DefaultCapabilities())
 	return &sparkProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -67,21 +74,21 @@ func (p *sparkProvider) GetProviderType() string {
 }

 func (p *sparkProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	p.config.handleRequestHeaders(p, ctx, apiName, log)
 	return nil
 }

 func (p *sparkProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !p.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return p.config.handleRequestBody(p, p.contextCache, ctx, apiName, body, log)
 }

 func (p *sparkProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
+	if apiName != ApiNameChatCompletion {
+		return body, nil
+	}
 	sparkResponse := &sparkResponse{}
 	if err := json.Unmarshal(body, sparkResponse); err != nil {
 		return nil, fmt.Errorf("unable to unmarshal spark response: %v", err)
@@ -97,6 +104,9 @@ func (p *sparkProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name Ap
 	if isLastChunk || len(chunk) == 0 {
 		return nil, nil
 	}
+	if name != ApiNameChatCompletion {
+		return chunk, nil
+	}
 	responseBuilder := &strings.Builder{}
 	lines := strings.Split(string(chunk), "\n")
 	for _, data := range lines {
@@ -168,7 +178,7 @@ func (p *sparkProvider) appendResponse(responseBuilder *strings.Builder, respons
 }

 func (p *sparkProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, sparkChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), p.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, sparkHost)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+p.config.GetApiTokenInUse(ctx))
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/stepfun.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/stepfun.go
@@ -17,14 +17,22 @@ const (
 type stepfunProviderInitializer struct {
 }

-func (m *stepfunProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *stepfunProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *stepfunProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		// stepfun的chat接口path和OpenAI的chat接口一样
+		string(ApiNameChatCompletion): stepfunChatCompletionPath,
+	}
+}
+
 func (m *stepfunProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &stepfunProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -41,22 +49,19 @@ func (m *stepfunProvider) GetProviderType() string {
 }

 func (m *stepfunProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *stepfunProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *stepfunProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, stepfunChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, stepfunDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
--- a/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/together_ai.go
@@ -2,11 +2,12 @@ package provider

 import (
 	"errors"
+	"net/http"
+	"strings"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
-	"net/http"
-	"strings"
 )

 const (
@@ -16,14 +17,21 @@ const (

 type togetherAIProviderInitializer struct{}

-func (m *togetherAIProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *togetherAIProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *togetherAIProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): togetherAICompletionPath,
+	}
+}
+
 func (m *togetherAIProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &togetherAIProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -40,22 +48,19 @@ func (m *togetherAIProvider) GetProviderType() string {
 }

 func (m *togetherAIProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *togetherAIProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *togetherAIProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, togetherAICompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, togetherAIDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
--- a/plugins/wasm-go/extensions/ai-proxy/provider/yi.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/yi.go
@@ -17,14 +17,21 @@ const (
 type yiProviderInitializer struct {
 }

-func (m *yiProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *yiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *yiProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): yiChatCompletionPath,
+	}
+}
+
 func (m *yiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &yiProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -41,22 +48,19 @@ func (m *yiProvider) GetProviderType() string {
 }

 func (m *yiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *yiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *yiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, yiChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, yiDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
--- a/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/zhipuai.go
@@ -13,18 +13,27 @@ import (
 const (
 	zhipuAiDomain             = "open.bigmodel.cn"
 	zhipuAiChatCompletionPath = "/api/paas/v4/chat/completions"
+	zhipuAiEmbeddingsPath     = "/api/paas/v4/embeddings"
 )

 type zhipuAiProviderInitializer struct{}

-func (m *zhipuAiProviderInitializer) ValidateConfig(config ProviderConfig) error {
+func (m *zhipuAiProviderInitializer) ValidateConfig(config *ProviderConfig) error {
 	if config.apiTokens == nil || len(config.apiTokens) == 0 {
 		return errors.New("no apiToken found in provider config")
 	}
 	return nil
 }

+func (m *zhipuAiProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): zhipuAiChatCompletionPath,
+		string(ApiNameEmbeddings):     zhipuAiEmbeddingsPath,
+	}
+}
+
 func (m *zhipuAiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(m.DefaultCapabilities())
 	return &zhipuAiProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
@@ -41,22 +50,19 @@ func (m *zhipuAiProvider) GetProviderType() string {
 }

 func (m *zhipuAiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) error {
-	if apiName != ApiNameChatCompletion {
-		return errUnsupportedApiName
-	}
 	m.config.handleRequestHeaders(m, ctx, apiName, log)
 	return nil
 }

 func (m *zhipuAiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
-	if apiName != ApiNameChatCompletion {
+	if !m.config.isSupportedAPI(apiName) {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body, log)
 }

 func (m *zhipuAiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
-	util.OverwriteRequestPathHeader(headers, zhipuAiChatCompletionPath)
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
 	util.OverwriteRequestHostHeader(headers, zhipuAiDomain)
 	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
 	headers.Del("Content-Length")
@@ -66,5 +72,8 @@ func (m *zhipuAiProvider) GetApiName(path string) ApiName {
 	if strings.Contains(path, zhipuAiChatCompletionPath) {
 		return ApiNameChatCompletion
 	}
+	if strings.Contains(path, zhipuAiEmbeddingsPath) {
+		return ApiNameEmbeddings
+	}
 	return ""
 }
--- a/plugins/wasm-go/extensions/ai-proxy/util/http.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/http.go
@@ -57,6 +57,17 @@ func OverwriteRequestPathHeader(headers http.Header, path string) {
 	headers.Set(":path", path)
 }

+func OverwriteRequestPathHeaderByCapability(headers http.Header, apiName string, mapping map[string]string) {
+	mappedPath, exist := mapping[apiName]
+	if !exist {
+		return
+	}
+	if originPath, err := proxywasm.GetHttpRequestHeader(":path"); err == nil {
+		headers.Set("X-ENVOY-ORIGINAL-PATH", originPath)
+	}
+	headers.Set(":path", mappedPath)
+}
+
 func OverwriteRequestAuthorizationHeader(headers http.Header, credential string) {
 	if exist := headers.Get("X-HI-ORIGINAL-AUTH"); exist == "" {
 		if originAuth := headers.Get("Authorization"); originAuth != "" {
--- a/plugins/wasm-go/extensions/ai-quota/README.md
+++ b/plugins/wasm-go/extensions/ai-quota/README.md
@@ -26,14 +26,14 @@ description: AI 配额管理插件配置参考

 `redis`中每一项的配置字段说明

-| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                        |
-| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
-| service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local     |
-| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口     |
-| username     | string | 否   | -                                                          | redis用户名                 |
-| password     | string | 否   | -                                                          | redis密码                   |
-| timeout      | int    | 否   | 1000                                                       | redis连接超时时间，单位毫秒 |
-
+| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                                                                                         |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | ---------------------------                                                                  |
+| service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口                                                                      |
+| username     | string | 否   | -                                                          | redis用户名                                                                                  |
+| password     | string | 否   | -                                                          | redis密码                                                                                    |
+| timeout      | int    | 否   | 1000                                                       | redis连接超时时间，单位毫秒                                                                  |
+| database     | int    | 否   | 0                                                          | 使用的数据库id，例如配置为1，对应`SELECT 1`                                                  |


 ## 配置示例
--- a/plugins/wasm-go/extensions/ai-quota/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-quota/README_EN.md
@@ -18,13 +18,14 @@ Plugin execution priority: `750`
 | `admin_path`        | string           | Optional                                   |   /quota      | Prefix for the path to manage quota requests      |
 | `redis`             | object           | Yes                                        |               | Redis related configuration                        |
 Explanation of each configuration field in `redis`
-| Configuration Item  | Type             | Required | Default Value                                            | Explanation                                   |
-|---------------------|------------------|----------|---------------------------------------------------------|-----------------------------------------------|
-| service_name        | string           | Required | -                                                       | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| service_port        | int              | No       | Default value for static service is 80; others are 6379 | Service port for the redis service            |
-| username            | string           | No       | -                                                       | Redis username                                |
-| password            | string           | No       | -                                                       | Redis password                                |
-| timeout             | int              | No       | 1000                                                    | Redis connection timeout in milliseconds      |
+| Configuration Item | Type   | Required | Default Value                                           | Explanation                                                                                             |
+|--------------------|--------|----------|---------------------------------------------------------|---------------------------------------------------------------------------------------------------------|
+| service_name       | string | Required | -                                                       | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| service_port       | int    | No       | Default value for static service is 80; others are 6379 | Service port for the redis service                                                                      |
+| username           | string | No       | -                                                       | Redis username                                                                                          |
+| password           | string | No       | -                                                       | Redis password                                                                                          |
+| timeout            | int    | No       | 1000                                                    | Redis connection timeout in milliseconds                                                                |
+| database           | int    | No       | 0                                                       | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                          |

 ## Configuration Example
 ### Identify request parameter apikey and apply rate limiting accordingly
--- a/plugins/wasm-go/extensions/ai-quota/go.mod
+++ b/plugins/wasm-go/extensions/ai-quota/go.mod
@@ -2,11 +2,11 @@ module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-quota

 go 1.19

-//replace github.com/alibaba/higress/plugins/wasm-go => ../..
+replace github.com/alibaba/higress/plugins/wasm-go => ../..

 require (
 	github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de
-	github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f
+	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
 	github.com/tidwall/gjson v1.17.3
 	github.com/tidwall/resp v0.1.1
 )
--- a/plugins/wasm-go/extensions/ai-quota/go.sum
+++ b/plugins/wasm-go/extensions/ai-quota/go.sum
@@ -1,12 +1,10 @@
-github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de h1:lDLqj7Hw41ox8VdsP7oCTPhjPa3+QJUCKApcLh2a45Y=
-github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de/go.mod h1:359don/ahMxpfeLMzr29Cjwcu8IywTTDUzWlBPRNLHw=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f h1:ZIiIBRvIw62gA5MJhuwp1+2wWbqL9IGElQ499rUsYYg=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
 github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
 github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
--- a/plugins/wasm-go/extensions/ai-quota/main.go
+++ b/plugins/wasm-go/extensions/ai-quota/main.go
@@ -69,6 +69,7 @@ type RedisInfo struct {
 	Username    string `required:"false" yaml:"username" json:"username"`
 	Password    string `required:"false" yaml:"password" json:"password"`
 	Timeout     int    `required:"false" yaml:"timeout" json:"timeout"`
+	Database    int    `required:"false" yaml:"database" json:"database"`
 }

 func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error {
@@ -110,17 +111,19 @@ func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error
 	if timeout == 0 {
 		timeout = 1000
 	}
+	database := int(redisConfig.Get("database").Int())
 	config.redisInfo.ServiceName = serviceName
 	config.redisInfo.ServicePort = servicePort
 	config.redisInfo.Username = username
 	config.redisInfo.Password = password
 	config.redisInfo.Timeout = timeout
+	config.redisInfo.Database = database
 	config.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
 		FQDN: serviceName,
 		Port: int64(servicePort),
 	})

-	return config.redisClient.Init(username, password, int64(timeout))
+	return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
 }

 func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log wrapper.Log) types.Action {
--- a/plugins/wasm-go/extensions/ai-search/README.md
+++ b/plugins/wasm-go/extensions/ai-search/README.md
@@ -0,0 +1,244 @@
+---
+title: AI 搜索增强
+keywords: [higress,ai search]
+description: higress 支持通过集成搜索引擎（Google/Bing/Arxiv/Elasticsearch等）的实时结果，增强DeepSeek-R1等模型等回答准确性和时效性
+---
+
+## 功能说明
+
+`ai-search`插件通过集成搜索引擎（Google/Bing/Arxiv/Elasticsearch等）的实时结果，增强AI模型的回答准确性和时效性。插件会自动将搜索结果注入到提示模板中，并根据配置决定是否在最终回答中添加引用来源。
+
+## 运行属性
+
+插件执行阶段：`默认阶段`
+插件执行优先级：`440`
+
+## 配置字段
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| needReference | bool | 选填 | false | 是否在回答中添加引用来源 |
+| referenceFormat | string | 选填 | `"**References:**\n%s"` | 引用内容格式，必须包含%s占位符 |
+| defaultLang | string | 选填 | - | 默认搜索语言代码（如zh-CN/en-US） |
+| promptTemplate | string | 选填 | 内置模板 | 提示模板，必须包含`{search_results}`和`{question}`占位符 |
+| searchFrom | array of object | 必填 | - | 参考下面搜索引擎配置，至少配置一个引擎 |
+| searchRewrite | object | 选填 | - | 搜索重写配置，用于使用LLM服务优化搜索查询 |
+
+## 搜索重写说明
+
+搜索重写功能使用LLM服务对用户的原始查询进行分析和优化，可以：
+1. 将用户的自然语言查询转换为更适合搜索引擎的关键词组合
+2. 对于Arxiv论文搜索，自动识别相关的论文类别并添加类别限定
+3. 对于私有知识库搜索，将长查询拆分成多个精准的关键词组合
+
+强烈建议在使用Arxiv或Elasticsearch引擎时启用此功能。对于Arxiv搜索，它能准确识别论文所属领域并优化英文关键词；对于私有知识库搜索，它能提供更精准的关键词匹配，显著提升搜索效果。
+
+## 搜索重写配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| llmServiceName | string | 必填 | - | LLM服务名称 |
+| llmServicePort | number | 必填 | - | LLM服务端口 |
+| llmApiKey | string | 必填 | - | LLM服务API密钥 |
+| llmUrl | string | 必填 | - | LLM服务API地址 |
+| llmModelName | string | 必填 | - | LLM模型名称 |
+| timeoutMillisecond | number | 选填 | 30000 | API调用超时时间（毫秒） |
+
+## 搜索引擎通用配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| type | string | 必填 | - | 引擎类型（google/bing/arxiv/elasticsearch/quark） |
+| serviceName | string | 必填 | - | 后端服务名称 |
+| servicePort | number | 必填 | - | 后端服务端口 |
+| apiKey | string | 必填 | - | 搜索引擎API密钥/Aliyun AccessKey |
+| count | number | 选填 | 10 | 单次搜索返回结果数量 |
+| start | number | 选填 | 0 | 搜索结果偏移量（从第start+1条结果开始返回） |
+| timeoutMillisecond | number | 选填 | 5000 | API调用超时时间（毫秒） |
+| optionArgs | map | 选填 | - | 搜索引擎特定参数（key-value格式） |
+
+## Google 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| cx | string | 必填 | - | Google自定义搜索引擎ID，用于指定搜索范围 |
+
+## Arxiv 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| arxivCategory | string | 选填 | - | 搜索的论文[类别](https://arxiv.org/category_taxonomy)（如cs.AI, cs.CL等） |
+
+## Elasticsearch 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| index | string | 必填 | - | 要搜索的Elasticsearch索引名称 |
+| contentField | string | 必填 | - | 要查询的内容字段名称 |
+| linkField | string | 必填 | - | 结果链接字段名称 |
+| titleField | string | 必填 | - | 结果标题字段名称 |
+
+## Quark 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| secretKey | string | 必填 | - | Aliyun SecretKey |
+| endpoint | string | 选填 | iqs.cn-zhangjiakou.aliyuncs.com | 请求搜索引擎服务时的接入点 |
+
+## 配置示例
+
+### 基础配置（单搜索引擎）
+
+```yaml
+needReference: true
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  count: 5
+  optionArgs:
+    fileType: "pdf"
+```
+
+### Arxiv搜索配置
+
+```yaml
+searchFrom:
+- type: arxiv
+  serviceName: "arxiv-svc.dns" 
+  servicePort: 443
+  arxivCategory: "cs.AI"
+  count: 10
+```
+
+
+### 夸克搜索配置
+
+```yaml
+searchFrom:
+- type: quark
+  serviceName: "quark-svc.dns" 
+  servicePort: 443
+  apiKey: "aliyun accessKey"
+  count: 10 # 搜索网页数，最多10条
+  secretKey: "aliyun secretKey"
+  endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
+```
+
+### 多搜索引擎配置
+
+```yaml
+defaultLang: "en-US"
+promptTemplate: |
+  # Search Results:
+  {search_results}
+  
+  # Please answer this question: 
+  {question}
+searchFrom:
+- type: google
+  apiKey: "google-key"
+  cx: "github-search-id"  # 专门搜索GitHub内容的搜索引擎ID
+  serviceName: "google-svc.dns"
+  servicePort: 443
+- type: google
+  apiKey: "google-key"
+  cx: "news-search-id"    # 专门搜索Google News内容的搜索引擎ID 
+  serviceName: "google-svc.dns"
+  servicePort: 443
+- type: bing
+  apiKey: "bing-key"
+  serviceName: "bing-svc.dns"
+  servicePort: 443
+  optionArgs:
+    answerCount: "5"
+```
+
+### 并发查询配置
+
+由于搜索引擎对单次查询返回结果数量有限制（如Google限制单次最多返回100条结果），可以通过以下方式获取更多结果：
+1. 设置较小的count值（如10）
+2. 通过start参数指定结果偏移量
+3. 并发发起多个查询请求，每个请求的start值按count递增
+
+例如，要获取30条结果，可以配置count=10并并发发起20个查询，每个查询的start值分别为0,10,20：
+
+```yaml
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 0
+  count: 10
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 10
+  count: 10
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 20
+  count: 10 
+```
+
+注意，过高的并发可能会导致限流，需要根据实际情况调整。
+
+### Elasticsearch 配置（用于对接私有知识库）
+
+```yaml
+searchFrom:
+- type: elasticsearch
+  serviceName: "es-svc.static"
+  # 固定地址服务的端口默认是80
+  servicePort: 80
+  index: "knowledge_base"
+  contentField: "content"
+  linkField: "url" 
+  titleField: "title"
+```
+
+### 自定义引用格式
+
+```yaml
+needReference: true
+referenceFormat: "### 数据来源\n%s"
+searchFrom:
+- type: bing
+  apiKey: "your-bing-key"
+  serviceName: "search-service.dns"
+  servicePort: 8080
+```
+
+### 搜索重写配置
+
+```yaml
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+searchRewrite:
+  llmServiceName: "llm-svc.dns"
+  llmServicePort: 443
+  llmApiKey: "your-llm-api-key"
+  llmUrl: "https://api.example.com/v1/chat/completions"
+  llmModelName: "gpt-3.5-turbo"
+  timeoutMillisecond: 15000
+```
+
+## 注意事项
+
+1. 提示词模版必须包含`{search_results}`和`{question}`占位符，可选使用`{cur_date}`插入当前日期（格式：2006年1月2日）
+2. 默认模板包含搜索结果处理指引和回答规范，如无特殊需要可以直接用默认模板，否则请根据实际情况修改
+3. 多个搜索引擎是并行查询，总超时时间 = 所有搜索引擎配置中最大timeoutMillisecond值 + 处理时间
+4. Arxiv搜索不需要API密钥，但可以指定论文类别（arxivCategory）来缩小搜索范围
--- a/plugins/wasm-go/extensions/ai-search/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-search/README_EN.md
@@ -0,0 +1,243 @@
+---
+title: AI Search Enhancement
+keywords: [higress, ai search]
+description: Higress supports enhancing the accuracy and timeliness of responses from models like DeepSeek-R1 by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.)
+---
+
+## Feature Description
+
+The `ai-search` plugin enhances the accuracy and timeliness of AI model responses by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.). The plugin automatically injects search results into the prompt template and determines whether to add reference sources in the final response based on configuration.
+
+## Runtime Properties
+
+Plugin execution stage: `Default stage`
+Plugin execution priority: `440`
+
+## Configuration Fields
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| needReference | bool | Optional | false | Whether to add reference sources in the response |
+| referenceFormat | string | Optional | `"**References:**\n%s"` | Reference content format, must include %s placeholder |
+| defaultLang | string | Optional | - | Default search language code (e.g. zh-CN/en-US) |
+| promptTemplate | string | Optional | Built-in template | Prompt template, must include `{search_results}` and `{question}` placeholders |
+| searchFrom | array of object | Required | - | Refer to search engine configuration below, at least one engine must be configured |
+| searchRewrite | object | Optional | - | Search rewrite configuration, used to optimize search queries using an LLM service |
+
+## Search Rewrite Description
+
+The search rewrite feature uses an LLM service to analyze and optimize the user's original query, which can:
+1. Convert natural language queries into keyword combinations better suited for search engines
+2. For Arxiv paper searches, automatically identify relevant paper categories and add category constraints
+3. For private knowledge base searches, break down long queries into multiple precise keyword combinations
+
+It is strongly recommended to enable this feature when using Arxiv or Elasticsearch engines. For Arxiv searches, it can accurately identify paper domains and optimize English keywords; for private knowledge base searches, it can provide more precise keyword matching, significantly improving search effectiveness.
+
+## Search Rewrite Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| llmServiceName | string | Required | - | LLM service name |
+| llmServicePort | number | Required | - | LLM service port |
+| llmApiKey | string | Required | - | LLM service API key |
+| llmUrl | string | Required | - | LLM service API URL |
+| llmModelName | string | Required | - | LLM model name |
+| timeoutMillisecond | number | Optional | 30000 | API call timeout (milliseconds) |
+
+## Search Engine Common Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch/quark) |
+| apiKey | string | Required | - | Search engine API key/Aliyun AccessKey |
+| serviceName | string | Required | - | Backend service name |
+| servicePort | number | Required | - | Backend service port |
+| count | number | Optional | 10 | Number of results returned per search |
+| start | number | Optional | 0 | Search result offset (start returning from the start+1 result) |
+| timeoutMillisecond | number | Optional | 5000 | API call timeout (milliseconds) |
+| optionArgs | map | Optional | - | Search engine specific parameters (key-value format) |
+
+## Google Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| cx | string | Required | - | Google Custom Search Engine ID, used to specify search scope |
+
+## Arxiv Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| arxivCategory | string | Optional | - | Search paper [category](https://arxiv.org/category_taxonomy) (e.g. cs.AI, cs.CL etc.) |
+
+## Elasticsearch Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| index | string | Required | - | Elasticsearch index name to search |
+| contentField | string | Required | - | Content field name to query |
+| linkField | string | Required | - | Result link field name |
+| titleField | string | Required | - | Result title field name |
+
+## Quark Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|----------|----------|--------|------|
+| secretKey | string | Required | - | Aliyun SecretKey |
+| endpoint | string | Optional | iqs.cn-zhangjiakou.aliyuncs.com | Endpoint for accessing quark |
+
+## Configuration Examples
+
+### Basic Configuration (Single Search Engine)
+
+```yaml
+needReference: true
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  count: 5
+  optionArgs:
+    fileType: "pdf"
+```
+
+### Arxiv Search Configuration
+
+```yaml
+searchFrom:
+- type: arxiv
+  serviceName: "arxiv-svc.dns" 
+  servicePort: 443
+  arxivCategory: "cs.AI"
+  count: 10
+```
+
+### 夸克搜索配置
+
+```yaml
+searchFrom:
+- type: quark
+  serviceName: "quark-svc.dns" 
+  servicePort: 443
+  apiKey: "aliyun accessKey"
+  count: 10
+  secretKey: "aliyun secretKey"
+  endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
+```
+
+### Multiple Search Engines Configuration
+
+```yaml
+defaultLang: "en-US"
+promptTemplate: |
+  # Search Results:
+  {search_results}
+  
+  # Please answer this question: 
+  {question}
+searchFrom:
+- type: google
+  apiKey: "google-key"
+  cx: "github-search-id"  # Search engine ID specifically for GitHub content
+  serviceName: "google-svc.dns"
+  servicePort: 443
+- type: google
+  apiKey: "google-key"
+  cx: "news-search-id"    # Search engine ID specifically for Google News content 
+  serviceName: "google-svc.dns"
+  servicePort: 443
+- type: bing
+  apiKey: "bing-key"
+  serviceName: "bing-svc.dns"
+  servicePort: 443
+  optionArgs:
+    answerCount: "5"
+```
+
+### Concurrent Query Configuration
+
+Since search engines limit the number of results per query (e.g. Google limits to 100 results per query), you can get more results by:
+1. Setting a smaller count value (e.g. 10)
+2. Specifying result offset with start parameter
+3. Concurrently initiating multiple query requests, with each request's start value incrementing by count
+
+For example, to get 30 results, configure count=10 and concurrently initiate 3 queries with start values 0,10,20 respectively:
+
+```yaml
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 0
+  count: 10
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 10
+  count: 10
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 20
+  count: 10 
+```
+
+Note that excessive concurrency may lead to rate limiting, adjust according to actual situation.
+
+### Elasticsearch Configuration (For Private Knowledge Base Integration)
+
+```yaml
+searchFrom:
+- type: elasticsearch
+  serviceName: "es-svc.static"
+  # static ip service use 80 as default port
+  servicePort: 80
+  index: "knowledge_base"
+  contentField: "content"
+  linkField: "url" 
+  titleField: "title"
+```
+
+### Custom Reference Format
+
+```yaml
+needReference: true
+referenceFormat: "### Data Sources\n%s"
+searchFrom: 
+- type: bing
+  apiKey: "your-bing-key"
+  serviceName: "search-service.dns"
+  servicePort: 8080
+```
+
+### Search Rewrite Configuration
+
+```yaml
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+searchRewrite:
+  llmServiceName: "llm-svc.dns"
+  llmServicePort: 443
+  llmApiKey: "your-llm-api-key"
+  llmUrl: "https://api.example.com/v1/chat/completions"
+  llmModelName: "gpt-3.5-turbo"
+  timeoutMillisecond: 15000
+```
+
+## Notes
+
+1. The prompt template must include `{search_results}` and `{question}` placeholders, optionally use `{cur_date}` to insert current date (format: January 2, 2006)
+2. The default template includes search results processing instructions and response specifications, you can use the default template unless there are special needs
+3. Multiple search engines query in parallel, total timeout = maximum timeoutMillisecond value among all search engine configurations + processing time
+4. Arxiv search doesn't require API key, but you can specify paper category (arxivCategory) to narrow search scope
--- a/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go
+++ b/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go
@@ -0,0 +1,134 @@
+package arxiv
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/antchfx/xmlquery"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type ArxivSearch struct {
+	optionArgs         map[string]string
+	start              int
+	count              int
+	timeoutMillisecond uint32
+	client             wrapper.HttpClient
+	arxivCategory      string
+}
+
+func NewArxivSearch(config *gjson.Result) (*ArxivSearch, error) {
+	engine := &ArxivSearch{}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.start = int(config.Get("start").Uint())
+	engine.count = int(config.Get("count").Uint())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	engine.optionArgs = map[string]string{}
+	for key, value := range config.Get("optionArgs").Map() {
+		valStr := value.String()
+		if valStr != "" {
+			engine.optionArgs[key] = value.String()
+		}
+	}
+	engine.arxivCategory = config.Get("arxivCategory").String()
+	return engine, nil
+}
+
+func (a ArxivSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "arxiv"
+}
+
+func (a ArxivSearch) Client() wrapper.HttpClient {
+	return a.client
+}
+
+func (a ArxivSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	var searchQueryItems []string
+	for _, q := range ctx.Querys {
+		searchQueryItems = append(searchQueryItems, fmt.Sprintf("all:%s", url.QueryEscape(q)))
+	}
+	searchQuery := strings.Join(searchQueryItems, "+AND+")
+	category := ctx.ArxivCategory
+	if category == "" {
+		category = a.arxivCategory
+	}
+	if category != "" {
+		searchQuery = fmt.Sprintf("%s+AND+cat:%s", searchQuery, category)
+	}
+	queryUrl := fmt.Sprintf("https://export.arxiv.org/api/query?search_query=%s&max_results=%d&start=%d",
+		searchQuery, a.count, a.start)
+	var extraArgs []string
+	for key, value := range a.optionArgs {
+		extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+	}
+	if len(extraArgs) > 0 {
+		queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+	}
+	return engine.CallArgs{
+		Method:             http.MethodGet,
+		Url:                queryUrl,
+		Headers:            [][2]string{{"Accept", "application/atom+xml"}},
+		TimeoutMillisecond: a.timeoutMillisecond,
+	}
+}
+
+func (a ArxivSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	var results []engine.SearchResult
+	doc, err := xmlquery.Parse(bytes.NewReader(response))
+	if err != nil {
+		return results
+	}
+
+	entries := xmlquery.Find(doc, "//entry")
+	for _, entry := range entries {
+		title := entry.SelectElement("title").InnerText()
+		link := ""
+		for _, l := range entry.SelectElements("link") {
+			if l.SelectAttr("rel") == "alternate" && l.SelectAttr("type") == "text/html" {
+				link = l.SelectAttr("href")
+				break
+			}
+		}
+		summary := entry.SelectElement("summary").InnerText()
+		publishTime := entry.SelectElement("published").InnerText()
+		authors := entry.SelectElements("author")
+		var authorNames []string
+		for _, author := range authors {
+			authorNames = append(authorNames, author.SelectElement("name").InnerText())
+		}
+		content := fmt.Sprintf("%s\nAuthors: %s\nPublication time: %s", summary, strings.Join(authorNames, ", "), publishTime)
+		result := engine.SearchResult{
+			Title:   title,
+			Link:    link,
+			Content: content,
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+	}
+	return results
+}
--- a/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go
+++ b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go
@@ -0,0 +1,128 @@
+package bing
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type BingSearch struct {
+	optionArgs         map[string]string
+	apiKey             string
+	start              int
+	count              int
+	timeoutMillisecond uint32
+	client             wrapper.HttpClient
+}
+
+func NewBingSearch(config *gjson.Result) (*BingSearch, error) {
+	engine := &BingSearch{}
+	engine.apiKey = config.Get("apiKey").String()
+	if engine.apiKey == "" {
+		return nil, errors.New("apiKey not found")
+	}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.start = int(config.Get("start").Uint())
+	engine.count = int(config.Get("count").Uint())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	engine.optionArgs = map[string]string{}
+	for key, value := range config.Get("optionArgs").Map() {
+		valStr := value.String()
+		if valStr != "" {
+			engine.optionArgs[key] = value.String()
+		}
+	}
+	return engine, nil
+}
+
+func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (b BingSearch) Client() wrapper.HttpClient {
+	return b.client
+}
+
+func (b BingSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	queryUrl := fmt.Sprintf("https://api.bing.microsoft.com/v7.0/search?q=%s&count=%d&offset=%d",
+		url.QueryEscape(strings.Join(ctx.Querys, " ")), b.count, b.start)
+	var extraArgs []string
+	for key, value := range b.optionArgs {
+		extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+	}
+	if ctx.Language != "" {
+		extraArgs = append(extraArgs, fmt.Sprintf("mkt=%s", ctx.Language))
+	}
+	if len(extraArgs) > 0 {
+		queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+	}
+	return engine.CallArgs{
+		Method:             http.MethodGet,
+		Url:                queryUrl,
+		Headers:            [][2]string{{"Ocp-Apim-Subscription-Key", b.apiKey}},
+		TimeoutMillisecond: b.timeoutMillisecond,
+	}
+}
+
+func (b BingSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	jsonObj := gjson.ParseBytes(response)
+	var results []engine.SearchResult
+	webPages := jsonObj.Get("webPages.value")
+	for _, page := range webPages.Array() {
+		result := engine.SearchResult{
+			Title:   page.Get("name").String(),
+			Link:    page.Get("url").String(),
+			Content: page.Get("snippet").String(),
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+		deepLinks := page.Get("deepLinks")
+		for _, inner := range deepLinks.Array() {
+			innerResult := engine.SearchResult{
+				Title:   inner.Get("name").String(),
+				Link:    inner.Get("url").String(),
+				Content: inner.Get("snippet").String(),
+			}
+			if innerResult.Valid() {
+				results = append(results, innerResult)
+			}
+		}
+	}
+	news := jsonObj.Get("news.value")
+	for _, article := range news.Array() {
+		result := engine.SearchResult{
+			Title:   article.Get("name").String(),
+			Link:    article.Get("url").String(),
+			Content: article.Get("description").String(),
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+	}
+	return results
+}
--- a/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go
+++ b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go
@@ -0,0 +1,114 @@
+package elasticsearch
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type ElasticsearchSearch struct {
+	client             wrapper.HttpClient
+	index              string
+	contentField       string
+	linkField          string
+	titleField         string
+	start              int
+	count              int
+	timeoutMillisecond uint32
+}
+
+func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) {
+	engine := &ElasticsearchSearch{}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.index = config.Get("index").String()
+	if engine.index == "" {
+		return nil, errors.New("index not found")
+	}
+	engine.contentField = config.Get("contentField").String()
+	if engine.contentField == "" {
+		return nil, errors.New("contentField not found")
+	}
+	engine.linkField = config.Get("linkField").String()
+	if engine.linkField == "" {
+		return nil, errors.New("linkField not found")
+	}
+	engine.titleField = config.Get("titleField").String()
+	if engine.titleField == "" {
+		return nil, errors.New("titleField not found")
+	}
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	engine.start = int(config.Get("start").Uint())
+	engine.count = int(config.Get("count").Uint())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	return engine, nil
+}
+
+func (e ElasticsearchSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "private"
+}
+
+func (e ElasticsearchSearch) Client() wrapper.HttpClient {
+	return e.client
+}
+
+func (e ElasticsearchSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	searchBody := fmt.Sprintf(`{
+		"query": {
+			"match": {
+				"%s": {
+					"query": "%s",
+					"operator": "AND"
+				}
+			}
+		}
+	}`, e.contentField, strings.Join(ctx.Querys, " "))
+
+	return engine.CallArgs{
+		Method: http.MethodPost,
+		Url:    fmt.Sprintf("/%s/_search?from=%d&size=%d", e.index, e.start, e.count),
+		Headers: [][2]string{
+			{"Content-Type", "application/json"},
+		},
+		Body:               []byte(searchBody),
+		TimeoutMillisecond: e.timeoutMillisecond,
+	}
+}
+
+func (e ElasticsearchSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	jsonObj := gjson.ParseBytes(response)
+	var results []engine.SearchResult
+	for _, hit := range jsonObj.Get("hits.hits").Array() {
+		source := hit.Get("_source")
+		result := engine.SearchResult{
+			Title:   source.Get(e.titleField).String(),
+			Link:    source.Get(e.linkField).String(),
+			Content: source.Get(e.contentField).String(),
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+	}
+	return results
+}
--- a/plugins/wasm-go/extensions/ai-search/engine/google/google.go
+++ b/plugins/wasm-go/extensions/ai-search/engine/google/google.go
@@ -0,0 +1,120 @@
+package google
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type GoogleSearch struct {
+	optionArgs         map[string]string
+	apiKey             string
+	cx                 string
+	start              int
+	count              int
+	timeoutMillisecond uint32
+	client             wrapper.HttpClient
+}
+
+func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) {
+	engine := &GoogleSearch{}
+	engine.apiKey = config.Get("apiKey").String()
+	if engine.apiKey == "" {
+		return nil, errors.New("apiKey not found")
+	}
+	engine.cx = config.Get("cx").String()
+	if engine.cx == "" {
+		return nil, errors.New("cx not found")
+	}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.start = int(config.Get("start").Uint())
+	engine.count = int(config.Get("count").Uint())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	if engine.count > 10 || engine.start+engine.count > 100 {
+		return nil, errors.New("count must be less than 10, and start + count must be less than or equal to 100.")
+	}
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	engine.optionArgs = map[string]string{}
+	for key, value := range config.Get("optionArgs").Map() {
+		valStr := value.String()
+		if valStr != "" {
+			engine.optionArgs[key] = value.String()
+		}
+	}
+	return engine, nil
+}
+
+func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (g GoogleSearch) Client() wrapper.HttpClient {
+	return g.client
+}
+
+func (g GoogleSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	queryUrl := fmt.Sprintf("https://customsearch.googleapis.com/customsearch/v1?cx=%s&q=%s&num=%d&key=%s&start=%d",
+		g.cx, url.QueryEscape(strings.Join(ctx.Querys, " ")), g.count, g.apiKey, g.start+1)
+	var extraArgs []string
+	for key, value := range g.optionArgs {
+		extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+	}
+	if ctx.Language != "" {
+		extraArgs = append(extraArgs, fmt.Sprintf("lr=lang_%s", ctx.Language))
+	}
+	if len(extraArgs) > 0 {
+		queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+	}
+	return engine.CallArgs{
+		Method: http.MethodGet,
+		Url:    queryUrl,
+		Headers: [][2]string{
+			{"Accept", "application/json"},
+		},
+		TimeoutMillisecond: g.timeoutMillisecond,
+	}
+}
+
+func (g GoogleSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	jsonObj := gjson.ParseBytes(response)
+	var results []engine.SearchResult
+	for _, item := range jsonObj.Get("items").Array() {
+		content := item.Get("snippet").String()
+		metaDescription := item.Get("pagemap.metatags.0.og:description").String()
+		if metaDescription != "" {
+			content = fmt.Sprintf("%s\n...\n%s", content, metaDescription)
+		}
+		result := engine.SearchResult{
+			Title:   item.Get("title").String(),
+			Link:    item.Get("link").String(),
+			Content: content,
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+	}
+	return results
+}
--- a/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
+++ b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
@@ -0,0 +1,194 @@
+package quark
+
+import (
+	"crypto/hmac"
+	"crypto/rand"
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type QuarkSearch struct {
+	apiKey             string
+	secretKey          string
+	timeoutMillisecond uint32
+	client             wrapper.HttpClient
+	count              uint32
+	endpoint           string
+}
+
+const (
+	Path               = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/genericSearch"
+	ContentSha256      = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" // for empty body
+	Action             = "GenericSearch"
+	Version            = "2024-11-11"
+	SignatureAlgorithm = "ACS3-HMAC-SHA256"
+	SignedHeaders      = "host;x-acs-action;x-acs-content-sha256;x-acs-date;x-acs-signature-nonce;x-acs-version"
+)
+
+func urlEncoding(rawStr string) string {
+	encodedStr := url.PathEscape(rawStr)
+	encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B")
+	encodedStr = strings.ReplaceAll(encodedStr, ":", "%3A")
+	encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D")
+	encodedStr = strings.ReplaceAll(encodedStr, "&", "%26")
+	encodedStr = strings.ReplaceAll(encodedStr, "$", "%24")
+	encodedStr = strings.ReplaceAll(encodedStr, "@", "%40")
+	// encodedStr := url.QueryEscape(rawStr)
+	return encodedStr
+}
+
+func getSignature(stringToSign, secret string) string {
+	h := hmac.New(sha256.New, []byte(secret))
+	h.Write([]byte(stringToSign))
+	hash := h.Sum(nil)
+	return hex.EncodeToString(hash)
+}
+
+func getCanonicalHeaders(params map[string]string) string {
+	paramArray := []string{}
+	for k, v := range params {
+		paramArray = append(paramArray, k+":"+v)
+	}
+	sort.Slice(paramArray, func(i, j int) bool {
+		return paramArray[i] <= paramArray[j]
+	})
+	return strings.Join(paramArray, "\n") + "\n"
+}
+
+func getHasedString(input string) string {
+	hash := sha256.Sum256([]byte(input))
+	hashHex := hex.EncodeToString(hash[:])
+	return hashHex
+}
+
+func generateHexID(length int) (string, error) {
+	bytes := make([]byte, length/2)
+	if _, err := rand.Read(bytes); err != nil {
+		return "", err
+	}
+	return hex.EncodeToString(bytes), nil
+}
+
+func NewQuarkSearch(config *gjson.Result) (*QuarkSearch, error) {
+	engine := &QuarkSearch{}
+	engine.apiKey = config.Get("apiKey").String()
+	if engine.apiKey == "" {
+		return nil, errors.New("apiKey not found")
+	}
+	engine.secretKey = config.Get("secretKey").String()
+	if engine.secretKey == "" {
+		return nil, errors.New("secretKey not found")
+	}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.endpoint = config.Get("endpoint").String()
+	if engine.endpoint == "" {
+		engine.endpoint = "iqs.cn-zhangjiakou.aliyuncs.com"
+	}
+	engine.count = uint32(config.Get("count").Int())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	return engine, nil
+}
+
+func (g QuarkSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (g QuarkSearch) Client() wrapper.HttpClient {
+	return g.client
+}
+
+func (g QuarkSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	query := strings.Join(ctx.Querys, " ")
+	canonicalURI := Path
+	queryParams := map[string]string{
+		"query":     query,
+		"timeRange": "NoLimit",
+	}
+	queryParamsStr := []string{}
+	for k, v := range queryParams {
+		queryParamsStr = append(queryParamsStr, k+"="+urlEncoding(v))
+	}
+	canonicalQueryString := strings.Join(queryParamsStr, "&")
+	timeStamp := time.Now().UTC().Format("2006-01-02T15:04:05Z")
+	randomID, _ := generateHexID(32)
+	params := map[string]string{
+		"host":                  g.endpoint,
+		"x-acs-action":          Action,
+		"x-acs-content-sha256":  ContentSha256,
+		"x-acs-date":            timeStamp,
+		"x-acs-signature-nonce": randomID,
+		"x-acs-version":         Version,
+	}
+	canonicalHeaders := getCanonicalHeaders(params)
+	canonicalRequest := http.MethodGet + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + SignedHeaders + "\n" + ContentSha256
+	stringToSign := SignatureAlgorithm + "\n" + getHasedString(canonicalRequest)
+
+	authHeaderFmt := "%s Credential=%s,SignedHeaders=%s,Signature=%s"
+	authHeader := fmt.Sprintf(authHeaderFmt, SignatureAlgorithm, g.apiKey, SignedHeaders, getSignature(stringToSign, g.secretKey))
+
+	reqParams := url.Values{}
+	for k, v := range queryParams {
+		reqParams.Add(k, v)
+	}
+	requestURL := fmt.Sprintf("https://%s%s?%s", g.endpoint, Path, reqParams.Encode())
+
+	return engine.CallArgs{
+		Method: http.MethodGet,
+		Url:    requestURL,
+		Headers: [][2]string{
+			{"x-acs-date", timeStamp},
+			{"x-acs-signature-nonce", randomID},
+			{"x-acs-content-sha256", ContentSha256},
+			{"x-acs-version", Version},
+			{"x-acs-action", Action},
+			{"Authorization", authHeader},
+		},
+		Body:               nil,
+		TimeoutMillisecond: g.timeoutMillisecond,
+	}
+}
+
+func (g QuarkSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	jsonObj := gjson.ParseBytes(response)
+	var results []engine.SearchResult
+	for index, item := range jsonObj.Get("pageItems").Array() {
+		result := engine.SearchResult{
+			Title:   item.Get("title").String(),
+			Link:    item.Get("link").String(),
+			Content: item.Get("mainText").String(),
+		}
+		if result.Valid() && index < int(g.count) {
+			results = append(results, result)
+		}
+	}
+	return results
+}
--- a/plugins/wasm-go/extensions/ai-search/engine/types.go
+++ b/plugins/wasm-go/extensions/ai-search/engine/types.go
@@ -0,0 +1,37 @@
+package engine
+
+import (
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+)
+
+type SearchResult struct {
+	Title   string
+	Link    string
+	Content string
+}
+
+func (result SearchResult) Valid() bool {
+	return result.Title != "" && result.Link != "" && result.Content != ""
+}
+
+type SearchContext struct {
+	EngineType    string
+	Querys        []string
+	Language      string
+	ArxivCategory string
+}
+
+type CallArgs struct {
+	Method             string
+	Url                string
+	Headers            [][2]string
+	Body               []byte
+	TimeoutMillisecond uint32
+}
+
+type SearchEngine interface {
+	NeedExectue(ctx SearchContext) bool
+	Client() wrapper.HttpClient
+	CallArgs(ctx SearchContext) CallArgs
+	ParseResult(ctx SearchContext, response []byte) []SearchResult
+}
--- a/plugins/wasm-go/extensions/ai-search/go.mod
+++ b/plugins/wasm-go/extensions/ai-search/go.mod
@@ -0,0 +1,26 @@
+module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search
+
+go 1.18
+
+replace github.com/alibaba/higress/plugins/wasm-go => ../..
+
+require (
+	github.com/alibaba/higress/plugins/wasm-go v0.0.0
+	github.com/antchfx/xmlquery v1.4.4
+	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
+	github.com/tidwall/gjson v1.18.0
+	github.com/tidwall/sjson v1.2.5
+)
+
+require (
+	github.com/antchfx/xpath v1.3.3 // indirect
+	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+	github.com/google/uuid v1.3.0 // indirect
+	github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
+	github.com/magefile/mage v1.14.0 // indirect
+	github.com/tidwall/match v1.1.1 // indirect
+	github.com/tidwall/pretty v1.2.0 // indirect
+	github.com/tidwall/resp v0.1.1 // indirect
+	golang.org/x/net v0.33.0 // indirect
+	golang.org/x/text v0.21.0 // indirect
+)
--- a/plugins/wasm-go/extensions/ai-search/go.sum
+++ b/plugins/wasm-go/extensions/ai-search/go.sum
@@ -0,0 +1,96 @@
+github.com/antchfx/xmlquery v1.4.4 h1:mxMEkdYP3pjKSftxss4nUHfjBhnMk4imGoR96FRY2dg=
+github.com/antchfx/xmlquery v1.4.4/go.mod h1:AEPEEPYE9GnA2mj5Ur2L5Q5/2PycJ0N9Fusrx9b12fc=
+github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs=
+github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
+github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
+github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
+github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
--- a/plugins/wasm-go/extensions/ai-search/main.go
+++ b/plugins/wasm-go/extensions/ai-search/main.go
@@ -0,0 +1,568 @@
+// Copyright (c) 2022 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	_ "embed"
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/arxiv"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/bing"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/elasticsearch"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/google"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/quark"
+)
+
+type SearchRewrite struct {
+	client             wrapper.HttpClient
+	url                string
+	apiKey             string
+	modelName          string
+	timeoutMillisecond uint32
+	prompt             string
+}
+
+type Config struct {
+	engine          []engine.SearchEngine
+	promptTemplate  string
+	referenceFormat string
+	defaultLanguage string
+	needReference   bool
+	searchRewrite   *SearchRewrite
+}
+
+const (
+	DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024
+)
+
+//go:embed prompts/full.md
+var fullSearchPrompts string
+
+//go:embed prompts/arxiv.md
+var arxivSearchPrompts string
+
+//go:embed prompts/internet.md
+var internetSearchPrompts string
+
+//go:embed prompts/private.md
+var privateSearchPrompts string
+
+func main() {
+	wrapper.SetCtx(
+		"ai-search",
+		wrapper.ParseConfigBy(parseConfig),
+		wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
+		wrapper.ProcessRequestBodyBy(onHttpRequestBody),
+		wrapper.ProcessResponseHeadersBy(onHttpResponseHeaders),
+		wrapper.ProcessStreamingResponseBodyBy(onStreamingResponseBody),
+		wrapper.ProcessResponseBodyBy(onHttpResponseBody),
+	)
+}
+
+func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error {
+	config.needReference = json.Get("needReference").Bool()
+	if config.needReference {
+		config.referenceFormat = json.Get("referenceFormat").String()
+		if config.referenceFormat == "" {
+			config.referenceFormat = "**References:**\n%s"
+		} else if !strings.Contains(config.referenceFormat, "%s") {
+			return fmt.Errorf("invalid referenceFormat:%s", config.referenceFormat)
+		}
+	}
+	config.defaultLanguage = json.Get("defaultLang").String()
+	config.promptTemplate = json.Get("promptTemplate").String()
+	if config.promptTemplate == "" {
+		if config.needReference {
+			config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果:
+{search_results}
+在我给你的搜索结果中，每个结果都是[webpage X begin]...[webpage X end]格式的，X代表每篇文章的数字索引。请在适当的情况下在句子末尾引用上下文。请按照引用编号[X]的格式在答案中对应部分引用上下文。如果一句话源自多个上下文，请列出所有相关的引用编号，例如[3][5]，切记不要将引用集中在最后返回引用编号，而是在答案对应部分列出。
+在回答时，请注意以下几点：
+- 今天是北京时间：{cur_date}。
+- 并非搜索结果的所有内容都与用户的问题密切相关，你需要结合问题，对搜索结果进行甄别、筛选。
+- 对于列举类的问题（如列举所有航班信息），尽量将答案控制在10个要点以内，并告诉用户可以查看搜索来源、获得完整信息。优先提供信息完整、最相关的列举项；如非必要，不要主动告诉用户搜索结果未提供的内容。
+- 对于创作类的问题（如写论文），请务必在正文的段落中引用对应的参考编号，例如[3][5]，不能只在文章末尾引用。你需要解读并概括用户的题目要求，选择合适的格式，充分利用搜索结果并抽取重要信息，生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长，对于每一个要点的论述要推测用户的意图，给出尽可能多角度的回答要点，且务必信息量大、论述详尽。
+- 如果回答很长，请尽量结构化、分段落总结。如果需要分点作答，尽量控制在5个点以内，并合并相关的内容。
+- 对于客观类的问答，如果问题的答案非常简短，可以适当补充一到两句相关信息，以丰富内容。
+- 你需要根据用户要求和回答内容选择合适、美观的回答格式，确保可读性强。
+- 你的回答应该综合多个相关网页来回答，不能重复引用一个网页。
+- 除非用户要求，否则你回答的语言需要和用户提问的语言保持一致。
+
+# 用户消息为：
+{question}`
+		} else {
+			config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果:
+{search_results}
+在我给你的搜索结果中，每个结果都是[webpage begin]...[webpage end]格式的。
+在回答时，请注意以下几点：
+- 今天是北京时间：{cur_date}。
+- 并非搜索结果的所有内容都与用户的问题密切相关，你需要结合问题，对搜索结果进行甄别、筛选。
+- 对于列举类的问题（如列举所有航班信息），尽量将答案控制在10个要点以内。如非必要，不要主动告诉用户搜索结果未提供的内容。
+- 对于创作类的问题（如写论文），你需要解读并概括用户的题目要求，选择合适的格式，充分利用搜索结果并抽取重要信息，生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长，对于每一个要点的论述要推测用户的意图，给出尽可能多角度的回答要点，且务必信息量大、论述详尽。
+- 如果回答很长，请尽量结构化、分段落总结。如果需要分点作答，尽量控制在5个点以内，并合并相关的内容。
+- 对于客观类的问答，如果问题的答案非常简短，可以适当补充一到两句相关信息，以丰富内容。
+- 你需要根据用户要求和回答内容选择合适、美观的回答格式，确保可读性强。
+- 你的回答应该综合多个相关网页来回答，但回答中不要给出网页的引用来源。
+- 除非用户要求，否则你回答的语言需要和用户提问的语言保持一致。
+
+# 用户消息为：
+{question}`
+		}
+	}
+	if !strings.Contains(config.promptTemplate, "{search_results}") ||
+		!strings.Contains(config.promptTemplate, "{question}") {
+		return fmt.Errorf("invalid promptTemplate, must contains {search_results} and {question}:%s", config.promptTemplate)
+	}
+	var internetExists, privateExists, arxivExists bool
+	for _, e := range json.Get("searchFrom").Array() {
+		switch e.Get("type").String() {
+		case "bing":
+			searchEngine, err := bing.NewBingSearch(&e)
+			if err != nil {
+				return fmt.Errorf("bing search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			internetExists = true
+		case "google":
+			searchEngine, err := google.NewGoogleSearch(&e)
+			if err != nil {
+				return fmt.Errorf("google search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			internetExists = true
+		case "arxiv":
+			searchEngine, err := arxiv.NewArxivSearch(&e)
+			if err != nil {
+				return fmt.Errorf("arxiv search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			arxivExists = true
+		case "elasticsearch":
+			searchEngine, err := elasticsearch.NewElasticsearchSearch(&e)
+			if err != nil {
+				return fmt.Errorf("elasticsearch search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			privateExists = true
+		case "quark":
+			searchEngine, err := quark.NewQuarkSearch(&e)
+			if err != nil {
+				return fmt.Errorf("elasticsearch search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			internetExists = true
+		default:
+			return fmt.Errorf("unkown search engine:%s", e.Get("type").String())
+		}
+	}
+	searchRewriteJson := json.Get("searchRewrite")
+	if searchRewriteJson.Exists() {
+		searchRewrite := &SearchRewrite{}
+		llmServiceName := searchRewriteJson.Get("llmServiceName").String()
+		if llmServiceName == "" {
+			return errors.New("llm_service_name not found")
+		}
+		llmServicePort := searchRewriteJson.Get("llmServicePort").Int()
+		if llmServicePort == 0 {
+			return errors.New("llmServicePort not found")
+		}
+		searchRewrite.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+			FQDN: llmServiceName,
+			Port: llmServicePort,
+		})
+		llmApiKey := searchRewriteJson.Get("llmApiKey").String()
+		if llmApiKey == "" {
+			return errors.New("llmApiKey not found")
+		}
+		searchRewrite.apiKey = llmApiKey
+		llmUrl := searchRewriteJson.Get("llmUrl").String()
+		if llmUrl == "" {
+			return errors.New("llmUrl not found")
+		}
+		searchRewrite.url = llmUrl
+		llmModelName := searchRewriteJson.Get("llmModelName").String()
+		if llmModelName == "" {
+			return errors.New("llmModelName not found")
+		}
+		searchRewrite.modelName = llmModelName
+		llmTimeout := searchRewriteJson.Get("timeoutMillisecond").Uint()
+		if llmTimeout == 0 {
+			llmTimeout = 30000
+		}
+		searchRewrite.timeoutMillisecond = uint32(llmTimeout)
+		// The consideration here is that internet searches are generally available, but arxiv and private sources may not be.
+		if arxivExists {
+			if privateExists {
+				// private + internet + arxiv
+				searchRewrite.prompt = fullSearchPrompts
+			} else {
+				// internet + arxiv
+				searchRewrite.prompt = arxivSearchPrompts
+			}
+		} else if privateExists {
+			// private + internet
+			searchRewrite.prompt = privateSearchPrompts
+		} else if internetExists {
+			// only internet
+			searchRewrite.prompt = internetSearchPrompts
+		}
+		config.searchRewrite = searchRewrite
+	}
+	if len(config.engine) == 0 {
+		return fmt.Errorf("no avaliable search engine found")
+	}
+	log.Debugf("ai search enabled, config: %#v", config)
+	return nil
+}
+
+func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action {
+	contentType, _ := proxywasm.GetHttpRequestHeader("content-type")
+	// The request does not have a body.
+	if contentType == "" {
+		return types.ActionContinue
+	}
+	if !strings.Contains(contentType, "application/json") {
+		log.Warnf("content is not json, can't process: %s", contentType)
+		ctx.DontReadRequestBody()
+		return types.ActionContinue
+	}
+	ctx.SetRequestBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
+	_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+	return types.ActionContinue
+}
+
+func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action {
+	var queryIndex int
+	var query string
+	messages := gjson.GetBytes(body, "messages").Array()
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Get("role").String() == "user" {
+			queryIndex = i
+			query = messages[i].Get("content").String()
+			break
+		}
+	}
+	if query == "" {
+		log.Errorf("not found user query in body:%s", body)
+		return types.ActionContinue
+	}
+	searchRewrite := config.searchRewrite
+	if searchRewrite != nil {
+		startTime := time.Now()
+		rewritePrompt := strings.Replace(searchRewrite.prompt, "{question}", query, 1)
+		rewriteBody, _ := sjson.SetBytes([]byte(fmt.Sprintf(
+			`{"stream":false,"max_tokens":100,"model":"%s","messages":[{"role":"user","content":""}]}`,
+			searchRewrite.modelName)), "messages.0.content", rewritePrompt)
+		err := searchRewrite.client.Post(searchRewrite.url,
+			[][2]string{
+				{"Content-Type", "application/json"},
+				{"Authorization", fmt.Sprintf("Bearer %s", searchRewrite.apiKey)},
+			}, rewriteBody,
+			func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+				if statusCode != http.StatusOK {
+					log.Errorf("search rewrite failed, status: %d", statusCode)
+					// After a rewrite failure, no further search is performed, thus quickly identifying the failure.
+					proxywasm.ResumeHttpRequest()
+					return
+				}
+
+				content := gjson.GetBytes(responseBody, "choices.0.message.content").String()
+				log.Infof("LLM rewritten query response: %s (took %v), original search query:%s",
+					strings.ReplaceAll(content, "\n", `\n`), time.Since(startTime), query)
+				if strings.Contains(content, "none") {
+					log.Debugf("no search required")
+					proxywasm.ResumeHttpRequest()
+					return
+				}
+
+				// Parse search queries from LLM response
+				var searchContexts []engine.SearchContext
+				for _, line := range strings.Split(content, "\n") {
+					line = strings.TrimSpace(line)
+					if line == "" {
+						continue
+					}
+
+					parts := strings.SplitN(line, ":", 2)
+					if len(parts) != 2 {
+						continue
+					}
+
+					engineType := strings.TrimSpace(parts[0])
+					queryStr := strings.TrimSpace(parts[1])
+
+					var ctx engine.SearchContext
+					ctx.Language = config.defaultLanguage
+
+					switch {
+					case engineType == "internet":
+						ctx.EngineType = engineType
+						ctx.Querys = []string{queryStr}
+					case engineType == "private":
+						ctx.EngineType = engineType
+						ctx.Querys = strings.Split(queryStr, ",")
+						for i := range ctx.Querys {
+							ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i])
+						}
+					default:
+						// Arxiv category
+						ctx.EngineType = "arxiv"
+						ctx.ArxivCategory = engineType
+						ctx.Querys = strings.Split(queryStr, ",")
+						for i := range ctx.Querys {
+							ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i])
+						}
+					}
+
+					if len(ctx.Querys) > 0 {
+						searchContexts = append(searchContexts, ctx)
+						if ctx.ArxivCategory != "" {
+							// Conduct i/nquiries in all areas to increase recall.
+							backupCtx := ctx
+							backupCtx.ArxivCategory = ""
+							searchContexts = append(searchContexts, backupCtx)
+						}
+					}
+				}
+
+				if len(searchContexts) == 0 {
+					log.Errorf("no valid search contexts found")
+					proxywasm.ResumeHttpRequest()
+					return
+				}
+				if types.ActionContinue == executeSearch(ctx, config, queryIndex, body, searchContexts, log) {
+					proxywasm.ResumeHttpRequest()
+				}
+			}, searchRewrite.timeoutMillisecond)
+		if err != nil {
+			log.Errorf("search rewrite call llm service failed:%s", err)
+			// After a rewrite failure, no further search is performed, thus quickly identifying the failure.
+			return types.ActionContinue
+		}
+		return types.ActionPause
+	}
+
+	// Execute search without rewrite
+	return executeSearch(ctx, config, queryIndex, body, []engine.SearchContext{{
+		Querys:   []string{query},
+		Language: config.defaultLanguage,
+	}}, log)
+}
+
+func executeSearch(ctx wrapper.HttpContext, config Config, queryIndex int, body []byte, searchContexts []engine.SearchContext, log wrapper.Log) types.Action {
+	searchResultGroups := make([][]engine.SearchResult, len(config.engine))
+	var finished int
+	var searching int
+	for i := 0; i < len(config.engine); i++ {
+		configEngine := config.engine[i]
+
+		// Check if engine needs to execute for any of the search contexts
+		var needsExecute bool
+		for _, searchCtx := range searchContexts {
+			if configEngine.NeedExectue(searchCtx) {
+				needsExecute = true
+				break
+			}
+		}
+		if !needsExecute {
+			continue
+		}
+
+		// Process all search contexts for this engine
+		for _, searchCtx := range searchContexts {
+			if !configEngine.NeedExectue(searchCtx) {
+				continue
+			}
+			args := configEngine.CallArgs(searchCtx)
+			index := i
+			err := configEngine.Client().Call(args.Method, args.Url, args.Headers, args.Body,
+				func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+					defer func() {
+						finished++
+						if finished == searching {
+							// Merge search results from all engines with deduplication
+							var mergedResults []engine.SearchResult
+							seenLinks := make(map[string]bool)
+							for _, results := range searchResultGroups {
+								for _, result := range results {
+									if !seenLinks[result.Link] {
+										seenLinks[result.Link] = true
+										mergedResults = append(mergedResults, result)
+									}
+								}
+							}
+							// Format search results for prompt template
+							var formattedResults []string
+							var formattedReferences []string
+							for j, result := range mergedResults {
+								if config.needReference {
+									formattedResults = append(formattedResults,
+										fmt.Sprintf("[webpage %d begin]\n%s\n[webpage %d end]", j+1, result.Content, j+1))
+									formattedReferences = append(formattedReferences,
+										fmt.Sprintf("[%d] [%s](%s)", j+1, result.Title, result.Link))
+								} else {
+									formattedResults = append(formattedResults,
+										fmt.Sprintf("[webpage begin]\n%s\n[webpage end]", result.Content))
+								}
+							}
+							// Prepare template variables
+							curDate := time.Now().In(time.FixedZone("CST", 8*3600)).Format("2006年1月2日")
+							searchResults := strings.Join(formattedResults, "\n")
+							log.Debugf("searchResults: %s", searchResults)
+							// Fill prompt template
+							prompt := strings.Replace(config.promptTemplate, "{search_results}", searchResults, 1)
+							prompt = strings.Replace(prompt, "{question}", searchContexts[0].Querys[0], 1)
+							prompt = strings.Replace(prompt, "{cur_date}", curDate, 1)
+							// Update request body with processed prompt
+							modifiedBody, err := sjson.SetBytes(body, fmt.Sprintf("messages.%d.content", queryIndex), prompt)
+							if err != nil {
+								log.Errorf("modify request message content failed, err:%v, body:%s", err, body)
+							} else {
+								log.Debugf("modifeid body:%s", modifiedBody)
+								proxywasm.ReplaceHttpRequestBody(modifiedBody)
+								if config.needReference {
+									ctx.SetContext("References", strings.Join(formattedReferences, "\n"))
+								}
+							}
+							proxywasm.ResumeHttpRequest()
+						}
+					}()
+					if statusCode != http.StatusOK {
+						log.Errorf("search call failed, status: %d, engine: %#v", statusCode, configEngine)
+						return
+					}
+					// Append results to existing slice for this engine
+					searchResultGroups[index] = append(searchResultGroups[index], configEngine.ParseResult(searchCtx, responseBody)...)
+				}, args.TimeoutMillisecond)
+			if err != nil {
+				log.Errorf("search call failed, engine: %#v", configEngine)
+				continue
+			}
+			searching++
+		}
+	}
+	if searching > 0 {
+		return types.ActionPause
+	}
+	return types.ActionContinue
+}
+
+func onHttpResponseHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action {
+	if !config.needReference {
+		ctx.DontReadResponseBody()
+		return types.ActionContinue
+	}
+	proxywasm.RemoveHttpResponseHeader("content-length")
+	contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
+	if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {
+		if err != nil {
+			log.Errorf("unable to load content-type header from response: %v", err)
+		}
+		ctx.BufferResponseBody()
+		ctx.SetResponseBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
+	}
+	return types.ActionContinue
+}
+
+func onHttpResponseBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action {
+	references := ctx.GetStringContext("References", "")
+	if references == "" {
+		return types.ActionContinue
+	}
+	content := gjson.GetBytes(body, "choices.0.message.content")
+	modifiedContent := fmt.Sprintf("%s\n\n%s", fmt.Sprintf(config.referenceFormat, references), content)
+	body, err := sjson.SetBytes(body, "choices.0.message.content", modifiedContent)
+	if err != nil {
+		log.Errorf("modify response message content failed, err:%v, body:%s", err, body)
+		return types.ActionContinue
+	}
+	proxywasm.ReplaceHttpResponseBody(body)
+	return types.ActionContinue
+}
+
+func onStreamingResponseBody(ctx wrapper.HttpContext, config Config, chunk []byte, isLastChunk bool, log wrapper.Log) []byte {
+	if ctx.GetBoolContext("ReferenceAppended", false) {
+		return chunk
+	}
+	references := ctx.GetStringContext("References", "")
+	if references == "" {
+		return chunk
+	}
+	modifiedChunk, responseReady := setReferencesToFirstMessage(ctx, chunk, fmt.Sprintf(config.referenceFormat, references), log)
+	if responseReady {
+		ctx.SetContext("ReferenceAppended", true)
+		return modifiedChunk
+	} else {
+		return []byte("")
+	}
+}
+
+const PARTIAL_MESSAGE_CONTEXT_KEY = "partialMessage"
+
+func setReferencesToFirstMessage(ctx wrapper.HttpContext, chunk []byte, references string, log wrapper.Log) ([]byte, bool) {
+	if len(chunk) == 0 {
+		log.Debugf("chunk is empty")
+		return nil, false
+	}
+
+	var partialMessage []byte
+	partialMessageI := ctx.GetContext(PARTIAL_MESSAGE_CONTEXT_KEY)
+	if partialMessageI != nil {
+		if pMsg, ok := partialMessageI.([]byte); ok {
+			partialMessage = append(pMsg, chunk...)
+		} else {
+			log.Warnf("invalid partial message type: %T", partialMessageI)
+			partialMessage = chunk
+		}
+	} else {
+		partialMessage = chunk
+	}
+
+	if len(partialMessage) == 0 {
+		log.Debugf("partial message is empty")
+		return nil, false
+	}
+	messages := strings.Split(string(partialMessage), "\n\n")
+	if len(messages) > 1 {
+		firstMessage := messages[0]
+		log.Debugf("first message: %s", firstMessage)
+		firstMessage = strings.TrimPrefix(firstMessage, "data:")
+		firstMessage = strings.TrimPrefix(firstMessage, " ")
+		firstMessage = strings.TrimSuffix(firstMessage, "\n")
+		deltaContent := gjson.Get(firstMessage, "choices.0.delta.content")
+		modifiedMessage, err := sjson.Set(firstMessage, "choices.0.delta.content", fmt.Sprintf("%s\n\n%s", references, deltaContent))
+		if err != nil {
+			log.Errorf("modify response delta content failed, err:%v", err)
+			return partialMessage, true
+		}
+		modifiedMessage = fmt.Sprintf("data: %s", modifiedMessage)
+		log.Debugf("modified message: %s", firstMessage)
+		messages[0] = string(modifiedMessage)
+		return []byte(strings.Join(messages, "\n\n")), true
+	}
+	ctx.SetContext(PARTIAL_MESSAGE_CONTEXT_KEY, partialMessage)
+	return nil, false
+}
--- a/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md
+++ b/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md
@@ -0,0 +1,214 @@
+# 目标
+你需要分析**用户发送的消息**，是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv)，并按照如下情况回复相应内容:
+
+## 情况一：不需要查询搜索引擎/论文资料/私有知识库
+### 情况举例：
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**，如果符合，则按照下面**回复内容示例**进行回复，注意不要输出思考过程
+
+### 回复内容示例：
+none
+
+## 情况二：需要查询搜索引擎/论文资料
+### 情况举例：
+1. 答复**用户发送的消息**，需依赖互联网上最新的资料
+2. 答复**用户发送的消息**，需依赖论文等专业资料
+3. 通过查询资料，可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**，以及其他需要查询资料的情况，如果符合，按照以下步骤思考，并按照下面**回复内容示例**进行回复，注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**，需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问，还是向Arxiv论文资料库进行查询，或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料，应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后，按下面方式对问题进行调整
+  4.1. 向搜索引擎提问：用一句话概括问题，并且针对搜索引擎做问题优化
+  4.2. 向Arxiv论文资料库提问：
+    4.2.1. 明确问题所属领域，然后确定Arxiv的Category值，Category可选的枚举如下:
+      - cs.AI: Artificial Intelligence
+      - cs.AR: Hardware Architecture
+      - cs.CC: Computational Complexity
+      - cs.CE: Computational Engineering, Finance, and Science
+      - cs.CG: Computational Geometry
+      - cs.CL: Computation and Language
+      - cs.CR: Cryptography and Security
+      - cs.CV: Computer Vision and Pattern Recognition
+      - cs.CY: Computers and Society
+      - cs.DB: Databases
+      - cs.DC: Distributed, Parallel, and Cluster Computing
+      - cs.DL: Digital Libraries
+      - cs.DM: Discrete Mathematics
+      - cs.DS: Data Structures and Algorithms
+      - cs.ET: Emerging Technologies
+      - cs.FL: Formal Languages and Automata Theory
+      - cs.GL: General Literature
+      - cs.GR: Graphics
+      - cs.GT: Computer Science and Game Theory
+      - cs.HC: Human-Computer Interaction
+      - cs.IR: Information Retrieval
+      - cs.IT: Information Theory
+      - cs.LG: Machine Learning
+      - cs.LO: Logic in Computer Science
+      - cs.MA: Multiagent Systems
+      - cs.MM: Multimedia
+      - cs.MS: Mathematical Software
+      - cs.NA: Numerical Analysis
+      - cs.NE: Neural and Evolutionary Computing
+      - cs.NI: Networking and Internet Architecture
+      - cs.OH: Other Computer Science
+      - cs.OS: Operating Systems
+      - cs.PF: Performance
+      - cs.PL: Programming Languages
+      - cs.RO: Robotics
+      - cs.SC: Symbolic Computation
+      - cs.SD: Sound
+      - cs.SE: Software Engineering
+      - cs.SI: Social and Information Networks
+      - cs.SY: Systems and Control
+      - econ.EM: Econometrics
+      - econ.GN: General Economics
+      - econ.TH: Theoretical Economics
+      - eess.AS: Audio and Speech Processing
+      - eess.IV: Image and Video Processing
+      - eess.SP: Signal Processing
+      - eess.SY: Systems and Control
+      - math.AC: Commutative Algebra
+      - math.AG: Algebraic Geometry
+      - math.AP: Analysis of PDEs
+      - math.AT: Algebraic Topology
+      - math.CA: Classical Analysis and ODEs
+      - math.CO: Combinatorics
+      - math.CT: Category Theory
+      - math.CV: Complex Variables
+      - math.DG: Differential Geometry
+      - math.DS: Dynamical Systems
+      - math.FA: Functional Analysis
+      - math.GM: General Mathematics
+      - math.GN: General Topology
+      - math.GR: Group Theory
+      - math.GT: Geometric Topology
+      - math.HO: History and Overview
+      - math.IT: Information Theory
+      - math.KT: K-Theory and Homology
+      - math.LO: Logic
+      - math.MG: Metric Geometry
+      - math.MP: Mathematical Physics
+      - math.NA: Numerical Analysis
+      - math.NT: Number Theory
+      - math.OA: Operator Algebras
+      - math.OC: Optimization and Control
+      - math.PR: Probability
+      - math.QA: Quantum Algebra
+      - math.RA: Rings and Algebras
+      - math.RT: Representation Theory
+      - math.SG: Symplectic Geometry
+      - math.SP: Spectral Theory
+      - math.ST: Statistics Theory
+      - astro-ph.CO: Cosmology and Nongalactic Astrophysics
+      - astro-ph.EP: Earth and Planetary Astrophysics
+      - astro-ph.GA: Astrophysics of Galaxies
+      - astro-ph.HE: High Energy Astrophysical Phenomena
+      - astro-ph.IM: Instrumentation and Methods for Astrophysics
+      - astro-ph.SR: Solar and Stellar Astrophysics
+      - cond-mat.dis-nn: Disordered Systems and Neural Networks
+      - cond-mat.mes-hall: Mesoscale and Nanoscale Physics
+      - cond-mat.mtrl-sci: Materials Science
+      - cond-mat.other: Other Condensed Matter
+      - cond-mat.quant-gas: Quantum Gases
+      - cond-mat.soft: Soft Condensed Matter
+      - cond-mat.stat-mech: Statistical Mechanics
+      - cond-mat.str-el: Strongly Correlated Electrons
+      - cond-mat.supr-con: Superconductivity
+      - gr-qc: General Relativity and Quantum Cosmology
+      - hep-ex: High Energy Physics - Experiment
+      - hep-lat: High Energy Physics - Lattice
+      - hep-ph: High Energy Physics - Phenomenology
+      - hep-th: High Energy Physics - Theory
+      - math-ph: Mathematical Physics
+      - nlin.AO: Adaptation and Self-Organizing Systems
+      - nlin.CD: Chaotic Dynamics
+      - nlin.CG: Cellular Automata and Lattice Gases
+      - nlin.PS: Pattern Formation and Solitons
+      - nlin.SI: Exactly Solvable and Integrable Systems
+      - nucl-ex: Nuclear Experiment
+      - nucl-th: Nuclear Theory
+      - physics.acc-ph: Accelerator Physics
+      - physics.ao-ph: Atmospheric and Oceanic Physics
+      - physics.app-ph: Applied Physics
+      - physics.atm-clus: Atomic and Molecular Clusters
+      - physics.atom-ph: Atomic Physics
+      - physics.bio-ph: Biological Physics
+      - physics.chem-ph: Chemical Physics
+      - physics.class-ph: Classical Physics
+      - physics.comp-ph: Computational Physics
+      - physics.data-an: Data Analysis, Statistics and Probability
+      - physics.ed-ph: Physics Education
+      - physics.flu-dyn: Fluid Dynamics
+      - physics.gen-ph: General Physics
+      - physics.geo-ph: Geophysics
+      - physics.hist-ph: History and Philosophy of Physics
+      - physics.ins-det: Instrumentation and Detectors
+      - physics.med-ph: Medical Physics
+      - physics.optics: Optics
+      - physics.plasm-ph: Plasma Physics
+      - physics.pop-ph: Popular Physics
+      - physics.soc-ph: Physics and Society
+      - physics.space-ph: Space Physics
+      - quant-ph: Quantum Physics
+      - q-bio.BM: Biomolecules
+      - q-bio.CB: Cell Behavior
+      - q-bio.GN: Genomics
+      - q-bio.MN: Molecular Networks
+      - q-bio.NC: Neurons and Cognition
+      - q-bio.OT: Other Quantitative Biology
+      - q-bio.PE: Populations and Evolution
+      - q-bio.QM: Quantitative Methods
+      - q-bio.SC: Subcellular Processes
+      - q-bio.TO: Tissues and Organs
+      - q-fin.CP: Computational Finance
+      - q-fin.EC: Economics
+      - q-fin.GN: General Finance
+      - q-fin.MF: Mathematical Finance
+      - q-fin.PM: Portfolio Management
+      - q-fin.PR: Pricing of Securities
+      - q-fin.RM: Risk Management
+      - q-fin.ST: Statistical Finance
+      - q-fin.TR: Trading and Market Microstructure
+      - stat.AP: Applications
+      - stat.CO: Computation
+      - stat.ME: Methodology
+      - stat.ML: Machine Learning
+      - stat.OT: Other Statistics
+      - stat.TH: Statistics Theory
+    4.2.2. 根据问题所属领域，将问题拆分成多组关键词的组合，同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复，注意:
+  - 不要输出思考过程
+  - 可以向多个查询目标分别查询多次，多个查询用换行分隔，总查询次数控制在5次以内
+  - 查询搜索引擎时，需要以"internet:"开头
+  - 查询Arxiv论文时，需要以Arxiv的Category值开头，例如"cs.AI:"
+  - 查询Arxiv论文时，优先用英文表述关键词进行搜索
+  - 当用多个关键词查询时，关键词之间用","分隔
+  - 尽量满足**用户发送的消息**中的搜索要求，例如用户要求用英文搜索，则需用英文表述问题和关键词
+  - 用户如果没有要求搜索语言，则用和**用户发送的消息**一致的语言表述问题和关键词
+  - 如果**用户发送的消息**使用中文，至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例：
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向Arxiv的多个类目查询多次
+cs.AI: attention mechanism
+cs.AI: neuron
+q-bio.NC: brain,attention mechanism
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+econ.TH: policy, real estate
+
+# 用户发送的消息为：
+{question}
--- a/plugins/wasm-go/extensions/ai-search/prompts/full.md
+++ b/plugins/wasm-go/extensions/ai-search/prompts/full.md
@@ -0,0 +1,221 @@
+# 目标
+你需要分析**用户发送的消息**，是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv)/私有知识库，并按照如下情况回复相应内容:
+
+## 情况一：不需要查询搜索引擎/论文资料/私有知识库
+### 情况举例：
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**，如果符合，则按照下面**回复内容示例**进行回复，注意不要输出思考过程
+
+### 回复内容示例：
+none
+
+## 情况二：需要查询搜索引擎/论文资料/私有知识库
+### 情况举例：
+1. 答复**用户发送的消息**，需依赖互联网上最新的资料
+2. 答复**用户发送的消息**，需依赖论文等专业资料
+3. 通过查询资料，可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**，以及其他需要查询资料的情况，如果符合，按照以下步骤思考，并按照下面**回复内容示例**进行回复，注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**，需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问，还是向Arxiv论文资料库进行查询，还是向私有知识库进行查询，或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料，应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后，按下面方式对问题进行调整
+  4.1. 向搜索引擎提问：用一句话概括问题，并且针对搜索引擎做问题优化
+  4.2. 向私有知识库提问：将问题拆分成多组关键词的组合，同时组合中的关键词个数尽量不要超过3个
+  4.3. 向Arxiv论文资料库提问：
+    4.3.1. 明确问题所属领域，然后确定Arxiv的Category值，Category可选的枚举如下:
+      - cs.AI: Artificial Intelligence
+      - cs.AR: Hardware Architecture
+      - cs.CC: Computational Complexity
+      - cs.CE: Computational Engineering, Finance, and Science
+      - cs.CG: Computational Geometry
+      - cs.CL: Computation and Language
+      - cs.CR: Cryptography and Security
+      - cs.CV: Computer Vision and Pattern Recognition
+      - cs.CY: Computers and Society
+      - cs.DB: Databases
+      - cs.DC: Distributed, Parallel, and Cluster Computing
+      - cs.DL: Digital Libraries
+      - cs.DM: Discrete Mathematics
+      - cs.DS: Data Structures and Algorithms
+      - cs.ET: Emerging Technologies
+      - cs.FL: Formal Languages and Automata Theory
+      - cs.GL: General Literature
+      - cs.GR: Graphics
+      - cs.GT: Computer Science and Game Theory
+      - cs.HC: Human-Computer Interaction
+      - cs.IR: Information Retrieval
+      - cs.IT: Information Theory
+      - cs.LG: Machine Learning
+      - cs.LO: Logic in Computer Science
+      - cs.MA: Multiagent Systems
+      - cs.MM: Multimedia
+      - cs.MS: Mathematical Software
+      - cs.NA: Numerical Analysis
+      - cs.NE: Neural and Evolutionary Computing
+      - cs.NI: Networking and Internet Architecture
+      - cs.OH: Other Computer Science
+      - cs.OS: Operating Systems
+      - cs.PF: Performance
+      - cs.PL: Programming Languages
+      - cs.RO: Robotics
+      - cs.SC: Symbolic Computation
+      - cs.SD: Sound
+      - cs.SE: Software Engineering
+      - cs.SI: Social and Information Networks
+      - cs.SY: Systems and Control
+      - econ.EM: Econometrics
+      - econ.GN: General Economics
+      - econ.TH: Theoretical Economics
+      - eess.AS: Audio and Speech Processing
+      - eess.IV: Image and Video Processing
+      - eess.SP: Signal Processing
+      - eess.SY: Systems and Control
+      - math.AC: Commutative Algebra
+      - math.AG: Algebraic Geometry
+      - math.AP: Analysis of PDEs
+      - math.AT: Algebraic Topology
+      - math.CA: Classical Analysis and ODEs
+      - math.CO: Combinatorics
+      - math.CT: Category Theory
+      - math.CV: Complex Variables
+      - math.DG: Differential Geometry
+      - math.DS: Dynamical Systems
+      - math.FA: Functional Analysis
+      - math.GM: General Mathematics
+      - math.GN: General Topology
+      - math.GR: Group Theory
+      - math.GT: Geometric Topology
+      - math.HO: History and Overview
+      - math.IT: Information Theory
+      - math.KT: K-Theory and Homology
+      - math.LO: Logic
+      - math.MG: Metric Geometry
+      - math.MP: Mathematical Physics
+      - math.NA: Numerical Analysis
+      - math.NT: Number Theory
+      - math.OA: Operator Algebras
+      - math.OC: Optimization and Control
+      - math.PR: Probability
+      - math.QA: Quantum Algebra
+      - math.RA: Rings and Algebras
+      - math.RT: Representation Theory
+      - math.SG: Symplectic Geometry
+      - math.SP: Spectral Theory
+      - math.ST: Statistics Theory
+      - astro-ph.CO: Cosmology and Nongalactic Astrophysics
+      - astro-ph.EP: Earth and Planetary Astrophysics
+      - astro-ph.GA: Astrophysics of Galaxies
+      - astro-ph.HE: High Energy Astrophysical Phenomena
+      - astro-ph.IM: Instrumentation and Methods for Astrophysics
+      - astro-ph.SR: Solar and Stellar Astrophysics
+      - cond-mat.dis-nn: Disordered Systems and Neural Networks
+      - cond-mat.mes-hall: Mesoscale and Nanoscale Physics
+      - cond-mat.mtrl-sci: Materials Science
+      - cond-mat.other: Other Condensed Matter
+      - cond-mat.quant-gas: Quantum Gases
+      - cond-mat.soft: Soft Condensed Matter
+      - cond-mat.stat-mech: Statistical Mechanics
+      - cond-mat.str-el: Strongly Correlated Electrons
+      - cond-mat.supr-con: Superconductivity
+      - gr-qc: General Relativity and Quantum Cosmology
+      - hep-ex: High Energy Physics - Experiment
+      - hep-lat: High Energy Physics - Lattice
+      - hep-ph: High Energy Physics - Phenomenology
+      - hep-th: High Energy Physics - Theory
+      - math-ph: Mathematical Physics
+      - nlin.AO: Adaptation and Self-Organizing Systems
+      - nlin.CD: Chaotic Dynamics
+      - nlin.CG: Cellular Automata and Lattice Gases
+      - nlin.PS: Pattern Formation and Solitons
+      - nlin.SI: Exactly Solvable and Integrable Systems
+      - nucl-ex: Nuclear Experiment
+      - nucl-th: Nuclear Theory
+      - physics.acc-ph: Accelerator Physics
+      - physics.ao-ph: Atmospheric and Oceanic Physics
+      - physics.app-ph: Applied Physics
+      - physics.atm-clus: Atomic and Molecular Clusters
+      - physics.atom-ph: Atomic Physics
+      - physics.bio-ph: Biological Physics
+      - physics.chem-ph: Chemical Physics
+      - physics.class-ph: Classical Physics
+      - physics.comp-ph: Computational Physics
+      - physics.data-an: Data Analysis, Statistics and Probability
+      - physics.ed-ph: Physics Education
+      - physics.flu-dyn: Fluid Dynamics
+      - physics.gen-ph: General Physics
+      - physics.geo-ph: Geophysics
+      - physics.hist-ph: History and Philosophy of Physics
+      - physics.ins-det: Instrumentation and Detectors
+      - physics.med-ph: Medical Physics
+      - physics.optics: Optics
+      - physics.plasm-ph: Plasma Physics
+      - physics.pop-ph: Popular Physics
+      - physics.soc-ph: Physics and Society
+      - physics.space-ph: Space Physics
+      - quant-ph: Quantum Physics
+      - q-bio.BM: Biomolecules
+      - q-bio.CB: Cell Behavior
+      - q-bio.GN: Genomics
+      - q-bio.MN: Molecular Networks
+      - q-bio.NC: Neurons and Cognition
+      - q-bio.OT: Other Quantitative Biology
+      - q-bio.PE: Populations and Evolution
+      - q-bio.QM: Quantitative Methods
+      - q-bio.SC: Subcellular Processes
+      - q-bio.TO: Tissues and Organs
+      - q-fin.CP: Computational Finance
+      - q-fin.EC: Economics
+      - q-fin.GN: General Finance
+      - q-fin.MF: Mathematical Finance
+      - q-fin.PM: Portfolio Management
+      - q-fin.PR: Pricing of Securities
+      - q-fin.RM: Risk Management
+      - q-fin.ST: Statistical Finance
+      - q-fin.TR: Trading and Market Microstructure
+      - stat.AP: Applications
+      - stat.CO: Computation
+      - stat.ME: Methodology
+      - stat.ML: Machine Learning
+      - stat.OT: Other Statistics
+      - stat.TH: Statistics Theory
+    4.3.2. 根据问题所属领域，将问题拆分成多组关键词的组合，同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复，注意:
+  - 不要输出思考过程
+  - 可以向多个查询目标分别查询多次，多个查询用换行分隔，总查询次数控制在5次以内
+  - 查询搜索引擎时，需要以"internet:"开头
+  - 查询私有知识库时，需要以"private:"开头
+  - 查询Arxiv论文时，需要以Arxiv的Category值开头，例如"cs.AI:"
+  - 查询Arxiv论文时，优先用英文表述关键词进行搜索
+  - 当用多个关键词查询时，关键词之间用","分隔
+  - 尽量满足**用户发送的消息**中的搜索要求，例如用户要求用英文搜索，则需用英文表述问题和关键词
+  - 用户如果没有要求搜索语言，则用和**用户发送的消息**一致的语言表述问题和关键词
+  - 如果**用户发送的消息**使用中文，至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例：
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向Arxiv的多个类目查询多次
+cs.AI: attention mechanism
+cs.AI: neuron
+q-bio.NC: brain,attention mechanism
+
+#### 向私有知识库查询多次
+private: 电子钱包,密码
+private: 张三,身份证号
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+econ.TH: policy, real estate
+private: 财务状况
+
+# 用户发送的消息为：
+{question}
--- a/plugins/wasm-go/extensions/ai-search/prompts/internet.md
+++ b/plugins/wasm-go/extensions/ai-search/prompts/internet.md
@@ -0,0 +1,41 @@
+# 目标
+你需要分析**用户发送的消息**，是否需要查询搜索引擎(Google/Bing)，并按照如下情况回复相应内容:
+
+## 情况一：不需要查询搜索引擎
+### 情况举例：
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**，如果符合，则按照下面**回复内容示例**进行回复，注意不要输出思考过程
+
+### 回复内容示例：
+none
+
+## 情况二：需要查询搜索引擎
+### 情况举例：
+1. 答复**用户发送的消息**，需依赖互联网上最新的资料
+2. 答复**用户发送的消息**，需依赖论文等专业资料
+3. 通过查询资料，可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**，以及其他需要查询资料的情况，如果符合，按照以下步骤思考，并按照下面**回复内容示例**进行回复，注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**，需要了解什么知识和资料
+2. How: 分析对于要查询的知识和资料，应该提出什么样的问题
+3. Adjust: 明确查询什么问题后，用一句话概括问题，并且针对搜索引擎做问题优化
+4. Final: 按照下面**回复内容示例**进行回复，注意:
+  - 不要输出思考过程
+  - 可以查询多次，多个查询用换行分隔，总查询次数控制在5次以内
+  - 需要以"internet:"开头
+  - 尽量满足**用户发送的消息**中的搜索要求，例如用户要求用英文搜索，则需用英文表述问题和关键词
+  - 用户如果没有要求搜索语言，则用和**用户发送的消息**一致的语言表述问题和关键词
+  - 如果**用户发送的消息**使用中文，至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例：
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+# 用户发送的消息为：
+{question}
--- a/plugins/wasm-go/extensions/ai-search/prompts/private.md
+++ b/plugins/wasm-go/extensions/ai-search/prompts/private.md
@@ -0,0 +1,55 @@
+# 目标
+你需要分析**用户发送的消息**，是否需要查询搜索引擎(Google/Bing)/私有知识库，并按照如下情况回复相应内容:
+
+## 情况一：不需要查询搜索引擎/私有知识库
+### 情况举例：
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**，如果符合，则按照下面**回复内容示例**进行回复，注意不要输出思考过程
+
+### 回复内容示例：
+none
+
+## 情况二：需要查询搜索引擎/私有知识库
+### 情况举例：
+1. 答复**用户发送的消息**，需依赖互联网上最新的资料
+2. 答复**用户发送的消息**，需依赖论文等专业资料
+3. 通过查询资料，可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**，以及其他需要查询资料的情况，如果符合，按照以下步骤思考，并按照下面**回复内容示例**进行回复，注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**，需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问，还是向私有知识库进行查询，或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料，应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后，按下面方式对问题进行调整
+  4.1. 向搜索引擎提问：用一句话概括问题，并且针对搜索引擎做问题优化
+  4.2. 向私有知识库提问：将问题拆分成多组关键词的组合，同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复，注意:
+  - 不要输出思考过程
+  - 可以向多个查询目标分别查询多次，多个查询用换行分隔，总查询次数控制在5次以内
+  - 查询搜索引擎时，需要以"internet:"开头
+  - 查询私有知识库时，需要以"private:"开头
+  - 当用多个关键词查询时，关键词之间用","分隔
+  - 尽量满足**用户发送的消息**中的搜索要求，例如用户要求用英文搜索，则需用英文表述问题和关键词
+  - 用户如果没有要求搜索语言，则用和**用户发送的消息**一致的语言表述问题和关键词
+  - 如果**用户发送的消息**使用中文，至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例：
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向私有知识库查询多次
+private: 电子钱包,密码
+private: 张三,身份证号
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+private: 财务状况
+
+# 用户发送的消息为：
+{question}
--- a/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py
+++ b/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py
@@ -0,0 +1,56 @@
+import argparse
+import requests
+import time
+import json
+
+def main():
+    # 解析命令行参数
+    parser = argparse.ArgumentParser(description='AI Search Test Script')
+    parser.add_argument('--question', required=True, help='The question to analyze')
+    parser.add_argument('--prompt', required=True, help='The prompt file to analyze')    
+    args = parser.parse_args()
+
+    # 读取并解析prompts.md模板
+    # 这里假设prompts.md已经复制到当前目录
+    with open(args.prompt, 'r', encoding='utf-8') as f:
+        prompt_template = f.read()
+    
+    # 替换模板中的{question}变量
+    prompt = prompt_template.replace('{question}', args.question)
+
+    # 准备请求数据
+    headers = {
+        'Content-Type': 'application/json',
+    }
+    data = {
+        "model": "deepseek-v3",
+        "max_tokens": 100,
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ]
+    }
+
+    # 发送请求并计时
+    start_time = time.time()
+    try:
+        response = requests.post(
+            'http://localhost:8080/v1/chat/completions', 
+            headers=headers,
+            data=json.dumps(data)
+        )
+        response.raise_for_status()
+        end_time = time.time()
+
+        # 处理响应
+        result = response.json()
+        print("Response:")
+        print(result['choices'][0]['message']['content'])
+        print(f"\nRequest took {end_time - start_time:.2f} seconds")
+    except requests.exceptions.RequestException as e:
+        print(f"Request failed: {e}")
+
+if __name__ == '__main__':
+    main()
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/README.md
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/README.md
@@ -51,14 +51,14 @@ description: AI Token限流插件配置参考

 `redis`中每一项的配置字段说明

-| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                        |
-| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
-| service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local     |
-| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口     |
-| username     | string | 否   | -                                                          | redis用户名                 |
-| password     | string | 否   | -                                                          | redis密码                   |
-| timeout      | int    | 否   | 1000                                                       | redis连接超时时间，单位毫秒 |
-
+| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                                                                                         |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | ---------------------------                                                                  |
+| service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口                                                                      |
+| username     | string | 否   | -                                                          | redis用户名                                                                                  |
+| password     | string | 否   | -                                                          | redis密码                                                                                    |
+| timeout      | int    | 否   | 1000                                                       | redis连接超时时间，单位毫秒                                                                  |
+| database     | int    | 否   | 0                                                          | 使用的数据库id，例如配置为1，对应`SELECT 1`                                       |


 ## 配置示例
@@ -194,3 +194,202 @@ rejected_msg: '{"code":-1,"msg":"Too many requests"}'
 redis:
  service_name: redis.static
 ```
+
+## 完整示例
+
+AI Token 限流插件依赖 Redis 记录剩余可用的 token 数，因此首先需要部署 Redis 服务。
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: redis
+  labels:
+    app: redis
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: redis
+  template:
+    metadata:
+      labels:
+        app: redis
+    spec:
+      containers:
+      - name: redis
+        image: redis
+        ports:
+        - containerPort: 6379
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis
+  labels:
+    app: redis
+spec:
+  ports:
+  - port: 6379
+    targetPort: 6379
+  selector:
+    app: redis
+---
+```
+
+在本例中，使用通义千问作为 AI 服务提供商。另外还需要设置 AI 统计插件，因为 AI Token 限流插件依赖 AI 统计插件计算每次请求消耗的 token 数，以下配置限制每分钟的 input 和 output token 总数为 200 个。
+
+```yaml
+apiVersion: extensions.higress.io/v1alpha1
+kind: WasmPlugin
+metadata:
+  name: ai-proxy
+  namespace: higress-system
+spec:
+  matchRules:
+  - config:
+      provider:
+        type: qwen
+        apiTokens:
+        - "<YOUR_API_TOKEN>"
+        modelMapping:
+          'gpt-3': "qwen-turbo"
+          'gpt-35-turbo': "qwen-plus"
+          'gpt-4-turbo': "qwen-max"
+          '*': "qwen-turbo"
+    ingress:
+    - qwen
+  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
+  phase: UNSPECIFIED_PHASE
+  priority: 100
+---
+apiVersion: extensions.higress.io/v1alpha1
+kind: WasmPlugin
+metadata:
+  name: ai-token-ratelimit
+  namespace: higress-system
+spec:
+  defaultConfig:
+    rule_name: default_limit_by_param_apikey
+    rule_items:
+    - limit_by_param: apikey
+      limit_keys:
+      - key: 123456
+        token_per_minute: 200
+    redis:
+      # 默认情况下，为了减轻数据面的压力，Higress 的 global.onlyPushRouteCluster 配置参数被设置为 true，意味着不会自动发现 Kubernetes Service
+      # 如果需要使用 Kubernetes Service 作为服务发现，可以将 global.onlyPushRouteCluster 参数设置为 false，
+      # 这样就可以直接将 service_name 设置为 Kubernetes Service, 而无须为 Redis 创建 McpBridge 以及 Ingress 路由
+      # service_name: redis.default.svc.cluster.local
+      service_name: redis.dns
+      service_port: 6379
+  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
+  phase: UNSPECIFIED_PHASE
+  priority: 600
+```
+注意，AI Token 限流插件中的 Redis 配置项 `service_name` 来自 McpBridge 中配置的服务来源，另外我们还需要在 McpBridge 中配置通义千问服务的访问地址。
+
+```yaml
+apiVersion: networking.higress.io/v1
+kind: McpBridge
+metadata:
+  name: default
+  namespace: higress-system
+spec:
+  registries:
+  - domain: dashscope.aliyuncs.com
+    name: qwen
+    port: 443
+    type: dns
+  - domain: redis.default.svc.cluster.local # Kubernetes Service
+    name: redis
+    type: dns
+    port: 6379
+```
+
+分别创建两条路由规则。
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  annotations:
+    higress.io/backend-protocol: HTTPS
+    higress.io/destination: qwen.dns
+    higress.io/proxy-ssl-name: dashscope.aliyuncs.com
+    higress.io/proxy-ssl-server-name: "on"
+  labels:
+    higress.io/resource-definer: higress
+  name: qwen
+  namespace: higress-system
+spec:
+  ingressClassName: higress
+  rules:
+  - host: qwen-test.com
+    http:
+      paths:
+      - backend:
+          resource:
+            apiGroup: networking.higress.io
+            kind: McpBridge
+            name: default
+        path: /
+        pathType: Prefix
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  annotations:
+    higress.io/destination: redis.dns
+    higress.io/ignore-path-case: "false"
+  labels:
+    higress.io/resource-definer: higress
+  name: redis
+spec:
+  ingressClassName: higress
+  rules:
+  - http:
+      paths:
+      - backend:
+          resource:
+            apiGroup: networking.higress.io
+            kind: McpBridge
+            name: default
+        path: /
+        pathType: Prefix
+```
+
+转发 higress-gateway 的流量到本地，方便进行测试。
+
+```bash
+kubectl port-forward svc/higress-gateway -n higress-system 18000:80
+```
+
+触发限流效果如下：
+
+```bash
+curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
+-H "Host: qwen-test.com" \
+-H "Content-Type: application/json"  \
+-d '{
+  "model": "gpt-3",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
+}'
+{"id":"88cfa80f-545d-93b4-8ff3-3f5245ca33ba","choices":[{"index":0,"message":{"role":"assistant","content":"我是通义千问，由阿里云开发的AI助手。我可以回答各种问题、提供信息和与用户进行对话。有什么我可以帮助你的吗？"},"finish_reason":"stop"}],"created":1719909825,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":13,"completion_tokens":33,"total_tokens":46}}
+curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json"  -d '{
+  "model": "gpt-3",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "stream": false
+}'
+Too many requests  # 限流成功
+```
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md
@@ -43,13 +43,14 @@ Field descriptions for each item in `limit_keys`
 | token_per_day          | int               | No, optionally select one in `token_per_second`, `token_per_minute`, `token_per_hour`, `token_per_day` | -             | Allowed number of token requests per day        |

 Field descriptions for each item in `redis`
-| Configuration Item      | Type              | Required | Default Value                                                     | Description                                     |
-| ----------------------- | ----------------- | -------- | --------------------------------------------------------------- | ----------------------------------------------- |
-| service_name            | string            | Required | -                                                               | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| service_port            | int               | No       | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service     |
-| username                | string            | No       | -                                                               | Redis username                                  |
-| password                | string            | No       | -                                                               | Redis password                                  |
-| timeout                 | int               | No       | 1000                                                            | Redis connection timeout in milliseconds       |
+| Configuration Item      | Type              | Required | Default Value                                                                    | Description                                                                                                    |
+| ----------------------- | ----------------- | -------- | ---------------------------------------------------------------                  | -----------------------------------------------                                                                |
+| service_name            | string            | Required | -                                                                                | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| service_port            | int               | No       | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service                                                                    |
+| username                | string            | No       | -                                                                                | Redis username                                                                                                 |
+| password                | string            | No       | -                                                                                | Redis password                                                                                                 |
+| timeout                 | int               | No       | 1000                                                                             | Redis connection timeout in milliseconds                                                                       |
+| database                | int               | No       | 0                                                                                | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                                 |

 ## Configuration Examples
 ### Identify request parameter apikey for differentiated rate limiting
@@ -168,3 +169,204 @@ rejected_msg: '{"code":-1,"msg":"Too many requests"}'
 redis:
  service_name: redis.static
 ```
+
+## Example
+
+The AI Token Rate Limiting Plugin relies on Redis to track the remaining available tokens, so the Redis service must be deployed first.
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: redis
+  labels:
+    app: redis
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: redis
+  template:
+    metadata:
+      labels:
+        app: redis
+    spec:
+      containers:
+      - name: redis
+        image: redis
+        ports:
+        - containerPort: 6379
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis
+  labels:
+    app: redis
+spec:
+  ports:
+  - port: 6379
+    targetPort: 6379
+  selector:
+    app: redis
+---
+```
+
+In this example, qwen is used as the AI service provider. Additionally, the AI Statistics Plugin must be configured, as the AI Token Rate Limiting Plugin depends on it to calculate the number of tokens consumed per request. The following configuration limits the total number of input and output tokens to 200 per minute.
+
+```yaml
+apiVersion: extensions.higress.io/v1alpha1
+kind: WasmPlugin
+metadata:
+  name: ai-proxy
+  namespace: higress-system
+spec:
+  matchRules:
+  - config:
+      provider:
+        type: qwen
+        apiTokens:
+        - "<YOUR_API_TOKEN>"
+        modelMapping:
+          'gpt-3': "qwen-turbo"
+          'gpt-35-turbo': "qwen-plus"
+          'gpt-4-turbo': "qwen-max"
+          '*': "qwen-turbo"
+    ingress:
+    - qwen
+  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
+  phase: UNSPECIFIED_PHASE
+  priority: 100
+---
+apiVersion: extensions.higress.io/v1alpha1
+kind: WasmPlugin
+metadata:
+  name: ai-token-ratelimit
+  namespace: higress-system
+spec:
+  defaultConfig:
+    rule_name: default_limit_by_param_apikey
+    rule_items:
+    - limit_by_param: apikey
+      limit_keys:
+      - key: 123456
+        token_per_minute: 200
+    redis:
+      # By default, to reduce data plane pressure, the `global.onlyPushRouteCluster` parameter in Higress is set to true, meaning that Kubernetes Services are not automatically discovered.
+      # If you need to use Kubernetes Service for service discovery, set `global.onlyPushRouteCluster` to false,
+      # allowing you to directly set `service_name` to the Kubernetes Service without needing to create an McpBridge and an Ingress route for Redis.
+      # service_name: redis.default.svc.cluster.local
+      service_name: redis.dns
+      service_port: 6379
+  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
+  phase: UNSPECIFIED_PHASE
+  priority: 600
+```
+
+Note that the `service_name` in the Redis configuration of the AI Token Rate Limiting Plugin is derived from the service source configured in McpBridge. Additionally, we need to configure the access address of the qnwen service in McpBridge.
+
+```yaml
+apiVersion: networking.higress.io/v1
+kind: McpBridge
+metadata:
+  name: default
+  namespace: higress-system
+spec:
+  registries:
+  - domain: dashscope.aliyuncs.com
+    name: qwen
+    port: 443
+    type: dns
+  - domain: redis.default.svc.cluster.local # Kubernetes Service
+    name: redis
+    type: dns
+    port: 6379
+```
+
+Create two routing rules separately.
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  annotations:
+    higress.io/backend-protocol: HTTPS
+    higress.io/destination: qwen.dns
+    higress.io/proxy-ssl-name: dashscope.aliyuncs.com
+    higress.io/proxy-ssl-server-name: "on"
+  labels:
+    higress.io/resource-definer: higress
+  name: qwen
+  namespace: higress-system
+spec:
+  ingressClassName: higress
+  rules:
+  - host: qwen-test.com
+    http:
+      paths:
+      - backend:
+          resource:
+            apiGroup: networking.higress.io
+            kind: McpBridge
+            name: default
+        path: /
+        pathType: Prefix
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  annotations:
+    higress.io/destination: redis.dns
+    higress.io/ignore-path-case: "false"
+  labels:
+    higress.io/resource-definer: higress
+  name: redis
+spec:
+  ingressClassName: higress
+  rules:
+  - http:
+      paths:
+      - backend:
+          resource:
+            apiGroup: networking.higress.io
+            kind: McpBridge
+            name: default
+        path: /
+        pathType: Prefix
+```
+
+Forward the traffic of higress-gateway to the local, making it convenient for testing.
+
+```bash
+kubectl port-forward svc/higress-gateway -n higress-system 18000:80
+```
+
+The rate limiting effect is triggered as follows:
+
+```bash
+curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
+-H "Host: qwen-test.com" \
+-H "Content-Type: application/json" \
+-d '{
+  "model": "gpt-3",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, who are you?"
+    }
+  ],
+  "stream": false
+}'
+{"id":"88cfa80f-545d-93b4-8ff3-3f5245ca33ba","choices":[{"index":0,"message":{"role":"assistant","content":"I am Tongyi Qianwen, an AI assistant developed by Alibaba Cloud. I can answer various questions, provide information, and have conversations with users. How can I assist you?"},"finish_reason":"stop"}],"created":1719909825,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":13,"completion_tokens":33,"total_tokens":46}}
+curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json"  -d '{
+  "model": "gpt-3",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, who are you?"
+    }
+  ],
+  "stream": false
+}'
+Too many requests  # Rate limiting successful
+```
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
@@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig
 		FQDN: serviceName,
 		Port: int64(servicePort),
 	})
-	return config.redisClient.Init(username, password, int64(timeout))
+	database := int(redisConfig.Get("database").Int())
+	return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
 }

 func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
@@ -52,13 +52,14 @@ description: 基于 Key 集群限流插件配置参考

 `redis` 中每一项的配置字段说明。

-| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                                                                        |
-| ------------ | ------ | ---- | ---------------------------------------------------------- |---------------------------------------------------------------------------|
+| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                                                                                         |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | ---------------------------------------------------------------------------                  |
 | service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
-| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口                                                            |
-| username     | string | 否   | -                                                          | redis 用户名                                                                 |
-| password     | string | 否   | -                                                          | redis 密码                                                                  |
-| timeout      | int    | 否   | 1000                                                       | redis 连接超时时间，单位毫秒                                                         |
+| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口                                                                      |
+| username     | string | 否   | -                                                          | redis 用户名                                                                                 |
+| password     | string | 否   | -                                                          | redis 密码                                                                                   |
+| timeout      | int    | 否   | 1000                                                       | redis 连接超时时间，单位毫秒                                                                 |
+| database     | int    | 否   | 0                                                          | 使用的数据库id，例如配置为1，对应`SELECT 1`                                                  |

 ## 配置示例

--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
@@ -46,13 +46,15 @@ Description of configuration fields for each item in `limit_keys`.
 | query_per_day             | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per day.                              |

 Description of configuration fields for each item in `redis`.
-| Configuration Item        | Type          | Required | Default Value                                               | Description                                                               |
-|---------------------------|---------------|----------|------------------------------------------------------------|---------------------------------------------------------------------------|
-| service_name              | string        | Required | -                                                          | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
-| service_port              | int           | No       | 80 for static services; otherwise 6379                     | Service port for the Redis service.                                      |
-| username                  | string        | No       | -                                                          | Redis username.                                                          |
-| password                  | string        | No       | -                                                          | Redis password.                                                          |
-| timeout                   | int           | No       | 1000                                                       | Redis connection timeout in milliseconds.                               |
+| Configuration Item | Type   | Required | Default Value                          | Description                                                                                                     |
+|--------------------|--------|----------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------|
+| service_name       | string | Required | -                                      | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
+| service_port       | int    | No       | 80 for static services; otherwise 6379 | Service port for the Redis service.                                                                             |
+| username           | string | No       | -                                      | Redis username.                                                                                                 |
+| password           | string | No       | -                                      | Redis password.                                                                                                 |
+| timeout            | int    | No       | 1000                                   | Redis connection timeout in milliseconds.                                                                       |
+| database           | int    | No       | 0                                      | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                                  |
+

 ## Configuration Examples

--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
@@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig
 		FQDN: serviceName,
 		Port: int64(servicePort),
 	})
-	return config.redisClient.Init(username, password, int64(timeout))
+	database := int(redisConfig.Get("database").Int())
+	return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
 }

 func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
--- a/plugins/wasm-go/extensions/ext-auth/README.md
+++ b/plugins/wasm-go/extensions/ext-auth/README.md
@@ -16,70 +16,129 @@ description: Ext 认证插件实现了调用外部授权服务进行认证鉴权

 ## 配置字段

-| 名称                            | 数据类型 | 必填 | 默认值 | 描述                                                                                                                                                         |
-| ------------------------------- | -------- | ---- | ------ |------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `http_service`                  | object   | 是   | -      | 外部授权服务配置                                                                                                                                                   |
-| `failure_mode_allow`            | bool     | 否   | false  | 当设置为 true 时，即使与授权服务的通信失败，或者授权服务返回了 HTTP 5xx 错误，仍会接受客户端请求                                                                                                   |
-| `failure_mode_allow_header_add` | bool     | 否   | false  | 当 `failure_mode_allow` 和 `failure_mode_allow_header_add` 都设置为 true 时，若与授权服务的通信失败，或授权服务返回了 HTTP 5xx 错误，那么请求头中将会添加 `x-envoy-auth-failure-mode-allowed: true` |
-| `status_on_error`               | int      | 否   | 403    | 当授权服务无法访问或状态码为 5xx 时，设置返回给客户端的 HTTP 状态码。默认状态码是 `403`                                        |
+| 名称                            | 数据类型           | 必填 | 默认值 | 描述                                                         |
+| ------------------------------- | ------------------ | ---- | ------ | ------------------------------------------------------------ |
+| `http_service`                  | object             | 是   | -      | 外部授权服务配置                                             |
+| `match_type`                    | string             | 否   |        | 可选 `whitelist` 或 `blacklist`                              |
+| `match_list`                    | array of MatchRule | 否   |        | 一个包含 (`match_rule_domain`, `match_rule_path`, `match_rule_type`) 的列表 |
+| `failure_mode_allow`            | bool               | 否   | false  | 当设置为 true 时，即使与授权服务的通信失败，或者授权服务返回了 HTTP 5xx 错误，仍会接受客户端请求 |
+| `failure_mode_allow_header_add` | bool               | 否   | false  | 当 `failure_mode_allow` 和 `failure_mode_allow_header_add` 都设置为 true 时，若与授权服务的通信失败，或授权服务返回了 HTTP 5xx 错误，那么请求头中将会添加 `x-envoy-auth-failure-mode-allowed: true` |
+| `status_on_error`               | int                | 否   | 403    | 当授权服务无法访问或状态码为 5xx 时，设置返回给客户端的 HTTP 状态码。默认状态码是 `403` |

-`http_service`中每一项的配置字段说明
+`http_service` 中每一项的配置字段说明

 | 名称                     | 数据类型 | 必填 | 默认值 | 描述                                  |
-| ------------------------ | -------- | ---- | ------ | ------------------------------------- |
-| `endpoint_mode`          | string   | 否   | envoy  | `envoy` , `forward_auth` 中选填一项   |
+|--------------------------|----------|------|--------|---------------------------------------|
+| `endpoint_mode`          | string   | 否   | envoy  | 可选 `envoy` 或 `forward_auth`        |
 | `endpoint`               | object   | 是   | -      | 发送鉴权请求的 HTTP 服务信息          |
 | `timeout`                | int      | 否   | 1000   | `ext-auth` 服务连接超时时间，单位毫秒 |
 | `authorization_request`  | object   | 否   | -      | 发送鉴权请求配置                      |
-| `authorization_response` | object   | 否   | -      | 处理鉴权响应配置 |
+| `authorization_response` | object   | 否   | -      | 处理鉴权响应配置                      |

-`endpoint`中每一项的配置字段说明
+`endpoint` 中每一项的配置字段说明

-| 名称             | 数据类型 | 必填                                   | 默认值 | 描述                                                                                                   |
-| --------         | -------- | --                                     | ------ | -----------------------------------------------------------------------------------------              |
-| `service_name`   | string   | 必填                                   | -      | 输入授权服务名称，带服务类型的完整 FQDN 名称，例如 `ext-auth.dns` 、`ext-auth.my-ns.svc.cluster.local` |
-| `service_port`   | int      | 否                                     | 80     | 输入授权服务的服务端口                                                                                 |
-| `service_host`   | string   | 否                                     | -      | 请求授权服务时设置的Host头，不填时和FQDN保持一致                                                       |
-| `path_prefix`    | string   | `endpoint_mode` 为`envoy`时必填        |        | `endpoint_mode` 为`envoy` 时，客户端向授权服务发送请求的请求路径前缀                                   |
-| `request_method` | string   | 否                                     | GET    | `endpoint_mode` 为`forward_auth` 时，客户端向授权服务发送请求的HTTP Method                             |
-| `path`           | string   | `endpoint_mode` 为`forward_auth`时必填 | -      | `endpoint_mode` 为`forward_auth` 时，客户端向授权服务发送请求的请求路径                                |
+| 名称             | 数据类型 | 必填                                   | 默认值 | 描述                                                         |
+|------------------|----------|----------------------------------------|--------|--------------------------------------------------------------|
+| `service_name`   | string   | 是                                     | -      | 输入授权服务名称，带服务类型的完整 FQDN 名称，例如 `ext-auth.dns` 、`ext-auth.my-ns.svc.cluster.local` |
+| `service_port`   | int      | 否                                     | 80     | 输入授权服务的服务端口                                       |
+| `service_host`   | string   | 否                                     | -      | 请求授权服务时设置的 Host 头，不填时和 FQDN 保持一致         |
+| `path_prefix`    | string   | `endpoint_mode` 为 `envoy` 时必填      | -      | `endpoint_mode` 为 `envoy` 时，客户端向授权服务发送请求的请求路径前缀 |
+| `request_method` | string   | 否                                     | GET    | `endpoint_mode` 为 `forward_auth` 时，客户端向授权服务发送请求的 HTTP Method |
+| `path`           | string   | `endpoint_mode` 为 `forward_auth` 时必填 | -      | `endpoint_mode` 为 `forward_auth` 时，客户端向授权服务发送请求的请求路径 |

-`authorization_request`中每一项的配置字段说明
+`authorization_request` 中每一项的配置字段说明

-| 名称                     | 数据类型               | 必填 | 默认值 | 描述                                                                                                                                                                                                      |
-| ------------------------ | ---------------------- | ---- | ------ |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `allowed_headers`        | array of StringMatcher | 否   | -      | 当设置后，具有相应匹配项的客户端请求头将添加到授权服务请求中的请求头中。除了用户自定义的头部匹配规则外，授权服务请求中会自动包含 `Authorization` 这个HTTP头（ `endpoint_mode` 为 `forward_auth` 时，会把原始请求的请求路径设置到 `X-Original-Uri` ，原始请求的HTTP Method设置到 `X-Original-Method` ） |
-| `headers_to_add`         | `map[string]string`    | 否   | -      | 设置将包含在授权服务请求中的请求头列表。请注意，同名的客户端请求头将被覆盖                                                                                                                                                                   |
-| `with_request_body`      | bool                   | 否   | false  | 缓冲客户端请求体，并将其发送至鉴权请求中（HTTP Method为GET、OPTIONS、HEAD请求时不生效）                                                                                                                                                |
-| `max_request_body_bytes` | int                    | 否   | 10MB   | 设置在内存中保存客户端请求体的最大尺寸。当客户端请求体达到在此字段中设置的数值时，将会返回HTTP 413状态码，并且不会启动授权过程。注意，这个设置会优先于 `failure_mode_allow` 的配置                                                                                                |
+| 名称                     | 数据类型               | 必填 | 默认值 | 描述                                                         |
+|--------------------------|------------------------|------|--------|--------------------------------------------------------------|
+| `allowed_headers`        | array of StringMatcher | 否   | -      | 设置后，匹配项的客户端请求头将添加到授权服务请求中的请求头中。除了用户自定义的头部匹配规则外，授权服务请求中会自动包含 `Authorization` 这个HTTP头（`endpoint_mode` 为 `forward_auth` 时，会添加 `X-Forwarded-*` 的请求头） |
+| `headers_to_add`         | map[string]string      | 否   | -      | 设置将包含在授权服务请求中的请求头列表。请注意，同名的客户端请求头将被覆盖 |
+| `with_request_body`      | bool                   | 否   | false  | 缓冲客户端请求体，并将其发送至鉴权请求中（HTTP Method为GET、OPTIONS、HEAD请求时不生效） |
+| `max_request_body_bytes` | int                    | 否   | 10MB   | 设置在内存中保存客户端请求体的最大尺寸。当客户端请求体达到在此字段中设置的数值时，将会返回HTTP 413状态码，并且不会启动授权过程。注意，这个设置会优先于 `failure_mode_allow` 的配置 |

-`authorization_response`中每一项的配置字段说明
+`authorization_response` 中每一项的配置字段说明

-| 名称                       | 数据类型               | 必填 | 默认值 | 描述                                                                              |
-| -------------------------- | ---------------------- | ---- | ------ |---------------------------------------------------------------------------------|
-| `allowed_upstream_headers` | array of StringMatcher | 否   | -      | 当设置后，具有相应匹配项的鉴权请求的响应头将添加到原始的客户端请求头中。请注意，同名的请求头将被覆盖                              |
-| `allowed_client_headers`   | array of StringMatcher | 否   | -      | 如果不设置，在请求被拒绝时，所有的鉴权请求的响应头将添加到客户端的响应头中。当设置后，在请求被拒绝时，具有相应匹配项的鉴权请求的响应头将添加到客户端的响应头中 |
+| 名称                       | 数据类型               | 必填 | 默认值 | 描述                                                         |
+|----------------------------|------------------------|------|--------|--------------------------------------------------------------|
+| `allowed_upstream_headers` | array of StringMatcher | 否   | -      | 匹配项的鉴权请求的响应头将添加到原始的客户端请求头中。请注意，同名的请求头将被覆盖 |
+| `allowed_client_headers`   | array of StringMatcher | 否   | -      | 如果不设置，在请求被拒绝时，所有的鉴权请求的响应头将添加到客户端的响应头中。当设置后，在请求被拒绝时，匹配项的鉴权请求的响应头将添加到客户端的响应头中 |

-`StringMatcher`类型每一项的配置字段说明，在使用`array of StringMatcher`时会按照数组中定义的StringMatcher顺序依次进行配置
+`StringMatcher` 类型每一项的配置字段说明，在使用 `array of StringMatcher` 时会按照数组中定义的 StringMatcher 顺序依次进行配置

 | 名称       | 数据类型 | 必填                                                         | 默认值 | 描述     |
-| ---------- | -------- | ------------------------------------------------------------ | ------ | -------- |
+|------------|----------|-------------------------------------------------------------|--------|----------|
 | `exact`    | string   | 否，`exact` , `prefix` , `suffix`, `contains`, `regex` 中选填一项 | -      | 精确匹配 |
 | `prefix`   | string   | 否，`exact` , `prefix` , `suffix`, `contains`, `regex` 中选填一项 | -      | 前缀匹配 |
 | `suffix`   | string   | 否，`exact` , `prefix` , `suffix`, `contains`, `regex` 中选填一项 | -      | 后缀匹配 |
 | `contains` | string   | 否，`exact` , `prefix` , `suffix`, `contains`, `regex` 中选填一项 | -      | 是否包含 |
 | `regex`    | string   | 否，`exact` , `prefix` , `suffix`, `contains`, `regex` 中选填一项 | -      | 正则匹配 |

+MatchRule 类型每一项的配置字段说明，在使用 `array of MatchRule` 时会按照数组中定义的 MatchRule 顺序依次进行配置

+| 名称                | 数据类型 | 必填 | 默认值 | 描述                                                         |
+| ------------------- | -------- | ---- | ------ | ------------------------------------------------------------ |
+| `match_rule_domain` | string   | 否   | -      | 匹配规则域名，支持通配符模式，例如 `*.bar.com`               |
+| `match_rule_method` | []string | 否   | -      | 匹配请求方法                                                 |
+| `match_rule_path`   | string   | 否   | -      | 匹配请求路径的规则                                           |
+| `match_rule_type`   | string   | 否   | -      | 匹配请求路径的规则类型，可选 `exact` , `prefix` , `suffix`, `contains`, `regex` |
+
+### 两种 `endpoint_mode` 的区别
+
+`endpoint_mode` 为 `envoy` 时，鉴权请求会使用原始请求的 HTTP Method，和配置的 `path_prefix` 作为请求路径前缀拼接上原始的请求路径
+
+`endpoint_mode` 为 `forward_auth` 时，鉴权请求会使用配置的 `request_method` 作为 HTTP Method，和配置的 `path` 作为请求路径，并且 Higress 会自动生成并发送以下 header 至鉴权服务：
+
+| Header               | 说明                                                   |
+| -------------------- | ------------------------------------------------------ |
+| `x-forwarded-proto`  | 原始请求的scheme，比如 http/https                      |
+| `x-forwarded-method` | 原始请求的方法，比如 get/post/delete/patch             |
+| `x-forwarded-host`   | 原始请求的host                                         |
+| `x-forwarded-uri`    | 原始请求的path，包含路径参数，比如 `/v1/app?test=true` |
+
+### 黑白名单模式
+
+支持黑白名单模式配置，默认为白名单模式，白名单为空，即所有请求都需要经过验证，匹配域名支持泛域名例如 `*.bar.com` ，匹配规则支持 `exact` , `prefix` , `suffix`, `contains`, `regex`
+
+**白名单模式**
+
+```yaml
+# 白名单模式配置，符合白名单规则的请求无需验证
+match_type: 'whitelist'
+match_list:
+  # 所有以 api.example.com 为域名，且路径前缀为 /public 的请求无需验证
+  - match_rule_domain: 'api.example.com'
+    match_rule_path: '/public'
+    match_rule_type: 'prefix'
+  # 针对图片资源服务器 images.example.com，所有 GET 请求无需验证
+  - match_rule_domain: 'images.example.com'
+    match_rule_method: ["GET"]
+  # 所有域名下，路径精确匹配 /health-check 的 HEAD 请求无需验证
+  - match_rule_method: ["HEAD"]
+    match_rule_path: '/health-check'
+    match_rule_type: 'exact'
+```
+
+**黑名单模式**
+
+```yaml
+# 黑名单模式配置，符合黑名单规则的请求需要验证
+match_type: 'blacklist'
+match_list:
+  # 所有以 admin.example.com 为域名，且路径前缀为 /sensitive 的请求需要验证
+  - match_rule_domain: 'admin.example.com'
+    match_rule_path: '/sensitive'
+    match_rule_type: 'prefix'
+  # 所有域名下，路径精确匹配 /user 的 DELETE 请求需要验证
+  - match_rule_method: ["DELETE"]
+    match_rule_path: '/user'
+    match_rule_type: 'exact'
+  # 所有以 legacy.example.com 为域名的 POST 请求需要验证
+  - match_rule_domain: 'legacy.example.com'
+    match_rule_method: ["POST"]
+```

 ## 配置示例

-下面假设 `ext-auth` 服务在Kubernetes中serviceName为 `ext-auth`，端口 `8090`，路径为 `/auth`，命名空间为 `backend`
-
-支持两种 `endpoint_mode`：
-
- `endpoint_mode` 为 `envoy` 时，鉴权请求会使用原始请求的HTTP Method，和配置的 `path_prefix` 作为请求路径前缀拼接上原始的请求路径
- `endpoint_mode` 为 `forward_auth` 时，鉴权请求会使用配置的 `request_method` 作为HTTP Method，和配置的 `path` 作为请求路径
+下面假设 `ext-auth` 服务在 Kubernetes 中 serviceName 为 `ext-auth`，端口 `8090`，路径为 `/auth`，命名空间为 `backend`

 ### endpoint_mode为envoy时

@@ -141,13 +200,13 @@ content-length: 0
 http_service:
  authorization_request:
    allowed_headers:
-    - exact: x-auth-version
+      - exact: x-auth-version
    headers_to_add:
      x-envoy-header: true
  authorization_response:
    allowed_upstream_headers:
-    - exact: x-user-id
-    - exact: x-auth-version
+      - exact: x-user-id
+      - exact: x-auth-version
  endpoint_mode: envoy
  endpoint:
    service_name: ext-auth.backend.svc.cluster.local
@@ -198,7 +257,7 @@ http_service:
 使用如下请求网关，当开启 `ext-auth` 插件后：

 ```shell
-curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx"
+curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx" -H "Host: foo.bar.com"
 ```

 **请求 `ext-auth` 服务成功：**
@@ -209,8 +268,10 @@ curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
 POST /auth HTTP/1.1
 Host: ext-auth.backend.svc.cluster.local
 Authorization: xxx
-X-Original-Uri: /users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
-X-Original-Method: GET
+X-Forwarded-Proto: HTTP
+X-Forwarded-Host: foo.bar.com
+X-Forwarded-Uri: /users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
+X-Forwarded-Method: GET
 Content-Length: 0
 ```

@@ -241,13 +302,13 @@ content-length: 0
 http_service:
  authorization_request:
    allowed_headers:
-    - exact: x-auth-version
+      - exact: x-auth-version
    headers_to_add:
      x-envoy-header: true
  authorization_response:
    allowed_upstream_headers:
-    - exact: x-user-id
-    - exact: x-auth-version
+      - exact: x-user-id
+      - exact: x-auth-version
  endpoint_mode: forward_auth
  endpoint:
    service_name: ext-auth.backend.svc.cluster.local
@@ -261,7 +322,7 @@ http_service:
 使用如下请求网关，当开启 `ext-auth` 插件后：

 ```shell
-curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx" -H "X-Auth-Version: 1.0"
+curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx" -H "X-Auth-Version: 1.0" -H "Host: foo.bar.com"
 ```

 `ext-auth` 服务将接收到如下的鉴权请求：
@@ -270,22 +331,13 @@ curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
 POST /auth HTTP/1.1
 Host: my-domain.local
 Authorization: xxx
-X-Original-Uri: /users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
-X-Original-Method: GET
+X-Forwarded-Proto: HTTP
+X-Forwarded-Host: foo.bar.com
+X-Forwarded-Uri: /users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
+X-Forwarded-Method: GET
 X-Auth-Version: 1.0
 x-envoy-header: true
 Content-Length: 0
 ```

-`ext-auth` 服务返回响应头中如果包含 `x-user-id` 和 `x-auth-version`，网关调用upstream时的请求中会带上这两个请求头
-
-#### x-forwarded-* header
-在endpoint_mode为forward_auth时，higress会自动生成并发送以下header至鉴权服务。
-
-| Header             | 说明                                  |
-|--------------------|-------------------------------------|
-| x-forwarded-proto  | 原始请求的scheme，比如http/https            |
-| x-forwarded-method | 原始请求的方法，比如get/post/delete/patch     |
-| x-forwarded-host   | 原始请求的host                           |
-| x-forwarded-uri    | 原始请求的path，包含路径参数，比如/v1/app?test=true |
-| x-forwarded-for    | 原始请求的客户端IP地址                        |
+`ext-auth` 服务返回响应头中如果包含 `x-user-id` 和 `x-auth-version`，网关调用upstream时的请求中会带上这两个请求头
--- a/plugins/wasm-go/extensions/ext-auth/README_EN.md
+++ b/plugins/wasm-go/extensions/ext-auth/README_EN.md
@@ -3,73 +3,150 @@ title: External Authentication
 keywords: [higress, auth]
 description: The Ext Authentication plugin implements the capability to call external authorization services for authentication and authorization.
 ---
-## Function Description
-The `ext-auth` plugin implements sending authentication requests to an external authorization service to check whether the client request is authorized. This plugin is implemented with reference to Envoy's native [ext_authz filter](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_authz_filter), which covers some capabilities for connecting to HTTP services.

-## Execution Properties
-Plugin Execution Phase: `Authentication Phase`  
+## Feature Description
+
+The `ext-auth` plugin sends an authorization request to an external authorization service to check if the client request is authorized. When implementing this plugin, it refers to the native [ext_authz filter](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_authz_filter) of Envoy, and realizes part of the capabilities of the native filter to connect to an HTTP service.
+
+## Operating Attributes
+
+Plugin Execution Phase: `Authentication Phase`
 Plugin Execution Priority: `360`

+
 ## Configuration Fields
-| Name                            | Data Type | Required | Default Value | Description                                                                                                                                                            |
-| ------------------------------- | --------- | -------- | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `http_service`                  | object    | Yes      | -             | Configuration for the external authorization service                                                                                                                   |
-| `failure_mode_allow`            | bool      | No       | false         | When set to true, client requests will still be accepted even if communication with the authorization service fails or the authorization service returns an HTTP 5xx error |
-| `failure_mode_allow_header_add` | bool      | No       | false         | When both `failure_mode_allow` and `failure_mode_allow_header_add` are set to true, if communication with the authorization service fails or returns an HTTP 5xx error, the request header will include `x-envoy-auth-failure-mode-allowed: true` |
-| `status_on_error`               | int       | No       | 403           | Sets the HTTP status code returned to the client when the authorization service is unreachable or returns a 5xx status code. The default status code is `403`          |

-### Configuration Fields for Each Item in `http_service`
-| Name                     | Data Type | Required | Default Value | Description                                  |
-| ------------------------ | --------- | -------- | ------------- | -------------------------------------------- |
-| `endpoint_mode`          | string    | No       | envoy         | Select either `envoy` or `forward_auth` as an optional choice |
-| `endpoint`               | object    | Yes      | -             | Information about the HTTP service for sending authentication requests |
-| `timeout`                | int       | No       | 1000          | Connection timeout for `ext-auth` service, in milliseconds |
-| `authorization_request`  | object    | No       | -             | Configuration for sending authentication requests |
-| `authorization_response` | object    | No       | -             | Configuration for processing authentication responses |
+| Name | Data Type | Required | Default Value | Description |
+| --- | --- | --- | --- | --- |
+| `http_service` | object | Yes | - | Configuration for the external authorization service |
+| `match_type` | string | No |  | Can be `whitelist` or `blacklist` |
+| `match_list` | array of MatchRule | No |  | A list containing (`match_rule_domain`, `match_rule_path`, `match_rule_type`) |
+| `failure_mode_allow` | bool | No | false | When set to true, client requests will be accepted even if the communication with the authorization service fails or the authorization service returns an HTTP 5xx error |
+| `failure_mode_allow_header_add` | bool | No | false | When both `failure_mode_allow` and `failure_mode_allow_header_add` are set to true, if the communication with the authorization service fails or the authorization service returns an HTTP 5xx error, the `x-envoy-auth-failure-mode-allowed: true` header will be added to the request header |
+| `status_on_error` | int | No | 403 | Sets the HTTP status code returned to the client when the authorization service is inaccessible or has a 5xx status code. The default status code is `403` |

-### Configuration Fields for Each Item in `endpoint`
-| Name             | Data Type | Required               | Default Value | Description                                                                                                   |
-| ---------------- | --------- | ---------------------- | ------------- | ------------------------------------------------------------------------------------------------------------- |
-| `service_name`   | string    | Required               | -             | Input the name of the authorization service, in complete FQDN format, e.g., `ext-auth.dns` or `ext-auth.my-ns.svc.cluster.local` |
-| `service_port`   | int       | No                     | 80            | Input the port of the authorization service                                                                      |
-| `service_host`   | string    | No                     | -             | The Host header set when requesting the authorization service; remains the same as FQDN if not filled          |
-| `path_prefix`    | string    | Required when `endpoint_mode` is `envoy` |             | Request path prefix for the client when sending requests to the authorization service                          |
-| `request_method` | string    | No                     | GET           | HTTP Method for client requests to the authorization service when `endpoint_mode` is `forward_auth`            |
-| `path`           | string    | Required when `endpoint_mode` is `forward_auth` | -             | Request path for the client when sending requests to the authorization service                                   |
+Configuration fields for each item in `http_service`

-### Configuration Fields for Each Item in `authorization_request`
-| Name                     | Data Type               | Required | Default Value | Description                                                                                                                                                            |
-| ------------------------ | ---------------------- | -------- | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `allowed_headers`        | array of StringMatcher | No       | -             | When set, client request headers with matching criteria will be added to the headers of the request to the authorization service. The `Authorization` HTTP header will be automatically included in the authorization service request, and if `endpoint_mode` is `forward_auth`, the original request path will be set to `X-Original-Uri` and the original request HTTP method will be set to `X-Original-Method`. |
-| `headers_to_add`         | `map[string]string`    | No       | -             | Sets the list of request headers to include in the authorization service request. Note that headers with the same name from the client will be overwritten.              |
-| `with_request_body`      | bool                   | No       | false         | Buffer the client request body and send it in the authentication request (does not take effect for HTTP Methods GET, OPTIONS, and HEAD)                               |
-| `max_request_body_bytes` | int                    | No       | 10MB          | Sets the maximum size of the client request body to keep in memory. When the client request body reaches the value set in this field, an HTTP 413 status code will be returned, and the authorization process will not start. Note that this setting takes precedence over the `failure_mode_allow` configuration. |
+| Name | Data Type | Required | Default Value | Description |
+| --- | --- | --- | --- | --- |
+| `endpoint_mode` | string | No | envoy | Can be `envoy` or `forward_auth` |
+| `endpoint` | object | Yes | - | Information about the HTTP service to which the authentication request is sent |
+| `timeout` | int | No | 1000 | The connection timeout for the `ext-auth` service in milliseconds |
+| `authorization_request` | object | No | - | Configuration for sending the authentication request |
+| `authorization_response` | object | No | - | Configuration for handling the authentication response |

-### Configuration Fields for Each Item in `authorization_response`
-| Name                       | Data Type               | Required | Default Value | Description                                                                                     |
-| -------------------------- | ---------------------- | -------- | ------------- | ----------------------------------------------------------------------------------------------- |
-| `allowed_upstream_headers` | array of StringMatcher | No       | -             | When set, the response headers of the authorization request with matching criteria will be added to the original client request headers. Note that headers with the same name will be overwritten. |
-| `allowed_client_headers`   | array of StringMatcher | No       | -             | If not set, all response headers from authorization requests will be added to the client’s response when a request is denied. When set, response headers from authorization requests with matching criteria will be added to the client's response when a request is denied. |
+Configuration fields for each item in `endpoint`

-### Field Descriptions for `StringMatcher` Type
-When using `array of StringMatcher`, the fields are configured according to the order defined in the array.
-| Name       | Data Type | Required                                            | Default Value | Description |
-| ---------- | --------- | --------------------------------------------------- | ------------- | ----------- |
-| `exact`    | string    | No, must select one from `exact`, `prefix`, `suffix`, `contains`, `regex` | -             | Exact match |
-| `prefix`   | string    | No, must select one from `exact`, `prefix`, `suffix`, `contains`, `regex` | -             | Prefix match |
-| `suffix`   | string    | No, must select one from `exact`, `prefix`, `suffix`, `contains`, `regex` | -             | Suffix match |
-| `contains` | string    | No, must select one from `exact`, `prefix`, `suffix`, `contains`, `regex` | -             | Contains match |
-| `regex`    | string    | No, must select one from `exact`, `prefix`, `suffix`, `contains`, `regex` | -             | Regex match |
+| Name | Data Type | Required | Default Value | Description |
+| --- | --- | --- | --- | --- |
+| `service_name` | string | Yes | - | Enter the name of the authorization service, the full FQDN name with service type, e.g., `ext-auth.dns`, `ext-auth.my-ns.svc.cluster.local` |
+| `service_port` | int | No | 80 | Enter the service port of the authorization service |
+| `service_host` | string | No | - | The Host header set when requesting the authorization service. If not filled, it will be the same as the FQDN |
+| `path_prefix` | string | Required when `endpoint_mode` is `envoy` | - | When `endpoint_mode` is `envoy`, the request path prefix for the client to send a request to the authorization service |
+| `request_method` | string | No | GET | When `endpoint_mode` is `forward_auth`, the HTTP Method for the client to send a request to the authorization service |
+| `path` | string | Required when `endpoint_mode` is `forward_auth` | - | When `endpoint_mode` is `forward_auth`, the request path for the client to send a request to the authorization service |

-## Configuration Example
-Assuming the `ext-auth` service has a serviceName of `ext-auth`, port `8090`, path `/auth`, and namespace `backend` in Kubernetes.
+Configuration fields for each item in `authorization_request`

-Two types of `endpoint_mode` are supported:
- When `endpoint_mode` is `envoy`, the authentication request will use the original request HTTP Method, and the configured `path_prefix` will be concatenated with the original request path.
- When `endpoint_mode` is `forward_auth`, the authentication request will use the configured `request_method` as the HTTP Method and the configured `path` as the request path.
+| Name | Data Type | Required | Default Value | Description |
+| --- | --- | --- | --- | --- |
+| `allowed_headers` | array of StringMatcher | No | - | After setting, the client request headers that match the items will be added to the request headers in the authorization service request. In addition to the user-defined header matching rules, the `Authorization` HTTP header will be automatically included in the authorization service request (when `endpoint_mode` is `forward_auth`, the `X-Forwarded-*` request headers will be added) |
+| `headers_to_add` | map[string]string | No | - | Sets the list of request headers to be included in the authorization service request. Please note that the client request headers with the same name will be overwritten |
+| `with_request_body` | bool | No | false | Buffer the client request body and send it to the authentication request (not effective for HTTP Method GET, OPTIONS, HEAD requests) |
+| `max_request_body_bytes` | int | No | 10MB | Sets the maximum size of the client request body to be saved in memory. When the client request body reaches the value set in this field, an HTTP 413 status code will be returned and the authorization process will not be started. Note that this setting takes precedence over the `failure_mode_allow` configuration |
+
+Configuration fields for each item in `authorization_response`
+
+| Name | Data Type | Required | Default Value | Description |
+| --- | --- | --- | --- | --- |
+| `allowed_upstream_headers` | array of StringMatcher | No | - | The response headers of the authentication request that match the items will be added to the original client request headers. Please note that the request headers with the same name will be overwritten |
+| `allowed_client_headers` | array of StringMatcher | No | - | If not set, when the request is rejected, all the response headers of the authentication request will be added to the client's response headers. When set, when the request is rejected, the response headers of the authentication request that match the items will be added to the client's response headers |
+
+Configuration fields for each item of `StringMatcher` type. When using `array of StringMatcher`, the StringMatchers defined in the array will be configured in order.
+
+| Name | Data Type | Required | Default Value | Description |
+| --- | --- | --- | --- | --- |
+| `exact` | string | No, one of `exact`, `prefix`, `suffix`, `contains`, `regex` must be selected | - | Exact match |
+| `prefix` | string | No, one of `exact`, `prefix`, `suffix`, `contains`, `regex` must be selected | - | Prefix match |
+| `suffix` | string | No, one of `exact`, `prefix`, `suffix`, `contains`, `regex` must be selected | - | Suffix match |
+| `contains` | string | No, one of `exact`, `prefix`, `suffix`, `contains`, `regex` must be selected | - | Contains |
+| `regex` | string | No, one of `exact`, `prefix`, `suffix`, `contains`, `regex` must be selected | - | Regular expression match |
+
+Configuration fields for each item of `MatchRule` type. When using `array of MatchRule`, the MatchRules defined in the array will be configured in order.
+
+| Name | Data Type | Required | Default Value | Description |
+| --- | --- | --- | --- | --- |
+| `match_rule_domain` | string | No | - | The domain of the matching rule, supports wildcard patterns, e.g., `*.bar.com` |
+| `match_rule_method` | []string | No | - | Matching rule for the request method |
+| `match_rule_path` | string | No | - | The rule for matching the request path |
+| `match_rule_type` | string | No | - | The type of the rule for matching the request path, can be `exact`, `prefix`, `suffix`, `contains`, `regex` |
+
+### Differences between the two `endpoint_mode`
+
+When `endpoint_mode` is `envoy`, the authentication request will use the original request's HTTP Method and the configured `path_prefix` as the request path prefix, concatenated with the original request path.
+
+When `endpoint_mode` is `forward_auth`, the authentication request will use the configured `request_method` as the HTTP Method and the configured `path` as the request path. Higress will automatically generate and send the following headers to the authorization service:
+
+| Header | Description |
+| --- | --- |
+| `x-forwarded-proto` | The scheme of the original request, such as http/https |
+| `x-forwarded-method` | The method of the original request, such as get/post/delete/patch |
+| `x-forwarded-host` | The host of the original request |
+| `x-forwarded-uri` | The path of the original request, including path parameters, e.g., `/v1/app?test=true` |
+
+### Blacklist and Whitelist Modes
+
+Supports blacklist and whitelist mode configuration. The default is the whitelist mode. If the whitelist is empty, all requests need to be verified. The matching domain supports wildcard domains such as `*.bar.com`, and the matching rule supports `exact`, `prefix`, `suffix`, `contains`, `regex`.
+
+**Whitelist Mode**
+
+```yaml
+# Configuration for the whitelist mode. Requests that match the whitelist rules do not need verification.
+match_type: 'whitelist'
+match_list:
+  # Requests with the domain name api.example.com and a path prefixed with /public do not need verification.
+  - match_rule_domain: 'api.example.com'
+    match_rule_path: '/public'
+    match_rule_type: 'prefix'
+  # For the image resource server images.example.com, all GET requests do not need verification.
+  - match_rule_domain: 'images.example.com'
+    match_rule_method: ["GET"]
+  # For all domains, HEAD requests with an exact path match of /health-check do not need verification.
+  - match_rule_method: ["HEAD"]
+    match_rule_path: '/health-check'
+    match_rule_type: 'exact'
+```
+
+**Blacklist Mode**
+
+```yaml
+# Configuration for the blacklist mode. Requests that match the blacklist rules need verification.
+match_type: 'blacklist'
+match_list:
+  # Requests with the domain name admin.example.com and a path prefixed with /sensitive need verification.
+  - match_rule_domain: 'admin.example.com'
+    match_rule_path: '/sensitive'
+    match_rule_type: 'prefix'
+  # For all domains, DELETE requests with an exact path match of /user need verification.
+  - match_rule_method: ["DELETE"]
+    match_rule_path: '/user'
+    match_rule_type: 'exact'
+  # For the domain legacy.example.com, all POST requests need verification.
+  - match_rule_domain: 'legacy.example.com'
+    match_rule_method: ["POST"]
+```
+
+
+## Configuration Examples
+
+Assume that in Kubernetes, the `ext-auth` service has a `serviceName` of `ext-auth`, a port of `8090`, a path of `/auth`, and is in the `backend` namespace.
+
+### When endpoint_mode is envoy
+
+#### Example 1
+
+Configuration of the `ext-auth` plugin:

-### Example 1: `endpoint_mode` is `envoy`
-#### Configuration of `ext-auth` Plugin:
 ```yaml
 http_service:
  endpoint_mode: envoy
@@ -80,13 +157,16 @@ http_service:
  timeout: 1000
 ```

-Using the following request to the gateway, after enabling the `ext-auth` plugin:
+When using the following request to the gateway after enabling the `ext-auth` plugin:
+
 ```shell
 curl -X POST http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx"
 ```

-**Successful request to the `ext-auth` service:**
-The `ext-auth` service will receive the following authentication request:
+**When the request to the `ext-auth` service is successful**:
+
+The `ext-auth` service will receive the following authorization request:
+
 ```
 POST /auth/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 HTTP/1.1
 Host: ext-auth.backend.svc.cluster.local
@@ -94,10 +174,12 @@ Authorization: xxx
 Content-Length: 0
 ```

-**Failed request to the `ext-auth` service:**
-When the `ext-auth` service responds with a 5xx error, the client will receive an HTTP response code of 403 along with all response headers returned by the `ext-auth` service.
+**When the request to the `ext-auth` service fails**:
+
+When the response from the `ext-auth` service is 5xx, the client will receive an HTTP response code of 403 and all the response headers returned by the `ext-auth` service.
+
+If the `ext-auth` service returns response headers of `x-auth-version: 1.0` and `x-auth-failed: true`, they will be passed to the client.

-If the `ext-auth` service returns `x-auth-version: 1.0` and `x-auth-failed: true` headers, these will be conveyed to the client:
 ```
 HTTP/1.1 403 Forbidden
 x-auth-version: 1.0
@@ -107,20 +189,25 @@ server: istio-envoy
 content-length: 0
 ```

-When the `ext-auth` service is inaccessible or returns a status code of 5xx, the client request will be denied with the status code configured in `status_on_error`. When the `ext-auth` service returns other HTTP status codes, the client request will be denied with the returned status code. If `allowed_client_headers` is configured, the matching response headers will be added to the client's response.
+When the `ext-auth` service is inaccessible or the status code is 5xx, the client request will be rejected with the status code configured in `status_on_error`.
+
+When the `ext-auth` service returns other HTTP status codes, the client request will be rejected with the returned status code. If `allowed_client_headers` is configured, the response headers with corresponding matching items will be added to the client's response.
+
+#### Example 2
+
+Configuration of the `ext-auth` plugin:

-#### Example 2: `ext-auth` Plugin Configuration:
 ```yaml
 http_service:
  authorization_request:
    allowed_headers:
-    - exact: x-auth-version
+      - exact: x-auth-version
    headers_to_add:
      x-envoy-header: true
  authorization_response:
    allowed_upstream_headers:
-    - exact: x-user-id
-    - exact: x-auth-version
+      - exact: x-user-id
+      - exact: x-auth-version
  endpoint_mode: envoy
  endpoint:
    service_name: ext-auth.backend.svc.cluster.local
@@ -130,12 +217,14 @@ http_service:
  timeout: 1000
 ```

-Using the following request to the gateway after enabling the `ext-auth` plugin:
+When using the following request to the gateway after enabling the `ext-auth` plugin:
+
 ```shell
 curl -X POST http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx"
 ```

-The `ext-auth` service will receive the following authentication request:
+The `ext-auth` service will receive the following authorization request:
+
 ```
 POST /auth/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 HTTP/1.1
 Host: my-domain.local
@@ -145,10 +234,14 @@ x-envoy-header: true
 Content-Length: 0
 ```

-If the `ext-auth` service returns headers containing `x-user-id` and `x-auth-version`, these two request headers will be included in requests to the upstream when the gateway calls it.
+If the response headers returned by the `ext-auth` service contain `x-user-id` and `x-auth-version`, these two headers will be included in the request when the gateway calls the upstream.
+
+### When endpoint_mode is forward_auth
+
+#### Example 1
+
+Configuration of the `ext-auth` plugin:

-### Example 1: `endpoint_mode` is `forward_auth`
-`ext-auth` Plugin Configuration:
 ```yaml
 http_service:
  endpoint_mode: forward_auth
@@ -160,26 +253,33 @@ http_service:
  timeout: 1000
 ```

-Using the following request to the gateway after enabling the `ext-auth` plugin:
+When using the following request to the gateway after enabling the `ext-auth` plugin:
+
 ```shell
-curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx"
+curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx" -H "Host: foo.bar.com"
 ```

-**Successful request to the `ext-auth` service:**
-The `ext-auth` service will receive the following authentication request:
+**When the request to the `ext-auth` service is successful**:
+
+The `ext-auth` service will receive the following authorization request:
+
 ```
 POST /auth HTTP/1.1
 Host: ext-auth.backend.svc.cluster.local
 Authorization: xxx
-X-Original-Uri: /users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
-X-Original-Method: GET
+X-Forwarded-Proto: HTTP
+X-Forwarded-Host: foo.bar.com
+X-Forwarded-Uri: /users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
+X-Forwarded-Method: GET
 Content-Length: 0
 ```

-**Failed request to the `ext-auth` service:**
-When the `ext-auth` service responds with a 5xx error, the client will receive an HTTP response code of 403 along with all response headers returned by the `ext-auth` service.
+**When the request to the `ext-auth` service fails**:
+
+When the response from the `ext-auth` service is 5xx, the client will receive an HTTP response code of 403 and all the response headers returned by the `ext-auth` service.
+
+If the `ext-auth` service returns response headers of `x-auth-version: 1.0` and `x-auth-failed: true`, they will be passed to the client.

-If the `ext-auth` service returns `x-auth-version: 1.0` and `x-auth-failed: true` headers, these will be conveyed to the client:
 ```
 HTTP/1.1 403 Forbidden
 x-auth-version: 1.0
@@ -189,20 +289,25 @@ server: istio-envoy
 content-length: 0
 ```

-When the `ext-auth` service is inaccessible or returns a status code of 5xx, the client request will be denied with the status code configured in `status_on_error`. When the `ext-auth` service returns other HTTP status codes, the client request will be denied with the returned status code. If `allowed_client_headers` is configured, the matching response headers will be added to the client's response.
+When the `ext-auth` service is inaccessible or the status code is 5xx, the client request will be rejected with the status code configured in `status_on_error`.
+
+When the `ext-auth` service returns other HTTP status codes, the client request will be rejected with the returned status code. If `allowed_client_headers` is configured, the response headers with corresponding matching items will be added to the client's response.
+
+#### Example 2
+
+Configuration of the `ext-auth` plugin:

-#### Example 2: `ext-auth` Plugin Configuration:
 ```yaml
 http_service:
  authorization_request:
    allowed_headers:
-    - exact: x-auth-version
+      - exact: x-auth-version
    headers_to_add:
      x-envoy-header: true
  authorization_response:
    allowed_upstream_headers:
-    - exact: x-user-id
-    - exact: x-auth-version
+      - exact: x-user-id
+      - exact: x-auth-version
  endpoint_mode: forward_auth
  endpoint:
    service_name: ext-auth.backend.svc.cluster.local
@@ -213,31 +318,25 @@ http_service:
  timeout: 1000
 ```

-Using the following request to the gateway after enabling the `ext-auth` plugin:
+When using the following request to the gateway after enabling the `ext-auth` plugin:
+
 ```shell
-curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx" -H "X-Auth-Version: 1.0"
+curl -i http://localhost:8082/users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5 -X GET -H "foo: bar" -H "Authorization: xxx" -H "X-Auth-Version: 1.0" -H "Host: foo.bar.com"
 ```

-The `ext-auth` service will receive the following authentication request:
+The `ext-auth` service will receive the following authorization request:
+
 ```
 POST /auth HTTP/1.1
 Host: my-domain.local
 Authorization: xxx
-X-Original-Uri: /users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
-X-Original-Method: GET
+X-Forwarded-Proto: HTTP
+X-Forwarded-Host: foo.bar.com
+X-Forwarded-Uri: /users?apikey=9a342114-ba8a-11ec-b1bf-00163e1250b5
+X-Forwarded-Method: GET
 X-Auth-Version: 1.0
 x-envoy-header: true
 Content-Length: 0
 ```

-If the `ext-auth` service returns headers containing `x-user-id` and `x-auth-version`, these two request headers will be included in requests to the upstream when the gateway calls it.
-
-#### x-forwarded-* Header
-When `endpoint_mode` is `forward_auth`, Higress will automatically generate and send the following headers to the authorization service.
-| Header             | Description                                   |
-|--------------------|-----------------------------------------------|
-| x-forwarded-proto  | The scheme of the original request, e.g., http/https |
-| x-forwarded-method | The method of the original request, e.g., get/post/delete/patch |
-| x-forwarded-host   | The host of the original request               |
-| x-forwarded-uri    | The path of the original request, including path parameters, e.g., /v1/app?test=true |
-| x-forwarded-for    | The client IP address of the original request   |
+If the response headers returned by the `ext-auth` service contain `x-user-id` and `x-auth-version`, these two headers will be included in the request when the gateway calls the upstream.
--- a/plugins/wasm-go/extensions/ext-auth/config/config.go
+++ b/plugins/wasm-go/extensions/ext-auth/config/config.go
@@ -1,4 +1,4 @@
-package main
+package config

 import (
 	"errors"
@@ -12,80 +12,78 @@ import (
 )

 const (
-	DefaultStatusOnError uint32 = http.StatusForbidden
+	DefaultStatusOnError = http.StatusForbidden

-	DefaultHttpServiceTimeout uint32 = 1000
+	DefaultHttpServiceTimeout = 1000

-	DefaultMaxRequestBodyBytes uint32 = 10 * 1024 * 1024
-
-	EndpointModeEnvoy = "envoy"
+	DefaultMaxRequestBodyBytes = 10 * 1024 * 1024

+	EndpointModeEnvoy       = "envoy"
 	EndpointModeForwardAuth = "forward_auth"
 )

 type ExtAuthConfig struct {
-	httpService               HttpService
-	failureModeAllow          bool
-	failureModeAllowHeaderAdd bool
-	statusOnError             uint32
+	HttpService               HttpService
+	MatchRules                expr.MatchRules
+	FailureModeAllow          bool
+	FailureModeAllowHeaderAdd bool
+	StatusOnError             uint32
 }

 type HttpService struct {
-	endpointMode string
-	client       wrapper.HttpClient
-	// pathPrefix is only used when endpoint_mode is envoy
-	pathPrefix string
-	// requestMethod is only used when endpoint_mode is forward_auth
-	requestMethod string
-	// path is only used when endpoint_mode is forward_auth
-	path                  string
-	timeout               uint32
-	authorizationRequest  AuthorizationRequest
-	authorizationResponse AuthorizationResponse
+	EndpointMode string
+	Client       wrapper.HttpClient
+	// PathPrefix is only used when endpoint_mode is envoy
+	PathPrefix string
+	// RequestMethod is only used when endpoint_mode is forward_auth
+	RequestMethod string
+	// Path is only used when endpoint_mode is forward_auth
+	Path                  string
+	Timeout               uint32
+	AuthorizationRequest  AuthorizationRequest
+	AuthorizationResponse AuthorizationResponse
 }

 type AuthorizationRequest struct {
-	// allowedHeaders In addition to the user’s supplied matchers,
-	// Authorization are automatically included to the list.
-	// When the endpoint_mode is set to forward_auth,
-	// the original request's path is set in the X-Original-Uri header,
-	// and the original request's HTTP method is set in the X-Original-Method header.
-	allowedHeaders      expr.Matcher
-	headersToAdd        map[string]string
-	withRequestBody     bool
-	maxRequestBodyBytes uint32
+	AllowedHeaders      expr.Matcher
+	HeadersToAdd        map[string]string
+	WithRequestBody     bool
+	MaxRequestBodyBytes uint32
 }

 type AuthorizationResponse struct {
-	allowedUpstreamHeaders expr.Matcher
-	allowedClientHeaders   expr.Matcher
+	AllowedUpstreamHeaders expr.Matcher
+	AllowedClientHeaders   expr.Matcher
 }

-func parseConfig(json gjson.Result, config *ExtAuthConfig, log wrapper.Log) error {
+func ParseConfig(json gjson.Result, config *ExtAuthConfig, log wrapper.Log) error {
 	httpServiceConfig := json.Get("http_service")
 	if !httpServiceConfig.Exists() {
 		return errors.New("missing http_service in config")
 	}
-	err := parseHttpServiceConfig(httpServiceConfig, config, log)
-	if err != nil {
+	if err := parseHttpServiceConfig(httpServiceConfig, config, log); err != nil {
+		return err
+	}
+
+	if err := parseMatchRules(json, config); err != nil {
 		return err
 	}

 	failureModeAllow := json.Get("failure_mode_allow")
 	if failureModeAllow.Exists() {
-		config.failureModeAllow = failureModeAllow.Bool()
+		config.FailureModeAllow = failureModeAllow.Bool()
 	}

 	failureModeAllowHeaderAdd := json.Get("failure_mode_allow_header_add")
 	if failureModeAllowHeaderAdd.Exists() {
-		config.failureModeAllowHeaderAdd = failureModeAllowHeaderAdd.Bool()
+		config.FailureModeAllowHeaderAdd = failureModeAllowHeaderAdd.Bool()
 	}

 	statusOnError := uint32(json.Get("status_on_error").Uint())
 	if statusOnError == 0 {
 		statusOnError = DefaultStatusOnError
 	}
-	config.statusOnError = statusOnError
+	config.StatusOnError = statusOnError

 	return nil
 }
@@ -101,7 +99,7 @@ func parseHttpServiceConfig(json gjson.Result, config *ExtAuthConfig, log wrappe
 	if timeout == 0 {
 		timeout = DefaultHttpServiceTimeout
 	}
-	httpService.timeout = timeout
+	httpService.Timeout = timeout

 	if err := parseAuthorizationRequestConfig(json, &httpService); err != nil {
 		return err
@@ -111,7 +109,7 @@ func parseHttpServiceConfig(json gjson.Result, config *ExtAuthConfig, log wrappe
 		return err
 	}

-	config.httpService = httpService
+	config.HttpService = httpService

 	return nil
 }
@@ -123,7 +121,7 @@ func parseEndpointConfig(json gjson.Result, httpService *HttpService, log wrappe
 	} else if endpointMode != EndpointModeEnvoy && endpointMode != EndpointModeForwardAuth {
 		return errors.New(fmt.Sprintf("endpoint_mode %s is not supported", endpointMode))
 	}
-	httpService.endpointMode = endpointMode
+	httpService.EndpointMode = endpointMode

 	endpointConfig := json.Get("endpoint")
 	if !endpointConfig.Exists() {
@@ -140,7 +138,7 @@ func parseEndpointConfig(json gjson.Result, httpService *HttpService, log wrappe
 	}
 	serviceHost := endpointConfig.Get("service_host").String()

-	httpService.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+	httpService.Client = wrapper.NewClusterClient(wrapper.FQDNCluster{
 		FQDN: serviceName,
 		Port: servicePort,
 		Host: serviceHost,
@@ -152,7 +150,7 @@ func parseEndpointConfig(json gjson.Result, httpService *HttpService, log wrappe
 		if !pathPrefixConfig.Exists() {
 			return errors.New("when endpoint_mode is envoy, endpoint path_prefix must not be empty")
 		}
-		httpService.pathPrefix = pathPrefixConfig.String()
+		httpService.PathPrefix = pathPrefixConfig.String()

 		if endpointConfig.Get("request_method").Exists() || endpointConfig.Get("path").Exists() {
 			log.Warn("when endpoint_mode is envoy, endpoint request_method and path will be ignored")
@@ -160,16 +158,16 @@ func parseEndpointConfig(json gjson.Result, httpService *HttpService, log wrappe
 	case EndpointModeForwardAuth:
 		requestMethodConfig := endpointConfig.Get("request_method")
 		if !requestMethodConfig.Exists() {
-			httpService.requestMethod = http.MethodGet
+			httpService.RequestMethod = http.MethodGet
 		} else {
-			httpService.requestMethod = strings.ToUpper(requestMethodConfig.String())
+			httpService.RequestMethod = strings.ToUpper(requestMethodConfig.String())
 		}

 		pathConfig := endpointConfig.Get("path")
 		if !pathConfig.Exists() {
 			return errors.New("when endpoint_mode is forward_auth, endpoint path must not be empty")
 		}
-		httpService.path = pathConfig.String()
+		httpService.Path = pathConfig.String()

 		if endpointConfig.Get("path_prefix").Exists() {
 			log.Warn("when endpoint_mode is forward_auth, endpoint path_prefix will be ignored")
@@ -189,35 +187,28 @@ func parseAuthorizationRequestConfig(json gjson.Result, httpService *HttpService
 			if err != nil {
 				return err
 			}
-			authorizationRequest.allowedHeaders = result
+			authorizationRequest.AllowedHeaders = result
 		}

-		headersToAdd := map[string]string{}
-		headersToAddConfig := authorizationRequestConfig.Get("headers_to_add")
-		if headersToAddConfig.Exists() {
-			for key, value := range headersToAddConfig.Map() {
-				headersToAdd[key] = value.Str
-			}
-		}
-		authorizationRequest.headersToAdd = headersToAdd
+		authorizationRequest.HeadersToAdd = convertToStringMap(authorizationRequestConfig.Get("headers_to_add"))

 		withRequestBody := authorizationRequestConfig.Get("with_request_body")
 		if withRequestBody.Exists() {
 			// withRequestBody is true and the request method is GET, OPTIONS or HEAD
 			if withRequestBody.Bool() &&
-				(httpService.requestMethod == http.MethodGet || httpService.requestMethod == http.MethodOptions || httpService.requestMethod == http.MethodHead) {
-				return errors.New(fmt.Sprintf("requestMethod %s does not support with_request_body set to true", httpService.requestMethod))
+				(httpService.RequestMethod == http.MethodGet || httpService.RequestMethod == http.MethodOptions || httpService.RequestMethod == http.MethodHead) {
+				return errors.New(fmt.Sprintf("requestMethod %s does not support with_request_body set to true", httpService.RequestMethod))
 			}
-			authorizationRequest.withRequestBody = withRequestBody.Bool()
+			authorizationRequest.WithRequestBody = withRequestBody.Bool()
 		}

 		maxRequestBodyBytes := uint32(authorizationRequestConfig.Get("max_request_body_bytes").Uint())
 		if maxRequestBodyBytes == 0 {
 			maxRequestBodyBytes = DefaultMaxRequestBodyBytes
 		}
-		authorizationRequest.maxRequestBodyBytes = maxRequestBodyBytes
+		authorizationRequest.MaxRequestBodyBytes = maxRequestBodyBytes

-		httpService.authorizationRequest = authorizationRequest
+		httpService.AuthorizationRequest = authorizationRequest
 	}
 	return nil
 }
@@ -233,7 +224,7 @@ func parseAuthorizationResponseConfig(json gjson.Result, httpService *HttpServic
 			if err != nil {
 				return err
 			}
-			authorizationResponse.allowedUpstreamHeaders = result
+			authorizationResponse.AllowedUpstreamHeaders = result
 		}

 		allowedClientHeaders := authorizationResponseConfig.Get("allowed_client_headers")
@@ -242,10 +233,84 @@ func parseAuthorizationResponseConfig(json gjson.Result, httpService *HttpServic
 			if err != nil {
 				return err
 			}
-			authorizationResponse.allowedClientHeaders = result
+			authorizationResponse.AllowedClientHeaders = result
 		}

-		httpService.authorizationResponse = authorizationResponse
+		httpService.AuthorizationResponse = authorizationResponse
 	}
 	return nil
 }
+
+func parseMatchRules(json gjson.Result, config *ExtAuthConfig) error {
+	matchListConfig := json.Get("match_list")
+	if !matchListConfig.Exists() {
+		config.MatchRules = expr.MatchRulesDefaults()
+		return nil
+	}
+
+	matchType := json.Get("match_type")
+	if !matchType.Exists() {
+		return errors.New("missing match_type in config")
+	}
+	if matchType.Str != expr.ModeWhitelist && matchType.Str != expr.ModeBlacklist {
+		return errors.New("invalid match_type in config, must be 'whitelist' or 'blacklist'")
+	}
+
+	ruleList := make([]expr.Rule, 0)
+	var err error
+
+	matchListConfig.ForEach(func(key, value gjson.Result) bool {
+		domain := value.Get("match_rule_domain").Str
+		methodArray := value.Get("match_rule_method").Array()
+		matchRuleType := value.Get("match_rule_type").Str
+		matchRulePath := value.Get("match_rule_path").Str
+
+		var pathMatcher expr.Matcher
+		var buildErr error
+
+		if matchRuleType == "" && matchRulePath == "" {
+			pathMatcher = nil
+		} else {
+			pathMatcher, buildErr = expr.BuildStringMatcher(matchRuleType, matchRulePath, false)
+			if buildErr != nil {
+				err = fmt.Errorf("failed to build string matcher for rule with domain %q, method %v, path %q, type %q: %w",
+					domain, methodArray, matchRulePath, matchRuleType, buildErr)
+				return false // stop iterating
+			}
+		}
+
+		ruleList = append(ruleList, expr.Rule{
+			Domain: domain,
+			Method: convertToStringList(methodArray),
+			Path:   pathMatcher,
+		})
+		return true // keep iterating
+	})
+
+	if err != nil {
+		return err
+	}
+
+	config.MatchRules = expr.MatchRules{
+		Mode:     matchType.Str,
+		RuleList: ruleList,
+	}
+	return nil
+}
+
+func convertToStringMap(result gjson.Result) map[string]string {
+	m := make(map[string]string)
+	result.ForEach(func(key, value gjson.Result) bool {
+		m[key.String()] = value.String()
+		return true // keep iterating
+	})
+	return m
+}
+
+func convertToStringList(results []gjson.Result) []string {
+	interfaces := make([]string, len(results))
+	for i, result := range results {
+		interfaces[i] = result.String()
+	}
+	return interfaces
+}
--- a/plugins/wasm-go/extensions/ext-auth/config/config_test.go
+++ b/plugins/wasm-go/extensions/ext-auth/config/config_test.go
@@ -0,0 +1,416 @@
+package config
+
+import (
+	"testing"
+
+	"ext-auth/expr"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/stretchr/testify/assert"
+	"github.com/tidwall/gjson"
+)
+
+func TestParseConfig(t *testing.T) {
+	tests := []struct {
+		name        string
+		json        string
+		expected    ExtAuthConfig
+		expectedErr string
+	}{
+		{
+			name: "Valid Config with Default Values",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80,
+						"path_prefix": "/auth"
+					}
+				}
+			}`,
+			expected: ExtAuthConfig{
+				HttpService: HttpService{
+					EndpointMode: "envoy",
+					Client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+						FQDN: "example.com",
+						Port: 80,
+						Host: "",
+					}),
+					PathPrefix: "/auth",
+					Timeout:    1000,
+				},
+				MatchRules:                expr.MatchRulesDefaults(),
+				FailureModeAllow:          false,
+				FailureModeAllowHeaderAdd: false,
+				StatusOnError:             403,
+			},
+		},
+		{
+			name: "Valid Config with Custom Values",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "forward_auth",
+					"endpoint": {
+						"service_name": "auth.example.com",
+						"service_port": 8080,
+						"service_host": "auth.example.com",
+						"request_method": "POST",
+						"path": "/auth"
+					},
+					"timeout": 2000,
+					"authorization_request": {
+						"headers_to_add": {
+							"X-Auth-Source": "wasm"
+						},
+						"with_request_body": true,
+						"max_request_body_bytes": 1048576
+					}
+				},
+				"skipped_path_prefixes": ["/health", "/metrics"],
+				"failure_mode_allow": true,
+				"failure_mode_allow_header_add": true,
+				"status_on_error": 500
+			}`,
+			expected: ExtAuthConfig{
+				HttpService: HttpService{
+					EndpointMode: "forward_auth",
+					Client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+						FQDN: "auth.example.com",
+						Port: 8080,
+						Host: "auth.example.com",
+					}),
+					RequestMethod: "POST",
+					Path:          "/auth",
+					Timeout:       2000,
+					AuthorizationRequest: AuthorizationRequest{
+						HeadersToAdd: map[string]string{
+							"X-Auth-Source": "wasm",
+						},
+						WithRequestBody:     true,
+						MaxRequestBodyBytes: 1048576,
+					},
+				},
+				MatchRules:                expr.MatchRulesDefaults(),
+				FailureModeAllow:          true,
+				FailureModeAllowHeaderAdd: true,
+				StatusOnError:             500,
+			},
+		},
+		{
+			name:        "Missing HttpService Configuration",
+			json:        `{}`,
+			expectedErr: "missing http_service in config",
+		},
+		{
+			name: "Invalid Endpoint Mode",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "invalid_mode",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80
+					}
+				}
+			}`,
+			expectedErr: "endpoint_mode invalid_mode is not supported",
+		},
+		{
+			name: "Missing Endpoint Configuration",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy"
+				}
+			}`,
+			expectedErr: "missing endpoint in config",
+		},
+		{
+			name: "Empty Service Name",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "",
+						"service_port": 80
+					}
+				}
+			}`,
+			expectedErr: "endpoint service name must not be empty",
+		},
+		{
+			name: "Invalid Request Method with Request Body",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "forward_auth",
+					"endpoint": {
+						"service_name": "auth.example.com",
+						"service_port": 8080,
+						"request_method": "GET",
+						"path": "/auth"
+					},
+					"authorization_request": {
+						"with_request_body": true
+					}
+				}
+			}`,
+			expectedErr: "requestMethod GET does not support with_request_body set to true",
+		},
+		{
+			name: "Missing Path for Forward Auth",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "forward_auth",
+					"endpoint": {
+						"service_name": "auth.example.com",
+						"service_port": 8080,
+						"service_host": "auth.example.com",
+						"request_method": "POST"
+					}
+				}
+			}`,
+			expectedErr: "when endpoint_mode is forward_auth, endpoint path must not be empty",
+		},
+		{
+			name: "Missing Path Prefix for Envoy",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80
+					}
+				}
+			}`,
+			expectedErr: "when endpoint_mode is envoy, endpoint path_prefix must not be empty",
+		},
+		{
+			name: "Valid Match Rules with Blacklist",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80,
+						"path_prefix": "/auth"
+					}
+				},
+				"match_type": "blacklist",
+				"match_list": [
+					{
+						"match_rule_domain": "*.bar.com",
+						"match_rule_path": "/headers",
+						"match_rule_type": "prefix"
+					}
+				]
+			}`,
+			expected: ExtAuthConfig{
+				HttpService: HttpService{
+					EndpointMode: "envoy",
+					Client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+						FQDN: "example.com",
+						Port: 80,
+						Host: "",
+					}),
+					PathPrefix: "/auth",
+					Timeout:    1000,
+				},
+				MatchRules: expr.MatchRules{
+					Mode: "blacklist",
+					RuleList: []expr.Rule{
+						{
+							Domain: "*.bar.com",
+							Method: []string{},
+							Path: func() expr.Matcher {
+								pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternPrefix, "/headers", false)
+								if err != nil {
+									t.Fatalf("Failed to create Matcher: %v", err)
+								}
+								return pathMatcher
+							}(),
+						},
+					},
+				},
+				FailureModeAllow:          false,
+				FailureModeAllowHeaderAdd: false,
+				StatusOnError:             403,
+			},
+		},
+		{
+			name: "Valid Match Rules with Whitelist",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80,
+						"path_prefix": "/auth"
+					}
+				},
+				"match_type": "whitelist",
+				"match_list": [
+					{
+						"match_rule_domain": "*.foo.com",
+						"match_rule_method": ["GET"],
+						"match_rule_path": "/api",
+						"match_rule_type": "exact"
+					}
+				]
+			}`,
+			expected: ExtAuthConfig{
+				HttpService: HttpService{
+					EndpointMode: "envoy",
+					Client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+						FQDN: "example.com",
+						Port: 80,
+						Host: "",
+					}),
+					PathPrefix: "/auth",
+					Timeout:    1000,
+				},
+				MatchRules: expr.MatchRules{
+					Mode: "whitelist",
+					RuleList: []expr.Rule{
+						{
+							Domain: "*.foo.com",
+							Method: []string{"GET"},
+							Path: func() expr.Matcher {
+								pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternExact, "/api", false)
+								if err != nil {
+									t.Fatalf("Failed to create Matcher: %v", err)
+								}
+								return pathMatcher
+							}(),
+						},
+					},
+				},
+				FailureModeAllow:          false,
+				FailureModeAllowHeaderAdd: false,
+				StatusOnError:             403,
+			},
+		},
+		{
+			name: "Valid Match Rules with Whitelist - Only Method",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80,
+						"path_prefix": "/auth"
+					}
+				},
+				"match_type": "whitelist",
+				"match_list": [
+					{
+						"match_rule_method": ["GET"]
+					}
+				]
+			}`,
+			expected: ExtAuthConfig{
+				HttpService: HttpService{
+					EndpointMode: "envoy",
+					Client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+						FQDN: "example.com",
+						Port: 80,
+						Host: "",
+					}),
+					PathPrefix: "/auth",
+					Timeout:    1000,
+				},
+				MatchRules: expr.MatchRules{
+					Mode: "whitelist",
+					RuleList: []expr.Rule{
+						{
+							Domain: "",
+							Method: []string{"GET"},
+							Path:   nil,
+						},
+					},
+				},
+				FailureModeAllow:          false,
+				FailureModeAllowHeaderAdd: false,
+				StatusOnError:             403,
+			},
+		},
+		{
+			name: "Missing Match Type",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80,
+						"path_prefix": "/auth"
+					}
+				},
+				"match_list": [
+					{
+						"match_rule_domain": "*.bar.com",
+						"match_rule_path": "/headers",
+						"match_rule_type": "prefix"
+					}
+				]
+			}`,
+			expectedErr: "missing match_type in config",
+		},
+		{
+			name: "Invalid Match Type",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80,
+						"path_prefix": "/auth"
+					}
+				},
+				"match_type": "invalid_type",
+				"match_list": [
+					{
+						"match_rule_domain": "*.bar.com",
+						"match_rule_path": "/headers",
+						"match_rule_type": "prefix"
+					}
+				]
+			}`,
+			expectedErr: "invalid match_type in config, must be 'whitelist' or 'blacklist'",
+		},
+		{
+			name: "Invalid Match Rule Type",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80,
+						"path_prefix": "/auth"
+					}
+				},
+				"match_type": "blacklist",
+				"match_list": [
+					{
+						"match_rule_domain": "*.bar.com",
+						"match_rule_method": ["POST","PUT","DELETE"],
+						"match_rule_path": "/headers",
+						"match_rule_type": "invalid_type"
+					}
+				]
+			}`,
+			expectedErr: `failed to build string matcher for rule with domain "*.bar.com", method [POST PUT DELETE], path "/headers", type "invalid_type": unknown string matcher type`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var config ExtAuthConfig
+			result := gjson.Parse(tt.json)
+			err := ParseConfig(result, &config, &wrapper.DefaultLog{})
+
+			if tt.expectedErr != "" {
+				assert.EqualError(t, err, tt.expectedErr)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expected, config)
+			}
+		})
+	}
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
007gzs	102596360a	rustup target add wasm32-wasip1	2025-02-27 13:28:20 +08:00
007gzs	7354d80263	fix prebuild	2025-02-27 11:42:34 +08:00
007gzs	3646502248	fix clippy	2025-02-27 11:12:51 +08:00
007gzs	8be591d052	fix rust_wasm_build	2025-02-27 10:30:34 +08:00
Kent Dong	45fdd95a9c	feat: Support pushing multi-arch images to a custom image registry (#1815 )	2025-02-26 21:15:53 +08:00
Se7en	d3afe345ad	fix: remove last failed apiToken from retry apiToken list (#1802 )	2025-02-26 21:11:51 +08:00
韩贤涛	90ca903d2e	feat: ext-auth plugin: Blacklist and whitelist modes support HTTP request method matching (#1798 )	2025-02-26 20:54:52 +08:00
007gzs	2d8a8f26da	Ai data masking msg window (#1775 )	2025-02-26 20:48:37 +08:00
Se7en	9ea2410388	feat: update ai-token-ratelimit documentation by removing ai-statistics plugin (#1767 )	2025-02-26 20:47:37 +08:00
littlejian	9e1792c245	add notes to gateway.rollingMaxUnavailable (#1819 )	2025-02-26 20:46:53 +08:00
rinfx	3eda7def89	ai-search support quark (#1811 )	2025-02-26 18:42:22 +08:00
澄潭	1787553294	set include_usage by default for all model providers (#1818 )	2025-02-26 16:49:16 +08:00
澄潭	f6c48415d1	Add database configuration for plugins that use Redis. (#1814 )	2025-02-26 10:52:54 +08:00
MARATRIX Li	e27d3d0971	fix(typo): use the correct bing name for ai-search. (#1807 ) Signed-off-by: maratrixx <maratrix@163.com>	2025-02-25 13:37:32 +08:00
Kent Dong	49617c7a98	feat: Unify the SSE processing logic (#1800 )	2025-02-25 11:00:18 +08:00
澄潭	53a015d8fe	Update arxiv.md	2025-02-24 11:27:55 +08:00
澄潭	e711e9f997	Update full.md	2025-02-24 11:27:33 +08:00
澄潭	8530742472	Update README_EN.md	2025-02-24 11:16:09 +08:00
澄潭	c0c1f5113a	Update README.md	2025-02-24 11:15:55 +08:00
澄潭	2e6ddd7e35	Add ai search plugin (#1804 )	2025-02-24 11:14:47 +08:00
Kent Dong	2328e19c9d	fix: Fix a bug in openaiCustomUrl support (#1790 )	2025-02-22 12:12:49 +08:00
Kent Dong	fabc22f218	feat: Support transforming reasoning_content returned by Qwen to OpenAI contract (#1791 )	2025-02-21 17:32:02 +08:00
Yiiong	2986e1911d	feat: add ollama embedding to ai-cache (#1794 )	2025-02-21 15:21:49 +08:00
澄潭	a566f7257d	update helm docs (#1782 )	2025-02-19 17:48:20 +08:00
澄潭	3dbd1b2731	release 2.0.7 (#1781 )	2025-02-19 17:44:08 +08:00
澄潭	7f23980bf5	remove basic-auth useless annotation (#1779 )	2025-02-19 15:58:03 +08:00
澄潭	6fb0684c39	fix openai compatiable (#1778 )	2025-02-19 15:23:15 +08:00
澄潭	dfac9fa5e6	Update README.md	2025-02-18 14:17:21 +08:00
澄潭	bfd9e3026d	Update helm-docs.yaml	2025-02-18 10:00:05 +08:00
澄潭	49aad4152c	Supports completions API & support config openai baseUrl through `openaiCustomUrl` (#1765 )	2025-02-18 09:57:48 +08:00
澄潭	94aacf5153	Update helm-docs.yaml Remove the part that causes the action to fail	2025-02-17 18:59:54 +08:00
littlejian	efcfdbf36e	Add translate-readme action to translate English into Chinese (#1711 )	2025-02-17 17:34:30 +08:00
澄潭	2dbde1833f	ai proxy support passthrough path when api name is unknown (#1754 )	2025-02-13 21:22:43 +08:00
mirror	7272eff8b6	update ai-cache extension (#1746 )	2025-02-13 19:49:52 +08:00
pepesi	a84a382f1d	feature: allow ai-proxy to forward standard AI capabilities that are … (#1704 )	2025-02-12 15:23:44 +08:00
韩贤涛	477e44b9f1	e2e: Enhance the e2e testing of the ai-proxy plugin based on the LLM mock server (#1742 )	2025-02-11 20:16:03 +08:00
澄潭	512385d225	fix host rewrite in frontend-gray (#1747 )	2025-02-08 17:42:29 +08:00
007gzs	b997e6fd26	wasm32-wasi to wasm32-wasip1 (#1716 )	2025-02-05 15:35:48 +08:00
韩贤涛	fab3ebb35a	ut: add ext-auth unit tests (#1710 )	2025-02-05 13:39:10 +08:00
韩贤涛	1431ff9cfe	e2e: Enhance the e2e testing of the ai-proxy plugin based on the LLM mock server (#1713 )	2025-02-05 10:14:25 +08:00
kai2321	fac2c3e7a3	feat:完善对接dify时返回usage相关信息 (#1715 )	2025-02-03 08:35:00 +08:00
韩贤涛	574d1aa36a	fix: Path concatenation issue for authentication requests in Envoy authentication mode (#1709 )	2025-01-23 15:47:07 +08:00
澄潭	7840167c4a	optimize body bufferlimit set in ext-auth plugin (#1707 )	2025-01-23 11:52:30 +08:00
韩贤涛	9d8e78dae3	fix: ext-auth crash bugfix (#1705 )	2025-01-23 11:29:49 +08:00
Se7en	133a30b8d5	fix: stream response buffer issue (#1703 )	2025-01-22 11:28:37 +08:00
kai2321	ce94c6e62d	feat:接入dify (#1664 )	2025-01-21 16:04:15 +08:00
Xijun Dai	05f251e627	fix gateway env (#1689 )	2025-01-21 15:05:14 +08:00
韩贤涛	0259eaddbb	feat: Add ext-auth plugin support for authentication blacklists/whitelists (#1694 )	2025-01-21 14:28:49 +08:00
Se7en	cfa3baddf8	sync ai-token-ratelimit docs (#1688 )	2025-01-19 13:05:25 +08:00
Se7en	b1f625a652	feat: support baidu api key (#1687 )	2025-01-19 11:46:29 +08:00
澄潭	fd1eb54f25	Release 2.0.6 (#1686 )	2025-01-17 15:22:43 +08:00
澄潭	c7550e2d49	Update deploy-to-oss.yaml	2025-01-17 15:10:40 +08:00
Se7en	ba74f4bbb9	fix: baidu api issue (#1685 )	2025-01-16 21:42:43 +08:00