release: v2.1.9-rc.1 (#2984 )

[key-auth] record consumer name once the consumer name is determined (#2978 )
fix ToolSecurity field (#2952 )
2026-02-25 21:21:01 +08:00 · 2025-10-09 17:32:35 +08:00 · 2025-10-09 11:22:09 +08:00 · 2025-10-06 15:10:08 +08:00 · 2025-10-06 15:09:53 +08:00 · 2025-10-06 15:09:21 +08:00
37 changed files with 2483 additions and 803 deletions
--- a/.github/workflows/generate-release-notes.yaml
+++ b/.github/workflows/generate-release-notes.yaml
@@ -66,6 +66,40 @@ jobs:
              "https://github.com/${GITHUB_REPO_OWNER}/${GITHUB_REPO_NAME}/releases/tag/${RELEASE_VERSION}" \
              -o release_page.html

+          # Extract system prompt content from HTML
+          echo "Extracting system prompt content..."
+          pip install beautifulsoup4 markdownify
+          SYSTEM_PROMPT=$(python3 -c "
+          import sys
+          from bs4 import BeautifulSoup
+          from markdownify import markdownify
+
+          with open('release_page.html', 'r') as f:
+              soup = BeautifulSoup(f, 'html.parser')
+          
+          system_prompt_header = soup.find('h2', string='system prompt')
+          if system_prompt_header:
+              content = []
+              for sibling in system_prompt_header.next_siblings:
+                  if sibling.name == 'h2':
+                      break
+                  content.append(str(sibling))
+              html_content = ''.join(content).strip()
+              # Convert HTML to Markdown
+              if html_content:
+                  markdown_content = markdownify(html_content)
+                  print(markdown_content.strip())
+              else:
+                  print('')
+          else:
+              print('')
+          ")
+          if [ -z "${SYSTEM_PROMPT}" ]; then
+              echo "No system prompt found in release notes."
+          else
+              echo "System prompt content: ${SYSTEM_PROMPT}"
+          fi
+
          echo "Extracting PR numbers from ${GITHUB_REPO_OWNER}/${GITHUB_REPO_NAME} release notes..."
          PR_NUMS=$(cat release_page.html | grep -o "/${GITHUB_REPO_OWNER}/${GITHUB_REPO_NAME}/pull/[0-9]*" | grep -o "[0-9]*$" | sort -n | uniq | tr '\n' ',')
          PR_NUMS=${PR_NUMS%,}
@@ -88,11 +122,24 @@ jobs:
          cd higress-report-agent
          pip install uv
          uv sync
+
+          # Build command
+          CMD_ARGS="--mode 2 --choice 2 --pr_nums ${PR_NUMS}"
          if [ -n "${IMPORTANT_PR_NUMS}" ]; then
-              uv run report_main.py --mode 2 --choice 2 --pr_nums ${PR_NUMS} --important_prs ${IMPORTANT_PR_NUMS}
-          else
-              uv run report_main.py --mode 2 --choice 2 --pr_nums ${PR_NUMS}
+              CMD_ARGS="${CMD_ARGS} --important_prs ${IMPORTANT_PR_NUMS}"
          fi
+          if [ -n "${SYSTEM_PROMPT}" ]; then
+              echo "${SYSTEM_PROMPT}" > temp_system_prompt.txt
+              CMD_ARGS="${CMD_ARGS} --sys_prompt_file temp_system_prompt.txt"
+          fi
+
+          uv run report_main.py ${CMD_ARGS}
+
+          # Clean up temporary file
+          if [ -f "temp_system_prompt.txt" ]; then
+              rm temp_system_prompt.txt
+          fi
+
          cp report.md ../
          cp report.EN.md ../
          cd ..
--- a/2
+++ b/2
@@ -1 +1 @@
-v2.1.8
+v2.1.9-rc.1
--- a/helm/core/Chart.yaml
+++ b/helm/core/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.1.8
+appVersion: 2.1.9-rc.1
 description: Helm chart for deploying higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -15,4 +15,4 @@ dependencies:
    repository: "file://../redis"
    version: 0.0.1
 type: application
-version: 2.1.8
+version: 2.1.9-rc.1
--- a/helm/higress/Chart.lock
+++ b/helm/higress/Chart.lock
@@ -1,9 +1,9 @@
 dependencies:
 - name: higress-core
  repository: file://../core
-  version: 2.1.8
+  version: 2.1.9-rc.1
 - name: higress-console
  repository: https://higress.io/helm-charts/
  version: 2.1.8
-digest: sha256:20abb8842774217237293b49a46e885dd3e5edd3da118c5fe449e50d19e9c9e3
-generated: "2025-09-21T14:55:25.084965+08:00"
+digest: sha256:0899e57f8744790bef3061413d6ce43ca4a54ac21fbe44fc0af7db973da28a79
+generated: "2025-10-09T17:25:21.377573+08:00"
--- a/helm/higress/Chart.yaml
+++ b/helm/higress/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 2.1.8
+appVersion: 2.1.9-rc.1
 description: Helm chart for deploying Higress gateways
 icon: https://higress.io/img/higress_logo_small.png
 home: http://higress.io/
@@ -12,9 +12,9 @@ sources:
 dependencies:
 - name: higress-core
  repository: "file://../core"
-  version: 2.1.8
+  version: 2.1.9-rc.1
 - name: higress-console
  repository: "https://higress.io/helm-charts/"
  version: 2.1.8
 type: application
-version: 2.1.8
+version: 2.1.9-rc.1
--- a/pkg/ingress/kube/annotations/mcpserver.go
+++ b/pkg/ingress/kube/annotations/mcpserver.go
@@ -53,7 +53,8 @@ func (a mcpServer) Parse(annotations Annotations, config *Ingress, globalContext
 	var matchRuleDomains []string
 	rawMatchRuleDomains, _ := annotations.ParseStringASAP(mcpServerMatchRuleDomains)
 	if rawMatchRuleDomains == "" || rawMatchRuleDomains == "*" {
-		// Match all domains. Leave an empty slice.
+		// Use wildcard to match all domains so we don't rely on the default behavior of empty domain list
+		matchRuleDomains = []string{"*"}
 	} else if strings.Contains(rawMatchRuleDomains, ",") {
 		matchRuleDomains = strings.Split(rawMatchRuleDomains, ",")
 	} else {
--- a/pkg/ingress/kube/annotations/mcpserver_test.go
+++ b/pkg/ingress/kube/annotations/mcpserver_test.go
@@ -132,7 +132,7 @@ func TestMCPServer_Parse(t *testing.T) {
 			},
 			expect: &mcpserver.McpServer{
 				Name:              "default/route",
-				Domains:           nil,
+				Domains:           []string{"*"},
 				PathMatchType:     "prefix",
 				PathMatchValue:    "/mcp",
 				UpstreamType:      "rest",
@@ -153,7 +153,7 @@ func TestMCPServer_Parse(t *testing.T) {
 			},
 			expect: &mcpserver.McpServer{
 				Name:              "default/route",
-				Domains:           nil,
+				Domains:           []string{"*"},
 				PathMatchType:     "prefix",
 				PathMatchValue:    "/mcp",
 				UpstreamType:      "rest",
--- a/plugins/golang-filter/go.mod
+++ b/plugins/golang-filter/go.mod
@@ -55,16 +55,21 @@ require (
 	github.com/milvus-io/milvus-sdk-go/v2 v2.4.2 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/openai/openai-go/v2 v2.7.0 // indirect
 	github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc // indirect
 	github.com/pkoukk/tiktoken-go v0.1.8 // indirect
 	github.com/prometheus/client_golang v1.14.0 // indirect
 	github.com/prometheus/client_model v0.4.0 // indirect
 	github.com/prometheus/common v0.37.0 // indirect
 	github.com/prometheus/procfs v0.8.0 // indirect
+	github.com/tidwall/gjson v1.18.0 // indirect
+	github.com/tidwall/match v1.2.0 // indirect
+	github.com/tidwall/pretty v1.2.1 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
 	github.com/tjfoc/gmsm v1.4.1 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/net v0.33.0 // indirect
+	golang.org/x/net v0.34.0 // indirect
 	golang.org/x/time v0.3.0 // indirect
 	google.golang.org/grpc v1.59.0 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
@@ -99,9 +104,9 @@ require (
 	github.com/shopspring/decimal v1.4.0 // indirect
 	go.opentelemetry.io/otel v1.26.0 // indirect
 	go.opentelemetry.io/otel/trace v1.26.0 // indirect
-	golang.org/x/crypto v0.31.0 // indirect
+	golang.org/x/crypto v0.32.0 // indirect
 	golang.org/x/sync v0.10.0 // indirect
-	golang.org/x/sys v0.28.0 // indirect
+	golang.org/x/sys v0.29.0 // indirect
 	golang.org/x/text v0.21.0 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect
--- a/plugins/golang-filter/go.sum
+++ b/plugins/golang-filter/go.sum
@@ -311,6 +311,8 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
 github.com/onsi/gomega v1.24.2 h1:J/tulyYK6JwBldPViHJReihxxZ+22FHs0piGjQAvoUE=
 github.com/onsi/gomega v1.24.2/go.mod h1:gs3J10IS7Z7r7eXRoNJIrNqU4ToQukCJhFtKrWgHWnk=
+github.com/openai/openai-go/v2 v2.7.0 h1:/8MSFCXcasin7AyuWQ2au6FraXL71gzAs+VfbMv+J3k=
+github.com/openai/openai-go/v2 v2.7.0/go.mod h1:jrJs23apqJKKbT+pqtFgNKpRju/KP9zpUTZhz3GElQE=
 github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc h1:Ak86L+yDSOzKFa7WM5bf5itSOo1e3Xh8bm5YCMUXIjQ=
 github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc/go.mod h1:Lu3tH6HLW3feq74c2GC+jIMS/K2CFcDWnWD9XkenwhI=
 github.com/paulmach/orb v0.11.1 h1:3koVegMC4X/WeiXYz9iswopaTwMem53NzTJuTF20JzU=
@@ -377,7 +379,18 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/match v1.2.0 h1:0pt8FlkOwjN2fPt4bIl4BoNxb98gGHN2ObFEDkrfZnM=
+github.com/tidwall/match v1.2.0/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
 github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
+github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
 github.com/tjfoc/gmsm v1.3.2/go.mod h1:HaUcFuY0auTiaHB9MHFGCPx5IaLhTUd2atbCFBQXn9w=
 github.com/tjfoc/gmsm v1.4.1 h1:aMe1GlZb+0bLjn+cKTPEvvn9oUEBlJitaZiiBwsbgho=
 github.com/tjfoc/gmsm v1.4.1/go.mod h1:j4INPkHWMrhJb38G+J6W4Tw0AbuN8Thu3PbdVYhVcTE=
@@ -426,6 +439,8 @@ golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDf
 golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
 golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
 golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
+golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
+golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -506,6 +521,8 @@ golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
 golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
 golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
+golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -581,6 +598,8 @@ golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
 golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
+golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
--- a/plugins/golang-filter/mcp-server/servers/rag/READMD.md
+++ b/plugins/golang-filter/mcp-server/servers/rag/READMD.md
@@ -1,173 +0,0 @@
-# Higress RAG MCP Server
-
-这是一个 Model Context Protocol (MCP) 服务器，提供知识管理和检索功能。
-
-该 MCP 服务器提供以下工具：
-
-## MCP Tools
-
-### 知识管理 
- `create-chunks-from-text` - 从 Text 创建知识 (p1)
-
-### 块管理
- `list-chunks` - 列出知识块 
- `delete-chunk` - 删除知识块 
-
-### 搜索 
- `search` - 搜索
-
-### 聊天功能
- `chat` - 发送聊天消息
-
-## 配置说明
-
-### 配置结构
-
-```yaml
-rag:
-  # RAG系统基础配置
-  splitter:
-    type: "recursive"  # 递归分块器 recursive 和 nosplitter
-    chunk_size: 500
-    chunk_overlap: 50
-  top_k: 5  # 搜索返回的知识块数量
-  threshold: 0.5  # 搜索阈值
-
-llm:
-  provider: "openai"  # openai
-  api_key: "your-llm-api-key"
-  base_url: "https://api.openai.com/v1"  # 可选
-  model: "gpt-3.5-turbo"  # LLM模型
-  max_tokens: 2048  # 最大令牌数
-  temperature: 0.5  # 温度参数
-
-embedding:
-  provider: "openai"  # openai, dashscope
-  api_key: "your-embedding-api-key"
-  base_url: "https://api.openai.com/v1"  # 可选
-  model: "text-embedding-ada-002"  # 嵌入模型
-
-vectordb:
-  provider: "milvus"  # milvus
-  host: "localhost"
-  port: 19530
-  database: "default"
-  collection: "test_collection"
-  username: ""  # 可选
-  password: ""  # 可选
-
-```
-### higress-config 配置样例
-
-```yaml
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: higress-config
-  namespace: higress-system
-data:
-  higress: |
-    mcpServer:
-      enable: true
-      sse_path_suffix: "/sse"
-      redis:
-        address: "<Redis IP>:6379"
-        username: ""
-        password: ""
-        db: 0
-      match_list:
-      - path_rewrite_prefix: ""
-        upstream_type: ""
-        enable_path_rewrite: false
-        match_rule_domain: ""
-        match_rule_path: "/mcp-servers/rag"
-        match_rule_type: "prefix"
-      servers:
-      - path: "/mcp-servers/rag"
-        name: "rag"
-        type: "rag"
-        config:
-          rag:
-            splitter:
-              provider: recursive
-              chunk_size: 500
-              chunk_overlap: 50
-            top_k: 10
-            threshold: 0.5
-          llm:
-            provider: openai
-            api_key: sk-XXX
-            base_url: https://openrouter.ai/api/v1
-            model: openai/gpt-4o
-            temperature: 0.5
-            max_tokens: 2048
-          embedding:
-            provider: dashscope
-            api_key: sk-xxx
-            model: text-embedding-v4
-          vectordb:
-            provider: milvus
-            host: <milvus IP>
-            port: 19530
-            database: default
-            collection: test_collection
-```
-
-### 支持的提供商
-#### Embedding
- **OpenAI**
- **DashScope**
-
-#### Vector Database
- **Milvus**
-
-#### LLM 
- **OpenAI**
-
-
-## Milvus 安装
-
-### Docker 配置
-配置 Docker Desktop 镜像加速器
-编辑 daemon.json 配置，加上镜像加速器，例如：
-```
-{
-  "registry-mirrors": [
-    "https://docker.m.daocloud.io",
-    "https://mirror.ccs.tencentyun.com",
-    "https://hub-mirror.c.163.com"
-  ],
-  "dns": ["8.8.8.8", "1.1.1.1"]
-}
-```
-
-### 安装 milvus
-
-```
-v2.6.0
-Download the configuration file
-wget https://github.com/milvus-io/milvus/releases/download/v2.6.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
-
-v2.4
-$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.23/milvus-standalone-docker-compose.yml -O docker-compose.yml
-
-# Start Milvus
-$ sudo docker compose up -d
-
-Creating milvus-etcd  ... done
-Creating milvus-minio ... done
-Creating milvus-standalone ... done
-```
-
-### 安装 attu
-
-Attu 是 Milvus 的可视化管理工具，用于查看和管理 Milvus 中的数据。
-
-```
-docker run -p 8000:3000 -e MILVUS_URL=http://<本机 IP>:19530  zilliz/attu:v2.6
-Open your browser and navigate to http://localhost:8000
-```
-
-
-
-
--- a/plugins/golang-filter/mcp-server/servers/rag/README.md
+++ b/plugins/golang-filter/mcp-server/servers/rag/README.md
@@ -0,0 +1,327 @@
+# Higress RAG MCP Server
+
+这是一个 Model Context Protocol (MCP) 服务器，提供知识管理和检索功能。
+
+## MCP 工具说明
+
+Higress RAG MCP Server 提供以下工具，根据配置不同，可用工具也会有所差异：
+
+| 工具名称 | 功能描述 | 依赖配置 | 必选/可选 |
+|---------|---------|---------|----------|
+| `create-chunks-from-text` | 将文本内容分块并存储到向量数据库，用于知识库构建 | embedding, vectordb | **必选** |
+| `list-chunks` | 列出已存储的知识块，用于知识库管理 | vectordb | **必选** |
+| `delete-chunk` | 删除指定的知识块，用于知识库维护 | vectordb | **必选** |
+| `search` | 基于语义相似度搜索知识库中的内容 | embedding, vectordb | **必选** |
+| `chat` | 基于检索增强生成(RAG)回答用户问题，结合知识库内容生成回答 | embedding, vectordb, llm | **可选** |
+
+### 工具与配置的关系
+
+- **基础功能**（知识管理、搜索）：只需配置 `embedding` 和 `vectordb`
+- **高级功能**（聊天问答）：需额外配置 `llm`
+
+具体关系如下：
+- 未配置 `llm` 时，`chat` 工具将不可用
+- 所有工具都依赖 `embedding` 和 `vectordb` 配置
+- `rag` 配置用于调整分块和检索参数，影响所有工具的行为
+
+## 典型使用场景
+
+### 最小工具集场景（无LLM配置）
+
+适用于仅需要知识库管理和检索的场景，不需要生成式回答。
+
+**可用工具**：`create-chunks-from-text`、`list-chunks`、`delete-chunk`、`search`
+
+**典型用例**：
+1. 构建企业文档库，仅需检索相关文档片段
+2. 数据索引系统，通过语义搜索快速定位信息
+3. 内容管理系统，管理和检索结构化/非结构化内容
+
+**示例流程**：
+```
+1. 使用 create-chunks-from-text 导入文档
+2. 使用 search 检索相关内容
+3. 使用 list-chunks 和 delete-chunk 管理知识库
+```
+
+### 完整工具集场景（含LLM配置）
+
+适用于需要智能问答和内容生成的高级场景。
+
+**可用工具**：`create-chunks-from-text`、`list-chunks`、`delete-chunk`、`search`、`chat`
+
+**典型用例**：
+1. 智能客服系统，基于企业知识库回答用户问题
+2. 文档助手，帮助用户理解和分析复杂文档
+3. 专业领域问答系统，如法律、金融、技术支持等
+
+**示例流程**：
+```
+1. 使用 create-chunks-from-text 导入专业领域文档
+2. 用户通过 chat 工具提问
+3. 系统使用 search 检索相关知识
+4. LLM 结合检索结果生成回答
+5. 管理员使用 list-chunks 和 delete-chunk 维护知识库
+```
+
+## 配置说明
+
+### 配置结构
+
+| 名称                         | 数据类型 | 填写要求 | 默认值 | 描述 |
+|----------------------------|----------|-----------|---------|--------|
+| **rag**                    | object | 必填 | - | RAG系统基础配置 |
+| rag.splitter.provider      | string | 必填 | recursive | 分块器类型：recursive或nosplitter |
+| rag.splitter.chunk_size    | integer | 可选 | 500 | 块大小 |
+| rag.splitter.chunk_overlap | integer | 可选 | 50 | 块重叠大小 |
+| rag.top_k                  | integer | 可选 | 10 | 搜索返回的知识块数量 |
+| rag.threshold              | float | 可选 | 0.5 | 搜索阈值 |
+| **llm**                    | object | 可选 | - | LLM配置（不配置则无chat功能） |
+| llm.provider               | string | 可选 | openai | LLM提供商 |
+| llm.api_key                | string | 可选 | - | LLM API密钥 |
+| llm.base_url               | string | 可选 |  | LLM API基础URL |
+| llm.model                  | string | 可选 | gpt-4o | LLM模型名称 |
+| llm.max_tokens             | integer | 可选 | 2048 | 最大令牌数 |
+| llm.temperature            | float | 可选 | 0.5 | 温度参数 |
+| **embedding**              | object | 必填 | - | 嵌入配置（所有工具必需） |
+| embedding.provider         | string | 必填 | openai | 嵌入提供商：支持openai协议的任意供应商 |
+| embedding.api_key          | string | 必填 | - | 嵌入API密钥 |
+| embedding.base_url         | string | 可选 |  | 嵌入API基础URL |
+| embedding.model            | string | 必填 | text-embedding-ada-002 | 嵌入模型名称 |
+| embedding.dimensions       | integer | 可选 | 1536 | 嵌入维度 |
+| **vectordb**               | object | 必填 | - | 向量数据库配置（所有工具必需） |
+| vectordb.provider          | string | 必填 | milvus | 向量数据库提供商 |
+| vectordb.host              | string | 必填 | localhost | 数据库主机地址 |
+| vectordb.port              | integer | 必填 | 19530 | 数据库端口 |
+| vectordb.database          | string | 必填 | default | 数据库名称 |
+| vectordb.collection        | string | 必填 | test_collection | 集合名称 |
+| vectordb.username          | string | 可选 | - | 数据库用户名 |
+| vectordb.password          | string | 可选 | - | 数据库密码 |
+| **vectordb.mapping**       | object | 可选 | - | 字段映射配置 |
+| vectordb.mapping.fields    | array | 可选 | - | 字段映射列表 |
+| vectordb.mapping.fields[].standard_name | string | 必填 | - | 标准字段名称（如 id, content, vector 等） |
+| vectordb.mapping.fields[].raw_name | string | 必填 | - | 原始字段名称（数据库中的实际字段名） |
+| vectordb.mapping.fields[].properties | object | 可选 | - | 字段属性（如 auto_id, max_length 等） |
+| vectordb.mapping.index     | object | 可选 | - | 索引配置 |
+| vectordb.mapping.index.index_type | string | 必填 | - | 索引类型（如 FLAT, IVF_FLAT, HNSW 等） |
+| vectordb.mapping.index.params | object | 可选 | - | 索引参数（根据索引类型不同而异） |
+| vectordb.mapping.search    | object | 可选 | - | 搜索配置 |
+| vectordb.mapping.search.metric_type | string | 可选 | L2 | 度量类型（如 L2, IP, COSINE 等） |
+| vectordb.mapping.search.params | object | 可选 | - | 搜索参数（如 nprobe, ef_search 等）
+
+
+### higress-config 配置样例
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: higress-config
+  namespace: higress-system
+data:
+  higress: |
+    mcpServer:
+      enable: true
+      sse_path_suffix: "/sse"
+      redis:
+        address: "<Redis IP>:6379"
+        username: ""
+        password: ""
+        db: 0
+      match_list:
+      - path_rewrite_prefix: ""
+        upstream_type: ""
+        enable_path_rewrite: false
+        match_rule_domain: ""
+        match_rule_path: "/mcp-servers/rag"
+        match_rule_type: "prefix"
+      servers:
+      - path: "/mcp-servers/rag"
+        name: "rag"
+        type: "rag"
+        config:
+          rag:
+            splitter:
+              provider: recursive
+              chunk_size: 500
+              chunk_overlap: 50
+            top_k: 10
+            threshold: 0.5
+          llm:
+            provider: openai
+            api_key: sk-XXX
+            base_url: https://openrouter.ai/api/v1
+            model: openai/gpt-4o
+            temperature: 0.5
+            max_tokens: 2048
+          embedding:
+            provider: openai
+            base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
+            api_key: sk-xxx
+            model: text-embedding-v4
+            dimensions: 1536
+          vectordb:
+            provider: milvus
+            host: localhost
+            port: 19530
+            database: default
+            collection: test_rag
+            mapping:
+              fields:
+              - standard_name: id
+                raw_name: id
+                properties:
+                  auto_id: false
+                  max_length: 256
+              - standard_name: content
+                raw_name: content
+                properties:
+                  max_length: 8192
+              - standard_name: vector
+                raw_name: vector
+              - standard_name: metadata
+                raw_name: metadata
+              - standard_name: created_at
+                raw_name: created_at
+              index:
+                index_type: HNSW
+                params:
+                  M: 4
+                  efConstruction: 32
+              search:
+                metric_type: IP
+                params:
+                  ef: 32
+
+```
+### 支持的提供商
+#### Embedding
+- **OpenAI 兼容**
+
+#### Vector Database
+- **Milvus**
+
+#### LLM 
+- **OpenAI 兼容**
+
+## 如何测试数据集的效果
+
+测试数据集的效果分两步，第一步导入数据集语料，第二步测试Chat效果。
+
+### 导入数据集语料
+
+使用 `RAGClient.CreateChunkFromText` 工具导入数据集语料，比如数据集语料格式为 JSON，每个 JSON 对象包含 `body`、`title` 和 `url` 等字段。样例代码如下：
+
+```golang
+func TestRAGClient_LoadChunks(t *testing.T) {
+	t.Logf("TestRAGClient_LoadChunks")
+	ragClient, err := getRAGClient()
+	if err != nil {
+		t.Errorf("getRAGClient() error = %v", err)
+		return
+	}
+	// load json output/corpus.json and then call ragclient CreateChunkFromText to insert chunks
+	file, err := os.Open("/dataset/corpus.json")
+	if err != nil {
+		t.Errorf("LoadData() error = %v", err)
+		return
+	}
+	defer file.Close()
+	decoder := json.NewDecoder(file)
+	var data []struct {
+		Body  string `json:"body"`
+		Title string `json:"title"`
+		Url   string `json:"url"`
+	}
+	if err := decoder.Decode(&data); err != nil {
+		t.Errorf("LoadData() error = %v", err)
+		return
+	}
+
+	for _, item := range data {
+		t.Logf("LoadData() url = %s", item.Url)
+		t.Logf("LoadData() title = %s", item.Title)
+		t.Logf("LoadData() len body = %d", len(item.Body))
+		chunks, err := ragClient.CreateChunkFromText(item.Body, item.Title)
+		if err != nil {
+			t.Errorf("LoadData() error = %v", err)
+			continue
+		} else {
+			t.Logf("LoadData() chunks len = %d", len(chunks))
+		}
+	}
+	t.Logf("TestRAGClient_LoadChunks done")
+}
+```
+
+### 测试Chat效果
+
+使用 `RAGClient.Chat` 工具测试 Chat 效果。样例代码如下：
+
+```golang
+func TestRAGClient_Chat(t *testing.T) {
+	ragClient, err := getRAGClient()
+	if err != nil {
+		t.Errorf("getRAGClient() error = %v", err)
+		return
+	}
+	query := "Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?"
+	resp, err := ragClient.Chat(query)
+	if err != nil {
+		t.Errorf("Chat() error = %v", err)
+		return
+	}
+	if resp == "" {
+		t.Errorf("Chat() resp = %s, want not empty", resp)
+		return
+	}
+	t.Logf("Chat() resp = %s", resp)
+}
+```
+
+## Milvus 安装
+
+### Docker 配置
+配置 Docker Desktop 镜像加速器
+编辑 daemon.json 配置，加上镜像加速器，例如：
+```
+{
+  "registry-mirrors": [
+    "https://docker.m.daocloud.io",
+    "https://mirror.ccs.tencentyun.com",
+    "https://hub-mirror.c.163.com"
+  ],
+  "dns": ["8.8.8.8", "1.1.1.1"]
+}
+```
+
+### 安装 milvus
+
+```
+v2.6.0
+Download the configuration file
+wget https://github.com/milvus-io/milvus/releases/download/v2.6.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
+
+v2.4
+$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.23/milvus-standalone-docker-compose.yml -O docker-compose.yml
+
+# Start Milvus
+$ sudo docker compose up -d
+
+Creating milvus-etcd  ... done
+Creating milvus-minio ... done
+Creating milvus-standalone ... done
+```
+
+### 安装 attu
+
+Attu 是 Milvus 的可视化管理工具，用于查看和管理 Milvus 中的数据。
+
+```
+docker run -p 8000:3000 -e MILVUS_URL=http://<本机 IP>:19530  zilliz/attu:v2.6
+Open your browser and navigate to http://localhost:8000
+```
+
+
+
+
--- a/plugins/golang-filter/mcp-server/servers/rag/config/config.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/config/config.go
@@ -1,5 +1,7 @@
 package config

+import "fmt"
+
 // Config represents the main configuration structure for the MCP server
 type Config struct {
 	RAG       RAGConfig       `json:"rag" yaml:"rag"`
@@ -34,20 +36,148 @@ type LLMConfig struct {

 // EmbeddingConfig defines configuration for embedding models
 type EmbeddingConfig struct {
-	Provider  string `json:"provider" yaml:"provider"` // Available options: openai, dashscope
-	APIKey    string `json:"api_key,omitempty" yaml:"api_key,omitempty"`
-	BaseURL   string `json:"base_url,omitempty" yaml:"base_url,omitempty"`
-	Model     string `json:"model,omitempty" yaml:"model,omitempty"`
-	Dimension int    `json:"dimension,omitempty" yaml:"dimension,omitempty"`
+	Provider   string `json:"provider" yaml:"provider"` // Available options: openai, dashscope
+	APIKey     string `json:"api_key,omitempty" yaml:"api_key,omitempty"`
+	BaseURL    string `json:"base_url,omitempty" yaml:"base_url,omitempty"`
+	Model      string `json:"model,omitempty" yaml:"model,omitempty"`
+	Dimensions int    `json:"dimensions,omitempty" yaml:"dimension,omitempty"`
 }

 // VectorDBConfig defines configuration for vector databases
 type VectorDBConfig struct {
-	Provider   string `json:"provider" yaml:"provider"` // Available options: milvus, qdrant, chroma
-	Host       string `json:"host,omitempty" yaml:"host,omitempty"`
-	Port       int    `json:"port,omitempty" yaml:"port,omitempty"`
-	Database   string `json:"database,omitempty" yaml:"database,omitempty"`
-	Collection string `json:"collection,omitempty" yaml:"collection,omitempty"`
-	Username   string `json:"username,omitempty" yaml:"username,omitempty"`
-	Password   string `json:"password,omitempty" yaml:"password,omitempty"`
+	Provider   string        `json:"provider" yaml:"provider"` // Available options: milvus, qdrant, chroma
+	Host       string        `json:"host,omitempty" yaml:"host,omitempty"`
+	Port       int           `json:"port,omitempty" yaml:"port,omitempty"`
+	Database   string        `json:"database,omitempty" yaml:"database,omitempty"`
+	Collection string        `json:"collection,omitempty" yaml:"collection,omitempty"`
+	Username   string        `json:"username,omitempty" yaml:"username,omitempty"`
+	Password   string        `json:"password,omitempty" yaml:"password,omitempty"`
+	Mapping    MappingConfig `json:"mapping,omitempty" yaml:"mapping,omitempty"`
+}
+
+// MappingConfig defines field mapping configuration for vector databases
+type MappingConfig struct {
+	Fields []FieldMapping `json:"fields,omitempty" yaml:"fields,omitempty"`
+	Index  IndexConfig    `json:"index,omitempty" yaml:"index,omitempty"`
+	Search SearchConfig   `json:"search,omitempty" yaml:"search,omitempty"`
+}
+
+// // CollectionMapping defines field mapping for collection
+// type CollectionMapping struct {
+// 	Fields []FieldMapping `json:"fields,omitempty" yaml:"fields,omitempty"`
+// }
+
+// FieldMapping defines mapping for a single field
+type FieldMapping struct {
+	StandardName string                 `json:"standard_name" yaml:"standard_name"`
+	RawName      string                 `json:"raw_name" yaml:"raw_name"`
+	Properties   map[string]interface{} `json:"properties,omitempty" yaml:"properties,omitempty"`
+}
+
+func (f FieldMapping) IsPrimaryKey() bool {
+	return f.StandardName == "id"
+}
+
+func (f FieldMapping) IsAutoID() bool {
+	if f.Properties == nil {
+		return false
+	}
+	autoID, ok := f.Properties["auto_id"].(bool)
+	if !ok {
+		return false
+	}
+	return autoID
+}
+
+func (f FieldMapping) IsVectorField() bool {
+	return f.StandardName == "vector"
+}
+
+func (f FieldMapping) MaxLength() int {
+	if f.Properties == nil {
+		return 0
+	}
+	maxLength, ok := f.Properties["max_length"].(int)
+	if !ok {
+		return 256
+	}
+	return maxLength
+}
+
+// IndexConfig defines configuration for index parameters
+type IndexConfig struct {
+	// Index type, e.g., IVF_FLAT, IVF_SQ8, HNSW, etc.
+	IndexType string `json:"index_type" yaml:"index_type"`
+	// Index parameter configuration
+	Params map[string]interface{} `json:"params" yaml:"params"`
+}
+
+func (i IndexConfig) ParamsString(key string) (string, error) {
+	if mVal, ok := i.Params[key].(string); ok {
+		return mVal, nil
+	}
+	return "", fmt.Errorf("params %s not found", key)
+}
+
+func (i IndexConfig) ParamsInt64(key string) (int64, error) {
+	if mVal, ok := i.Params[key].(int64); ok {
+		return mVal, nil
+	}
+	if mVal, ok := i.Params[key].(int); ok {
+		return int64(mVal), nil
+	}
+	return 0, fmt.Errorf("params %s not found", key)
+}
+
+func (i IndexConfig) ParamsFloat64(key string) (float64, error) {
+	if mVal, ok := i.Params[key].(float64); ok {
+		return mVal, nil
+	}
+	if mVal, ok := i.Params[key].(float32); ok {
+		return float64(mVal), nil
+	}
+	return 0, fmt.Errorf("params %s not found", key)
+}
+
+func (i IndexConfig) ParamsBool(key string) (bool, error) {
+	if mVal, ok := i.Params[key].(bool); ok {
+		return mVal, nil
+	}
+	return false, fmt.Errorf("params %s not found", key)
+}
+
+// SearchConfig defines configuration for search parameters
+type SearchConfig struct {
+	// Metric type, e.g., L2, IP, etc.
+	MetricType string `json:"metric_type,omitempty" yaml:"metric_type,omitempty"`
+	// Search parameter configuration
+	Params map[string]interface{} `json:"params" yaml:"params"`
+}
+
+func (i SearchConfig) ParamsString(key string) (string, error) {
+	if mVal, ok := i.Params[key].(string); ok {
+		return mVal, nil
+	}
+	return "", fmt.Errorf("params %s not found", key)
+}
+
+func (i SearchConfig) ParamsInt64(key string) (int64, error) {
+	if mVal, ok := i.Params[key].(int64); ok {
+		return mVal, nil
+	}
+	return 0, fmt.Errorf("params %s not found", key)
+}
+
+func (i SearchConfig) ParamsFloat64(key string) (float64, error) {
+	if mVal, ok := i.Params[key].(float64); ok {
+		return mVal, nil
+	}
+	return 0, fmt.Errorf("params %s not found", key)
+}
+
+func (i SearchConfig) ParamsBool(key string) (bool, error) {
+	if mVal, ok := i.Params[key].(bool); ok {
+		return mVal, nil
+	}
+	return false, fmt.Errorf("params %s not found", key)
 }
--- a/plugins/golang-filter/mcp-server/servers/rag/embedding/dashscope.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/embedding/dashscope.go
@@ -1,169 +0,0 @@
-package embedding
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-
-	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/common"
-	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
-)
-
-const (
-	DASHSCOPE_DOMAIN             = "dashscope.aliyuncs.com"
-	DASHSCOPE_PORT               = 443
-	DASHSCOPE_DEFAULT_MODEL_NAME = "text-embedding-v4"
-	DASHSCOPE_ENDPOINT           = "/api/v1/services/embeddings/text-embedding/text-embedding"
-)
-
-var dashScopeConfig dashScopeProviderConfig
-
-type dashScopeProviderInitializer struct {
-}
-type dashScopeProviderConfig struct {
-	apiKey string
-	model  string
-}
-
-func (c *dashScopeProviderInitializer) InitConfig(config config.EmbeddingConfig) {
-	dashScopeConfig.apiKey = config.APIKey
-	dashScopeConfig.model = config.Model
-}
-
-func (c *dashScopeProviderInitializer) ValidateConfig() error {
-	if dashScopeConfig.apiKey == "" {
-		return errors.New("[DashScope] apiKey is required")
-	}
-	return nil
-}
-
-func (c *dashScopeProviderInitializer) CreateProvider(config config.EmbeddingConfig) (Provider, error) {
-	c.InitConfig(config)
-	err := c.ValidateConfig()
-	if err != nil {
-		return nil, err
-	}
-
-	headers := map[string]string{
-		"Authorization": "Bearer " + config.APIKey,
-		"Content-Type":  "application/json",
-	}
-	httpClient := common.NewHTTPClient(fmt.Sprintf("https://%s", DASHSCOPE_DOMAIN), headers)
-
-	return &DashScopeProvider{
-		config: dashScopeConfig,
-		client: httpClient,
-	}, nil
-}
-
-func (d *DashScopeProvider) GetProviderType() string {
-	return PROVIDER_TYPE_DASHSCOPE
-}
-
-type Embedding struct {
-	Embedding []float32 `json:"embedding"`
-	TextIndex int       `json:"text_index"`
-}
-
-type Input struct {
-	Texts []string `json:"texts"`
-}
-
-type Params struct {
-	TextType string `json:"text_type"`
-}
-
-type Response struct {
-	RequestID string `json:"request_id"`
-	Output    Output `json:"output"`
-	Usage     Usage  `json:"usage"`
-}
-
-type Output struct {
-	Embeddings []Embedding `json:"embeddings"`
-}
-
-type Usage struct {
-	TotalTokens int `json:"total_tokens"`
-}
-
-type EmbeddingRequest struct {
-	Model      string `json:"model"`
-	Input      Input  `json:"input"`
-	Parameters Params `json:"parameters"`
-}
-
-type Document struct {
-	Vector []float64         `json:"vector"`
-	Fields map[string]string `json:"fields"`
-}
-
-type DashScopeProvider struct {
-	config dashScopeProviderConfig
-	client *common.HTTPClient
-}
-
-func (d *DashScopeProvider) constructRequestData(texts []string) (EmbeddingRequest, error) {
-	model := d.config.model
-	if model == "" {
-		model = DASHSCOPE_DEFAULT_MODEL_NAME
-	}
-
-	if dashScopeConfig.apiKey == "" {
-		return EmbeddingRequest{}, errors.New("dashScopeKey is empty")
-	}
-
-	data := EmbeddingRequest{
-		Model: model,
-		Input: Input{
-			Texts: texts,
-		},
-		Parameters: Params{
-			TextType: "query",
-		},
-	}
-
-	return data, nil
-}
-
-type Result struct {
-	ID     string                 `json:"id"`
-	Vector []float32              `json:"vector,omitempty"`
-	Fields map[string]interface{} `json:"fields"`
-	Score  float64                `json:"score"`
-}
-
-func (d *DashScopeProvider) parseTextEmbedding(responseBody []byte) (*Response, error) {
-	var resp Response
-	err := json.Unmarshal(responseBody, &resp)
-	if err != nil {
-		return nil, err
-	}
-	return &resp, nil
-}
-
-func (d *DashScopeProvider) GetEmbedding(
-	ctx context.Context,
-	queryString string) ([]float32, error) {
-	requestData, err := d.constructRequestData([]string{queryString})
-	if err != nil {
-		return nil, fmt.Errorf("failed to construct request data: %v", err)
-	}
-
-	responseBody, err := d.client.Post(DASHSCOPE_ENDPOINT, requestData)
-	if err != nil {
-		return nil, fmt.Errorf("failed to send request: %v", err)
-	}
-
-	embeddingResp, err := d.parseTextEmbedding(responseBody)
-	if err != nil {
-		return nil, fmt.Errorf("failed to parse response: %v", err)
-	}
-
-	if len(embeddingResp.Output.Embeddings) == 0 {
-		return nil, errors.New("no embedding found in response")
-	}
-
-	return embeddingResp.Output.Embeddings[0].Embedding, nil
-}
--- a/plugins/golang-filter/mcp-server/servers/rag/embedding/openai.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/embedding/openai.go
@@ -2,160 +2,93 @@ package embedding

 import (
 	"context"
-	"encoding/json"
 	"errors"
 	"fmt"

-	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/common"
 	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
+	"github.com/openai/openai-go/v2"
+	"github.com/openai/openai-go/v2/option"
 )

 const (
-	OPENAI_DOMAIN             = "api.openai.com"
-	OPENAI_PORT               = 443
-	OPENAI_DEFAULT_MODEL_NAME = "text-embedding-3-small"
-	OPENAI_ENDPOINT           = "/v1/embeddings"
+	OPENAI_DEFAULT_MODEL_NAME = "text-embedding-ada-002"
 )

 type openAIProviderInitializer struct {
 }

-var openAIConfig openAIProviderConfig
-
-type openAIProviderConfig struct {
-	baseUrl string
-	apiKey  string
-	model   string
-}
-
-func (c *openAIProviderInitializer) InitConfig(config config.EmbeddingConfig) {
-	openAIConfig.apiKey = config.APIKey
-	openAIConfig.model = config.Model
-	openAIConfig.baseUrl = config.BaseURL
-}
-
-func (c *openAIProviderInitializer) ValidateConfig() error {
-	if openAIConfig.apiKey == "" {
-		return errors.New("[openAI] apiKey is required")
+func (c *openAIProviderInitializer) validateConfig(config *config.EmbeddingConfig) error {
+	if config.APIKey == "" {
+		return errors.New("[openai embbeding] apiKey is required")
 	}
+	if config.Model == "" {
+		config.Model = OPENAI_DEFAULT_MODEL_NAME
+	}
+	if config.Dimensions <= 0 {
+		config.Dimensions = 1536
+	}
+
 	return nil
 }

 func (c *openAIProviderInitializer) CreateProvider(config config.EmbeddingConfig) (Provider, error) {
-	c.InitConfig(config)
-	err := c.ValidateConfig()
-	if err != nil {
+	if err := c.validateConfig(&config); err != nil {
 		return nil, err
 	}
+	// 创建 OpenAI 客户端
+	var clientOptions []option.RequestOption
+	clientOptions = append(clientOptions, option.WithAPIKey(config.APIKey))

-	if openAIConfig.model == "" {
-		openAIConfig.model = OPENAI_DEFAULT_MODEL_NAME
+	// 如果设置了自定义 baseURL，则使用它
+	if config.BaseURL != "" {
+		clientOptions = append(clientOptions, option.WithBaseURL(config.BaseURL))
 	}
-
-	if openAIConfig.baseUrl == "" {
-		openAIConfig.baseUrl = fmt.Sprintf("https://%s", OPENAI_DOMAIN)
-	}
-
-	headers := map[string]string{
-		"Authorization": "Bearer " + config.APIKey,
-		"Content-Type":  "application/json",
-	}
-	httpClient := common.NewHTTPClient(openAIConfig.baseUrl, headers)
+	// 创建 OpenAI 客户端
+	client := openai.NewClient(clientOptions...)

 	return &OpenAIProvider{
-		config: openAIConfig,
-		client: httpClient,
+		client:     &client,
+		model:      config.Model,
+		dimensions: config.Dimensions,
 	}, nil
 }

-func (o *OpenAIProvider) GetProviderType() string {
+// EmbeddingClient handles vector embedding generation using OpenAI-compatible APIs
+type OpenAIProvider struct {
+	client     *openai.Client
+	model      string
+	dimensions int
+}
+
+func (e *OpenAIProvider) GetProviderType() string {
 	return PROVIDER_TYPE_OPENAI
 }

-type OpenAIResponse struct {
-	Object string         `json:"object"`
-	Data   []OpenAIResult `json:"data"`
-	Model  string         `json:"model"`
-	Error  *OpenAIError   `json:"error"`
-}
-
-type OpenAIResult struct {
-	Object    string    `json:"object"`
-	Embedding []float32 `json:"embedding"`
-	Index     int       `json:"index"`
-}
-
-type OpenAIError struct {
-	Message string `json:"prompt_tokens"`
-	Type    string `json:"type"`
-	Code    string `json:"code"`
-	Param   string `json:"param"`
-}
-
-type OpenAIEmbeddingRequest struct {
-	Input string `json:"input"`
-	Model string `json:"model"`
-}
-
-type OpenAIProvider struct {
-	config openAIProviderConfig
-	client *common.HTTPClient
-}
-
-func (o *OpenAIProvider) constructRequestData(text string) (OpenAIEmbeddingRequest, error) {
-	if text == "" {
-		return OpenAIEmbeddingRequest{}, errors.New("queryString text cannot be empty")
+// GetEmbedding generates vector embedding for the given text
+func (e *OpenAIProvider) GetEmbedding(ctx context.Context, text string) ([]float32, error) {
+	params := openai.EmbeddingNewParams{
+		Model: e.model,
+		Input: openai.EmbeddingNewParamsInputUnion{
+			OfString: openai.String(text),
+		},
+		Dimensions:     openai.Int(int64(e.dimensions)),
+		EncodingFormat: openai.EmbeddingNewParamsEncodingFormatFloat,
 	}

-	if openAIConfig.apiKey == "" {
-		return OpenAIEmbeddingRequest{}, errors.New("openAI apiKey is empty")
-	}
-
-	model := o.config.model
-	if model == "" {
-		model = OPENAI_DEFAULT_MODEL_NAME
-	}
-
-	data := OpenAIEmbeddingRequest{
-		Input: text,
-		Model: model,
-	}
-
-	return data, nil
-}
-
-func (o *OpenAIProvider) parseTextEmbedding(responseBody []byte) (*OpenAIResponse, error) {
-	var resp OpenAIResponse
-	err := json.Unmarshal(responseBody, &resp)
+	embeddingResp, err := e.client.Embeddings.New(ctx, params)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("failed to generate embedding: %w", err)
 	}
-	return &resp, nil
-}
-
-func (o *OpenAIProvider) GetEmbedding(ctx context.Context, queryString string) ([]float32, error) {
-	requestData, err := o.constructRequestData(queryString)
-	if err != nil {
-		return nil, fmt.Errorf("failed to construct request data: %v", err)
-	}
-
-	responseBody, err := o.client.Post(OPENAI_ENDPOINT, requestData)
-	if err != nil {
-		return nil, fmt.Errorf("failed to send request: %v", err)
-	}
-
-	resp, err := o.parseTextEmbedding(responseBody)
-	if err != nil {
-		return nil, fmt.Errorf("failed to parse response: %v", err)
-	}
-
-	if resp.Error != nil {
-		return nil, fmt.Errorf("OpenAI API error: %s - %s", resp.Error.Type, resp.Error.Message)
-	}
-
-	if len(resp.Data) == 0 {
-		return nil, errors.New("no embedding found in response")
-	}
-
-	return resp.Data[0].Embedding, nil
+
+	if len(embeddingResp.Data) == 0 {
+		return nil, fmt.Errorf("empty embedding response")
+	}
+
+	// Convert []float64 to []float32
+	embedding := make([]float32, len(embeddingResp.Data[0].Embedding))
+	for i, v := range embeddingResp.Data[0].Embedding {
+		embedding[i] = float32(v)
+	}
+
+	return embedding, nil
 }
--- a/plugins/golang-filter/mcp-server/servers/rag/embedding/provider.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/embedding/provider.go
@@ -10,21 +10,21 @@ import (
 // Provider type constants for different embedding services
 const (
 	// DashScope embedding service
-	PROVIDER_TYPE_DASHSCOPE   = "dashscope"
+	PROVIDER_TYPE_DASHSCOPE = "dashscope"
 	// TextIn embedding service
-	PROVIDER_TYPE_TEXTIN      = "textin"
+	PROVIDER_TYPE_TEXTIN = "textin"
 	// Cohere embedding service
-	PROVIDER_TYPE_COHERE      = "cohere"
+	PROVIDER_TYPE_COHERE = "cohere"
 	// OpenAI embedding service
-	PROVIDER_TYPE_OPENAI      = "openai"
+	PROVIDER_TYPE_OPENAI = "openai"
 	// Ollama embedding service
-	PROVIDER_TYPE_OLLAMA      = "ollama"
+	PROVIDER_TYPE_OLLAMA = "ollama"
 	// HuggingFace embedding service
 	PROVIDER_TYPE_HUGGINGFACE = "huggingface"
 	// XFYun embedding service
-	PROVIDER_TYPE_XFYUN       = "xfyun"
+	PROVIDER_TYPE_XFYUN = "xfyun"
 	// Azure embedding service
-	PROVIDER_TYPE_AZURE       = "azure"
+	PROVIDER_TYPE_AZURE = "azure"
 )

 // Factory interface for creating Provider instances
@@ -36,8 +36,7 @@ type providerInitializer interface {
 // Maps provider types to their initializers
 var (
 	providerInitializers = map[string]providerInitializer{
-		PROVIDER_TYPE_DASHSCOPE: &dashScopeProviderInitializer{},
-		PROVIDER_TYPE_OPENAI:    &openAIProviderInitializer{},
+		PROVIDER_TYPE_OPENAI: &openAIProviderInitializer{},
 	}
 )

--- a/plugins/golang-filter/mcp-server/servers/rag/llm/openai.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/llm/openai.go
@@ -2,133 +2,105 @@ package llm

 import (
 	"context"
-	"encoding/json"
 	"errors"
 	"fmt"

-	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/common"
 	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
+	"github.com/openai/openai-go/v2"
+	"github.com/openai/openai-go/v2/option"
+	"github.com/openai/openai-go/v2/packages/param"
 )

 const (
-	OPENAI_CHAT_ENDPOINT = "/chat/completions"
-	OPENAI_DEFAULT_MODEL = "gpt-3.5-turbo"
+	OPENAI_DEFAULT_MODEL = "gpt-4o"
 )

-// openAI specific configuration captured after initialization.
-type openAIProviderConfig struct {
-	apiKey      string
-	baseURL     string
+type OpenAIProvider struct {
+	client      *openai.Client
 	model       string
-	maxTokens   int
 	temperature float64
+	maxTokens   int
 }

 type openAIProviderInitializer struct{}

-var openAIConfig openAIProviderConfig
-
-func (i *openAIProviderInitializer) initConfig(c config.LLMConfig) {
-	openAIConfig.apiKey = c.APIKey
-	openAIConfig.baseURL = c.BaseURL
-	openAIConfig.model = c.Model
-	if openAIConfig.model == "" {
-		openAIConfig.model = OPENAI_DEFAULT_MODEL
-	}
-	if openAIConfig.baseURL == "" {
-		openAIConfig.baseURL = "https://api.openai.com/v1" // default public endpoint
-	}
-	openAIConfig.maxTokens = c.MaxTokens
-	openAIConfig.temperature = c.Temperature
-}
-
-func (i *openAIProviderInitializer) validateConfig() error {
-	if openAIConfig.apiKey == "" {
+func (i *openAIProviderInitializer) validateConfig(cfg *config.LLMConfig) error {
+	if cfg.APIKey == "" {
 		return errors.New("[openai llm] apiKey is required")
 	}
+	if cfg.Model == "" {
+		cfg.Model = OPENAI_DEFAULT_MODEL
+	}
+
+	if cfg.Temperature <= 0 || cfg.Temperature > 2 {
+		cfg.Temperature = 0.5
+	}
+
+	if cfg.MaxTokens <= 0 {
+		cfg.MaxTokens = 2048
+	}
 	return nil
 }

 func (i *openAIProviderInitializer) CreateProvider(cfg config.LLMConfig) (Provider, error) {
-	i.initConfig(cfg)
-	if err := i.validateConfig(); err != nil {
+	if err := i.validateConfig(&cfg); err != nil {
 		return nil, err
 	}
-	headers := map[string]string{
-		"Authorization": "Bearer " + openAIConfig.apiKey,
-		"Content-Type":  "application/json",
+	// Create OpenAI client
+	var clientOptions []option.RequestOption
+	clientOptions = append(clientOptions, option.WithAPIKey(cfg.APIKey))
+
+	// If a custom baseURL is set, use it
+	if cfg.BaseURL != "" {
+		clientOptions = append(clientOptions, option.WithBaseURL(cfg.BaseURL))
 	}
-	client := common.NewHTTPClient(openAIConfig.baseURL, headers)
-	return &OpenAIProvider{client: client, cfg: openAIConfig}, nil
-}

-type OpenAIProvider struct {
-	client *common.HTTPClient
-	cfg    openAIProviderConfig
-}
+	// Create OpenAI client
+	client := openai.NewClient(clientOptions...)

-type openAIChatCompletionRequest struct {
-	Model       string              `json:"model"`
-	Messages    []openAIChatMessage `json:"messages"`
-	Temperature float64             `json:"temperature,omitempty"`
-	MaxTokens   int                 `json:"max_tokens,omitempty"`
-}
-
-type openAIChatMessage struct {
-	Role    string `json:"role"`
-	Content string `json:"content"`
-}
-
-type openAIChatCompletionResponse struct {
-	ID      string                               `json:"id"`
-	Object  string                               `json:"object"`
-	Choices []openAIChatCompletionResponseChoice `json:"choices"`
-	Error   *openAIError                         `json:"error,omitempty"`
-}
-
-type openAIChatCompletionResponseChoice struct {
-	Index        int               `json:"index"`
-	Message      openAIChatMessage `json:"message"`
-	FinishReason string            `json:"finish_reason"`
-}
-
-type openAIError struct {
-	Message string `json:"message"`
-	Type    string `json:"type"`
-	Code    string `json:"code"`
-	Param   string `json:"param"`
+	return &OpenAIProvider{
+		client:      &client,
+		model:       cfg.Model,
+		temperature: cfg.Temperature,
+		maxTokens:   cfg.MaxTokens,
+	}, nil
 }

 // GenerateCompletion implements Provider interface.
 func (o *OpenAIProvider) GenerateCompletion(ctx context.Context, prompt string) (string, error) {
-	req := openAIChatCompletionRequest{
-		Model: o.cfg.model,
-		Messages: []openAIChatMessage{
-			{Role: "user", Content: prompt},
+	// Create chat request
+	params := openai.ChatCompletionNewParams{
+		Model: o.model,
+		Messages: []openai.ChatCompletionMessageParamUnion{
+			openai.UserMessage(prompt),
 		},
-		Temperature: o.cfg.temperature,
-		MaxTokens:   o.cfg.maxTokens,
 	}

-	body, err := o.client.Post(OPENAI_CHAT_ENDPOINT, req)
+	// Set optional parameters
+	if o.temperature > 0 {
+		temperature := float64(o.temperature)
+		params.Temperature = param.Opt[float64]{Value: temperature}
+	}
+
+	if o.maxTokens > 0 {
+		maxTokens := int64(o.maxTokens)
+		params.MaxTokens = param.Opt[int64]{Value: maxTokens}
+	}
+
+	// Send request
+	response, err := o.client.Chat.Completions.New(ctx, params)
 	if err != nil {
-		return "", fmt.Errorf("openai llm post error: %w", err)
+		// Handle error
+		return "", fmt.Errorf("openai llm error: %w", err)
 	}

-	var resp openAIChatCompletionResponse
-	if err := json.Unmarshal(body, &resp); err != nil {
-		return "", fmt.Errorf("openai llm unmarshal error: %w", err)
-	}
-
-	if resp.Error != nil {
-		return "", fmt.Errorf("openai llm api error: %s - %s", resp.Error.Type, resp.Error.Message)
-	}
-
-	if len(resp.Choices) == 0 {
+	// Check response
+	if len(response.Choices) == 0 {
 		return "", errors.New("openai llm: empty choices")
 	}

-	return resp.Choices[0].Message.Content, nil
+	// Return generated content
+	return response.Choices[0].Message.Content, nil
 }

 func (o *OpenAIProvider) GetProviderType() string {
--- a/plugins/golang-filter/mcp-server/servers/rag/llm/prompt.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/llm/prompt.go
@@ -4,7 +4,7 @@ import (
 	"strings"
 )

-const RAGPromptTemplate = `You are a professional knowledge Q&A assistant. Your task is to provide accurate, complete, and strictly relevant answers based on the user's question and retrieved context.
+const RAGPromptTemplate = `You are a professional knowledge Q&A assistant. Your task is to provide direct and concise answers based on the user's question and retrieved context.

 Retrieved relevant context (may be empty, multiple segments separated by line breaks):
 {contexts}
@@ -13,9 +13,11 @@ User question:
 {query}

 Requirements:
-1. If the context provides sufficient information, answer directly based on the context. You may use domain knowledge to supplement, but do not fabricate facts beyond the context.
-2. If the context is insufficient or unrelated to the question, respond with: "I am unable to answer this question."
-3. Your response must correctly answer the user's question and must not contain any irrelevant or unrelated content.`
+1. Provide ONLY the direct answer without any explanation, reasoning, or additional context.
+2. If the context provides sufficient information, output the answer in the most concise form possible.
+3. If the context is insufficient or unrelated to the question, respond with: "I am unable to answer this question."
+4. Do not include any phrases like "The answer is", "Based on the context", etc. Just output the answer directly.
+`

 func BuildPrompt(query string, contexts []string, join string) string {
 	rendered := strings.ReplaceAll(RAGPromptTemplate, "{query}", query)
--- a/plugins/golang-filter/mcp-server/servers/rag/rag_client.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/rag_client.go
@@ -46,24 +46,22 @@ func NewRAGClient(config *config.Config) (*RAGClient, error) {
 	}
 	ragclient.embeddingProvider = embeddingProvider

-	llmProvider, err := llm.NewLLMProvider(ragclient.config.LLM)
-	if err != nil {
-		return nil, fmt.Errorf("create llm provider failed, err: %w", err)
+	if ragclient.config.LLM.Provider == "" {
+		ragclient.llmProvider = nil
+	} else {
+		llmProvider, err := llm.NewLLMProvider(ragclient.config.LLM)
+		if err != nil {
+			return nil, fmt.Errorf("create llm provider failed, err: %w", err)
+		}
+		ragclient.llmProvider = llmProvider
 	}
-	ragclient.llmProvider = llmProvider
-
-	demoVector, err := embeddingProvider.GetEmbedding(context.Background(), "initialization")
-	if err != nil {
-		return nil, fmt.Errorf("create init embedding failed, err: %w", err)
-	}
-	dim := len(demoVector)

+	dim := ragclient.config.Embedding.Dimensions
 	provider, err := vectordb.NewVectorDBProvider(&ragclient.config.VectorDB, dim)
 	if err != nil {
 		return nil, fmt.Errorf("create vector store provider failed, err: %w", err)
 	}
 	ragclient.vectordbProvider = provider
-
 	return ragclient, nil
 }

--- a/plugins/golang-filter/mcp-server/servers/rag/rag_client_test.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/rag_client_test.go
@@ -1,6 +1,8 @@
 package rag

 import (
+	"encoding/json"
+	"os"
 	"testing"

 	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
@@ -20,15 +22,17 @@ func getRAGClient() (*RAGClient, error) {

 		LLM: config.LLMConfig{
 			Provider: "openai",
-			APIKey:   "sk-xxxx",
+			APIKey:   "sk-xxx",
 			BaseURL:  "https://openrouter.ai/api/v1",
 			Model:    "openai/gpt-4o",
 		},

 		Embedding: config.EmbeddingConfig{
-			Provider: "dashscope",
-			APIKey:   "sk-xxxx",
-			Model:    "text-embedding-v4",
+			Provider:   "openai",
+			BaseURL:    "https://dashscope.aliyuncs.com/compatible-mode/v1",
+			APIKey:     "sk-xxxx",
+			Model:      "text-embedding-v4",
+			Dimensions: 1536,
 		},

 		VectorDB: config.VectorDBConfig{
@@ -36,7 +40,49 @@ func getRAGClient() (*RAGClient, error) {
 			Host:       "localhost",
 			Port:       19530,
 			Database:   "default",
-			Collection: "test_collection",
+			Collection: "test_collection3",
+			Mapping: config.MappingConfig{
+				Fields: []config.FieldMapping{
+					{
+						StandardName: "id",
+						RawName:      "pk",
+						Properties: map[string]interface{}{
+							"max_length": 256,
+							"auto_id":    false,
+						},
+					},
+					{
+						StandardName: "content",
+						RawName:      "page_content",
+						Properties: map[string]interface{}{
+							"max_length": 8192,
+						},
+					},
+					{
+						StandardName: "vector",
+						RawName:      "page_vector",
+						Properties:   make(map[string]interface{}),
+					},
+					{
+						StandardName: "metadata",
+						RawName:      "metadata",
+						Properties:   make(map[string]interface{}),
+					},
+					{
+						StandardName: "created_at",
+						RawName:      "created_at",
+						Properties:   make(map[string]interface{}),
+					},
+				},
+				Index: config.IndexConfig{
+					IndexType: "IVF_FLAT",
+					Params:    map[string]interface{}{"nlist": 64},
+				},
+				Search: config.SearchConfig{
+					MetricType: "COSINE",
+					Params:     map[string]interface{}{"nprobe": 32},
+				},
+			},
 		},
 	}

@@ -46,7 +92,6 @@ func getRAGClient() (*RAGClient, error) {
 	}

 	return ragClient, nil
-
 }

 func TestNewRAGClient(t *testing.T) {
@@ -102,7 +147,7 @@ func TestRAGClient_DeleteChunk(t *testing.T) {
 		return
 	}

-	chunk_id := "63ee25d7-41b9-4455-8066-075ca5c803b2"
+	chunk_id := "2a06679c-a8ea-46dc-bf1c-7e7b164a73c8"
 	err = ragClient.DeleteChunk(chunk_id)
 	if err != nil {
 		t.Errorf("DeleteChunk() error = %v", err)
@@ -137,7 +182,11 @@ func TestRAGClient_Chat(t *testing.T) {
 		t.Errorf("getRAGClient() error = %v", err)
 		return
 	}
-	query := "what is the competition about?"
+	// query := "Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?"
+	// query := "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?"
+	// query := "Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing theory suggesting a lack of full truthfulness with the board as reported by TechCrunch?"
+	// query := "Do the TechCrunch article on software companies and the Hacker News article on The Epoch Times both report an increase in revenue related to payment and subscription models, respectively?"
+	query := "Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?"
 	resp, err := ragClient.Chat(query)
 	if err != nil {
 		t.Errorf("Chat() error = %v", err)
@@ -147,4 +196,45 @@ func TestRAGClient_Chat(t *testing.T) {
 		t.Errorf("Chat() resp = %s, want not empty", resp)
 		return
 	}
+	t.Logf("Chat() resp = %s", resp)
+}
+
+func TestRAGClient_LoadChunks(t *testing.T) {
+	t.Logf("TestRAGClient_LoadChunks")
+	ragClient, err := getRAGClient()
+	if err != nil {
+		t.Errorf("getRAGClient() error = %v", err)
+		return
+	}
+	// load json output/corpus.json and then call ragclient CreateChunkFromText to insert chunks
+	file, err := os.Open("/dataset/corpus.json")
+	if err != nil {
+		t.Errorf("LoadData() error = %v", err)
+		return
+	}
+	defer file.Close()
+	decoder := json.NewDecoder(file)
+	var data []struct {
+		Body  string `json:"body"`
+		Title string `json:"title"`
+		Url   string `json:"url"`
+	}
+	if err := decoder.Decode(&data); err != nil {
+		t.Errorf("LoadData() error = %v", err)
+		return
+	}
+
+	for _, item := range data {
+		t.Logf("LoadData() url = %s", item.Url)
+		t.Logf("LoadData() title = %s", item.Title)
+		t.Logf("LoadData() len body = %d", len(item.Body))
+		chunks, err := ragClient.CreateChunkFromText(item.Body, item.Title)
+		if err != nil {
+			t.Errorf("LoadData() error = %v", err)
+			continue
+		} else {
+			t.Logf("LoadData() chunks len = %d", len(chunks))
+		}
+	}
+	t.Logf("TestRAGClient_LoadChunks done")
 }
--- a/plugins/golang-filter/mcp-server/servers/rag/server.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/server.go
@@ -28,7 +28,7 @@ func init() {
 				TopK:      10,
 			},
 			LLM: config.LLMConfig{
-				Provider:    "openai",
+				Provider:    "",
 				APIKey:      "",
 				BaseURL:     "",
 				Model:       "gpt-4o",
@@ -36,11 +36,11 @@ func init() {
 				MaxTokens:   2048,
 			},
 			Embedding: config.EmbeddingConfig{
-				Provider:  "dashscope",
-				APIKey:    "",
-				BaseURL:   "",
-				Model:     "text-embedding-v4",
-				Dimension: 1024,
+				Provider:   "openai",
+				APIKey:     "",
+				BaseURL:    "",
+				Model:      "text-embedding-ada-002",
+				Dimensions: 1536,
 			},
 			VectorDB: config.VectorDBConfig{
 				Provider:   "milvus",
@@ -50,14 +50,56 @@ func init() {
 				Collection: "rag",
 				Username:   "",
 				Password:   "",
+				Mapping: config.MappingConfig{
+					Fields: []config.FieldMapping{
+						{
+							StandardName: "id",
+							RawName:      "id",
+							Properties: map[string]interface{}{
+								"max_length": 256,
+								"auto_id":    false,
+							},
+						},
+						{
+							StandardName: "content",
+							RawName:      "content",
+							Properties: map[string]interface{}{
+								"max_length": 8192,
+							},
+						},
+						{
+							StandardName: "vector",
+							RawName:      "vector",
+							Properties:   make(map[string]interface{}),
+						},
+						{
+							StandardName: "metadata",
+							RawName:      "metadata",
+							Properties:   make(map[string]interface{}),
+						},
+						{
+							StandardName: "created_at",
+							RawName:      "created_at",
+							Properties:   make(map[string]interface{}),
+						},
+					},
+					Index: config.IndexConfig{
+						IndexType: "HNSW",
+						Params:    map[string]interface{}{"M": 8, "efConstruction": 64},
+					},
+					Search: config.SearchConfig{
+						MetricType: "IP",
+						Params:     make(map[string]interface{}),
+					},
+				},
 			},
 		},
 	})
 }

-func (c *RAGConfig) ParseConfig(config map[string]any) error {
+func (c *RAGConfig) ParseConfig(cfg map[string]any) error {
 	// Parse RAG configuration
-	if ragConfig, ok := config["rag"].(map[string]any); ok {
+	if ragConfig, ok := cfg["rag"].(map[string]any); ok {
 		if splitter, exists := ragConfig["splitter"].(map[string]any); exists {
 			if splitterType, exists := splitter["provider"].(string); exists {
 				c.config.RAG.Splitter.Provider = splitterType
@@ -78,7 +120,7 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
 	}

 	// Parse Embedding configuration
-	if embeddingConfig, ok := config["embedding"].(map[string]any); ok {
+	if embeddingConfig, ok := cfg["embedding"].(map[string]any); ok {
 		if provider, exists := embeddingConfig["provider"].(string); exists {
 			c.config.Embedding.Provider = provider
 		} else {
@@ -94,17 +136,15 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
 		if model, exists := embeddingConfig["model"].(string); exists {
 			c.config.Embedding.Model = model
 		}
-		if dimension, exists := embeddingConfig["dimension"].(float64); exists {
-			c.config.Embedding.Dimension = int(dimension)
+		if dimensions, exists := embeddingConfig["dimensions"].(float64); exists {
+			c.config.Embedding.Dimensions = int(dimensions)
 		}
 	}

 	// Parse llm configuration
-	if llmConfig, ok := config["llm"].(map[string]any); ok {
+	if llmConfig, ok := cfg["llm"].(map[string]any); ok {
 		if provider, exists := llmConfig["provider"].(string); exists {
 			c.config.LLM.Provider = provider
-		} else {
-			return errors.New("missing llm provider")
 		}
 		if apiKey, exists := llmConfig["api_key"].(string); exists {
 			c.config.LLM.APIKey = apiKey
@@ -124,7 +164,7 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
 	}

 	// Parse VectorDB configuration
-	if vectordbConfig, ok := config["vectordb"].(map[string]any); ok {
+	if vectordbConfig, ok := cfg["vectordb"].(map[string]any); ok {
 		if provider, exists := vectordbConfig["provider"].(string); exists {
 			c.config.VectorDB.Provider = provider
 		} else {
@@ -148,8 +188,59 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
 		if password, exists := vectordbConfig["password"].(string); exists {
 			c.config.VectorDB.Password = password
 		}
-	}

+		// Parse mapping here
+		if mapping, exists := vectordbConfig["mapping"].(map[string]any); exists {
+			// Parse field mappings
+			if fields, ok := mapping["fields"].([]any); ok {
+				c.config.VectorDB.Mapping.Fields = []config.FieldMapping{}
+				for _, field := range fields {
+					if fieldMap, ok := field.(map[string]any); ok {
+						fieldMapping := config.FieldMapping{
+							Properties: make(map[string]interface{}),
+						}
+						if standardName, ok := fieldMap["standard_name"].(string); ok {
+							fieldMapping.StandardName = standardName
+						}
+
+						if rawName, ok := fieldMap["raw_name"].(string); ok {
+							fieldMapping.RawName = rawName
+						}
+						// Parse properties
+						if properties, ok := fieldMap["properties"].(map[string]any); ok {
+							for key, value := range properties {
+								fieldMapping.Properties[key] = value
+							}
+						}
+						c.config.VectorDB.Mapping.Fields = append(c.config.VectorDB.Mapping.Fields, fieldMapping)
+					}
+				}
+			}
+
+			// Parse index configuration
+			if index, ok := mapping["index"].(map[string]any); ok {
+				if indexType, ok := index["index_type"].(string); ok {
+					c.config.VectorDB.Mapping.Index.IndexType = indexType
+				}
+
+				// Parse index parameters
+				if params, ok := index["params"].(map[string]any); ok {
+					c.config.VectorDB.Mapping.Index.Params = params
+				}
+			}
+
+			// Parse search configuration
+			if search, ok := mapping["search"].(map[string]any); ok {
+				if metricType, ok := search["metric_type"].(string); ok {
+					c.config.VectorDB.Mapping.Search.MetricType = metricType
+				}
+				// Parse search parameters
+				if params, ok := search["params"].(map[string]any); ok {
+					c.config.VectorDB.Mapping.Search.Params = params
+				}
+			}
+		}
+	}
 	return nil
 }

@@ -190,7 +281,7 @@ func (c *RAGConfig) NewServer(serverName string) (*common.MCPServer, error) {

 	// Intelligent Q&A Tool
 	mcpServer.AddTool(
-		mcp.NewToolWithRawSchema("chat", "Generate contextually relevant responses using RAG system with LLM integration", GetChatSchema()),
+		mcp.NewToolWithRawSchema("chat", "Answer user questions by retrieving relevant knowledge from the database and generating responses using RAG-enhanced LLM", GetChatSchema()),
 		HandleChat(ragClient),
 	)

--- a/plugins/golang-filter/mcp-server/servers/rag/server_test.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/server_test.go
@@ -28,11 +28,11 @@ func TestRAGConfig_ParseConfig(t *testing.T) {
 			MaxTokens:   2048,
 		},
 		Embedding: config.EmbeddingConfig{
-			Provider:  "dashscope",
-			APIKey:    "sk-XXX",
-			BaseURL:   "",
-			Model:     "text-embedding-v4",
-			Dimension: 1024,
+			Provider:   "dashscope",
+			APIKey:     "sk-XXX",
+			BaseURL:    "",
+			Model:      "text-embedding-v4",
+			Dimensions: 1024,
 		},
 		VectorDB: config.VectorDBConfig{
 			Provider:   "milvus",
@@ -42,6 +42,48 @@ func TestRAGConfig_ParseConfig(t *testing.T) {
 			Collection: "test_rag",
 			Username:   "",
 			Password:   "",
+			Mapping: config.MappingConfig{
+				Fields: []config.FieldMapping{
+					{
+						StandardName: "id",
+						RawName:      "id",
+						Properties: map[string]interface{}{
+							"max_length": 256,
+							"auto_id":    false,
+						},
+					},
+					{
+						StandardName: "content",
+						RawName:      "content",
+						Properties: map[string]interface{}{
+							"max_length": 8192,
+						},
+					},
+					{
+						StandardName: "vector",
+						RawName:      "vector",
+						Properties:   make(map[string]interface{}),
+					},
+					{
+						StandardName: "metadata",
+						RawName:      "metadata",
+						Properties:   make(map[string]interface{}),
+					},
+					{
+						StandardName: "created_at",
+						RawName:      "created_at",
+						Properties:   make(map[string]interface{}),
+					},
+				},
+				Index: config.IndexConfig{
+					IndexType: "HNSW",
+					Params:    map[string]interface{}{"M": 4, "efConstruction": 32},
+				},
+				Search: config.SearchConfig{
+					MetricType: "IP",
+					Params:     map[string]interface{}{"ef": 32},
+				},
+			},
 		},
 	}
 	// 把 config 输出 yaml 格式
--- a/plugins/golang-filter/mcp-server/servers/rag/tools.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/tools.go
@@ -169,6 +169,10 @@ func HandleChat(ragClient *RAGClient) common.ToolHandlerFunc {
 		if !ok {
 			return nil, fmt.Errorf("invalid query argument")
 		}
+		// check llm provider
+		if ragClient.llmProvider == nil {
+			return nil, fmt.Errorf("llm provider is empty, please check the llm configuration")
+		}
 		// Generate response using RAGClient's LLM
 		reply, err := ragClient.Chat(query)
 		if err != nil {
--- a/plugins/golang-filter/mcp-server/servers/rag/vectordb/mapper.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/vectordb/mapper.go
@@ -0,0 +1,182 @@
+package vectordb
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
+)
+
+// Error definitions
+var (
+	ErrFieldNotFound        = errors.New("field not found")
+	ErrInvalidFieldType     = errors.New("invalid field type")
+	ErrInvalidIndexType     = errors.New("invalid index type")
+	ErrInvalidMetricType    = errors.New("invalid metric type")
+	ErrInvalidSearchParams  = errors.New("invalid search parameters")
+	ErrCollectionNotFound   = errors.New("collection not found")
+	ErrUnsupportedOperation = errors.New("unsupported operation")
+)
+
+// VectorDBMapper interface for vector database mapping
+type VectorDBMapper interface {
+	// ParseMapping parses the mapping configuration
+	ParseMapping(provider string, cfg config.MappingConfig) error
+
+	// GetIndexConfig returns the index configuration
+	GetIndexConfig() (config.IndexConfig, error)
+
+	// GetSearchConfig returns the search configuration
+	GetSearchConfig() (config.SearchConfig, error)
+
+	// Get all raw field names
+	GetRawAllFieldNames() ([]string, error)
+
+	// GetIDField returns the ID field mapping
+	GetIDField() (*config.FieldMapping, error)
+
+	// GetVectorField returns the vector field mapping
+	GetVectorField() (*config.FieldMapping, error)
+
+	// Get raw field name by standard field name
+	GetRawField(standardFieldName string) (*config.FieldMapping, error)
+
+	// Get field mapping by raw field name
+	GetField(rawFieldName string) (*config.FieldMapping, error)
+
+	// Get all field mappings
+	GetFieldMappings() ([]config.FieldMapping, error)
+}
+
+// DefaultVectorDBMapper is the default implementation of VectorDBMapper interface
+type DefaultVectorDBMapper struct {
+	// Mapping configuration
+	mappingConfig config.MappingConfig
+	// Map from standard field name to field mapping
+	standardFieldMap map[string]*config.FieldMapping
+	// Map from raw field name to field mapping
+	rawFieldMap map[string]*config.FieldMapping
+}
+
+// NewDefaultVectorDBMapper creates a new default vector database mapper
+func NewDefaultVectorDBMapper(provider string, mappingConfig config.MappingConfig) (*DefaultVectorDBMapper, error) {
+	mapper := &DefaultVectorDBMapper{
+		standardFieldMap: make(map[string]*config.FieldMapping),
+		rawFieldMap:      make(map[string]*config.FieldMapping),
+	}
+	if err := mapper.ParseMapping(provider, mappingConfig); err != nil {
+		return nil, err
+	}
+	return mapper, nil
+}
+
+// ParseMapping parses the mapping configuration
+func (m *DefaultVectorDBMapper) ParseMapping(provider string, cfg config.MappingConfig) error {
+	m.mappingConfig = cfg
+	// Clear existing mappings
+	m.standardFieldMap = make(map[string]*config.FieldMapping)
+	m.rawFieldMap = make(map[string]*config.FieldMapping)
+	// fill default field mappings
+	if len(cfg.Fields) == 0 {
+		defaultFields := []config.FieldMapping{
+			{
+				StandardName: "id",
+				RawName:      "id",
+				Properties: map[string]interface{}{
+					"max_length": 256,
+					"auto_id":    false,
+				},
+			},
+			{
+				StandardName: "content",
+				RawName:      "content",
+				Properties: map[string]interface{}{
+					"max_length": 8192,
+				},
+			},
+			{
+				StandardName: "vector",
+				RawName:      "vector",
+			},
+			{
+				StandardName: "metadata",
+				RawName:      "metadata",
+			},
+			{
+				StandardName: "created_at",
+				RawName:      "created_at",
+			},
+		}
+		cfg.Fields = defaultFields
+	}
+
+	// Parse field mappings
+	for i, field := range cfg.Fields {
+		// Save pointer for future reference
+		fieldPtr := &cfg.Fields[i]
+		m.standardFieldMap[field.StandardName] = fieldPtr
+		m.rawFieldMap[field.RawName] = fieldPtr
+	}
+
+	// Check fields, must include id, content, vector fields
+	requiredFields := []string{"id", "content", "vector"}
+	for _, fieldName := range requiredFields {
+		if _, err := m.GetRawField(fieldName); err != nil {
+			return fmt.Errorf("[vector db mapper] required field %s not found or not varchar type", fieldName)
+		}
+	}
+
+	return nil
+}
+
+// GetIndexConfig gets the index configuration
+func (m *DefaultVectorDBMapper) GetIndexConfig() (config.IndexConfig, error) {
+	return m.mappingConfig.Index, nil
+}
+
+// GetSearchConfig gets the search configuration
+func (m *DefaultVectorDBMapper) GetSearchConfig() (config.SearchConfig, error) {
+	return m.mappingConfig.Search, nil
+}
+
+// GetRawAllFieldNames gets all raw field names
+func (m *DefaultVectorDBMapper) GetRawAllFieldNames() ([]string, error) {
+	fieldNames := make([]string, 0, len(m.rawFieldMap))
+	for name := range m.rawFieldMap {
+		fieldNames = append(fieldNames, name)
+	}
+	return fieldNames, nil
+}
+
+// GetIDField gets the ID field
+func (m *DefaultVectorDBMapper) GetIDField() (*config.FieldMapping, error) {
+	return m.GetRawField("id")
+}
+
+// GetVectorField gets the vector field
+func (m *DefaultVectorDBMapper) GetVectorField() (*config.FieldMapping, error) {
+	return m.GetRawField("vector")
+}
+
+// GetRawField gets the raw field mapping by standard field name
+func (m *DefaultVectorDBMapper) GetRawField(standardFieldName string) (*config.FieldMapping, error) {
+	field, exists := m.standardFieldMap[standardFieldName]
+	if !exists {
+		return nil, fmt.Errorf("%w: standard field %s not found", ErrFieldNotFound, standardFieldName)
+	}
+	return field, nil
+}
+
+// GetField gets the field mapping by raw field name
+func (m *DefaultVectorDBMapper) GetField(rawFieldName string) (*config.FieldMapping, error) {
+	field, exists := m.rawFieldMap[rawFieldName]
+	if !exists {
+		return nil, fmt.Errorf("%w: raw field %s not found", ErrFieldNotFound, rawFieldName)
+	}
+	return field, nil
+}
+
+// GetFieldMappings gets all field mappings
+func (m *DefaultVectorDBMapper) GetFieldMappings() ([]config.FieldMapping, error) {
+	return m.mappingConfig.Fields, nil
+}
--- a/plugins/golang-filter/mcp-server/servers/rag/vectordb/milvus.go
+++ b/plugins/golang-filter/mcp-server/servers/rag/vectordb/milvus.go
@@ -80,16 +80,17 @@ func (m *milvusProviderInitializer) CreateProvider(cfg *config.VectorDBConfig, d
 type MilvusProvider struct {
 	client     client.Client
 	config     *config.VectorDBConfig
-	Collection string
+	collection string
+	mapper     VectorDBMapper
+	dimensions int
 }

 // NewMilvusProvider creates a new instance of MilvusProvider
-func NewMilvusProvider(cfg *config.VectorDBConfig, dim int) (VectorStoreProvider, error) {
+func NewMilvusProvider(cfg *config.VectorDBConfig, dimensions int) (VectorStoreProvider, error) {
 	// Create Milvus client
 	connectParam := client.Config{
 		Address: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port),
 	}
-
 	connectParam.DBName = cfg.Database
 	// Add authentication if credentials are provided
 	if cfg.Username != "" && cfg.Password != "" {
@@ -102,92 +103,301 @@ func NewMilvusProvider(cfg *config.VectorDBConfig, dim int) (VectorStoreProvider
 		return nil, fmt.Errorf("failed to create milvus client: %w", err)
 	}

+	mapper, err := NewDefaultVectorDBMapper(MILVUS_PROVIDER_TYPE, cfg.Mapping)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create default vector db mapper: %w", err)
+	}
+
 	provider := &MilvusProvider{
 		client:     milvusClient,
 		config:     cfg,
-		Collection: cfg.Collection,
+		collection: cfg.Collection,
+		mapper:     mapper,
+		dimensions: dimensions,
 	}
-
 	ctx := context.Background()
-	if err := provider.CreateCollection(ctx, dim); err != nil {
+	if err := provider.CreateCollection(ctx, dimensions); err != nil {
 		return nil, err
 	}
 	return provider, nil
 }

+func (m *MilvusProvider) buildSchema() (*entity.Schema, error) {
+	// Create Milvus collection Schema
+	idField, _ := m.mapper.GetIDField()
+	isIDAuto := idField.IsAutoID()
+	schema := entity.NewSchema().
+		WithName(m.collection).
+		WithDescription("Knowledge document collection").
+		WithAutoID(isIDAuto).
+		WithDynamicFieldEnabled(false)
+	// Add fields
+	var fieldEntity *entity.Field
+	fieldMappings, _ := m.mapper.GetFieldMappings()
+	for _, field := range fieldMappings {
+		fieldEntity = nil
+		maxLength := field.MaxLength()
+		switch field.StandardName {
+		case "id":
+			isIDAuto := field.IsAutoID()
+			fieldEntity = entity.NewField().
+				WithName(field.RawName).
+				WithDataType(entity.FieldTypeVarChar).
+				WithMaxLength(int64(maxLength)).
+				WithIsPrimaryKey(true)
+			if isIDAuto {
+				fieldEntity.WithIsAutoID(true)
+			}
+			schema.WithField(fieldEntity)
+		case "content":
+			fieldEntity = entity.NewField().
+				WithName(field.RawName).
+				WithDataType(entity.FieldTypeVarChar).
+				WithMaxLength(int64(maxLength))
+			schema.WithField(fieldEntity)
+		case "vector":
+			fieldEntity = entity.NewField().
+				WithName(field.RawName).
+				WithDataType(entity.FieldTypeFloatVector).
+				WithDim(int64(m.dimensions))
+			schema.WithField(fieldEntity)
+		case "metadata":
+			fieldEntity = entity.NewField().
+				WithName(field.RawName).
+				WithDataType(entity.FieldTypeJSON)
+			schema.WithField(fieldEntity)
+		case "created_at":
+			fieldEntity = entity.NewField().
+				WithName(field.RawName).
+				WithDataType(entity.FieldTypeInt64)
+			schema.WithField(fieldEntity)
+		}
+	}
+	return schema, nil
+}
+
+func (m *MilvusProvider) GetMetricType(metricType string) entity.MetricType {
+	switch strings.ToUpper(metricType) {
+	case "L2":
+		return entity.L2
+	case "IP":
+		return entity.IP
+	case "COSINE":
+		return entity.COSINE
+	case "HAMMING":
+		return entity.HAMMING
+	case "JACCARD":
+		return entity.JACCARD
+	case "TANIMOTO":
+		return entity.TANIMOTO
+	case "SUBSTRUCTURE":
+		return entity.SUBSTRUCTURE
+	case "SUPERSTRUCTURE":
+		return entity.SUPERSTRUCTURE
+	default:
+		return entity.IP
+	}
+}
+
+func (m *MilvusProvider) buildVectorIndex() (entity.Index, error) {
+	// Map index type
+	indexConfig, _ := m.mapper.GetIndexConfig()
+	searchConfig, _ := m.mapper.GetSearchConfig()
+	// Map index parameters
+	milvusIndexType := strings.ToUpper(indexConfig.IndexType)
+	if milvusIndexType == "" {
+		milvusIndexType = "HNSW"
+	}
+	metricType := m.GetMetricType(searchConfig.MetricType)
+	switch milvusIndexType {
+	case "FLAT":
+		// FLAT index doesn't need additional parameters
+		index, err := entity.NewIndexFlat(metricType)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create FLAT index: %w", err)
+		}
+		return index, nil
+
+	case "BIN_FLAT":
+		// BIN_FLAT index doesn't need additional parameters
+		nlist := 128
+		if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
+			nlist = int(nlistVal)
+		}
+		index, err := entity.NewIndexBinFlat(metricType, nlist)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create BIN_FLAT index: %w", err)
+		}
+		return index, nil
+
+	case "IVF_FLAT":
+		// Default parameters
+		nlist := 128
+		if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
+			nlist = int(nlistVal)
+		}
+		index, err := entity.NewIndexIvfFlat(metricType, nlist)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create IVF_FLAT index: %w", err)
+		}
+		return index, nil
+
+	case "BIN_IVF_FLAT":
+		// Default parameters
+		nlist := 128
+		if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
+			nlist = int(nlistVal)
+		}
+		index, err := entity.NewIndexBinIvfFlat(metricType, nlist)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create BIN_IVF_FLAT index: %w", err)
+		}
+		return index, nil
+
+	case "IVF_SQ8":
+		// Default parameters
+		nlist := 128
+		if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
+			nlist = int(nlistVal)
+		}
+		index, err := entity.NewIndexIvfSQ8(metricType, nlist)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create IVF_SQ8 index: %w", err)
+		}
+		return index, nil
+
+	case "IVF_PQ":
+		// Default parameters
+		nlist := 128
+		m := 4
+		nbits := 8
+
+		if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
+			nlist = int(nlistVal)
+		}
+		if mVal, err := indexConfig.ParamsFloat64("m"); err == nil {
+			m = int(mVal)
+		}
+		if nbitsVal, err := indexConfig.ParamsInt64("nbits"); err == nil {
+			nbits = int(nbitsVal)
+		}
+
+		index, err := entity.NewIndexIvfPQ(metricType, nlist, m, nbits)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create IVF_PQ index: %w", err)
+		}
+		return index, nil
+
+	case "HNSW":
+		// Default parameters
+		m := 8
+		efConstruction := 64
+		if mVal, err := indexConfig.ParamsInt64("M"); err == nil {
+			m = int(mVal)
+		}
+		if efConstructionVal, err := indexConfig.ParamsInt64("efConstruction"); err == nil {
+			efConstruction = int(efConstructionVal)
+		}
+		index, err := entity.NewIndexHNSW(metricType, m, efConstruction)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create HNSW index: %w", err)
+		}
+		return index, nil
+
+	case "IVF_HNSW":
+		// Default parameters
+		nlist := 128
+		m := 8
+		efConstruction := 64
+
+		if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
+			nlist = int(nlistVal)
+		}
+		if mVal, err := indexConfig.ParamsInt64("M"); err == nil {
+			m = int(mVal)
+		}
+
+		if efConstructionVal, err := indexConfig.ParamsInt64("efConstruction"); err == nil {
+			efConstruction = int(efConstructionVal)
+		}
+
+		index, err := entity.NewIndexIvfHNSW(metricType, nlist, m, efConstruction)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create IVF_HNSW index: %w", err)
+		}
+		return index, nil
+
+	case "DISKANN":
+		// DISKANN index parameters
+		index, err := entity.NewIndexDISKANN(metricType)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create DISKANN index: %w", err)
+		}
+		return index, nil
+
+	case "SCANN":
+		// SCANN index parameters
+		nlist := 128
+		with_raw_data := false
+		if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
+			nlist = int(nlistVal)
+		}
+		if with_raw_dataVal, err := indexConfig.ParamsBool("with_raw_data"); err == nil {
+			with_raw_data = with_raw_dataVal
+		}
+		index, err := entity.NewIndexSCANN(metricType, nlist, with_raw_data)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create SCANN index: %w", err)
+		}
+		return index, nil
+
+	case "AUTOINDEX":
+		// Auto index
+		index, err := entity.NewIndexAUTOINDEX(metricType)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create AUTOINDEX index: %w", err)
+		}
+		return index, nil
+
+	default:
+		return nil, fmt.Errorf("unsupported index type: %s", milvusIndexType)
+	}
+}
+
 // CreateCollection creates a new collection with the specified dimension
 func (m *MilvusProvider) CreateCollection(ctx context.Context, dim int) error {
 	// Check if collection exists
-	document_exists, err := m.client.HasCollection(ctx, m.Collection)
+	document_exists, err := m.client.HasCollection(ctx, m.collection)
 	if err != nil {
-		return fmt.Errorf("failed to check %s collection existence: %w", m.Collection, err)
+		return fmt.Errorf("failed to check %s collection existence: %w", m.collection, err)
 	}

 	if !document_exists {
-		fmt.Printf("create collection %s\n", m.Collection)
+		fmt.Printf("create collection %s\n", m.collection)
 		// Create schema
-		schema := entity.NewSchema().
-			WithName(m.Collection).
-			WithDescription("Knowledge document collection").
-			WithAutoID(false).
-			WithDynamicFieldEnabled(false)
-
-		// Add fields based on schema.Document structure
-		// Primary key field - ID
-		pkField := entity.NewField().
-			WithName("id").
-			WithDataType(entity.FieldTypeVarChar).
-			WithMaxLength(256).
-			WithIsPrimaryKey(true).
-			WithIsAutoID(false)
-		schema.WithField(pkField)
-
-		// Content field
-		contentField := entity.NewField().
-			WithName("content").
-			WithDataType(entity.FieldTypeVarChar).
-			WithMaxLength(8192)
-		schema.WithField(contentField)
-
-		// Vector field
-		vectorField := entity.NewField().
-			WithName("vector").
-			WithDataType(entity.FieldTypeFloatVector).
-			WithDim(int64(dim))
-		schema.WithField(vectorField)
-
-		// Metadata field
-		metadataField := entity.NewField().
-			WithName("metadata").
-			WithDataType(entity.FieldTypeJSON)
-		schema.WithField(metadataField)
-
-		// CreatedAt field (stored as Unix timestamp)
-		createdAtField := entity.NewField().
-			WithName("created_at").
-			WithDataType(entity.FieldTypeInt64)
-		schema.WithField(createdAtField)
-
+		schema, err := m.buildSchema()
+		if err != nil {
+			return fmt.Errorf("failed to build schema: %w", err)
+		}
 		// Create collection
 		err = m.client.CreateCollection(ctx, schema, entity.DefaultShardNumber)
 		if err != nil {
 			return fmt.Errorf("failed to create collection: %w", err)
 		}
-
 		// Create vector index
-		vectorIndex, err := entity.NewIndexHNSW(entity.IP, 8, 64)
+		vectorIndex, err := m.buildVectorIndex()
+		vectorField, _ := m.mapper.GetVectorField()
 		if err != nil {
 			return fmt.Errorf("failed to create vector index: %w", err)
 		}

-		err = m.client.CreateIndex(ctx, m.Collection, "vector", vectorIndex, false, client.WithIndexName("vector_index"))
+		err = m.client.CreateIndex(ctx, m.collection, vectorField.RawName, vectorIndex, false, client.WithIndexName("vector_index"))
 		if err != nil {
 			return fmt.Errorf("failed to create vector index: %w", err)
 		}
 	}
-
 	// Load collection
-	err = m.client.LoadCollection(ctx, m.Collection, false)
+	err = m.client.LoadCollection(ctx, m.collection, false)
 	if err != nil {
 		return fmt.Errorf("failed to load document collection: %w", err)
 	}
@@ -197,15 +407,15 @@ func (m *MilvusProvider) CreateCollection(ctx context.Context, dim int) error {
 // DropCollection removes the collection from the database
 func (m *MilvusProvider) DropCollection(ctx context.Context) error {
 	// Check if collection exists
-	exists, err := m.client.HasCollection(ctx, m.Collection)
+	exists, err := m.client.HasCollection(ctx, m.collection)
 	if err != nil {
-		return fmt.Errorf("failed to check %s collection existence: %w", m.Collection, err)
+		return fmt.Errorf("failed to check %s collection existence: %w", m.collection, err)
 	}
 	if !exists {
-		return fmt.Errorf("collection %s does not exist", m.Collection)
+		return fmt.Errorf("collection %s does not exist", m.collection)
 	}
 	// Drop collection
-	err = m.client.DropCollection(ctx, m.Collection)
+	err = m.client.DropCollection(ctx, m.collection)
 	if err != nil {
 		return fmt.Errorf("failed to drop collection: %w", err)
 	}
@@ -217,51 +427,71 @@ func (m *MilvusProvider) AddDoc(ctx context.Context, docs []schema.Document) err
 	if len(docs) == 0 {
 		return nil
 	}
-	// Prepare data
-	ids := make([]string, len(docs))
-	contents := make([]string, len(docs))
-	vectors := make([][]float32, len(docs))
-	metadatas := make([][]byte, len(docs))
-	createdAts := make([]int64, len(docs))

-	for i, doc := range docs {
-		ids[i] = doc.ID
-		contents[i] = doc.Content
-
-		// Convert vector type
-		vectorFloat32 := make([]float32, len(doc.Vector))
-		for j, v := range doc.Vector {
-			vectorFloat32[j] = float32(v)
-		}
-		vectors[i] = vectorFloat32
-
-		// Serialize metadata
-		metadataBytes, err := json.Marshal(doc.Metadata)
-		if err != nil {
-			return fmt.Errorf("failed to marshal metadata for doc %s: %w", doc.ID, err)
-		}
-		metadatas[i] = metadataBytes
-
-		createdAts[i] = doc.CreatedAt.UnixMilli()
+	// Get field mappings
+	fieldMappings, err := m.mapper.GetFieldMappings()
+	if err != nil {
+		return fmt.Errorf("failed to get field mappings: %w", err)
 	}
+	// Prepare data and columns
+	columns := make([]entity.Column, 0, len(fieldMappings))
+	// Create corresponding column data for each field
+	for _, field := range fieldMappings {
+		// Skip ID field if configured as auto ID
+		if field.IsPrimaryKey() && field.IsAutoID() {
+			continue
+		}
+		switch field.StandardName {
+		case "id":
+			// Handle string type fields
+			values := make([]string, len(docs))
+			for i, doc := range docs {
+				values[i] = doc.ID
+			}
+			columns = append(columns, entity.NewColumnVarChar(field.RawName, values))
+		case "content":
+			values := make([]string, len(docs))
+			for i, doc := range docs {
+				values[i] = doc.Content
+			}
+			columns = append(columns, entity.NewColumnVarChar(field.RawName, values))

-	// Build insert data
-	columns := []entity.Column{
-		entity.NewColumnVarChar("id", ids),
-		entity.NewColumnVarChar("content", contents),
-		entity.NewColumnFloatVector("vector", len(vectors[0]), vectors),
-		entity.NewColumnJSONBytes("metadata", metadatas),
-		entity.NewColumnInt64("created_at", createdAts),
+		case "vector":
+			// Handle vector fields
+			vectors := make([][]float32, len(docs))
+			for i, doc := range docs {
+				vectors[i] = doc.Vector
+			}
+			columns = append(columns, entity.NewColumnFloatVector(field.RawName, len(vectors[0]), vectors))
+		case "metadata":
+			// Handle JSON type fields (like metadata)
+			values := make([][]byte, len(docs))
+			for i, doc := range docs {
+				// Serialize metadata
+				metadataBytes, err := json.Marshal(doc.Metadata)
+				if err != nil {
+					return fmt.Errorf("failed to marshal metadata for doc %s: %w", doc.ID, err)
+				}
+				values[i] = metadataBytes
+			}
+			columns = append(columns, entity.NewColumnJSONBytes(field.RawName, values))
+		case "created_at":
+			// Handle integer type fields
+			values := make([]int64, len(docs))
+			for i, doc := range docs {
+				values[i] = doc.CreatedAt.UnixMilli()
+			}
+			columns = append(columns, entity.NewColumnInt64(field.RawName, values))
+		}
 	}
-
 	// Insert data
-	_, err := m.client.Insert(ctx, m.Collection, "", columns...)
+	_, err = m.client.Insert(ctx, m.collection, "", columns...)
 	if err != nil {
 		return fmt.Errorf("failed to insert documents: %w", err)
 	}

 	// Flush data
-	err = m.client.Flush(ctx, m.Collection, false)
+	err = m.client.Flush(ctx, m.collection, false)
 	if err != nil {
 		return fmt.Errorf("failed to flush collection: %w", err)
 	}
@@ -271,16 +501,19 @@ func (m *MilvusProvider) AddDoc(ctx context.Context, docs []schema.Document) err

 // DeleteDoc deletes a document by its ID
 func (m *MilvusProvider) DeleteDoc(ctx context.Context, id string) error {
-	// Build delete expression
-	expr := fmt.Sprintf(`id == "%s"`, id)
+	// Get ID field
+	idField, _ := m.mapper.GetIDField()
+	// Build delete expression using the RawName of ID field
+	expr := fmt.Sprintf(`%s == "%s"`, idField.RawName, id)
+
 	// Delete data
-	err := m.client.Delete(ctx, m.Collection, "", expr)
+	err := m.client.Delete(ctx, m.collection, "", expr)
 	if err != nil {
 		return fmt.Errorf("failed to delete documents for id %s: %w", id, err)
 	}

 	// Flush data
-	err = m.client.Flush(ctx, m.Collection, false)
+	err = m.client.Flush(ctx, m.collection, false)
 	if err != nil {
 		return fmt.Errorf("failed to flush collection after delete: %w", err)
 	}
@@ -306,24 +539,127 @@ func (m *MilvusProvider) UpdateDoc(ctx context.Context, docs []schema.Document)
 	return nil
 }

+func (m *MilvusProvider) buildSearchParam() (entity.SearchParam, error) {
+	// Get index configuration
+	indexConfig, err := m.mapper.GetIndexConfig()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get index config: %w", err)
+	}
+
+	// Get search configuration
+	searchConfig, err := m.mapper.GetSearchConfig()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get search config: %w", err)
+	}
+
+	// Choose appropriate search parameters based on index type
+	milvusIndexType := strings.ToUpper(indexConfig.IndexType)
+	if milvusIndexType == "" {
+		milvusIndexType = "HNSW" // Default to HNSW index
+	}
+
+	switch milvusIndexType {
+	case "FLAT":
+		// FLAT and BIN_FLAT indices don't need additional search parameters
+		return entity.NewIndexFlatSearchParam()
+
+	case "BIN_FLAT", "IVF_FLAT", "BIN_IVF_FLAT", "IVF_SQ8":
+		// Search parameters for IVF series indices
+		nprobe := 16 // Default value
+		if nprobeVal, err := searchConfig.ParamsFloat64("nprobe"); err == nil {
+			nprobe = int(nprobeVal)
+		}
+		return entity.NewIndexIvfFlatSearchParam(nprobe)
+
+	case "IVF_PQ":
+		// Search parameters for IVF_PQ index
+		nprobe := 16 // Default value
+		if nprobeVal, err := searchConfig.ParamsFloat64("nprobe"); err == nil {
+			nprobe = int(nprobeVal)
+		}
+		return entity.NewIndexIvfPQSearchParam(nprobe)
+
+	case "HNSW":
+		// Search parameters for HNSW index
+		efSearch := 16 // Default value
+		if efSearchVal, err := searchConfig.ParamsFloat64("ef"); err == nil {
+			efSearch = int(efSearchVal)
+		}
+		return entity.NewIndexHNSWSearchParam(efSearch)
+
+	case "IVF_HNSW":
+		// Search parameters for IVF_HNSW index
+		nprobe := 16   // Default value
+		efSearch := 64 // Default value
+		if nprobeVal, err := searchConfig.ParamsFloat64("nprobe"); err == nil {
+			nprobe = int(nprobeVal)
+		}
+		if efSearchVal, err := searchConfig.ParamsFloat64("ef"); err == nil {
+			efSearch = int(efSearchVal)
+		}
+		return entity.NewIndexIvfHNSWSearchParam(nprobe, efSearch)
+
+	case "SCANN":
+		// Search parameters for SCANN index
+		nprobe := 16 // Default value
+		reorder_k := 64
+		if nprobeVal, err := searchConfig.ParamsFloat64("nprobe"); err == nil {
+			nprobe = int(nprobeVal)
+		}
+		if reorderKVal, err := searchConfig.ParamsInt64("reorder_k"); err == nil {
+			reorder_k = int(reorderKVal)
+		}
+		return entity.NewIndexSCANNSearchParam(nprobe, reorder_k)
+
+	case "DISKANN":
+		// Search parameters for DISKANN index
+		search_list := 100 // Default value
+		if searchListVal, err := searchConfig.ParamsInt64("search_list"); err == nil {
+			search_list = int(searchListVal)
+		}
+		return entity.NewIndexDISKANNSearchParam(search_list)
+
+	case "AUTOINDEX":
+		level := 8
+		if levelVal, err := searchConfig.ParamsInt64("level"); err == nil {
+			level = int(levelVal)
+		}
+		// Search parameters for AUTOINDEX index
+		return entity.NewIndexAUTOINDEXSearchParam(level)
+	default:
+		// Default to using HNSW search parameters
+		return entity.NewIndexHNSWSearchParam(16)
+	}
+}
+
 // SearchDocs performs similarity search for documents
 func (m *MilvusProvider) SearchDocs(ctx context.Context, vector []float32, options *schema.SearchOptions) ([]schema.SearchResult, error) {
 	if options == nil {
 		options = &schema.SearchOptions{TopK: 10}
 	}
+
 	// Build search parameters
-	sp, _ := entity.NewIndexHNSWSearchParam(16)
+	sp, err := m.buildSearchParam()
+	if err != nil {
+		return nil, fmt.Errorf("failed to build search param: %w", err)
+	}
+
+	outputFields, _ := m.mapper.GetRawAllFieldNames()
+	vectorField, _ := m.mapper.GetVectorField()
+	searchConfig, _ := m.mapper.GetSearchConfig()
+	metricType := m.GetMetricType(searchConfig.MetricType)
+
 	// Build filter expression
 	expr := ""
 	searchResults, err := m.client.Search(
 		ctx,
-		m.Collection,
-		[]string{}, // partition names
-		expr,       // filter expression
-		[]string{"id", "content", "metadata", "created_at"}, // output fields
+		m.collection,
+		[]string{},   // partition names
+		expr,         // filter expression
+		outputFields, // output fields
 		[]entity.Vector{entity.FloatVector(vector)},
-		"vector",  // anns_field
-		entity.IP, // metric_type
+		vectorField.RawName, // anns_field
+		metricType,          // metric_type
 		options.TopK,
 		sp,
 	)
@@ -341,9 +677,13 @@ func (m *MilvusProvider) SearchDocs(ctx context.Context, vector []float32, optio
 			// Get field data
 			var content string
 			var metadata map[string]interface{}
-
 			for _, field := range result.Fields {
-				switch field.Name() {
+				fieldMapping, err := m.mapper.GetField(field.Name())
+				if err != nil {
+					continue
+				}
+				fieldName := strings.ToLower(fieldMapping.StandardName)
+				switch fieldName {
 				case "content":
 					if contentCol, ok := field.(*entity.ColumnVarChar); ok {
 						if contentVal, err := contentCol.Get(i); err == nil {
@@ -364,7 +704,6 @@ func (m *MilvusProvider) SearchDocs(ctx context.Context, vector []float32, optio
 					}
 				}
 			}
-
 			searchResult := schema.SearchResult{
 				Document: schema.Document{
 					ID:       fmt.Sprintf("%s", id),
@@ -392,15 +731,17 @@ func (m *MilvusProvider) DeleteDocs(ctx context.Context, ids []string) error {
 	for i, id := range ids {
 		quotedIDs[i] = fmt.Sprintf("\"%s\"", id)
 	}
-	expr := fmt.Sprintf("id in [%s]", strings.Join(quotedIDs, ","))
+
+	idField, _ := m.mapper.GetIDField()
+	expr := fmt.Sprintf("%s in [%s]", idField.RawName, strings.Join(quotedIDs, ","))

 	// Delete data
-	err := m.client.Delete(ctx, m.Collection, "", expr)
+	err := m.client.Delete(ctx, m.collection, "", expr)
 	if err != nil {
 		return fmt.Errorf("failed to delete documents: %w", err)
 	}
 	// Flush data
-	err = m.client.Flush(ctx, m.Collection, false)
+	err = m.client.Flush(ctx, m.collection, false)
 	if err != nil {
 		return fmt.Errorf("failed to flush collection after delete: %w", err)
 	}
@@ -413,12 +754,13 @@ func (m *MilvusProvider) ListDocs(ctx context.Context, limit int) ([]schema.Docu
 	// Build query expression
 	expr := ""
 	// Query all relevant documents
+	outputFields, _ := m.mapper.GetRawAllFieldNames()
 	queryResult, err := m.client.Query(
 		ctx,
-		m.Collection,
+		m.collection,
 		[]string{}, // partitions
 		expr,       // filter condition
-		[]string{"id", "content", "metadata", "created_at"},
+		outputFields,
 		client.WithOffset(0), client.WithLimit(int64(limit)),
 	)

@@ -443,7 +785,12 @@ func (m *MilvusProvider) ListDocs(ctx context.Context, limit int) ([]schema.Docu
 		)

 		for _, col := range queryResult {
-			switch col.Name() {
+			fieldMapping, err := m.mapper.GetField(col.Name())
+			if err != nil {
+				continue
+			}
+			fieldName := strings.ToLower(fieldMapping.StandardName)
+			switch fieldName {
 			case "id":
 				if v, err := col.(*entity.ColumnVarChar).Get(i); err == nil {
 					id = v.(string)
@@ -488,8 +835,3 @@ func (m *MilvusProvider) Close() error {
 	}
 	return nil
 }
-
-// joinStrings joins a slice of strings with the given separator
-func joinStrings(elems []string, sep string) string {
-	return strings.Join(elems, sep)
-}
--- a/plugins/wasm-go/extensions/ai-load-balancer/global_least_request/lb_policy.go
+++ b/plugins/wasm-go/extensions/ai-load-balancer/global_least_request/lb_policy.go
@@ -28,31 +28,19 @@ local function randomBool()
    return math.random() >= 0.5
 end

-local function is_healthy(addr)
-    for i = 4, #KEYS do
-        if addr == KEYS[i] then
-            return true
-        end
-    end
-    return false
-end
-
 if redis.call('HEXISTS', hset_key, current_target) == 1 then
    current_count = redis.call('HGET', hset_key, current_target)
-    local hash = redis.call('HGETALL', hset_key)
-    for i = 1, #hash, 2 do
-		local addr = hash[i]
-		local count = hash[i+1]
-        if is_healthy(addr) then
-            if tonumber(count) < tonumber(current_count) then
-                current_target = addr
-                current_count = count
-            elseif count == current_count and randomBool() then
-                current_target = addr
-                current_count = count
-            end
-        end
-    end
+	for i = 4, #KEYS do
+		if redis.call('HEXISTS', hset_key, KEYS[i]) == 1 then
+			local count = redis.call('HGET', hset_key, KEYS[i])
+			if tonumber(count) < tonumber(current_count) then
+				current_target = KEYS[i]
+				current_count = count
+			elseif count == current_count and randomBool() then
+				current_target = KEYS[i]
+			end
+		end
+	end
 end

 redis.call("HINCRBY", hset_key, current_target, 1)
--- a/plugins/wasm-go/extensions/ai-load-balancer/prefix_cache/lb_policy.go
+++ b/plugins/wasm-go/extensions/ai-load-balancer/prefix_cache/lb_policy.go
@@ -121,17 +121,14 @@ if target == "" then
 	target = default_target
 	if redis.call('HEXISTS', hset_key, target) == 1 then
 		current_count = redis.call('HGET', hset_key, target)
-		local hash = redis.call('HGETALL', hset_key)
-		for i = 1, #hash, 2 do
-			local addr = hash[i]
-			local count = hash[i+1]
-			if is_healthy(addr) then
+		for i = 4, #KEYS do
+			if redis.call('HEXISTS', hset_key, KEYS[i]) == 1 then
+				local count = redis.call('HGET', hset_key, KEYS[i])
 				if tonumber(count) < tonumber(current_count) then
-					target = addr
+					target = KEYS[i]
 					current_count = count
 				elseif count == current_count and randomBool() then
-					target = addr
-					current_count = count
+					target = KEYS[i]
 				end
 			end
 		end
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -129,7 +129,15 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：
 | ----------------- | -------- | -------- | ------ | -------------------------------------------------------- |
 | `azureServiceUrl` | string   | 必填     | -      | Azure OpenAI 服务的 URL，须包含 `api-version` 查询参数。 |

-**注意：** Azure OpenAI 只支持配置一个 API Token。
+**注意：**
+1. Azure OpenAI 只支持配置一个 API Token。
+2. `azureServiceUrl` 支持以下三种配置格式：
+   1. 完整路径格式，例如：`https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview`
+      - 插件会直接将请求转发至该 URL，不会参考实际的请求路径。
+   2. 部署名称格式，例如：`https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME?api-version=2024-02-15-preview`
+      - 插件会根据实际的请求路径拼接后续路径。路径中的部署名称会保留不变，不会按照模型映射规则进行修改。同时支持 URL 中不包含部署名称的接口。
+   3. 资源名称格式，例如：`https://YOUR_RESOURCE_NAME.openai.azure.com?api-version=2024-02-15-preview` 
+      - 插件会根据实际的请求路径拼接后续路径。路径中的部署名称会根据请求中的模型名称结合模型映射规则进行填入。同时支持 URL 中不包含部署名称的接口。

 #### 月之暗面（Moonshot）

--- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md
@@ -100,7 +100,15 @@ For Azure OpenAI, the corresponding `type` is `azure`. Its unique configuration
 |---------------------|-------------|----------------------|---------------|---------------------------------------------------------------------------------------------------------------|
 | `azureServiceUrl`   | string      | Required             | -             | The URL of the Azure OpenAI service, must include the `api-version` query parameter.                           |

-**Note:** Azure OpenAI only supports configuring one API Token.
+**Note:**
+1. Azure OpenAI only supports configuring one API Token.
+2. `azureServiceUrl` accepts three formats：
+    1. Full URL. e.g. `https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview`
+        - Request will be forwarded to the given URL, no matter what original path the request uses.
+    2. Resource name + deployment name，e.g. `https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME?api-version=2024-02-15-preview`
+        - The path will be updated based on the actual request path, leaving the deployment name unchanged. APIs with no deployment name in the path are also support.
+    3. Resource name only.e.g.`https://YOUR_RESOURCE_NAME.openai.azure.com?api-version=2024-02-15-preview`
+        - The path will be updated based on the actual request path. The deployment name will be filled based on the model name in the request and the configured model mapping rule. APIs with no deployment name in the path are also support.

 #### Moonshot

--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -97,7 +97,6 @@ func init() {
 		wrapper.ProcessResponseHeaders(onHttpResponseHeaders),
 		wrapper.ProcessStreamingResponseBody(onStreamingResponseBody),
 		wrapper.ProcessResponseBody(onHttpResponseBody),
-		wrapper.WithRebuildAfterRequests[config.PluginConfig](1000),
 	)
 }

@@ -433,6 +432,10 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 		return claudeChunk
 	}

+	if !needsClaudeResponseConversion(ctx) {
+		return chunk
+	}
+
 	// If provider doesn't implement any streaming handlers but we need Claude conversion
 	// First extract complete events from the chunk
 	events := provider.ExtractStreamingEvents(ctx, chunk)
--- a/plugins/wasm-go/extensions/ai-proxy/provider/azure.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/azure.go
@@ -38,6 +38,7 @@ var (
 		ApiNameFiles:               true,
 		ApiNameRetrieveFile:        true,
 		ApiNameRetrieveFileContent: true,
+		ApiNameResponses:           true,
 	}
 	regexAzureModelWithPath = regexp.MustCompile("/openai/deployments/(.+?)(?:/(.*)|$)")
 )
@@ -100,8 +101,15 @@ func (m *azureProviderInitializer) CreateProvider(config ProviderConfig) (Provid
 		}
 		log.Debugf("azureProvider: found default model from serviceUrl: %s", defaultModel)
 	} else {
-		serviceUrlType = azureServiceUrlTypeDomainOnly
-		log.Debugf("azureProvider: no default model found in serviceUrl")
+		// If path doesn't match the /openai/deployments pattern,
+		// check if it's a custom full path or domain only
+		if serviceUrl.Path != "" && serviceUrl.Path != "/" {
+			serviceUrlType = azureServiceUrlTypeFull
+			log.Debugf("azureProvider: using custom full path: %s", serviceUrl.Path)
+		} else {
+			serviceUrlType = azureServiceUrlTypeDomainOnly
+			log.Debugf("azureProvider: no default model found in serviceUrl")
+		}
 	}
 	log.Debugf("azureProvider: serviceUrlType=%d", serviceUrlType)

--- a/plugins/wasm-go/extensions/ai-proxy/test/azure.go
+++ b/plugins/wasm-go/extensions/ai-proxy/test/azure.go
@@ -146,6 +146,20 @@ var azureInvalidConfigMissingToken = func() json.RawMessage {
 	return data
 }()

+// 测试配置：Azure OpenAI Response API配置
+var azureResponseAPIConfig = func() json.RawMessage {
+	data, _ := json.Marshal(map[string]interface{}{
+		"provider": map[string]interface{}{
+			"type": "azure",
+			"apiTokens": []string{
+				"sk-azure-multi",
+			},
+			"azureServiceUrl": "https://multi-resource.openai.azure.com/openai/responses?api-version=2025-04-01-preview",
+		},
+	})
+	return data
+}()
+
 func RunAzureParseConfigTests(t *testing.T) {
 	test.RunGoTest(t, func(t *testing.T) {
 		// 测试基本Azure OpenAI配置解析
@@ -203,6 +217,17 @@ func RunAzureParseConfigTests(t *testing.T) {
 			require.NotNil(t, config)
 		})

+		// 测试Azure Response API 配置解析
+		t.Run("azure response api config", func(t *testing.T) {
+			host, status := test.NewTestHost(azureResponseAPIConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			config, err := host.GetMatchConfig()
+			require.NoError(t, err)
+			require.NotNil(t, config)
+		})
+
 		// 测试Azure OpenAI无效配置（缺少azureServiceUrl）
 		t.Run("azure invalid config missing url", func(t *testing.T) {
 			host, status := test.NewTestHost(azureInvalidConfigMissingUrl)
@@ -411,6 +436,61 @@ func RunAzureOnHttpRequestBodyTests(t *testing.T) {
 			require.Equal(t, "gpt-4", model, "Model should be mapped correctly")
 		})

+		// 测试Azure OpenAI Response API 处理
+		t.Run("azure response api request body", func(t *testing.T) {
+			host, status := test.NewTestHost(azureResponseAPIConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			// 设置请求头
+			action := host.CallOnHttpRequestHeaders([][2]string{
+				{":authority", "example.com"},
+				{":path", "/responses/v1/responses"},
+				{":method", "POST"},
+				{"Content-Type", "application/json"},
+			})
+			require.Equal(t, types.HeaderStopIteration, action)
+
+			// 设置请求体
+			requestBody := `{
+                          "input": [
+                            {
+                              "role": "user",
+                              "content": [
+                                {
+                                  "type": "input_text",
+                                  "text": "Explain quantum computing"
+                                }
+                              ]
+                            }
+                          ],
+                          "model": "gpt-5",
+                          "reasoning": {
+                            "effort": "medium"
+                          }
+                        }`
+			action = host.CallOnHttpRequestBody([]byte(requestBody))
+			require.Equal(t, types.ActionContinue, action)
+
+			// 验证请求体是否被正确处理
+			transformedBody := host.GetRequestBody()
+			require.NotNil(t, transformedBody)
+
+			var bodyMap map[string]interface{}
+			err := json.Unmarshal(transformedBody, &bodyMap)
+			require.NoError(t, err)
+
+			model, exists := bodyMap["model"]
+			require.True(t, exists, "Model should exist in request body")
+			require.Equal(t, "gpt-5", model, "Model should be mapped correctly")
+
+			// 验证请求路径是否被正确转换
+			requestHeaders := host.GetRequestHeaders()
+			pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
+			require.True(t, hasPath, "Path header should exist")
+			require.Equal(t, pathValue, "/openai/responses?api-version=2025-04-01-preview", "Path should not equal  Azure response api path")
+		})
+
 		// 测试Azure OpenAI请求体处理（仅部署配置）
 		t.Run("azure deployment only request body", func(t *testing.T) {
 			host, status := test.NewTestHost(azureDeploymentOnlyConfig)
@@ -566,6 +646,10 @@ func RunAzureOnHttpResponseBodyTests(t *testing.T) {
 					}
 				]
 			}`
+			action = host.CallOnHttpResponseHeaders([][2]string{
+				{"Content-Type", "application/json"},
+			})
+			require.Equal(t, types.ActionContinue, action)
 			action = host.CallOnHttpRequestBody([]byte(requestBody))
 			require.Equal(t, types.ActionContinue, action)

--- a/plugins/wasm-go/extensions/ai-security-guard/main.go
+++ b/plugins/wasm-go/extensions/ai-security-guard/main.go
@@ -81,11 +81,11 @@ type Response struct {
 }

 type Data struct {
-	RiskLevel  string   `json:"RiskLevel"`
-	AttackLevel string  `json:"AttackLevel,omitempty"`
-	Result     []Result `json:"Result,omitempty"`
-	Advice     []Advice `json:"Advice,omitempty"`
-	Detail     []Detail `json:"Detail,omitempty"`
+	RiskLevel   string   `json:"RiskLevel"`
+	AttackLevel string   `json:"AttackLevel,omitempty"`
+	Result      []Result `json:"Result,omitempty"`
+	Advice      []Advice `json:"Advice,omitempty"`
+	Detail      []Detail `json:"Detail,omitempty"`
 }

 type Result struct {
@@ -123,6 +123,7 @@ type AISecurityConfig struct {
 	denyCode                             int64
 	denyMessage                          string
 	protocolOriginal                     bool
+	riskLevelBar                         string
 	contentModerationLevelBar            string
 	promptAttackLevelBar                 string
 	sensitiveDataLevelBar                string
@@ -224,7 +225,7 @@ func isRiskLevelAcceptable(action string, data Data, config AISecurityConfig) bo
 		}
 		return true
 	} else {
-		return levelToInt(data.RiskLevel) < levelToInt(config.contentModerationLevelBar)
+		return levelToInt(data.RiskLevel) < levelToInt(config.riskLevelBar)
 	}
 }

@@ -281,8 +282,17 @@ func parseConfig(json gjson.Result, config *AISecurityConfig) error {
 	if config.ak == "" || config.sk == "" {
 		return errors.New("invalid AK/SK config")
 	}
+	if obj := json.Get("riskLevelBar"); obj.Exists() {
+		config.riskLevelBar = obj.String()
+	} else {
+		config.riskLevelBar = HighRisk
+	}
 	config.token = json.Get("securityToken").String()
-	config.action = json.Get("action").String()
+	if obj := json.Get("action"); obj.Exists() {
+		config.action = json.Get("action").String()
+	} else {
+		config.action = "TextModerationPlus"
+	}
 	config.checkRequest = json.Get("checkRequest").Bool()
 	config.checkResponse = json.Get("checkResponse").Bool()
 	config.protocolOriginal = json.Get("protocol").String() == "original"
--- a/plugins/wasm-go/extensions/ai-statistics/main.go
+++ b/plugins/wasm-go/extensions/ai-statistics/main.go
@@ -28,7 +28,6 @@ func init() {
 		wrapper.ProcessResponseHeaders(onHttpResponseHeaders),
 		wrapper.ProcessStreamingResponseBody(onHttpStreamingBody),
 		wrapper.ProcessResponseBody(onHttpResponseBody),
-		wrapper.WithRebuildAfterRequests[AIStatisticsConfig](1000),
 	)
 }

--- a/plugins/wasm-go/extensions/key-auth/main.go
+++ b/plugins/wasm-go/extensions/key-auth/main.go
@@ -291,6 +291,8 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config KeyAuthConfig, log log
 		return deniedUnauthorizedConsumer()
 	}

+	proxywasm.AddHttpRequestHeader("X-Mse-Consumer", name)
+
 	// 全局生效：
 	// - global_auth == true 且 当前 domain/route 未配置该插件
 	// - global_auth 未设置 且 没有任何一个 domain/route 配置该插件
@@ -343,7 +345,6 @@ func deniedUnauthorizedConsumer() types.Action {
 }

 func authenticated(name string) types.Action {
-	_ = proxywasm.AddHttpRequestHeader("X-Mse-Consumer", name)
 	return types.ActionContinue
 }

--- a/registry/mcp_model.go
+++ b/registry/mcp_model.go
@@ -63,7 +63,7 @@ type McpTool struct {
 }

 type ToolSecurity struct {
-	Id          string `json:"type,omitempty"`
+	Id          string `json:"id,omitempty"`
 	PassThrough bool   `json:"passthrough,omitempty"`
 	Credential  string `json:"credential"`
 }
--- a/release-notes/2.1.8/README.md
+++ b/release-notes/2.1.8/README.md
@@ -0,0 +1,367 @@
+# Higress
+
+
+## 📋 Overview of This Release
+
+This release includes **30** updates, covering various aspects such as feature enhancements, bug fixes, performance optimizations, and more.
+
+### Update Distribution
+
+- **New Features**: 13
+- **Bug Fixes**: 7
+- **Refactoring and Optimization**: 5
+- **Documentation Updates**: 4
+- **Testing Improvements**: 1
+
+### ⭐ Key Highlights
+
+This release includes **2** major updates, which are highly recommended for your attention:
+
+- **feat: add rag mcp server** ([#2930](https://github.com/alibaba/higress/pull/2930)): By introducing the RAG MCP server, this update provides a new way for users to manage and retrieve knowledge, enhancing the functionality and practicality of the system.
+- **refactor(mcp): use ECDS for golang filter configuration to avoid connection drain** ([#2931](https://github.com/alibaba/higress/pull/2931)): Using ECDS for filter configuration avoids instability caused by directly embedding golang filter configurations, improving the system's stability and maintainability, and reducing unnecessary service interruptions for users.
+
+For more details, please refer to the important features section below.
+
+---
+
+## 🌟 Detailed Description of Important Features
+
+Below is a detailed description of the key features and improvements in this release:
+
+### 1. feat: add rag mcp server
+
+**Related PR**: [#2930](https://github.com/alibaba/higress/pull/2930) | **Contributor**: [@2456868764](https://github.com/2456868764)
+
+**Use Case**
+
+In modern applications, knowledge management and retrieval have become increasingly important. Many systems require fast and accurate extraction and retrieval of information from large volumes of text data. RAG (Retrieval-Augmented Generation) technology combines retrieval and generation models to effectively enhance the efficiency and accuracy of knowledge management. This PR introduces a Model Context Protocol (MCP) server specifically for knowledge management and retrieval, meeting the needs of users for efficient information processing. The target user group includes enterprises and developers who need to handle large amounts of text data, especially in the fields of natural language processing (NLP) and machine learning.
+
+**Feature Details**
+
+This PR implements the RAG MCP server, adding multiple functional modules, including knowledge management, block management, search, and chat functions. The core features include:
+1. **Knowledge Management**: Supports creating knowledge blocks from text.
+2. **Block Management**: Provides functionalities for listing and deleting knowledge blocks.
+3. **Search**: Supports keyword-based search.
+4. **Chat Function**: Allows users to send chat messages and receive responses.
+Technically, the server uses several external libraries, such as `github.com/dlclark/regexp2`, `github.com/milvus-io/milvus-sdk-go/v2`, and `github.com/pkoukk/tiktoken-go`, which provide regular expression handling, vector database management, and text encoding functionalities. Key code changes include adding an HTTP client, configuration files, and multiple processing functions to ensure the flexibility and configurability of the system.
+
+**Usage Instructions**
+
+To enable and configure the RAG MCP server, follow these steps:
+1. Enable the MCP server in the `higress-config` configuration file and set the corresponding path and configuration items.
+2. Configure the basic parameters of the RAG system, such as splitter type, chunk size, and overlap.
+3. Configure the LLM (Large Language Model) provider and its API key, model name, etc.
+4. Configure the embedding model provider and its API key, model name, etc.
+5. Configure the vector database provider and its connection information.
+Example configuration:
+```yaml
+rag:
+  splitter:
+    type: "recursive"
+    chunk_size: 500
+    chunk_overlap: 50
+  top_k: 5
+  threshold: 0.5
+llm:
+  provider: "openai"
+  api_key: "your-llm-api-key"
+  model: "gpt-3.5-turbo"
+embedding:
+  provider: "openai"
+  api_key: "your-embedding-api-key"
+  model: "text-embedding-ada-002"
+vectordb:
+  provider: "milvus"
+  host: "localhost"
+  port: 19530
+  collection: "test_collection"
+```
+Notes:
+- Ensure all configuration items are correct, especially API keys and model names.
+- In production environments, it is recommended to adjust parameters such as timeout appropriately to adapt to different network conditions.
+
+**Feature Value**
+
+The RAG MCP server provides a complete solution for knowledge management and retrieval, enhancing the intelligence and automation of the system. Specific benefits include:
+1. **Improved Efficiency**: Through integrated knowledge management and retrieval functions, users can quickly process and retrieve large volumes of text data, saving time and resources.
+2. **Enhanced Accuracy**: Combining RAG technology, the system can more accurately extract and retrieve information, reducing error rates.
+3. **Flexible Configuration**: Provides rich configuration options, allowing users to flexibly adjust according to actual needs, meeting the requirements of different scenarios.
+4. **High Scalability**: Supports multiple providers and models, making it easy for users to choose suitable components and technology stacks based on business needs.
+5. **Stability Improvement**: Through detailed configuration validation and error handling mechanisms, the stability and robustness of the system are ensured.
+
+---
+
+### 2. refactor(mcp): use ECDS for golang filter configuration to avoid connection drain
+
+**Related PR**: [#2931](https://github.com/alibaba/higress/pull/2931) | **Contributor**: [@johnlanni](https://github.com/johnlanni)
+
+**Use Case**
+
+In the current implementation, Golang filter configurations are directly embedded in the HTTP_FILTER patch, which can lead to connection drain when configurations change. The main reason is the inconsistent sorting of Go maps in the `map[string]any` field, and the listener configuration changes triggered by HTTP_FILTER updates. This issue affects the stability and user experience of the system. The target user group is developers and operations personnel using Higress for service mesh management.
+
+**Feature Details**
+
+This PR splits the configuration into two parts: HTTP_FILTER only contains filter references with `config_discovery`, while EXTENSION_CONFIG contains the actual Golang filter configuration. This way, configuration changes do not directly cause connection drain. The specific implementation includes updating the `constructMcpSessionStruct` and `constructMcpServerStruct` methods to return formats compatible with EXTENSION_CONFIG and updating unit tests to match the new configuration structure. The core innovation lies in using the ECDS mechanism to separate configurations, making configuration changes smoother.
+
+**Usage Instructions**
+
+Enabling and configuring this feature does not require any additional operations as it is automatically handled in the background. A typical use case is when configuring Golang filters in Higress; the system will automatically split them into HTTP_FILTER and EXTENSION_CONFIG. Users only need to configure Golang filters as usual. Note that when upgrading to the new version, ensure all related configuration files are updated and thoroughly tested in the production environment to ensure that configuration changes do not introduce other issues.
+
+**Feature Value**
+
+By separating configurations and using ECDS, this feature eliminates the connection drain problem during configuration changes, significantly improving the system's stability and user experience. Additionally, this design makes configurations easier to manage and maintain, reducing potential issues caused by configuration changes. For large-scale service mesh deployments, this improvement is particularly important as it reduces service interruptions caused by configuration changes, thereby enhancing the overall reliability and availability of the system.
+
+---
+
+## 📝 Full Changelog
+
+### 🚀 New Features (Features)
+
+- **Related PR**: [#2926](https://github.com/alibaba/higress/pull/2926) \
+  **Contributor**: @rinfx \
+  **Change Log**: This PR adds support for multimodal, function calls, and thinking in vertex-ai, involving the introduction of a regular expression library and improvements to the processing logic. \
+  **Feature Value**: By adding new features, vertex-ai can better support application needs in complex scenarios, such as multimodal data processing and more flexible function call methods, enhancing the system's flexibility and practicality.
+
+- **Related PR**: [#2917](https://github.com/alibaba/higress/pull/2917) \
+  **Contributor**: @Aias00 \
+  **Change Log**: This PR adds support for Fireworks AI, expanding the functionality of the AI agent plugin, including the addition of necessary configuration files and test code. \
+  **Feature Value**: Adding support for Fireworks AI allows users to leverage the AI features provided by the platform, broadening the range of AI services that applications can integrate with, and enhancing the user experience.
+
+- **Related PR**: [#2907](https://github.com/alibaba/higress/pull/2907) \
+  **Contributor**: @Aias00 \
+  **Change Log**: This PR upgrades wasm-go to support outputSchema, involving dependency updates for jsonrpc-converter and oidc plugins. \
+  **Feature Value**: By supporting outputSchema, the functionality and flexibility of the wasm-go plugin are enhanced, making it easier for users to handle and define output data structures.
+
+- **Related PR**: [#2897](https://github.com/alibaba/higress/pull/2897) \
+  **Contributor**: @rinfx \
+  **Change Log**: This PR adds multimodal support and thinking functionality to the ai-proxy bedrock, achieved by extending the relevant code in bedrock.go. \
+  **Feature Value**: The added multimodal and thinking support enriches the ai-proxy's feature set, enabling users to utilize more advanced AI technologies for complex scenarios, enhancing the system's flexibility and practicality.
+
+- **Related PR**: [#2891](https://github.com/alibaba/higress/pull/2891) \
+  **Contributor**: @rinfx \
+  **Change Log**: This PR adds the ability to configure specific detection services for different consumers in the AI content security plugin, allowing users to customize request and response check rules according to their needs. \
+  **Feature Value**: By supporting independent detection services for different consumers, this feature enhances the system's flexibility and security, enabling users to control the content review process more precisely, thus meeting diverse security policy requirements.
+
+- **Related PR**: [#2883](https://github.com/alibaba/higress/pull/2883) \
+  **Contributor**: @Aias00 \
+  **Change Log**: This PR adds support for Meituan Longcat, including integration with the Longcat platform and related unit tests. \
+  **Feature Value**: Adding support for Meituan Longcat expands the plugin's functionality, allowing users to leverage more AI service providers' technologies, enhancing the flexibility and diversity of the application.
+
+- **Related PR**: [#2867](https://github.com/alibaba/higress/pull/2867) \
+  **Contributor**: @Aias00 \
+  **Change Log**: This PR adds support for Gzip configuration and updates the default settings. By adding gzip options in the Helm configuration file, users can customize compression parameters to optimize response performance. \
+  **Feature Value**: Adding support for Gzip configuration allows users to adjust the compression level of HTTP responses according to their needs, helping to reduce the amount of transmitted data, speed up page loading, and improve the user experience.
+
+- **Related PR**: [#2844](https://github.com/alibaba/higress/pull/2844) \
+  **Contributor**: @Aias00 \
+  **Change Log**: This PR enhances the consistent hashing algorithm for load balancing by supporting useSourceIp, modifying the relevant Go code files, and adding an example configuration file. \
+  **Feature Value**: The newly added useSourceIp option allows users to perform consistent hash load balancing based on source IP addresses, which helps to improve the stability and reliability of services under specific network conditions.
+
+- **Related PR**: [#2843](https://github.com/alibaba/higress/pull/2843) \
+  **Contributor**: @erasernoob \
+  **Change Log**: This PR adds NVIDIA Triton server support to the AI agent plugin, including related configuration instructions and code implementation. \
+  **Feature Value**: Adding support for the Triton server expands the AI agent plugin's feature set, allowing users to leverage high-performance machine learning inference services.
+
+- **Related PR**: [#2806](https://github.com/alibaba/higress/pull/2806) \
+  **Contributor**: @C-zhaozhou \
+  **Change Log**: This PR makes ai-security-guard compatible with the MultiModalGuard interface, adding support for multimodal APIs and updating the relevant documentation. \
+  **Feature Value**: By supporting multimodal APIs, the functionality of ai-security-guard is enhanced, enabling it to handle more complex content security scenarios, improving the user experience and security.
+
+- **Related PR**: [#2727](https://github.com/alibaba/higress/pull/2727) \
+  **Contributor**: @Aias00 \
+  **Change Log**: This PR adds end-to-end testing support for OpenAI, including test cases for non-streaming and streaming requests. \
+  **Feature Value**: The added end-to-end testing for OpenAI ensures the system remains stable and accurate when handling different types of requests, improving the user experience.
+
+- **Related PR**: [#2593](https://github.com/alibaba/higress/pull/2593) \
+  **Contributor**: @Xscaperrr \
+  **Change Log**: Adds the WorkloadSelector field to limit the scope of EnvoyFilter, ensuring that it does not affect other components in the same namespace in an open-source istio environment. \
+  **Feature Value**: By limiting EnvoyFilter to only apply to the Higress Gateway, this feature prevents interference with other istio gateways/sidecars in the environment, enhancing the security and isolation of the configuration.
+
+### 🐛 Bug Fixes (Bug Fixes)
+
+- **Related PR**: [#2938](https://github.com/alibaba/higress/pull/2938) \
+  **Contributor**: @wydream \
+  **Change Log**: This PR fixes the issue where prompt attack detection fails due to the lack of AttackLevel field support in MultiModalGuard mode, ensuring that all levels of attacks are correctly identified. \
+  **Feature Value**: By adding support for the AttackLevel field, the system's security is improved, preventing high-risk-level prompt attacks from going undetected, ensuring user experience and security.
+
+- **Related PR**: [#2904](https://github.com/alibaba/higress/pull/2904) \
+  **Contributor**: @johnlanni \
+  **Change Log**: This PR fixes the issue where the original Authorization header might be overwritten when processing HTTP requests. By unconditionally saving and checking for non-empty before writing to the context, it ensures the accuracy and security of authentication information. \
+  **Feature Value**: This fix improves the system's security and stability, preventing potential authentication failures or security vulnerabilities due to lost authentication information, enhancing user experience and trust.
+
+- **Related PR**: [#2899](https://github.com/alibaba/higress/pull/2899) \
+  **Contributor**: @Jing-ze \
+  **Change Log**: This PR optimizes the MCP server, including pre-parsing the host pattern to reduce runtime overhead and removing the unused DomainList field. It also fixes the SSE message format issue, particularly the handling of extra newline characters. \
+  **Feature Value**: By improving pattern matching efficiency and memory usage, as well as correcting errors in SSE messages, the user experience and service stability are enhanced, ensuring the correctness and integrity of data transmission.
+
+- **Related PR**: [#2892](https://github.com/alibaba/higress/pull/2892) \
+  **Contributor**: @johnlanni \
+  **Change Log**: This PR corrects the JSON unmarshalling error when Claude API returns content in array format and removes redundant code structures, improving code quality and maintainability. \
+  **Feature Value**: This resolves the message parsing failure due to incorrect data types, enhancing the system's stability and user experience. For users using array as the content format, this fix ensures a smooth message processing flow.
+
+- **Related PR**: [#2882](https://github.com/alibaba/higress/pull/2882) \
+  **Contributor**: @johnlanni \
+  **Change Log**: This PR addresses the SSE event chunking issue in Claude's streaming response conversion logic, improving protocol auto-conversion and tool invocation state tracking. \
+  **Feature Value**: It enhances the bidirectional conversion reliability between Claude and OpenAI-compatible providers, avoiding connection blocking, and enhancing the user experience.
+
+- **Related PR**: [#2865](https://github.com/alibaba/higress/pull/2865) \
+  **Contributor**: @Thomas-Eliot \
+  **Change Log**: This PR solves the issue where SSE connections would be blocked when SSE events were split into multiple chunks. By adding a caching mechanism in the proxy mcp server scenario, it ensures the continuity of data stream processing. \
+  **Feature Value**: This fix resolves the potential issue of SSE connection interruption, enhancing the system's stability and user experience. Users will no longer encounter incomplete data reception due to network conditions or server response methods.
+
+- **Related PR**: [#2859](https://github.com/alibaba/higress/pull/2859) \
+  **Contributor**: @lcfang \
+  **Change Log**: This PR solves the issue of route configuration failure when the registered service instance ports are inconsistent by adding a vport element in the mcpbridge. The main changes include updating the CRD definition, protobuf files, and related generated code. \
+  **Feature Value**: This feature ensures that even if the backend instance ports change, the service route configuration remains valid, thereby improving the system's stability and compatibility, providing a more reliable service experience for users.
+
+### ♻️ Refactoring and Optimization (Refactoring)
+
+- **Related PR**: [#2933](https://github.com/alibaba/higress/pull/2933) \
+  **Contributor**: @rinfx \
+  **Change Log**: This PR removes duplicate think tags in bedrock and vertex, reducing redundant code and improving code readability and maintainability. \
+  **Feature Value**: By removing unnecessary duplicate code, the overall quality and development efficiency of the project are improved, making the code structure clearer and easier to maintain and extend.
+
+- **Related PR**: [#2927](https://github.com/alibaba/higress/pull/2927) \
+  **Contributor**: @rinfx \
+  **Change Log**: This PR modifies the API name extraction logic in the ai-statistics plugin, adjusting the check condition from a fixed length of 5 to at least 3 parts to enhance flexibility and compatibility. \
+  **Feature Value**: By relaxing the restriction on API string splitting, the system's support for different format API strings is enhanced, improving the system's adaptability and stability.
+
+- **Related PR**: [#2922](https://github.com/alibaba/higress/pull/2922) \
+  **Contributor**: @daixijun \
+  **Change Log**: This PR upgrades the Higress SDK package reference in the project from `github.com/alibaba/higress` to `github.com/alibaba/higress/v2` to be compatible with the latest version. \
+  **Feature Value**: By updating the package name, the project can introduce and use the latest features and improvements of Higress, enhancing development efficiency and code quality.
+
+- **Related PR**: [#2890](https://github.com/alibaba/higress/pull/2890) \
+  **Contributor**: @johnlanni \
+  **Change Log**: This PR refactors the `matchDomain` function, introduces the HostMatcher struct and matching types, replaces regular expressions with simple string operations to improve performance, and implements port stripping logic. \
+  **Feature Value**: By optimizing the host matching logic, the system performance and code maintainability are improved, making the handling of host headers with port numbers more accurate and efficient, enhancing the user experience.
+
+### 📚 Documentation Updates (Documentation)
+
+- **Related PR**: [#2915](https://github.com/alibaba/higress/pull/2915) \
+  **Contributor**: @a6d9a6m \
+  **Change Log**: This PR fixes a broken link in README_JP.md and adds missing parts in README.md, making the multilingual documentation more consistent. \
+  **Feature Value**: This improves the accuracy and consistency of the documentation, helping users find relevant information more easily, enhancing the user experience.
+
+- **Related PR**: [#2912](https://github.com/alibaba/higress/pull/2912) \
+  **Contributor**: @hanxiantao \
+  **Change Log**: This PR optimizes the English and Chinese documentation for the hmac-auth-apisix plugin, adding more detailed configuration explanations, and improving the clarity of the documentation. \
+  **Feature Value**: By providing more detailed documentation, it helps developers better understand and use the hmac-auth-apisix plugin, improving the user experience.
+
+- **Related PR**: [#2880](https://github.com/alibaba/higress/pull/2880) \
+  **Contributor**: @a6d9a6m \
+  **Change Log**: This PR fixes grammatical errors in README.md, README_JP.md, and README_ZH.md files, ensuring the correctness and consistency of the documentation. \
+  **Feature Value**: By correcting language errors in the documentation, the quality and readability of the documentation are improved, helping users better understand project information.
+
+- **Related PR**: [#2873](https://github.com/alibaba/higress/pull/2873) \
+  **Contributor**: @CH3CHO \
+  **Change Log**: This PR adds methods to obtain Higress runtime logs and configurations in the non-crash-safe vulnerability issue template, helping to better investigate problems. \
+  **Feature Value**: By providing more detailed log and configuration information, users can more easily diagnose and resolve issues, improving the efficiency and accuracy of problem handling.
+
+### 🧪 Testing Improvements (Testing)
+
+- **Related PR**: [#2928](https://github.com/alibaba/higress/pull/2928) \
+  **Contributor**: @rinfx \
+  **Change Log**: This PR updates the test code for the ai-security-guard component, adding new test cases and adjusting some existing test logic. \
+  **Feature Value**: By improving the test coverage and accuracy of ai-security-guard, the stability and reliability of the entire project are enhanced, helping developers better understand and maintain related features.
+
+---
+
+## 📊 Release Statistics
+
+- 🚀 New Features: 13
+- 🐛 Bug Fixes: 7
+- ♻️ Refactoring and Optimization: 5
+- 📚 Documentation Updates: 4
+- 🧪 Testing Improvements: 1
+
+**Total**: 30 changes (including 2 major updates)
+
+Thank you to all contributors for your hard work! 🎉
+
+# Higress Console
+
+
+## 📋 Overview of This Release
+
+This release includes **4** updates, covering aspects such as feature enhancements, bug fixes, and performance optimizations.
+
+### Update Content Distribution
+
+- **New Features**: 1 item
+- **Bug Fixes**: 2 items
+- **Documentation Updates**: 1 item
+
+### ⭐ Key Focus
+
+This release contains **1** significant update, which is recommended for special attention:
+
+- **feat: Support using a known service in OpenAI LLM provider** ([#589](https://github.com/higress-group/higress-console/pull/589)): This feature allows users to utilize existing service resources within the OpenAI LLM provider, thereby enhancing the flexibility and usability of the system, offering more options to users.
+
+For more details, please refer to the "Important Features in Detail" section below.
+
+---
+
+## 🌟 Important Features in Detail
+
+Here are detailed explanations of the important features and improvements in this release:
+
+### 1. feat: Support using a known service in OpenAI LLM provider
+
+**Related PR**: [#589](https://github.com/higress-group/higress-console/pull/589) | **Contributor**: [@CH3CHO](https://github.com/CH3CHO)
+
+**Usage Background**
+
+In many application scenarios, developers may wish to use their own custom OpenAI service instance instead of the default one. This could be due to specific security requirements, performance optimizations, or infrastructure constraints. This PR meets these needs by introducing support for known services. The target user group includes enterprise-level users and technical experts who require highly customized configurations. This feature addresses the issue of users not being able to flexibly choose and configure OpenAI services, improving the adaptability and user experience of the system.
+
+**Feature Details**
+
+This PR mainly implements the following:
+1. Allows users to specify a custom service when configuring the OpenAI LLM provider.
+2. Modifies the `OpenaiLlmProviderHandler` class, adding the `buildServiceSource` and `buildUpstreamService` methods to handle the logic for custom services.
+3. Adds a delete method with an `internal` parameter to the `WasmPluginInstanceService` interface, supporting finer-grained control.
+4. Updates the frontend internationalization resource files, adding prompts related to custom services. The key technical point lies in extending the existing architecture so that the system can recognize and use user-provided custom services while maintaining backward compatibility.
+
+**Usage Instructions**
+
+Enabling and configuring this feature is straightforward. First, when creating or updating an LLM provider, select the "Custom OpenAI Service" option and enter the corresponding service host and service path. Then, the system will automatically use these custom configurations to connect to the OpenAI service. Typical use cases include internally deployed OpenAI service instances within enterprises or environments requiring specific security policies. It's important to ensure that the entered URL is valid and that the service host and service path are correct. Best practice involves thorough testing to ensure that the custom configuration works as expected.
+
+**Feature Value**
+
+This new feature significantly enhances the flexibility and configurability of the system, allowing users to choose the most suitable OpenAI service based on their needs. For enterprise-level users who require high levels of customization, this flexibility is particularly crucial. Additionally, by supporting custom services, the system can better integrate into existing infrastructures, improving overall stability and performance. This is of great significance for maintaining and scaling large application systems. Overall, this feature not only enhances the user experience but also brings higher scalability and reliability to the system.
+
+---
+
+## 📝 Full Changelog
+
+### 🐛 Bug Fixes
+
+- **Related PR**: [#591](https://github.com/higress-group/higress-console/pull/591) \
+  **Contributor**: @CH3CHO \
+  **Change Log**: This PR fixes the issue where mandatory fields were not properly validated when enabling route rewrite configuration, ensuring that both `host` and `newPath.path` must provide valid values to avoid configuration errors. \
+  **Feature Value**: By correcting the validation logic for route rewrites, it prevents potential errors caused by incomplete configurations, enhancing the system's stability and user experience.
+
+- **Related PR**: [#590](https://github.com/higress-group/higress-console/pull/590) \
+  **Contributor**: @CH3CHO \
+  **Change Log**: Fixed an error in the Route.customLabels handling logic, ensuring that built-in labels are correctly excluded during updates. \
+  **Feature Value**: Resolved the conflict between custom labels and built-in labels, ensuring flexibility and accuracy for users when updating route settings.
+
+### 📚 Documentation
+
+- **Related PR**: [#595](https://github.com/higress-group/higress-console/pull/595) \
+  **Contributor**: @CH3CHO \
+  **Change Log**: Removed irrelevant descriptions from README.md and added a code formatting guide, making the documentation more focused on the project itself. \
+  **Feature Value**: By updating README.md, users can more clearly understand the project structure and code formatting requirements, helping new contributors get up to speed quickly.
+
+---
+
+## 📊 Release Statistics
+
+- 🚀 New Features: 1 item
+- 🐛 Bug Fixes: 2 items
+- 📚 Documentation Updates: 1 item
+
+**Total**: 4 changes (including 1 significant update)
+
+Thank you to all contributors for their hard work! 🎉
+
--- a/release-notes/2.1.8/README_ZH.md
+++ b/release-notes/2.1.8/README_ZH.md
@@ -0,0 +1,365 @@
+# Higress
+
+
+## 📋 本次发布概览
+
+本次发布包含 **30** 项更新，涵盖了功能增强、Bug修复、性能优化等多个方面。
+
+### 更新内容分布
+
+- **新功能**: 13项
+- **Bug修复**: 7项
+- **重构优化**: 5项
+- **文档更新**: 4项
+- **测试改进**: 1项
+
+### ⭐ 重点关注
+
+本次发布包含 **2** 项重要更新，建议重点关注：
+
+- **feat: add rag mcp server** ([#2930](https://github.com/alibaba/higress/pull/2930)): 通过引入RAG MCP服务器，为用户提供了一种新的方式来管理与检索知识，增强了系统的功能性和实用性。
+- **refactor(mcp): use ECDS for golang filter configuration to avoid connection drain** ([#2931](https://github.com/alibaba/higress/pull/2931)): 采用ECDS进行过滤器配置避免了直接嵌入golang过滤器配置带来的不稳定因素，提高了系统的稳定性和可维护性，对用户而言减少了不必要的服务中断。
+
+详细信息请查看下方重要功能详述部分。
+
+---
+
+## 🌟 重要功能详述
+
+以下是本次发布中的重要功能和改进的详细说明：
+
+### 1. feat: add rag mcp server
+
+**相关PR**: [#2930](https://github.com/alibaba/higress/pull/2930) | **贡献者**: [@2456868764](https://github.com/2456868764)
+
+**使用背景**
+
+在现代应用中，知识管理和检索变得越来越重要。许多系统需要快速、准确地从大量文本数据中提取和检索信息。RAG (Retrieval-Augmented Generation) 技术结合了检索和生成模型，能够有效提升知识管理的效率和准确性。本PR引入了一个Model Context Protocol (MCP) 服务器，专门用于知识管理和检索，满足了用户对高效信息处理的需求。目标用户群体包括需要处理大量文本数据的企业和开发者，尤其是在自然语言处理（NLP）和机器学习领域。
+
+**功能详述**
+
+该PR实现了RAG MCP服务器，新增了多个功能模块，包括知识管理、块管理、搜索和聊天功能。核心功能包括：
+1. **知识管理**：支持从文本创建知识块。
+2. **块管理**：提供列表显示和删除知识块的功能。
+3. **搜索**：支持基于关键词的搜索功能。
+4. **聊天功能**：允许用户发送聊天消息并获取响应。
+技术实现上，该服务器使用了多种外部库，如`github.com/dlclark/regexp2`、`github.com/milvus-io/milvus-sdk-go/v2`和`github.com/pkoukk/tiktoken-go`，这些库提供了正则表达式处理、向量数据库管理和文本编码等功能。关键代码变更包括新增HTTP客户端、配置文件和多个处理函数，确保了系统的灵活性和可配置性。
+
+**使用方式**
+
+启用和配置RAG MCP服务器的步骤如下：
+1. 在`higress-config`配置文件中启用MCP服务器，并设置相应的路径和配置项。
+2. 配置RAG系统的基础参数，如分块器类型、块大小和重叠等。
+3. 配置LLM（大语言模型）提供商及其API密钥、模型名称等。
+4. 配置嵌入模型提供商及其API密钥、模型名称等。
+5. 配置向量数据库提供商及其连接信息。
+示例配置如下：
+```yaml
+rag:
+  splitter:
+    type: "recursive"
+    chunk_size: 500
+    chunk_overlap: 50
+  top_k: 5
+  threshold: 0.5
+llm:
+  provider: "openai"
+  api_key: "your-llm-api-key"
+  model: "gpt-3.5-turbo"
+embedding:
+  provider: "openai"
+  api_key: "your-embedding-api-key"
+  model: "text-embedding-ada-002"
+vectordb:
+  provider: "milvus"
+  host: "localhost"
+  port: 19530
+  collection: "test_collection"
+```
+注意事项：
+- 确保所有配置项正确无误，特别是API密钥和模型名称。
+- 在生产环境中，建议对超时时间等参数进行适当调整以适应不同网络环境。
+
+**功能价值**
+
+RAG MCP服务器为用户提供了一套完整的知识管理和检索解决方案，提升了系统的智能化和自动化水平。具体好处包括：
+1. **提高效率**：通过集成的知识管理和检索功能，用户可以快速处理和检索大量文本数据，节省时间和资源。
+2. **增强准确性**：结合RAG技术，系统能够更准确地提取和检索信息，减少错误率。
+3. **灵活配置**：提供了丰富的配置选项，用户可以根据实际需求进行灵活调整，满足不同场景下的需求。
+4. **扩展性强**：支持多种提供商和模型，方便用户根据业务需求选择合适的组件和技术栈。
+5. **稳定性提升**：通过详细的配置验证和错误处理机制，确保系统的稳定性和健壮性。
+
+---
+
+### 2. refactor(mcp): use ECDS for golang filter configuration to avoid connection drain
+
+**相关PR**: [#2931](https://github.com/alibaba/higress/pull/2931) | **贡献者**: [@johnlanni](https://github.com/johnlanni)
+
+**使用背景**
+
+当前实现中，Golang过滤器配置直接嵌入在HTTP_FILTER补丁中，这会导致配置更改时出现连接耗尽的问题。主要原因是Go map在`map[string]any`字段中的排序不一致，以及HTTP_FILTER更新触发的监听器配置更改。这个问题影响了系统的稳定性和用户体验。目标用户群体是使用Higress进行服务网格管理的开发者和运维人员。
+
+**功能详述**
+
+此PR将配置分为两部分：HTTP_FILTER仅包含带有`config_discovery`的过滤器引用，而EXTENSION_CONFIG则包含实际的Golang过滤器配置。通过这种方式，配置更改不会直接导致连接耗尽。具体实现包括更新`constructMcpSessionStruct`和`constructMcpServerStruct`方法以返回与EXTENSION_CONFIG兼容的格式，并更新单元测试以匹配新的配置结构。核心技术创新在于利用ECDS机制分离配置，使配置更改更加平滑。
+
+**使用方式**
+
+启用和配置这个功能不需要额外的操作，因为它是在后台自动处理的。典型的使用场景是在Higress中配置Golang过滤器时，系统会自动将其分为HTTP_FILTER和EXTENSION_CONFIG两部分。用户只需按照常规方式配置Golang过滤器即可。需要注意的是，在升级到新版本时，确保所有相关的配置文件都已更新，并且在生产环境中进行充分的测试，以确保配置更改不会引入其他问题。
+
+**功能价值**
+
+通过分离配置并使用ECDS，此功能消除了配置更改时的连接耗尽问题，显著提高了系统的稳定性和用户体验。此外，这种设计使得配置更易于管理和维护，减少了因配置更改引起的潜在问题。对于大规模的服务网格部署，这一改进尤为重要，因为它可以减少因配置更改导致的服务中断，从而提高整体系统的可靠性和可用性。
+
+---
+
+## 📝 完整变更日志
+
+### 🚀 新功能 (Features)
+
+- **Related PR**: [#2926](https://github.com/alibaba/higress/pull/2926) \
+  **Contributor**: @rinfx \
+  **Change Log**: 此PR在vertex-ai中添加了对多模态、函数调用和思考的支持，涉及引入正则表达式库及处理逻辑的改进。 \
+  **Feature Value**: 通过增加新功能，使得vertex-ai能够更好地支持复杂场景下的应用需求，如多模态数据处理和更灵活的功能调用方式，提升了系统的灵活性与实用性。
+
+- **Related PR**: [#2917](https://github.com/alibaba/higress/pull/2917) \
+  **Contributor**: @Aias00 \
+  **Change Log**: 此次PR新增了对Fireworks AI的支持，扩展了AI代理插件的功能，包括必要的配置文件和测试代码的添加。 \
+  **Feature Value**: 增加对Fireworks AI的支持使用户能够利用该平台提供的AI功能，拓宽了应用程序可以集成的AI服务范围，增强了用户体验。
+
+- **Related PR**: [#2907](https://github.com/alibaba/higress/pull/2907) \
+  **Contributor**: @Aias00 \
+  **Change Log**: 此PR升级了wasm-go以支持outputSchema功能，涉及jsonrpc-converter和oidc插件的依赖更新。 \
+  **Feature Value**: 通过支持outputSchema，增强了wasm-go插件的功能性和灵活性，使用户能够更方便地处理和定义输出数据结构。
+
+- **Related PR**: [#2897](https://github.com/alibaba/higress/pull/2897) \
+  **Contributor**: @rinfx \
+  **Change Log**: 此次PR为ai-proxy bedrock添加了多模态支持及thinking功能，通过扩展bedrock.go中的相关代码来实现。 \
+  **Feature Value**: 新增的多模态和thinking支持丰富了ai-proxy的功能集，使得用户能够利用更先进的AI技术处理复杂场景，提升了系统的灵活性与实用性。
+
+- **Related PR**: [#2891](https://github.com/alibaba/higress/pull/2891) \
+  **Contributor**: @rinfx \
+  **Change Log**: 此PR在AI内容安全插件中添加了针对不同消费者配置特定检测服务的功能，允许用户根据需求自定义请求和响应的检查规则。 \
+  **Feature Value**: 通过支持为不同消费者设置独立的检测服务，该功能增强了系统的灵活性与安全性，使用户能够更精确地控制内容审查过程，从而满足多样化的安全策略需求。
+
+- **Related PR**: [#2883](https://github.com/alibaba/higress/pull/2883) \
+  **Contributor**: @Aias00 \
+  **Change Log**: 此PR为美团Longcat增加了支持，包括实现与Longcat平台的集成和相关的单元测试。 \
+  **Feature Value**: 新增对美团Longcat的支持扩展了插件的功能范围，使得用户能够利用更多AI服务提供商的技术，增强了应用的灵活性和多样性。
+
+- **Related PR**: [#2867](https://github.com/alibaba/higress/pull/2867) \
+  **Contributor**: @Aias00 \
+  **Change Log**: 此PR新增了Gzip配置支持，并更新了默认设置。通过在Helm配置文件中添加gzip选项，用户可以自定义压缩参数以优化响应性能。 \
+  **Feature Value**: 增加了对Gzip配置的支持，使得用户可以根据需求调整HTTP响应的压缩级别，有助于减少传输的数据量，加快页面加载速度，提升用户体验。
+
+- **Related PR**: [#2844](https://github.com/alibaba/higress/pull/2844) \
+  **Contributor**: @Aias00 \
+  **Change Log**: 此PR通过支持useSourceIp增强了负载均衡的一致性哈希算法，修改了相关的Go代码文件以及添加了一个示例配置文件。 \
+  **Feature Value**: 新增的useSourceIp选项允许用户基于源IP地址进行一致性哈希负载均衡，这有助于提高服务在特定网络条件下的稳定性和可靠性。
+
+- **Related PR**: [#2843](https://github.com/alibaba/higress/pull/2843) \
+  **Contributor**: @erasernoob \
+  **Change Log**: 此PR为AI代理插件添加了NVIDIA Triton服务器支持，包括相关配置说明和代码实现。 \
+  **Feature Value**: 新增对Triton服务器的支持扩展了AI代理插件的功能集，使用户能够利用高性能的机器学习推理服务。
+
+- **Related PR**: [#2806](https://github.com/alibaba/higress/pull/2806) \
+  **Contributor**: @C-zhaozhou \
+  **Change Log**: 此PR使ai-security-guard兼容MultiModalGuard接口，增加了多模态API的支持，并更新了相关文档。 \
+  **Feature Value**: 通过支持多模态API，增强了ai-security-guard的功能，使其能够处理更复杂的内容安全场景，提升了用户体验和安全性。
+
+- **Related PR**: [#2727](https://github.com/alibaba/higress/pull/2727) \
+  **Contributor**: @Aias00 \
+  **Change Log**: 本PR为OpenAI添加了端到端测试支持，包括非流式和流式请求的测试用例。 \
+  **Feature Value**: 新增的OpenAI端到端测试有助于确保系统在处理不同类型的请求时保持稳定性和准确性，提升了用户体验。
+
+- **Related PR**: [#2593](https://github.com/alibaba/higress/pull/2593) \
+  **Contributor**: @Xscaperrr \
+  **Change Log**: 增加了WorkloadSelector字段以限制EnvoyFilter的作用范围，确保在存在开源istio环境下不影响同命名空间的其他组件。 \
+  **Feature Value**: 通过限定EnvoyFilter仅作用于Higress Gateway，避免了对环境内其他istio gateway/sidecar造成干扰，提升了配置的安全性和隔离性。
+
+### 🐛 Bug修复 (Bug Fixes)
+
+- **Related PR**: [#2938](https://github.com/alibaba/higress/pull/2938) \
+  **Contributor**: @wydream \
+  **Change Log**: 此PR修复了MultiModalGuard模式下因缺少AttackLevel字段支持而导致的提示攻击检测失效问题，确保所有级别的攻击都能被正确识别。 \
+  **Feature Value**: 通过增加对AttackLevel字段的支持，提高了系统安全性，防止高风险级别的提示攻击未被拦截的情况发生，保障了用户体验和安全。
+
+- **Related PR**: [#2904](https://github.com/alibaba/higress/pull/2904) \
+  **Contributor**: @johnlanni \
+  **Change Log**: 修复了在处理HTTP请求时，原始Authorization头可能被覆盖的问题。通过无条件保存并检查非空后再写入上下文，确保认证信息的准确性和安全性。 \
+  **Feature Value**: 该修复提升了系统的安全性和稳定性，避免了因认证信息丢失而导致的潜在认证失败或安全漏洞问题，增强了用户体验和信任度。
+
+- **Related PR**: [#2899](https://github.com/alibaba/higress/pull/2899) \
+  **Contributor**: @Jing-ze \
+  **Change Log**: 此PR对MCP服务器进行了优化，包括提前解析主机模式以减少运行时开销和移除未使用的DomainList字段。同时修复了SSE消息格式问题，特别是处理多余换行符的问题。 \
+  **Feature Value**: 通过提高模式匹配效率和内存使用率，以及修正SSE消息中的错误，提升了用户体验和服务稳定性，确保了数据传输的正确性和完整性。
+
+- **Related PR**: [#2892](https://github.com/alibaba/higress/pull/2892) \
+  **Contributor**: @johnlanni \
+  **Change Log**: 修正了Claude API返回数组格式content时的JSON解组错误，并移除了重复的代码结构，提升了代码质量和维护性。 \
+  **Feature Value**: 解决了由于不正确的数据类型而导致的消息解析失败问题，增强了系统的稳定性和用户体验，对于使用数组作为content格式的用户来说，这修复确保了消息处理流程的顺畅。
+
+- **Related PR**: [#2882](https://github.com/alibaba/higress/pull/2882) \
+  **Contributor**: @johnlanni \
+  **Change Log**: 解决了Claude流式响应转换逻辑中的SSE事件分块问题，改进了协议自动转换和工具调用状态跟踪。 \
+  **Feature Value**: 提高了Claude与OpenAI兼容提供者之间的双向转换可靠性，避免了连接阻塞，增强了用户体验。
+
+- **Related PR**: [#2865](https://github.com/alibaba/higress/pull/2865) \
+  **Contributor**: @Thomas-Eliot \
+  **Change Log**: 该PR解决了当SSE事件被分割成多个chunk时，SSE连接会被阻塞的问题。通过在代理mcp server场景下增加缓存机制来确保数据流处理的连续性。 \
+  **Feature Value**: 修复了可能导致SSE连接中断的问题，增强了系统的稳定性和用户体验。用户不再会因为网络条件或服务器响应方式而遇到数据接收不完整的情况。
+
+- **Related PR**: [#2859](https://github.com/alibaba/higress/pull/2859) \
+  **Contributor**: @lcfang \
+  **Change Log**: 此PR通过在mcpbridge中新增vport元素，解决了当注册服务实例端口不一致时路由配置失效的问题。主要改动包括更新CRD定义、protobuf文件及相关生成代码。 \
+  **Feature Value**: 该功能确保了即使后端实例端口发生变化，服务的路由配置也能保持有效，从而提高了系统的稳定性和兼容性，为用户提供了更加可靠的服务体验。
+
+### ♻️ 重构优化 (Refactoring)
+
+- **Related PR**: [#2933](https://github.com/alibaba/higress/pull/2933) \
+  **Contributor**: @rinfx \
+  **Change Log**: 移除了bedrock和vertex中重复的think标签，减少了冗余代码，提高了代码的可读性和维护性。 \
+  **Feature Value**: 通过去除不必要的重复代码，提升了项目的整体质量和开发效率，使得代码结构更加清晰，方便后续的维护和扩展。
+
+- **Related PR**: [#2927](https://github.com/alibaba/higress/pull/2927) \
+  **Contributor**: @rinfx \
+  **Change Log**: 此PR修改了ai-statistics插件中API名称提取逻辑，将检查条件从固定长度5调整为至少3个部分，以提高灵活性和兼容性。 \
+  **Feature Value**: 通过放宽API字符串分割的限制条件，增强了系统对不同格式API字符串的支持能力，提升了系统的适应性和稳定性。
+
+- **Related PR**: [#2922](https://github.com/alibaba/higress/pull/2922) \
+  **Contributor**: @daixijun \
+  **Change Log**: 该PR将项目中引用的Higress SDK包名从github.com/alibaba/higress升级为github.com/alibaba/higress/v2，以兼容最新版本。 \
+  **Feature Value**: 通过更新包名，确保项目可以引入并使用Higress的最新功能和改进，提升开发效率和代码质量。
+
+- **Related PR**: [#2890](https://github.com/alibaba/higress/pull/2890) \
+  **Contributor**: @johnlanni \
+  **Change Log**: 重构了`matchDomain`函数，引入HostMatcher结构及匹配类型，替换正则表达式以简单字符串操作提高性能，并实现端口剥离逻辑。 \
+  **Feature Value**: 通过优化主机匹配逻辑提高了系统性能和代码可维护性，使得处理包含端口号的主机头更加准确高效，提升了用户体验。
+
+### 📚 文档更新 (Documentation)
+
+- **Related PR**: [#2915](https://github.com/alibaba/higress/pull/2915) \
+  **Contributor**: @a6d9a6m \
+  **Change Log**: 修复了README_JP.md中的一个失效链接，并在README.md中添加了缺失的部分，使多语言文档内容更加一致。 \
+  **Feature Value**: 提高了文档的准确性和一致性，帮助用户更容易地找到相关信息，提升了用户体验。
+
+- **Related PR**: [#2912](https://github.com/alibaba/higress/pull/2912) \
+  **Contributor**: @hanxiantao \
+  **Change Log**: 优化了hmac-auth-apisix插件的英文和中文文档，增加了更多配置说明细节，提升了文档清晰度。 \
+  **Feature Value**: 通过更详细的文档解释，帮助开发者更好地理解和使用hmac-auth-apisix插件，提高了用户体验。
+
+- **Related PR**: [#2880](https://github.com/alibaba/higress/pull/2880) \
+  **Contributor**: @a6d9a6m \
+  **Change Log**: 此PR修复了README.md、README_JP.md和README_ZH.md文件中的语法错误，确保文档的正确性和一致性。 \
+  **Feature Value**: 通过修正文档中的语言错误，提升了文档的质量与可读性，帮助用户更好地理解项目信息。
+
+- **Related PR**: [#2873](https://github.com/alibaba/higress/pull/2873) \
+  **Contributor**: @CH3CHO \
+  **Change Log**: 此PR在非崩溃安全漏洞问题模板中增加了获取Higress运行时日志和配置的方法，帮助更好地调查问题。 \
+  **Feature Value**: 通过提供更详细的日志和配置信息，用户可以更容易地诊断和解决问题，提高了问题处理的效率和准确性。
+
+### 🧪 测试改进 (Testing)
+
+- **Related PR**: [#2928](https://github.com/alibaba/higress/pull/2928) \
+  **Contributor**: @rinfx \
+  **Change Log**: 该PR更新了ai-security-guard组件的测试代码，增加了新的测试用例并调整了一些现有的测试逻辑。 \
+  **Feature Value**: 通过改进ai-security-guard的测试覆盖率和准确性，提高了整个项目的稳定性和可靠性，有助于开发者更好地理解和维护相关功能。
+
+---
+
+## 📊 发布统计
+
+- 🚀 新功能: 13项
+- 🐛 Bug修复: 7项
+- ♻️ 重构优化: 5项
+- 📚 文档更新: 4项
+- 🧪 测试改进: 1项
+
+**总计**: 30项更改（包含2项重要更新）
+
+感谢所有贡献者的辛勤付出！🎉
+
+
+# Higress Console
+
+
+## 📋 本次发布概览
+
+本次发布包含 **4** 项更新，涵盖了功能增强、Bug修复、性能优化等多个方面。
+
+### 更新内容分布
+
+- **新功能**: 1项
+- **Bug修复**: 2项
+- **文档更新**: 1项
+
+### ⭐ 重点关注
+
+本次发布包含 **1** 项重要更新，建议重点关注：
+
+- **feat: Support using a known service in OpenAI LLM provider** ([#589](https://github.com/higress-group/higress-console/pull/589)): 该功能允许用户在OpenAI LLM提供者中利用现有的服务资源，从而扩展了系统的灵活性和可用性，为用户提供更多选择。
+
+详细信息请查看下方重要功能详述部分。
+
+---
+
+## 🌟 重要功能详述
+
+以下是本次发布中的重要功能和改进的详细说明：
+
+### 1. feat: Support using a known service in OpenAI LLM provider
+
+**相关PR**: [#589](https://github.com/higress-group/higress-console/pull/589) | **贡献者**: [@CH3CHO](https://github.com/CH3CHO)
+
+**使用背景**
+
+在许多应用场景中，开发者可能希望使用自定义的OpenAI服务实例，而不是默认的服务。这可能是由于特定的安全要求、性能优化或基础设施限制。此PR通过引入对已知服务的支持，满足了这些需求。目标用户群体包括需要高度定制化配置的企业级用户和技术专家。此功能解决了用户无法灵活选择和配置OpenAI服务的问题，提升了系统的适应性和用户体验。
+
+**功能详述**
+
+该PR主要实现了以下功能：1. 允许用户在配置OpenAI LLM提供者时指定自定义的服务。2. 修改了`OpenaiLlmProviderHandler`类，添加了`buildServiceSource`和`buildUpstreamService`方法，以处理自定义服务的逻辑。3. 在`WasmPluginInstanceService`接口中新增了带`internal`参数的删除方法，以支持更细粒度的控制。4. 更新了前端国际化资源文件，增加了与自定义服务相关的提示信息。核心技术要点在于对现有架构的扩展，使得系统能够识别并使用用户提供的自定义服务，同时保持了向后兼容性。
+
+**使用方式**
+
+启用和配置这个功能非常简单。首先，在创建或更新LLM提供者时，选择“自定义OpenAI服务”选项，并填写相应的服务主机和服务路径。然后，系统会自动使用这些自定义配置来连接OpenAI服务。典型的使用场景包括企业内部部署的OpenAI服务实例，或者需要特定安全策略的环境。注意事项包括确保输入的URL是有效的，并且服务主机和服务路径正确。最佳实践是进行充分的测试，确保自定义配置能够正常工作。
+
+**功能价值**
+
+这一新功能显著提升了系统的灵活性和可配置性，使用户能够根据自身需求选择最合适的OpenAI服务。对于需要高度定制化的企业级用户来说，这种灵活性尤为重要。此外，通过支持自定义服务，系统可以更好地集成到现有的基础设施中，提高了整体的稳定性和性能。这对于维护和扩展大型应用系统具有重要意义。总体而言，这一功能不仅增强了用户体验，还为系统带来了更高的可扩展性和可靠性。
+
+---
+
+## 📝 完整变更日志
+
+### 🐛 Bug修复 (Bug Fixes)
+
+- **Related PR**: [#591](https://github.com/higress-group/higress-console/pull/591) \
+  **Contributor**: @CH3CHO \
+  **Change Log**: 此PR修复了在启用路由重写配置时未正确验证必填字段的问题，确保`host`和`newPath.path`都必须提供有效值以避免配置错误。 \
+  **Feature Value**: 通过修正路由重写的验证逻辑，防止因配置不完整而导致的潜在错误，提升了系统的稳定性和用户体验。
+
+- **Related PR**: [#590](https://github.com/higress-group/higress-console/pull/590) \
+  **Contributor**: @CH3CHO \
+  **Change Log**: 修正了Route.customLabels处理逻辑中的错误，确保内置标签在更新时能够被正确排除。 \
+  **Feature Value**: 解决了自定义标签与内置标签冲突的问题，保证了用户在更新路由设置时的灵活性和准确性。
+
+### 📚 文档更新 (Documentation)
+
+- **Related PR**: [#595](https://github.com/higress-group/higress-console/pull/595) \
+  **Contributor**: @CH3CHO \
+  **Change Log**: 移除了README.md中与项目无关的描述，并添加了代码格式指南，使得文档更加专注于项目本身。 \
+  **Feature Value**: 通过更新README.md，使用户能够更清晰地了解项目的结构和代码规范要求，有助于新贡献者快速上手。
+
+---
+
+## 📊 发布统计
+
+- 🚀 新功能: 1项
+- 🐛 Bug修复: 2项
+- 📚 文档更新: 1项
+
+**总计**: 4项更改（包含1项重要更新）
+
+感谢所有贡献者的辛勤付出！🎉
+
+
Author	SHA1	Message	Date
lvshui	13261bdc3d	release: v2.1.9-rc.1 (#2984 )	2025-10-09 17:32:35 +08:00
rinfx	ac2f7dedaa	[key-auth] record consumer name once the consumer name is determined (#2978 )	2025-10-09 11:22:09 +08:00
EricaLiu	742b9498e4	fix ToolSecurity field (#2952 )	2025-10-06 15:10:08 +08:00
Kent Dong	b351dc45e3	doc: Update the description of azureServiceUrl in ai-proxy README files (#2965 )	2025-10-06 15:09:53 +08:00
Kent Dong	096b97e433	fix: Eliminate compatibility risk of matching all domains for an MCP server (#2973 )	2025-10-06 15:09:21 +08:00
Jun	aebe354055	add vectordb mapping (#2968 )	2025-10-06 15:08:13 +08:00
johnlanni	45a11734bd	remove rebuild logic in ai-proxy&ai-statistics	2025-09-26 16:26:06 +08:00
johnlanni	063bfbfcfe	fix(ai-proxy): fix streaming process	2025-09-23 19:44:30 +08:00
rinfx	9a3ccff4c8	opt(ai-load-balancer): update global least request lua script for ai-load-balancer (#2945 )	2025-09-23 19:24:33 +08:00
澄潭	623c8da8d8	fix(ai-proxy): Fix Azure OpenAI Response API handling and service URL type detection (#2948 )	2025-09-23 18:49:55 +08:00
Jun	e2d00da861	fix: llm can be empty and optimize document and prompt (#2942 )	2025-09-23 14:03:00 +08:00
GuoChenxu	bfca4667bb	release note supports system prompt (#2943 ) Signed-off-by: guochenxu <guochenxu11@outlook.com>	2025-09-23 14:00:40 +08:00
rinfx	732aacdbc5	fix(ai-security-guard): compatible with old configs (#2941 )	2025-09-23 10:23:25 +08:00
github-actions[bot]	a694865f72	Add release notes (#2940 ) Co-authored-by: johnlanni <6763318+johnlanni@users.noreply.github.com>	2025-09-21 16:18:23 +08:00