Compare commits

...

14 Commits

Author SHA1 Message Date
lvshui
13261bdc3d release: v2.1.9-rc.1 (#2984) 2025-10-09 17:32:35 +08:00
rinfx
ac2f7dedaa [key-auth] record consumer name once the consumer name is determined (#2978) 2025-10-09 11:22:09 +08:00
EricaLiu
742b9498e4 fix ToolSecurity field (#2952) 2025-10-06 15:10:08 +08:00
Kent Dong
b351dc45e3 doc: Update the description of azureServiceUrl in ai-proxy README files (#2965) 2025-10-06 15:09:53 +08:00
Kent Dong
096b97e433 fix: Eliminate compatibility risk of matching all domains for an MCP server (#2973) 2025-10-06 15:09:21 +08:00
Jun
aebe354055 add vectordb mapping (#2968) 2025-10-06 15:08:13 +08:00
johnlanni
45a11734bd remove rebuild logic in ai-proxy&ai-statistics 2025-09-26 16:26:06 +08:00
johnlanni
063bfbfcfe fix(ai-proxy): fix streaming process 2025-09-23 19:44:30 +08:00
rinfx
9a3ccff4c8 opt(ai-load-balancer): update global least request lua script for ai-load-balancer (#2945) 2025-09-23 19:24:33 +08:00
澄潭
623c8da8d8 fix(ai-proxy): Fix Azure OpenAI Response API handling and service URL type detection (#2948) 2025-09-23 18:49:55 +08:00
Jun
e2d00da861 fix: llm can be empty and optimize document and prompt (#2942) 2025-09-23 14:03:00 +08:00
GuoChenxu
bfca4667bb release note supports system prompt (#2943)
Signed-off-by: guochenxu <guochenxu11@outlook.com>
2025-09-23 14:00:40 +08:00
rinfx
732aacdbc5 fix(ai-security-guard): compatible with old configs (#2941) 2025-09-23 10:23:25 +08:00
github-actions[bot]
a694865f72 Add release notes (#2940)
Co-authored-by: johnlanni <6763318+johnlanni@users.noreply.github.com>
2025-09-21 16:18:23 +08:00
37 changed files with 2483 additions and 803 deletions

View File

@@ -66,6 +66,40 @@ jobs:
"https://github.com/${GITHUB_REPO_OWNER}/${GITHUB_REPO_NAME}/releases/tag/${RELEASE_VERSION}" \
-o release_page.html
# Extract system prompt content from HTML
echo "Extracting system prompt content..."
pip install beautifulsoup4 markdownify
SYSTEM_PROMPT=$(python3 -c "
import sys
from bs4 import BeautifulSoup
from markdownify import markdownify
with open('release_page.html', 'r') as f:
soup = BeautifulSoup(f, 'html.parser')
system_prompt_header = soup.find('h2', string='system prompt')
if system_prompt_header:
content = []
for sibling in system_prompt_header.next_siblings:
if sibling.name == 'h2':
break
content.append(str(sibling))
html_content = ''.join(content).strip()
# Convert HTML to Markdown
if html_content:
markdown_content = markdownify(html_content)
print(markdown_content.strip())
else:
print('')
else:
print('')
")
if [ -z "${SYSTEM_PROMPT}" ]; then
echo "No system prompt found in release notes."
else
echo "System prompt content: ${SYSTEM_PROMPT}"
fi
echo "Extracting PR numbers from ${GITHUB_REPO_OWNER}/${GITHUB_REPO_NAME} release notes..."
PR_NUMS=$(cat release_page.html | grep -o "/${GITHUB_REPO_OWNER}/${GITHUB_REPO_NAME}/pull/[0-9]*" | grep -o "[0-9]*$" | sort -n | uniq | tr '\n' ',')
PR_NUMS=${PR_NUMS%,}
@@ -88,11 +122,24 @@ jobs:
cd higress-report-agent
pip install uv
uv sync
# Build command
CMD_ARGS="--mode 2 --choice 2 --pr_nums ${PR_NUMS}"
if [ -n "${IMPORTANT_PR_NUMS}" ]; then
uv run report_main.py --mode 2 --choice 2 --pr_nums ${PR_NUMS} --important_prs ${IMPORTANT_PR_NUMS}
else
uv run report_main.py --mode 2 --choice 2 --pr_nums ${PR_NUMS}
CMD_ARGS="${CMD_ARGS} --important_prs ${IMPORTANT_PR_NUMS}"
fi
if [ -n "${SYSTEM_PROMPT}" ]; then
echo "${SYSTEM_PROMPT}" > temp_system_prompt.txt
CMD_ARGS="${CMD_ARGS} --sys_prompt_file temp_system_prompt.txt"
fi
uv run report_main.py ${CMD_ARGS}
# Clean up temporary file
if [ -f "temp_system_prompt.txt" ]; then
rm temp_system_prompt.txt
fi
cp report.md ../
cp report.EN.md ../
cd ..

View File

@@ -1 +1 @@
v2.1.8
v2.1.9-rc.1

View File

@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 2.1.8
appVersion: 2.1.9-rc.1
description: Helm chart for deploying higress gateways
icon: https://higress.io/img/higress_logo_small.png
home: http://higress.io/
@@ -15,4 +15,4 @@ dependencies:
repository: "file://../redis"
version: 0.0.1
type: application
version: 2.1.8
version: 2.1.9-rc.1

View File

@@ -1,9 +1,9 @@
dependencies:
- name: higress-core
repository: file://../core
version: 2.1.8
version: 2.1.9-rc.1
- name: higress-console
repository: https://higress.io/helm-charts/
version: 2.1.8
digest: sha256:20abb8842774217237293b49a46e885dd3e5edd3da118c5fe449e50d19e9c9e3
generated: "2025-09-21T14:55:25.084965+08:00"
digest: sha256:0899e57f8744790bef3061413d6ce43ca4a54ac21fbe44fc0af7db973da28a79
generated: "2025-10-09T17:25:21.377573+08:00"

View File

@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 2.1.8
appVersion: 2.1.9-rc.1
description: Helm chart for deploying Higress gateways
icon: https://higress.io/img/higress_logo_small.png
home: http://higress.io/
@@ -12,9 +12,9 @@ sources:
dependencies:
- name: higress-core
repository: "file://../core"
version: 2.1.8
version: 2.1.9-rc.1
- name: higress-console
repository: "https://higress.io/helm-charts/"
version: 2.1.8
type: application
version: 2.1.8
version: 2.1.9-rc.1

View File

@@ -53,7 +53,8 @@ func (a mcpServer) Parse(annotations Annotations, config *Ingress, globalContext
var matchRuleDomains []string
rawMatchRuleDomains, _ := annotations.ParseStringASAP(mcpServerMatchRuleDomains)
if rawMatchRuleDomains == "" || rawMatchRuleDomains == "*" {
// Match all domains. Leave an empty slice.
// Use wildcard to match all domains so we don't rely on the default behavior of empty domain list
matchRuleDomains = []string{"*"}
} else if strings.Contains(rawMatchRuleDomains, ",") {
matchRuleDomains = strings.Split(rawMatchRuleDomains, ",")
} else {

View File

@@ -132,7 +132,7 @@ func TestMCPServer_Parse(t *testing.T) {
},
expect: &mcpserver.McpServer{
Name: "default/route",
Domains: nil,
Domains: []string{"*"},
PathMatchType: "prefix",
PathMatchValue: "/mcp",
UpstreamType: "rest",
@@ -153,7 +153,7 @@ func TestMCPServer_Parse(t *testing.T) {
},
expect: &mcpserver.McpServer{
Name: "default/route",
Domains: nil,
Domains: []string{"*"},
PathMatchType: "prefix",
PathMatchValue: "/mcp",
UpstreamType: "rest",

View File

@@ -55,16 +55,21 @@ require (
github.com/milvus-io/milvus-sdk-go/v2 v2.4.2 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/openai/openai-go/v2 v2.7.0 // indirect
github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc // indirect
github.com/pkoukk/tiktoken-go v0.1.8 // indirect
github.com/prometheus/client_golang v1.14.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/tidwall/gjson v1.18.0 // indirect
github.com/tidwall/match v1.2.0 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/tidwall/sjson v1.2.5 // indirect
github.com/tjfoc/gmsm v1.4.1 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/net v0.34.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/grpc v1.59.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
@@ -99,9 +104,9 @@ require (
github.com/shopspring/decimal v1.4.0 // indirect
go.opentelemetry.io/otel v1.26.0 // indirect
go.opentelemetry.io/otel/trace v1.26.0 // indirect
golang.org/x/crypto v0.31.0 // indirect
golang.org/x/crypto v0.32.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/text v0.21.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect

View File

@@ -311,6 +311,8 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
github.com/onsi/gomega v1.24.2 h1:J/tulyYK6JwBldPViHJReihxxZ+22FHs0piGjQAvoUE=
github.com/onsi/gomega v1.24.2/go.mod h1:gs3J10IS7Z7r7eXRoNJIrNqU4ToQukCJhFtKrWgHWnk=
github.com/openai/openai-go/v2 v2.7.0 h1:/8MSFCXcasin7AyuWQ2au6FraXL71gzAs+VfbMv+J3k=
github.com/openai/openai-go/v2 v2.7.0/go.mod h1:jrJs23apqJKKbT+pqtFgNKpRju/KP9zpUTZhz3GElQE=
github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc h1:Ak86L+yDSOzKFa7WM5bf5itSOo1e3Xh8bm5YCMUXIjQ=
github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc/go.mod h1:Lu3tH6HLW3feq74c2GC+jIMS/K2CFcDWnWD9XkenwhI=
github.com/paulmach/orb v0.11.1 h1:3koVegMC4X/WeiXYz9iswopaTwMem53NzTJuTF20JzU=
@@ -377,7 +379,18 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/match v1.2.0 h1:0pt8FlkOwjN2fPt4bIl4BoNxb98gGHN2ObFEDkrfZnM=
github.com/tidwall/match v1.2.0/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
github.com/tjfoc/gmsm v1.3.2/go.mod h1:HaUcFuY0auTiaHB9MHFGCPx5IaLhTUd2atbCFBQXn9w=
github.com/tjfoc/gmsm v1.4.1 h1:aMe1GlZb+0bLjn+cKTPEvvn9oUEBlJitaZiiBwsbgho=
github.com/tjfoc/gmsm v1.4.1/go.mod h1:j4INPkHWMrhJb38G+J6W4Tw0AbuN8Thu3PbdVYhVcTE=
@@ -426,6 +439,8 @@ golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDf
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -506,6 +521,8 @@ golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -581,6 +598,8 @@ golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=

View File

@@ -1,173 +0,0 @@
# Higress RAG MCP Server
这是一个 Model Context Protocol (MCP) 服务器,提供知识管理和检索功能。
该 MCP 服务器提供以下工具:
## MCP Tools
### 知识管理
- `create-chunks-from-text` - 从 Text 创建知识 (p1)
### 块管理
- `list-chunks` - 列出知识块
- `delete-chunk` - 删除知识块
### 搜索
- `search` - 搜索
### 聊天功能
- `chat` - 发送聊天消息
## 配置说明
### 配置结构
```yaml
rag:
# RAG系统基础配置
splitter:
type: "recursive" # 递归分块器 recursive 和 nosplitter
chunk_size: 500
chunk_overlap: 50
top_k: 5 # 搜索返回的知识块数量
threshold: 0.5 # 搜索阈值
llm:
provider: "openai" # openai
api_key: "your-llm-api-key"
base_url: "https://api.openai.com/v1" # 可选
model: "gpt-3.5-turbo" # LLM模型
max_tokens: 2048 # 最大令牌数
temperature: 0.5 # 温度参数
embedding:
provider: "openai" # openai, dashscope
api_key: "your-embedding-api-key"
base_url: "https://api.openai.com/v1" # 可选
model: "text-embedding-ada-002" # 嵌入模型
vectordb:
provider: "milvus" # milvus
host: "localhost"
port: 19530
database: "default"
collection: "test_collection"
username: "" # 可选
password: "" # 可选
```
### higress-config 配置样例
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: higress-config
namespace: higress-system
data:
higress: |
mcpServer:
enable: true
sse_path_suffix: "/sse"
redis:
address: "<Redis IP>:6379"
username: ""
password: ""
db: 0
match_list:
- path_rewrite_prefix: ""
upstream_type: ""
enable_path_rewrite: false
match_rule_domain: ""
match_rule_path: "/mcp-servers/rag"
match_rule_type: "prefix"
servers:
- path: "/mcp-servers/rag"
name: "rag"
type: "rag"
config:
rag:
splitter:
provider: recursive
chunk_size: 500
chunk_overlap: 50
top_k: 10
threshold: 0.5
llm:
provider: openai
api_key: sk-XXX
base_url: https://openrouter.ai/api/v1
model: openai/gpt-4o
temperature: 0.5
max_tokens: 2048
embedding:
provider: dashscope
api_key: sk-xxx
model: text-embedding-v4
vectordb:
provider: milvus
host: <milvus IP>
port: 19530
database: default
collection: test_collection
```
### 支持的提供商
#### Embedding
- **OpenAI**
- **DashScope**
#### Vector Database
- **Milvus**
#### LLM
- **OpenAI**
## Milvus 安装
### Docker 配置
配置 Docker Desktop 镜像加速器
编辑 daemon.json 配置,加上镜像加速器,例如:
```
{
"registry-mirrors": [
"https://docker.m.daocloud.io",
"https://mirror.ccs.tencentyun.com",
"https://hub-mirror.c.163.com"
],
"dns": ["8.8.8.8", "1.1.1.1"]
}
```
### 安装 milvus
```
v2.6.0
Download the configuration file
wget https://github.com/milvus-io/milvus/releases/download/v2.6.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
v2.4
$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.23/milvus-standalone-docker-compose.yml -O docker-compose.yml
# Start Milvus
$ sudo docker compose up -d
Creating milvus-etcd ... done
Creating milvus-minio ... done
Creating milvus-standalone ... done
```
### 安装 attu
Attu 是 Milvus 的可视化管理工具,用于查看和管理 Milvus 中的数据。
```
docker run -p 8000:3000 -e MILVUS_URL=http://<本机 IP>:19530 zilliz/attu:v2.6
Open your browser and navigate to http://localhost:8000
```

View File

@@ -0,0 +1,327 @@
# Higress RAG MCP Server
这是一个 Model Context Protocol (MCP) 服务器,提供知识管理和检索功能。
## MCP 工具说明
Higress RAG MCP Server 提供以下工具,根据配置不同,可用工具也会有所差异:
| 工具名称 | 功能描述 | 依赖配置 | 必选/可选 |
|---------|---------|---------|----------|
| `create-chunks-from-text` | 将文本内容分块并存储到向量数据库,用于知识库构建 | embedding, vectordb | **必选** |
| `list-chunks` | 列出已存储的知识块,用于知识库管理 | vectordb | **必选** |
| `delete-chunk` | 删除指定的知识块,用于知识库维护 | vectordb | **必选** |
| `search` | 基于语义相似度搜索知识库中的内容 | embedding, vectordb | **必选** |
| `chat` | 基于检索增强生成(RAG)回答用户问题,结合知识库内容生成回答 | embedding, vectordb, llm | **可选** |
### 工具与配置的关系
- **基础功能**(知识管理、搜索):只需配置 `embedding``vectordb`
- **高级功能**(聊天问答):需额外配置 `llm`
具体关系如下:
- 未配置 `llm` 时,`chat` 工具将不可用
- 所有工具都依赖 `embedding``vectordb` 配置
- `rag` 配置用于调整分块和检索参数,影响所有工具的行为
## 典型使用场景
### 最小工具集场景无LLM配置
适用于仅需要知识库管理和检索的场景,不需要生成式回答。
**可用工具**`create-chunks-from-text``list-chunks``delete-chunk``search`
**典型用例**
1. 构建企业文档库,仅需检索相关文档片段
2. 数据索引系统,通过语义搜索快速定位信息
3. 内容管理系统,管理和检索结构化/非结构化内容
**示例流程**
```
1. 使用 create-chunks-from-text 导入文档
2. 使用 search 检索相关内容
3. 使用 list-chunks 和 delete-chunk 管理知识库
```
### 完整工具集场景含LLM配置
适用于需要智能问答和内容生成的高级场景。
**可用工具**`create-chunks-from-text``list-chunks``delete-chunk``search``chat`
**典型用例**
1. 智能客服系统,基于企业知识库回答用户问题
2. 文档助手,帮助用户理解和分析复杂文档
3. 专业领域问答系统,如法律、金融、技术支持等
**示例流程**
```
1. 使用 create-chunks-from-text 导入专业领域文档
2. 用户通过 chat 工具提问
3. 系统使用 search 检索相关知识
4. LLM 结合检索结果生成回答
5. 管理员使用 list-chunks 和 delete-chunk 维护知识库
```
## 配置说明
### 配置结构
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|----------------------------|----------|-----------|---------|--------|
| **rag** | object | 必填 | - | RAG系统基础配置 |
| rag.splitter.provider | string | 必填 | recursive | 分块器类型recursive或nosplitter |
| rag.splitter.chunk_size | integer | 可选 | 500 | 块大小 |
| rag.splitter.chunk_overlap | integer | 可选 | 50 | 块重叠大小 |
| rag.top_k | integer | 可选 | 10 | 搜索返回的知识块数量 |
| rag.threshold | float | 可选 | 0.5 | 搜索阈值 |
| **llm** | object | 可选 | - | LLM配置不配置则无chat功能 |
| llm.provider | string | 可选 | openai | LLM提供商 |
| llm.api_key | string | 可选 | - | LLM API密钥 |
| llm.base_url | string | 可选 | | LLM API基础URL |
| llm.model | string | 可选 | gpt-4o | LLM模型名称 |
| llm.max_tokens | integer | 可选 | 2048 | 最大令牌数 |
| llm.temperature | float | 可选 | 0.5 | 温度参数 |
| **embedding** | object | 必填 | - | 嵌入配置(所有工具必需) |
| embedding.provider | string | 必填 | openai | 嵌入提供商支持openai协议的任意供应商 |
| embedding.api_key | string | 必填 | - | 嵌入API密钥 |
| embedding.base_url | string | 可选 | | 嵌入API基础URL |
| embedding.model | string | 必填 | text-embedding-ada-002 | 嵌入模型名称 |
| embedding.dimensions | integer | 可选 | 1536 | 嵌入维度 |
| **vectordb** | object | 必填 | - | 向量数据库配置(所有工具必需) |
| vectordb.provider | string | 必填 | milvus | 向量数据库提供商 |
| vectordb.host | string | 必填 | localhost | 数据库主机地址 |
| vectordb.port | integer | 必填 | 19530 | 数据库端口 |
| vectordb.database | string | 必填 | default | 数据库名称 |
| vectordb.collection | string | 必填 | test_collection | 集合名称 |
| vectordb.username | string | 可选 | - | 数据库用户名 |
| vectordb.password | string | 可选 | - | 数据库密码 |
| **vectordb.mapping** | object | 可选 | - | 字段映射配置 |
| vectordb.mapping.fields | array | 可选 | - | 字段映射列表 |
| vectordb.mapping.fields[].standard_name | string | 必填 | - | 标准字段名称(如 id, content, vector 等) |
| vectordb.mapping.fields[].raw_name | string | 必填 | - | 原始字段名称(数据库中的实际字段名) |
| vectordb.mapping.fields[].properties | object | 可选 | - | 字段属性(如 auto_id, max_length 等) |
| vectordb.mapping.index | object | 可选 | - | 索引配置 |
| vectordb.mapping.index.index_type | string | 必填 | - | 索引类型(如 FLAT, IVF_FLAT, HNSW 等) |
| vectordb.mapping.index.params | object | 可选 | - | 索引参数(根据索引类型不同而异) |
| vectordb.mapping.search | object | 可选 | - | 搜索配置 |
| vectordb.mapping.search.metric_type | string | 可选 | L2 | 度量类型(如 L2, IP, COSINE 等) |
| vectordb.mapping.search.params | object | 可选 | - | 搜索参数(如 nprobe, ef_search 等)
### higress-config 配置样例
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: higress-config
namespace: higress-system
data:
higress: |
mcpServer:
enable: true
sse_path_suffix: "/sse"
redis:
address: "<Redis IP>:6379"
username: ""
password: ""
db: 0
match_list:
- path_rewrite_prefix: ""
upstream_type: ""
enable_path_rewrite: false
match_rule_domain: ""
match_rule_path: "/mcp-servers/rag"
match_rule_type: "prefix"
servers:
- path: "/mcp-servers/rag"
name: "rag"
type: "rag"
config:
rag:
splitter:
provider: recursive
chunk_size: 500
chunk_overlap: 50
top_k: 10
threshold: 0.5
llm:
provider: openai
api_key: sk-XXX
base_url: https://openrouter.ai/api/v1
model: openai/gpt-4o
temperature: 0.5
max_tokens: 2048
embedding:
provider: openai
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: sk-xxx
model: text-embedding-v4
dimensions: 1536
vectordb:
provider: milvus
host: localhost
port: 19530
database: default
collection: test_rag
mapping:
fields:
- standard_name: id
raw_name: id
properties:
auto_id: false
max_length: 256
- standard_name: content
raw_name: content
properties:
max_length: 8192
- standard_name: vector
raw_name: vector
- standard_name: metadata
raw_name: metadata
- standard_name: created_at
raw_name: created_at
index:
index_type: HNSW
params:
M: 4
efConstruction: 32
search:
metric_type: IP
params:
ef: 32
```
### 支持的提供商
#### Embedding
- **OpenAI 兼容**
#### Vector Database
- **Milvus**
#### LLM
- **OpenAI 兼容**
## 如何测试数据集的效果
测试数据集的效果分两步第一步导入数据集语料第二步测试Chat效果。
### 导入数据集语料
使用 `RAGClient.CreateChunkFromText` 工具导入数据集语料,比如数据集语料格式为 JSON每个 JSON 对象包含 `body``title``url` 等字段。样例代码如下:
```golang
func TestRAGClient_LoadChunks(t *testing.T) {
t.Logf("TestRAGClient_LoadChunks")
ragClient, err := getRAGClient()
if err != nil {
t.Errorf("getRAGClient() error = %v", err)
return
}
// load json output/corpus.json and then call ragclient CreateChunkFromText to insert chunks
file, err := os.Open("/dataset/corpus.json")
if err != nil {
t.Errorf("LoadData() error = %v", err)
return
}
defer file.Close()
decoder := json.NewDecoder(file)
var data []struct {
Body string `json:"body"`
Title string `json:"title"`
Url string `json:"url"`
}
if err := decoder.Decode(&data); err != nil {
t.Errorf("LoadData() error = %v", err)
return
}
for _, item := range data {
t.Logf("LoadData() url = %s", item.Url)
t.Logf("LoadData() title = %s", item.Title)
t.Logf("LoadData() len body = %d", len(item.Body))
chunks, err := ragClient.CreateChunkFromText(item.Body, item.Title)
if err != nil {
t.Errorf("LoadData() error = %v", err)
continue
} else {
t.Logf("LoadData() chunks len = %d", len(chunks))
}
}
t.Logf("TestRAGClient_LoadChunks done")
}
```
### 测试Chat效果
使用 `RAGClient.Chat` 工具测试 Chat 效果。样例代码如下:
```golang
func TestRAGClient_Chat(t *testing.T) {
ragClient, err := getRAGClient()
if err != nil {
t.Errorf("getRAGClient() error = %v", err)
return
}
query := "Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?"
resp, err := ragClient.Chat(query)
if err != nil {
t.Errorf("Chat() error = %v", err)
return
}
if resp == "" {
t.Errorf("Chat() resp = %s, want not empty", resp)
return
}
t.Logf("Chat() resp = %s", resp)
}
```
## Milvus 安装
### Docker 配置
配置 Docker Desktop 镜像加速器
编辑 daemon.json 配置,加上镜像加速器,例如:
```
{
"registry-mirrors": [
"https://docker.m.daocloud.io",
"https://mirror.ccs.tencentyun.com",
"https://hub-mirror.c.163.com"
],
"dns": ["8.8.8.8", "1.1.1.1"]
}
```
### 安装 milvus
```
v2.6.0
Download the configuration file
wget https://github.com/milvus-io/milvus/releases/download/v2.6.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
v2.4
$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.23/milvus-standalone-docker-compose.yml -O docker-compose.yml
# Start Milvus
$ sudo docker compose up -d
Creating milvus-etcd ... done
Creating milvus-minio ... done
Creating milvus-standalone ... done
```
### 安装 attu
Attu 是 Milvus 的可视化管理工具,用于查看和管理 Milvus 中的数据。
```
docker run -p 8000:3000 -e MILVUS_URL=http://<本机 IP>:19530 zilliz/attu:v2.6
Open your browser and navigate to http://localhost:8000
```

View File

@@ -1,5 +1,7 @@
package config
import "fmt"
// Config represents the main configuration structure for the MCP server
type Config struct {
RAG RAGConfig `json:"rag" yaml:"rag"`
@@ -34,20 +36,148 @@ type LLMConfig struct {
// EmbeddingConfig defines configuration for embedding models
type EmbeddingConfig struct {
Provider string `json:"provider" yaml:"provider"` // Available options: openai, dashscope
APIKey string `json:"api_key,omitempty" yaml:"api_key,omitempty"`
BaseURL string `json:"base_url,omitempty" yaml:"base_url,omitempty"`
Model string `json:"model,omitempty" yaml:"model,omitempty"`
Dimension int `json:"dimension,omitempty" yaml:"dimension,omitempty"`
Provider string `json:"provider" yaml:"provider"` // Available options: openai, dashscope
APIKey string `json:"api_key,omitempty" yaml:"api_key,omitempty"`
BaseURL string `json:"base_url,omitempty" yaml:"base_url,omitempty"`
Model string `json:"model,omitempty" yaml:"model,omitempty"`
Dimensions int `json:"dimensions,omitempty" yaml:"dimension,omitempty"`
}
// VectorDBConfig defines configuration for vector databases
type VectorDBConfig struct {
Provider string `json:"provider" yaml:"provider"` // Available options: milvus, qdrant, chroma
Host string `json:"host,omitempty" yaml:"host,omitempty"`
Port int `json:"port,omitempty" yaml:"port,omitempty"`
Database string `json:"database,omitempty" yaml:"database,omitempty"`
Collection string `json:"collection,omitempty" yaml:"collection,omitempty"`
Username string `json:"username,omitempty" yaml:"username,omitempty"`
Password string `json:"password,omitempty" yaml:"password,omitempty"`
Provider string `json:"provider" yaml:"provider"` // Available options: milvus, qdrant, chroma
Host string `json:"host,omitempty" yaml:"host,omitempty"`
Port int `json:"port,omitempty" yaml:"port,omitempty"`
Database string `json:"database,omitempty" yaml:"database,omitempty"`
Collection string `json:"collection,omitempty" yaml:"collection,omitempty"`
Username string `json:"username,omitempty" yaml:"username,omitempty"`
Password string `json:"password,omitempty" yaml:"password,omitempty"`
Mapping MappingConfig `json:"mapping,omitempty" yaml:"mapping,omitempty"`
}
// MappingConfig defines field mapping configuration for vector databases
type MappingConfig struct {
Fields []FieldMapping `json:"fields,omitempty" yaml:"fields,omitempty"`
Index IndexConfig `json:"index,omitempty" yaml:"index,omitempty"`
Search SearchConfig `json:"search,omitempty" yaml:"search,omitempty"`
}
// // CollectionMapping defines field mapping for collection
// type CollectionMapping struct {
// Fields []FieldMapping `json:"fields,omitempty" yaml:"fields,omitempty"`
// }
// FieldMapping defines mapping for a single field
type FieldMapping struct {
StandardName string `json:"standard_name" yaml:"standard_name"`
RawName string `json:"raw_name" yaml:"raw_name"`
Properties map[string]interface{} `json:"properties,omitempty" yaml:"properties,omitempty"`
}
func (f FieldMapping) IsPrimaryKey() bool {
return f.StandardName == "id"
}
func (f FieldMapping) IsAutoID() bool {
if f.Properties == nil {
return false
}
autoID, ok := f.Properties["auto_id"].(bool)
if !ok {
return false
}
return autoID
}
func (f FieldMapping) IsVectorField() bool {
return f.StandardName == "vector"
}
func (f FieldMapping) MaxLength() int {
if f.Properties == nil {
return 0
}
maxLength, ok := f.Properties["max_length"].(int)
if !ok {
return 256
}
return maxLength
}
// IndexConfig defines configuration for index parameters
type IndexConfig struct {
// Index type, e.g., IVF_FLAT, IVF_SQ8, HNSW, etc.
IndexType string `json:"index_type" yaml:"index_type"`
// Index parameter configuration
Params map[string]interface{} `json:"params" yaml:"params"`
}
func (i IndexConfig) ParamsString(key string) (string, error) {
if mVal, ok := i.Params[key].(string); ok {
return mVal, nil
}
return "", fmt.Errorf("params %s not found", key)
}
func (i IndexConfig) ParamsInt64(key string) (int64, error) {
if mVal, ok := i.Params[key].(int64); ok {
return mVal, nil
}
if mVal, ok := i.Params[key].(int); ok {
return int64(mVal), nil
}
return 0, fmt.Errorf("params %s not found", key)
}
func (i IndexConfig) ParamsFloat64(key string) (float64, error) {
if mVal, ok := i.Params[key].(float64); ok {
return mVal, nil
}
if mVal, ok := i.Params[key].(float32); ok {
return float64(mVal), nil
}
return 0, fmt.Errorf("params %s not found", key)
}
func (i IndexConfig) ParamsBool(key string) (bool, error) {
if mVal, ok := i.Params[key].(bool); ok {
return mVal, nil
}
return false, fmt.Errorf("params %s not found", key)
}
// SearchConfig defines configuration for search parameters
type SearchConfig struct {
// Metric type, e.g., L2, IP, etc.
MetricType string `json:"metric_type,omitempty" yaml:"metric_type,omitempty"`
// Search parameter configuration
Params map[string]interface{} `json:"params" yaml:"params"`
}
func (i SearchConfig) ParamsString(key string) (string, error) {
if mVal, ok := i.Params[key].(string); ok {
return mVal, nil
}
return "", fmt.Errorf("params %s not found", key)
}
func (i SearchConfig) ParamsInt64(key string) (int64, error) {
if mVal, ok := i.Params[key].(int64); ok {
return mVal, nil
}
return 0, fmt.Errorf("params %s not found", key)
}
func (i SearchConfig) ParamsFloat64(key string) (float64, error) {
if mVal, ok := i.Params[key].(float64); ok {
return mVal, nil
}
return 0, fmt.Errorf("params %s not found", key)
}
func (i SearchConfig) ParamsBool(key string) (bool, error) {
if mVal, ok := i.Params[key].(bool); ok {
return mVal, nil
}
return false, fmt.Errorf("params %s not found", key)
}

View File

@@ -1,169 +0,0 @@
package embedding
import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/common"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
)
const (
DASHSCOPE_DOMAIN = "dashscope.aliyuncs.com"
DASHSCOPE_PORT = 443
DASHSCOPE_DEFAULT_MODEL_NAME = "text-embedding-v4"
DASHSCOPE_ENDPOINT = "/api/v1/services/embeddings/text-embedding/text-embedding"
)
var dashScopeConfig dashScopeProviderConfig
type dashScopeProviderInitializer struct {
}
type dashScopeProviderConfig struct {
apiKey string
model string
}
func (c *dashScopeProviderInitializer) InitConfig(config config.EmbeddingConfig) {
dashScopeConfig.apiKey = config.APIKey
dashScopeConfig.model = config.Model
}
func (c *dashScopeProviderInitializer) ValidateConfig() error {
if dashScopeConfig.apiKey == "" {
return errors.New("[DashScope] apiKey is required")
}
return nil
}
func (c *dashScopeProviderInitializer) CreateProvider(config config.EmbeddingConfig) (Provider, error) {
c.InitConfig(config)
err := c.ValidateConfig()
if err != nil {
return nil, err
}
headers := map[string]string{
"Authorization": "Bearer " + config.APIKey,
"Content-Type": "application/json",
}
httpClient := common.NewHTTPClient(fmt.Sprintf("https://%s", DASHSCOPE_DOMAIN), headers)
return &DashScopeProvider{
config: dashScopeConfig,
client: httpClient,
}, nil
}
func (d *DashScopeProvider) GetProviderType() string {
return PROVIDER_TYPE_DASHSCOPE
}
type Embedding struct {
Embedding []float32 `json:"embedding"`
TextIndex int `json:"text_index"`
}
type Input struct {
Texts []string `json:"texts"`
}
type Params struct {
TextType string `json:"text_type"`
}
type Response struct {
RequestID string `json:"request_id"`
Output Output `json:"output"`
Usage Usage `json:"usage"`
}
type Output struct {
Embeddings []Embedding `json:"embeddings"`
}
type Usage struct {
TotalTokens int `json:"total_tokens"`
}
type EmbeddingRequest struct {
Model string `json:"model"`
Input Input `json:"input"`
Parameters Params `json:"parameters"`
}
type Document struct {
Vector []float64 `json:"vector"`
Fields map[string]string `json:"fields"`
}
type DashScopeProvider struct {
config dashScopeProviderConfig
client *common.HTTPClient
}
func (d *DashScopeProvider) constructRequestData(texts []string) (EmbeddingRequest, error) {
model := d.config.model
if model == "" {
model = DASHSCOPE_DEFAULT_MODEL_NAME
}
if dashScopeConfig.apiKey == "" {
return EmbeddingRequest{}, errors.New("dashScopeKey is empty")
}
data := EmbeddingRequest{
Model: model,
Input: Input{
Texts: texts,
},
Parameters: Params{
TextType: "query",
},
}
return data, nil
}
type Result struct {
ID string `json:"id"`
Vector []float32 `json:"vector,omitempty"`
Fields map[string]interface{} `json:"fields"`
Score float64 `json:"score"`
}
func (d *DashScopeProvider) parseTextEmbedding(responseBody []byte) (*Response, error) {
var resp Response
err := json.Unmarshal(responseBody, &resp)
if err != nil {
return nil, err
}
return &resp, nil
}
func (d *DashScopeProvider) GetEmbedding(
ctx context.Context,
queryString string) ([]float32, error) {
requestData, err := d.constructRequestData([]string{queryString})
if err != nil {
return nil, fmt.Errorf("failed to construct request data: %v", err)
}
responseBody, err := d.client.Post(DASHSCOPE_ENDPOINT, requestData)
if err != nil {
return nil, fmt.Errorf("failed to send request: %v", err)
}
embeddingResp, err := d.parseTextEmbedding(responseBody)
if err != nil {
return nil, fmt.Errorf("failed to parse response: %v", err)
}
if len(embeddingResp.Output.Embeddings) == 0 {
return nil, errors.New("no embedding found in response")
}
return embeddingResp.Output.Embeddings[0].Embedding, nil
}

View File

@@ -2,160 +2,93 @@ package embedding
import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/common"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
"github.com/openai/openai-go/v2"
"github.com/openai/openai-go/v2/option"
)
const (
OPENAI_DOMAIN = "api.openai.com"
OPENAI_PORT = 443
OPENAI_DEFAULT_MODEL_NAME = "text-embedding-3-small"
OPENAI_ENDPOINT = "/v1/embeddings"
OPENAI_DEFAULT_MODEL_NAME = "text-embedding-ada-002"
)
type openAIProviderInitializer struct {
}
var openAIConfig openAIProviderConfig
type openAIProviderConfig struct {
baseUrl string
apiKey string
model string
}
func (c *openAIProviderInitializer) InitConfig(config config.EmbeddingConfig) {
openAIConfig.apiKey = config.APIKey
openAIConfig.model = config.Model
openAIConfig.baseUrl = config.BaseURL
}
func (c *openAIProviderInitializer) ValidateConfig() error {
if openAIConfig.apiKey == "" {
return errors.New("[openAI] apiKey is required")
func (c *openAIProviderInitializer) validateConfig(config *config.EmbeddingConfig) error {
if config.APIKey == "" {
return errors.New("[openai embbeding] apiKey is required")
}
if config.Model == "" {
config.Model = OPENAI_DEFAULT_MODEL_NAME
}
if config.Dimensions <= 0 {
config.Dimensions = 1536
}
return nil
}
func (c *openAIProviderInitializer) CreateProvider(config config.EmbeddingConfig) (Provider, error) {
c.InitConfig(config)
err := c.ValidateConfig()
if err != nil {
if err := c.validateConfig(&config); err != nil {
return nil, err
}
// 创建 OpenAI 客户端
var clientOptions []option.RequestOption
clientOptions = append(clientOptions, option.WithAPIKey(config.APIKey))
if openAIConfig.model == "" {
openAIConfig.model = OPENAI_DEFAULT_MODEL_NAME
// 如果设置了自定义 baseURL则使用它
if config.BaseURL != "" {
clientOptions = append(clientOptions, option.WithBaseURL(config.BaseURL))
}
if openAIConfig.baseUrl == "" {
openAIConfig.baseUrl = fmt.Sprintf("https://%s", OPENAI_DOMAIN)
}
headers := map[string]string{
"Authorization": "Bearer " + config.APIKey,
"Content-Type": "application/json",
}
httpClient := common.NewHTTPClient(openAIConfig.baseUrl, headers)
// 创建 OpenAI 客户端
client := openai.NewClient(clientOptions...)
return &OpenAIProvider{
config: openAIConfig,
client: httpClient,
client: &client,
model: config.Model,
dimensions: config.Dimensions,
}, nil
}
func (o *OpenAIProvider) GetProviderType() string {
// EmbeddingClient handles vector embedding generation using OpenAI-compatible APIs
type OpenAIProvider struct {
client *openai.Client
model string
dimensions int
}
func (e *OpenAIProvider) GetProviderType() string {
return PROVIDER_TYPE_OPENAI
}
type OpenAIResponse struct {
Object string `json:"object"`
Data []OpenAIResult `json:"data"`
Model string `json:"model"`
Error *OpenAIError `json:"error"`
}
type OpenAIResult struct {
Object string `json:"object"`
Embedding []float32 `json:"embedding"`
Index int `json:"index"`
}
type OpenAIError struct {
Message string `json:"prompt_tokens"`
Type string `json:"type"`
Code string `json:"code"`
Param string `json:"param"`
}
type OpenAIEmbeddingRequest struct {
Input string `json:"input"`
Model string `json:"model"`
}
type OpenAIProvider struct {
config openAIProviderConfig
client *common.HTTPClient
}
func (o *OpenAIProvider) constructRequestData(text string) (OpenAIEmbeddingRequest, error) {
if text == "" {
return OpenAIEmbeddingRequest{}, errors.New("queryString text cannot be empty")
// GetEmbedding generates vector embedding for the given text
func (e *OpenAIProvider) GetEmbedding(ctx context.Context, text string) ([]float32, error) {
params := openai.EmbeddingNewParams{
Model: e.model,
Input: openai.EmbeddingNewParamsInputUnion{
OfString: openai.String(text),
},
Dimensions: openai.Int(int64(e.dimensions)),
EncodingFormat: openai.EmbeddingNewParamsEncodingFormatFloat,
}
if openAIConfig.apiKey == "" {
return OpenAIEmbeddingRequest{}, errors.New("openAI apiKey is empty")
}
model := o.config.model
if model == "" {
model = OPENAI_DEFAULT_MODEL_NAME
}
data := OpenAIEmbeddingRequest{
Input: text,
Model: model,
}
return data, nil
}
func (o *OpenAIProvider) parseTextEmbedding(responseBody []byte) (*OpenAIResponse, error) {
var resp OpenAIResponse
err := json.Unmarshal(responseBody, &resp)
embeddingResp, err := e.client.Embeddings.New(ctx, params)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to generate embedding: %w", err)
}
return &resp, nil
}
func (o *OpenAIProvider) GetEmbedding(ctx context.Context, queryString string) ([]float32, error) {
requestData, err := o.constructRequestData(queryString)
if err != nil {
return nil, fmt.Errorf("failed to construct request data: %v", err)
}
responseBody, err := o.client.Post(OPENAI_ENDPOINT, requestData)
if err != nil {
return nil, fmt.Errorf("failed to send request: %v", err)
}
resp, err := o.parseTextEmbedding(responseBody)
if err != nil {
return nil, fmt.Errorf("failed to parse response: %v", err)
}
if resp.Error != nil {
return nil, fmt.Errorf("OpenAI API error: %s - %s", resp.Error.Type, resp.Error.Message)
}
if len(resp.Data) == 0 {
return nil, errors.New("no embedding found in response")
}
return resp.Data[0].Embedding, nil
if len(embeddingResp.Data) == 0 {
return nil, fmt.Errorf("empty embedding response")
}
// Convert []float64 to []float32
embedding := make([]float32, len(embeddingResp.Data[0].Embedding))
for i, v := range embeddingResp.Data[0].Embedding {
embedding[i] = float32(v)
}
return embedding, nil
}

View File

@@ -10,21 +10,21 @@ import (
// Provider type constants for different embedding services
const (
// DashScope embedding service
PROVIDER_TYPE_DASHSCOPE = "dashscope"
PROVIDER_TYPE_DASHSCOPE = "dashscope"
// TextIn embedding service
PROVIDER_TYPE_TEXTIN = "textin"
PROVIDER_TYPE_TEXTIN = "textin"
// Cohere embedding service
PROVIDER_TYPE_COHERE = "cohere"
PROVIDER_TYPE_COHERE = "cohere"
// OpenAI embedding service
PROVIDER_TYPE_OPENAI = "openai"
PROVIDER_TYPE_OPENAI = "openai"
// Ollama embedding service
PROVIDER_TYPE_OLLAMA = "ollama"
PROVIDER_TYPE_OLLAMA = "ollama"
// HuggingFace embedding service
PROVIDER_TYPE_HUGGINGFACE = "huggingface"
// XFYun embedding service
PROVIDER_TYPE_XFYUN = "xfyun"
PROVIDER_TYPE_XFYUN = "xfyun"
// Azure embedding service
PROVIDER_TYPE_AZURE = "azure"
PROVIDER_TYPE_AZURE = "azure"
)
// Factory interface for creating Provider instances
@@ -36,8 +36,7 @@ type providerInitializer interface {
// Maps provider types to their initializers
var (
providerInitializers = map[string]providerInitializer{
PROVIDER_TYPE_DASHSCOPE: &dashScopeProviderInitializer{},
PROVIDER_TYPE_OPENAI: &openAIProviderInitializer{},
PROVIDER_TYPE_OPENAI: &openAIProviderInitializer{},
}
)

View File

@@ -2,133 +2,105 @@ package llm
import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/common"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
"github.com/openai/openai-go/v2"
"github.com/openai/openai-go/v2/option"
"github.com/openai/openai-go/v2/packages/param"
)
const (
OPENAI_CHAT_ENDPOINT = "/chat/completions"
OPENAI_DEFAULT_MODEL = "gpt-3.5-turbo"
OPENAI_DEFAULT_MODEL = "gpt-4o"
)
// openAI specific configuration captured after initialization.
type openAIProviderConfig struct {
apiKey string
baseURL string
type OpenAIProvider struct {
client *openai.Client
model string
maxTokens int
temperature float64
maxTokens int
}
type openAIProviderInitializer struct{}
var openAIConfig openAIProviderConfig
func (i *openAIProviderInitializer) initConfig(c config.LLMConfig) {
openAIConfig.apiKey = c.APIKey
openAIConfig.baseURL = c.BaseURL
openAIConfig.model = c.Model
if openAIConfig.model == "" {
openAIConfig.model = OPENAI_DEFAULT_MODEL
}
if openAIConfig.baseURL == "" {
openAIConfig.baseURL = "https://api.openai.com/v1" // default public endpoint
}
openAIConfig.maxTokens = c.MaxTokens
openAIConfig.temperature = c.Temperature
}
func (i *openAIProviderInitializer) validateConfig() error {
if openAIConfig.apiKey == "" {
func (i *openAIProviderInitializer) validateConfig(cfg *config.LLMConfig) error {
if cfg.APIKey == "" {
return errors.New("[openai llm] apiKey is required")
}
if cfg.Model == "" {
cfg.Model = OPENAI_DEFAULT_MODEL
}
if cfg.Temperature <= 0 || cfg.Temperature > 2 {
cfg.Temperature = 0.5
}
if cfg.MaxTokens <= 0 {
cfg.MaxTokens = 2048
}
return nil
}
func (i *openAIProviderInitializer) CreateProvider(cfg config.LLMConfig) (Provider, error) {
i.initConfig(cfg)
if err := i.validateConfig(); err != nil {
if err := i.validateConfig(&cfg); err != nil {
return nil, err
}
headers := map[string]string{
"Authorization": "Bearer " + openAIConfig.apiKey,
"Content-Type": "application/json",
// Create OpenAI client
var clientOptions []option.RequestOption
clientOptions = append(clientOptions, option.WithAPIKey(cfg.APIKey))
// If a custom baseURL is set, use it
if cfg.BaseURL != "" {
clientOptions = append(clientOptions, option.WithBaseURL(cfg.BaseURL))
}
client := common.NewHTTPClient(openAIConfig.baseURL, headers)
return &OpenAIProvider{client: client, cfg: openAIConfig}, nil
}
type OpenAIProvider struct {
client *common.HTTPClient
cfg openAIProviderConfig
}
// Create OpenAI client
client := openai.NewClient(clientOptions...)
type openAIChatCompletionRequest struct {
Model string `json:"model"`
Messages []openAIChatMessage `json:"messages"`
Temperature float64 `json:"temperature,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
type openAIChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type openAIChatCompletionResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Choices []openAIChatCompletionResponseChoice `json:"choices"`
Error *openAIError `json:"error,omitempty"`
}
type openAIChatCompletionResponseChoice struct {
Index int `json:"index"`
Message openAIChatMessage `json:"message"`
FinishReason string `json:"finish_reason"`
}
type openAIError struct {
Message string `json:"message"`
Type string `json:"type"`
Code string `json:"code"`
Param string `json:"param"`
return &OpenAIProvider{
client: &client,
model: cfg.Model,
temperature: cfg.Temperature,
maxTokens: cfg.MaxTokens,
}, nil
}
// GenerateCompletion implements Provider interface.
func (o *OpenAIProvider) GenerateCompletion(ctx context.Context, prompt string) (string, error) {
req := openAIChatCompletionRequest{
Model: o.cfg.model,
Messages: []openAIChatMessage{
{Role: "user", Content: prompt},
// Create chat request
params := openai.ChatCompletionNewParams{
Model: o.model,
Messages: []openai.ChatCompletionMessageParamUnion{
openai.UserMessage(prompt),
},
Temperature: o.cfg.temperature,
MaxTokens: o.cfg.maxTokens,
}
body, err := o.client.Post(OPENAI_CHAT_ENDPOINT, req)
// Set optional parameters
if o.temperature > 0 {
temperature := float64(o.temperature)
params.Temperature = param.Opt[float64]{Value: temperature}
}
if o.maxTokens > 0 {
maxTokens := int64(o.maxTokens)
params.MaxTokens = param.Opt[int64]{Value: maxTokens}
}
// Send request
response, err := o.client.Chat.Completions.New(ctx, params)
if err != nil {
return "", fmt.Errorf("openai llm post error: %w", err)
// Handle error
return "", fmt.Errorf("openai llm error: %w", err)
}
var resp openAIChatCompletionResponse
if err := json.Unmarshal(body, &resp); err != nil {
return "", fmt.Errorf("openai llm unmarshal error: %w", err)
}
if resp.Error != nil {
return "", fmt.Errorf("openai llm api error: %s - %s", resp.Error.Type, resp.Error.Message)
}
if len(resp.Choices) == 0 {
// Check response
if len(response.Choices) == 0 {
return "", errors.New("openai llm: empty choices")
}
return resp.Choices[0].Message.Content, nil
// Return generated content
return response.Choices[0].Message.Content, nil
}
func (o *OpenAIProvider) GetProviderType() string {

View File

@@ -4,7 +4,7 @@ import (
"strings"
)
const RAGPromptTemplate = `You are a professional knowledge Q&A assistant. Your task is to provide accurate, complete, and strictly relevant answers based on the user's question and retrieved context.
const RAGPromptTemplate = `You are a professional knowledge Q&A assistant. Your task is to provide direct and concise answers based on the user's question and retrieved context.
Retrieved relevant context (may be empty, multiple segments separated by line breaks):
{contexts}
@@ -13,9 +13,11 @@ User question:
{query}
Requirements:
1. If the context provides sufficient information, answer directly based on the context. You may use domain knowledge to supplement, but do not fabricate facts beyond the context.
2. If the context is insufficient or unrelated to the question, respond with: "I am unable to answer this question."
3. Your response must correctly answer the user's question and must not contain any irrelevant or unrelated content.`
1. Provide ONLY the direct answer without any explanation, reasoning, or additional context.
2. If the context provides sufficient information, output the answer in the most concise form possible.
3. If the context is insufficient or unrelated to the question, respond with: "I am unable to answer this question."
4. Do not include any phrases like "The answer is", "Based on the context", etc. Just output the answer directly.
`
func BuildPrompt(query string, contexts []string, join string) string {
rendered := strings.ReplaceAll(RAGPromptTemplate, "{query}", query)

View File

@@ -46,24 +46,22 @@ func NewRAGClient(config *config.Config) (*RAGClient, error) {
}
ragclient.embeddingProvider = embeddingProvider
llmProvider, err := llm.NewLLMProvider(ragclient.config.LLM)
if err != nil {
return nil, fmt.Errorf("create llm provider failed, err: %w", err)
if ragclient.config.LLM.Provider == "" {
ragclient.llmProvider = nil
} else {
llmProvider, err := llm.NewLLMProvider(ragclient.config.LLM)
if err != nil {
return nil, fmt.Errorf("create llm provider failed, err: %w", err)
}
ragclient.llmProvider = llmProvider
}
ragclient.llmProvider = llmProvider
demoVector, err := embeddingProvider.GetEmbedding(context.Background(), "initialization")
if err != nil {
return nil, fmt.Errorf("create init embedding failed, err: %w", err)
}
dim := len(demoVector)
dim := ragclient.config.Embedding.Dimensions
provider, err := vectordb.NewVectorDBProvider(&ragclient.config.VectorDB, dim)
if err != nil {
return nil, fmt.Errorf("create vector store provider failed, err: %w", err)
}
ragclient.vectordbProvider = provider
return ragclient, nil
}

View File

@@ -1,6 +1,8 @@
package rag
import (
"encoding/json"
"os"
"testing"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
@@ -20,15 +22,17 @@ func getRAGClient() (*RAGClient, error) {
LLM: config.LLMConfig{
Provider: "openai",
APIKey: "sk-xxxx",
APIKey: "sk-xxx",
BaseURL: "https://openrouter.ai/api/v1",
Model: "openai/gpt-4o",
},
Embedding: config.EmbeddingConfig{
Provider: "dashscope",
APIKey: "sk-xxxx",
Model: "text-embedding-v4",
Provider: "openai",
BaseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1",
APIKey: "sk-xxxx",
Model: "text-embedding-v4",
Dimensions: 1536,
},
VectorDB: config.VectorDBConfig{
@@ -36,7 +40,49 @@ func getRAGClient() (*RAGClient, error) {
Host: "localhost",
Port: 19530,
Database: "default",
Collection: "test_collection",
Collection: "test_collection3",
Mapping: config.MappingConfig{
Fields: []config.FieldMapping{
{
StandardName: "id",
RawName: "pk",
Properties: map[string]interface{}{
"max_length": 256,
"auto_id": false,
},
},
{
StandardName: "content",
RawName: "page_content",
Properties: map[string]interface{}{
"max_length": 8192,
},
},
{
StandardName: "vector",
RawName: "page_vector",
Properties: make(map[string]interface{}),
},
{
StandardName: "metadata",
RawName: "metadata",
Properties: make(map[string]interface{}),
},
{
StandardName: "created_at",
RawName: "created_at",
Properties: make(map[string]interface{}),
},
},
Index: config.IndexConfig{
IndexType: "IVF_FLAT",
Params: map[string]interface{}{"nlist": 64},
},
Search: config.SearchConfig{
MetricType: "COSINE",
Params: map[string]interface{}{"nprobe": 32},
},
},
},
}
@@ -46,7 +92,6 @@ func getRAGClient() (*RAGClient, error) {
}
return ragClient, nil
}
func TestNewRAGClient(t *testing.T) {
@@ -102,7 +147,7 @@ func TestRAGClient_DeleteChunk(t *testing.T) {
return
}
chunk_id := "63ee25d7-41b9-4455-8066-075ca5c803b2"
chunk_id := "2a06679c-a8ea-46dc-bf1c-7e7b164a73c8"
err = ragClient.DeleteChunk(chunk_id)
if err != nil {
t.Errorf("DeleteChunk() error = %v", err)
@@ -137,7 +182,11 @@ func TestRAGClient_Chat(t *testing.T) {
t.Errorf("getRAGClient() error = %v", err)
return
}
query := "what is the competition about?"
// query := "Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?"
// query := "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?"
// query := "Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing theory suggesting a lack of full truthfulness with the board as reported by TechCrunch?"
// query := "Do the TechCrunch article on software companies and the Hacker News article on The Epoch Times both report an increase in revenue related to payment and subscription models, respectively?"
query := "Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?"
resp, err := ragClient.Chat(query)
if err != nil {
t.Errorf("Chat() error = %v", err)
@@ -147,4 +196,45 @@ func TestRAGClient_Chat(t *testing.T) {
t.Errorf("Chat() resp = %s, want not empty", resp)
return
}
t.Logf("Chat() resp = %s", resp)
}
func TestRAGClient_LoadChunks(t *testing.T) {
t.Logf("TestRAGClient_LoadChunks")
ragClient, err := getRAGClient()
if err != nil {
t.Errorf("getRAGClient() error = %v", err)
return
}
// load json output/corpus.json and then call ragclient CreateChunkFromText to insert chunks
file, err := os.Open("/dataset/corpus.json")
if err != nil {
t.Errorf("LoadData() error = %v", err)
return
}
defer file.Close()
decoder := json.NewDecoder(file)
var data []struct {
Body string `json:"body"`
Title string `json:"title"`
Url string `json:"url"`
}
if err := decoder.Decode(&data); err != nil {
t.Errorf("LoadData() error = %v", err)
return
}
for _, item := range data {
t.Logf("LoadData() url = %s", item.Url)
t.Logf("LoadData() title = %s", item.Title)
t.Logf("LoadData() len body = %d", len(item.Body))
chunks, err := ragClient.CreateChunkFromText(item.Body, item.Title)
if err != nil {
t.Errorf("LoadData() error = %v", err)
continue
} else {
t.Logf("LoadData() chunks len = %d", len(chunks))
}
}
t.Logf("TestRAGClient_LoadChunks done")
}

View File

@@ -28,7 +28,7 @@ func init() {
TopK: 10,
},
LLM: config.LLMConfig{
Provider: "openai",
Provider: "",
APIKey: "",
BaseURL: "",
Model: "gpt-4o",
@@ -36,11 +36,11 @@ func init() {
MaxTokens: 2048,
},
Embedding: config.EmbeddingConfig{
Provider: "dashscope",
APIKey: "",
BaseURL: "",
Model: "text-embedding-v4",
Dimension: 1024,
Provider: "openai",
APIKey: "",
BaseURL: "",
Model: "text-embedding-ada-002",
Dimensions: 1536,
},
VectorDB: config.VectorDBConfig{
Provider: "milvus",
@@ -50,14 +50,56 @@ func init() {
Collection: "rag",
Username: "",
Password: "",
Mapping: config.MappingConfig{
Fields: []config.FieldMapping{
{
StandardName: "id",
RawName: "id",
Properties: map[string]interface{}{
"max_length": 256,
"auto_id": false,
},
},
{
StandardName: "content",
RawName: "content",
Properties: map[string]interface{}{
"max_length": 8192,
},
},
{
StandardName: "vector",
RawName: "vector",
Properties: make(map[string]interface{}),
},
{
StandardName: "metadata",
RawName: "metadata",
Properties: make(map[string]interface{}),
},
{
StandardName: "created_at",
RawName: "created_at",
Properties: make(map[string]interface{}),
},
},
Index: config.IndexConfig{
IndexType: "HNSW",
Params: map[string]interface{}{"M": 8, "efConstruction": 64},
},
Search: config.SearchConfig{
MetricType: "IP",
Params: make(map[string]interface{}),
},
},
},
},
})
}
func (c *RAGConfig) ParseConfig(config map[string]any) error {
func (c *RAGConfig) ParseConfig(cfg map[string]any) error {
// Parse RAG configuration
if ragConfig, ok := config["rag"].(map[string]any); ok {
if ragConfig, ok := cfg["rag"].(map[string]any); ok {
if splitter, exists := ragConfig["splitter"].(map[string]any); exists {
if splitterType, exists := splitter["provider"].(string); exists {
c.config.RAG.Splitter.Provider = splitterType
@@ -78,7 +120,7 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
}
// Parse Embedding configuration
if embeddingConfig, ok := config["embedding"].(map[string]any); ok {
if embeddingConfig, ok := cfg["embedding"].(map[string]any); ok {
if provider, exists := embeddingConfig["provider"].(string); exists {
c.config.Embedding.Provider = provider
} else {
@@ -94,17 +136,15 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
if model, exists := embeddingConfig["model"].(string); exists {
c.config.Embedding.Model = model
}
if dimension, exists := embeddingConfig["dimension"].(float64); exists {
c.config.Embedding.Dimension = int(dimension)
if dimensions, exists := embeddingConfig["dimensions"].(float64); exists {
c.config.Embedding.Dimensions = int(dimensions)
}
}
// Parse llm configuration
if llmConfig, ok := config["llm"].(map[string]any); ok {
if llmConfig, ok := cfg["llm"].(map[string]any); ok {
if provider, exists := llmConfig["provider"].(string); exists {
c.config.LLM.Provider = provider
} else {
return errors.New("missing llm provider")
}
if apiKey, exists := llmConfig["api_key"].(string); exists {
c.config.LLM.APIKey = apiKey
@@ -124,7 +164,7 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
}
// Parse VectorDB configuration
if vectordbConfig, ok := config["vectordb"].(map[string]any); ok {
if vectordbConfig, ok := cfg["vectordb"].(map[string]any); ok {
if provider, exists := vectordbConfig["provider"].(string); exists {
c.config.VectorDB.Provider = provider
} else {
@@ -148,8 +188,59 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
if password, exists := vectordbConfig["password"].(string); exists {
c.config.VectorDB.Password = password
}
}
// Parse mapping here
if mapping, exists := vectordbConfig["mapping"].(map[string]any); exists {
// Parse field mappings
if fields, ok := mapping["fields"].([]any); ok {
c.config.VectorDB.Mapping.Fields = []config.FieldMapping{}
for _, field := range fields {
if fieldMap, ok := field.(map[string]any); ok {
fieldMapping := config.FieldMapping{
Properties: make(map[string]interface{}),
}
if standardName, ok := fieldMap["standard_name"].(string); ok {
fieldMapping.StandardName = standardName
}
if rawName, ok := fieldMap["raw_name"].(string); ok {
fieldMapping.RawName = rawName
}
// Parse properties
if properties, ok := fieldMap["properties"].(map[string]any); ok {
for key, value := range properties {
fieldMapping.Properties[key] = value
}
}
c.config.VectorDB.Mapping.Fields = append(c.config.VectorDB.Mapping.Fields, fieldMapping)
}
}
}
// Parse index configuration
if index, ok := mapping["index"].(map[string]any); ok {
if indexType, ok := index["index_type"].(string); ok {
c.config.VectorDB.Mapping.Index.IndexType = indexType
}
// Parse index parameters
if params, ok := index["params"].(map[string]any); ok {
c.config.VectorDB.Mapping.Index.Params = params
}
}
// Parse search configuration
if search, ok := mapping["search"].(map[string]any); ok {
if metricType, ok := search["metric_type"].(string); ok {
c.config.VectorDB.Mapping.Search.MetricType = metricType
}
// Parse search parameters
if params, ok := search["params"].(map[string]any); ok {
c.config.VectorDB.Mapping.Search.Params = params
}
}
}
}
return nil
}
@@ -190,7 +281,7 @@ func (c *RAGConfig) NewServer(serverName string) (*common.MCPServer, error) {
// Intelligent Q&A Tool
mcpServer.AddTool(
mcp.NewToolWithRawSchema("chat", "Generate contextually relevant responses using RAG system with LLM integration", GetChatSchema()),
mcp.NewToolWithRawSchema("chat", "Answer user questions by retrieving relevant knowledge from the database and generating responses using RAG-enhanced LLM", GetChatSchema()),
HandleChat(ragClient),
)

View File

@@ -28,11 +28,11 @@ func TestRAGConfig_ParseConfig(t *testing.T) {
MaxTokens: 2048,
},
Embedding: config.EmbeddingConfig{
Provider: "dashscope",
APIKey: "sk-XXX",
BaseURL: "",
Model: "text-embedding-v4",
Dimension: 1024,
Provider: "dashscope",
APIKey: "sk-XXX",
BaseURL: "",
Model: "text-embedding-v4",
Dimensions: 1024,
},
VectorDB: config.VectorDBConfig{
Provider: "milvus",
@@ -42,6 +42,48 @@ func TestRAGConfig_ParseConfig(t *testing.T) {
Collection: "test_rag",
Username: "",
Password: "",
Mapping: config.MappingConfig{
Fields: []config.FieldMapping{
{
StandardName: "id",
RawName: "id",
Properties: map[string]interface{}{
"max_length": 256,
"auto_id": false,
},
},
{
StandardName: "content",
RawName: "content",
Properties: map[string]interface{}{
"max_length": 8192,
},
},
{
StandardName: "vector",
RawName: "vector",
Properties: make(map[string]interface{}),
},
{
StandardName: "metadata",
RawName: "metadata",
Properties: make(map[string]interface{}),
},
{
StandardName: "created_at",
RawName: "created_at",
Properties: make(map[string]interface{}),
},
},
Index: config.IndexConfig{
IndexType: "HNSW",
Params: map[string]interface{}{"M": 4, "efConstruction": 32},
},
Search: config.SearchConfig{
MetricType: "IP",
Params: map[string]interface{}{"ef": 32},
},
},
},
}
// 把 config 输出 yaml 格式

View File

@@ -169,6 +169,10 @@ func HandleChat(ragClient *RAGClient) common.ToolHandlerFunc {
if !ok {
return nil, fmt.Errorf("invalid query argument")
}
// check llm provider
if ragClient.llmProvider == nil {
return nil, fmt.Errorf("llm provider is empty, please check the llm configuration")
}
// Generate response using RAGClient's LLM
reply, err := ragClient.Chat(query)
if err != nil {

View File

@@ -0,0 +1,182 @@
package vectordb
import (
"errors"
"fmt"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
)
// Error definitions
var (
ErrFieldNotFound = errors.New("field not found")
ErrInvalidFieldType = errors.New("invalid field type")
ErrInvalidIndexType = errors.New("invalid index type")
ErrInvalidMetricType = errors.New("invalid metric type")
ErrInvalidSearchParams = errors.New("invalid search parameters")
ErrCollectionNotFound = errors.New("collection not found")
ErrUnsupportedOperation = errors.New("unsupported operation")
)
// VectorDBMapper interface for vector database mapping
type VectorDBMapper interface {
// ParseMapping parses the mapping configuration
ParseMapping(provider string, cfg config.MappingConfig) error
// GetIndexConfig returns the index configuration
GetIndexConfig() (config.IndexConfig, error)
// GetSearchConfig returns the search configuration
GetSearchConfig() (config.SearchConfig, error)
// Get all raw field names
GetRawAllFieldNames() ([]string, error)
// GetIDField returns the ID field mapping
GetIDField() (*config.FieldMapping, error)
// GetVectorField returns the vector field mapping
GetVectorField() (*config.FieldMapping, error)
// Get raw field name by standard field name
GetRawField(standardFieldName string) (*config.FieldMapping, error)
// Get field mapping by raw field name
GetField(rawFieldName string) (*config.FieldMapping, error)
// Get all field mappings
GetFieldMappings() ([]config.FieldMapping, error)
}
// DefaultVectorDBMapper is the default implementation of VectorDBMapper interface
type DefaultVectorDBMapper struct {
// Mapping configuration
mappingConfig config.MappingConfig
// Map from standard field name to field mapping
standardFieldMap map[string]*config.FieldMapping
// Map from raw field name to field mapping
rawFieldMap map[string]*config.FieldMapping
}
// NewDefaultVectorDBMapper creates a new default vector database mapper
func NewDefaultVectorDBMapper(provider string, mappingConfig config.MappingConfig) (*DefaultVectorDBMapper, error) {
mapper := &DefaultVectorDBMapper{
standardFieldMap: make(map[string]*config.FieldMapping),
rawFieldMap: make(map[string]*config.FieldMapping),
}
if err := mapper.ParseMapping(provider, mappingConfig); err != nil {
return nil, err
}
return mapper, nil
}
// ParseMapping parses the mapping configuration
func (m *DefaultVectorDBMapper) ParseMapping(provider string, cfg config.MappingConfig) error {
m.mappingConfig = cfg
// Clear existing mappings
m.standardFieldMap = make(map[string]*config.FieldMapping)
m.rawFieldMap = make(map[string]*config.FieldMapping)
// fill default field mappings
if len(cfg.Fields) == 0 {
defaultFields := []config.FieldMapping{
{
StandardName: "id",
RawName: "id",
Properties: map[string]interface{}{
"max_length": 256,
"auto_id": false,
},
},
{
StandardName: "content",
RawName: "content",
Properties: map[string]interface{}{
"max_length": 8192,
},
},
{
StandardName: "vector",
RawName: "vector",
},
{
StandardName: "metadata",
RawName: "metadata",
},
{
StandardName: "created_at",
RawName: "created_at",
},
}
cfg.Fields = defaultFields
}
// Parse field mappings
for i, field := range cfg.Fields {
// Save pointer for future reference
fieldPtr := &cfg.Fields[i]
m.standardFieldMap[field.StandardName] = fieldPtr
m.rawFieldMap[field.RawName] = fieldPtr
}
// Check fields, must include id, content, vector fields
requiredFields := []string{"id", "content", "vector"}
for _, fieldName := range requiredFields {
if _, err := m.GetRawField(fieldName); err != nil {
return fmt.Errorf("[vector db mapper] required field %s not found or not varchar type", fieldName)
}
}
return nil
}
// GetIndexConfig gets the index configuration
func (m *DefaultVectorDBMapper) GetIndexConfig() (config.IndexConfig, error) {
return m.mappingConfig.Index, nil
}
// GetSearchConfig gets the search configuration
func (m *DefaultVectorDBMapper) GetSearchConfig() (config.SearchConfig, error) {
return m.mappingConfig.Search, nil
}
// GetRawAllFieldNames gets all raw field names
func (m *DefaultVectorDBMapper) GetRawAllFieldNames() ([]string, error) {
fieldNames := make([]string, 0, len(m.rawFieldMap))
for name := range m.rawFieldMap {
fieldNames = append(fieldNames, name)
}
return fieldNames, nil
}
// GetIDField gets the ID field
func (m *DefaultVectorDBMapper) GetIDField() (*config.FieldMapping, error) {
return m.GetRawField("id")
}
// GetVectorField gets the vector field
func (m *DefaultVectorDBMapper) GetVectorField() (*config.FieldMapping, error) {
return m.GetRawField("vector")
}
// GetRawField gets the raw field mapping by standard field name
func (m *DefaultVectorDBMapper) GetRawField(standardFieldName string) (*config.FieldMapping, error) {
field, exists := m.standardFieldMap[standardFieldName]
if !exists {
return nil, fmt.Errorf("%w: standard field %s not found", ErrFieldNotFound, standardFieldName)
}
return field, nil
}
// GetField gets the field mapping by raw field name
func (m *DefaultVectorDBMapper) GetField(rawFieldName string) (*config.FieldMapping, error) {
field, exists := m.rawFieldMap[rawFieldName]
if !exists {
return nil, fmt.Errorf("%w: raw field %s not found", ErrFieldNotFound, rawFieldName)
}
return field, nil
}
// GetFieldMappings gets all field mappings
func (m *DefaultVectorDBMapper) GetFieldMappings() ([]config.FieldMapping, error) {
return m.mappingConfig.Fields, nil
}

View File

@@ -80,16 +80,17 @@ func (m *milvusProviderInitializer) CreateProvider(cfg *config.VectorDBConfig, d
type MilvusProvider struct {
client client.Client
config *config.VectorDBConfig
Collection string
collection string
mapper VectorDBMapper
dimensions int
}
// NewMilvusProvider creates a new instance of MilvusProvider
func NewMilvusProvider(cfg *config.VectorDBConfig, dim int) (VectorStoreProvider, error) {
func NewMilvusProvider(cfg *config.VectorDBConfig, dimensions int) (VectorStoreProvider, error) {
// Create Milvus client
connectParam := client.Config{
Address: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port),
}
connectParam.DBName = cfg.Database
// Add authentication if credentials are provided
if cfg.Username != "" && cfg.Password != "" {
@@ -102,92 +103,301 @@ func NewMilvusProvider(cfg *config.VectorDBConfig, dim int) (VectorStoreProvider
return nil, fmt.Errorf("failed to create milvus client: %w", err)
}
mapper, err := NewDefaultVectorDBMapper(MILVUS_PROVIDER_TYPE, cfg.Mapping)
if err != nil {
return nil, fmt.Errorf("failed to create default vector db mapper: %w", err)
}
provider := &MilvusProvider{
client: milvusClient,
config: cfg,
Collection: cfg.Collection,
collection: cfg.Collection,
mapper: mapper,
dimensions: dimensions,
}
ctx := context.Background()
if err := provider.CreateCollection(ctx, dim); err != nil {
if err := provider.CreateCollection(ctx, dimensions); err != nil {
return nil, err
}
return provider, nil
}
func (m *MilvusProvider) buildSchema() (*entity.Schema, error) {
// Create Milvus collection Schema
idField, _ := m.mapper.GetIDField()
isIDAuto := idField.IsAutoID()
schema := entity.NewSchema().
WithName(m.collection).
WithDescription("Knowledge document collection").
WithAutoID(isIDAuto).
WithDynamicFieldEnabled(false)
// Add fields
var fieldEntity *entity.Field
fieldMappings, _ := m.mapper.GetFieldMappings()
for _, field := range fieldMappings {
fieldEntity = nil
maxLength := field.MaxLength()
switch field.StandardName {
case "id":
isIDAuto := field.IsAutoID()
fieldEntity = entity.NewField().
WithName(field.RawName).
WithDataType(entity.FieldTypeVarChar).
WithMaxLength(int64(maxLength)).
WithIsPrimaryKey(true)
if isIDAuto {
fieldEntity.WithIsAutoID(true)
}
schema.WithField(fieldEntity)
case "content":
fieldEntity = entity.NewField().
WithName(field.RawName).
WithDataType(entity.FieldTypeVarChar).
WithMaxLength(int64(maxLength))
schema.WithField(fieldEntity)
case "vector":
fieldEntity = entity.NewField().
WithName(field.RawName).
WithDataType(entity.FieldTypeFloatVector).
WithDim(int64(m.dimensions))
schema.WithField(fieldEntity)
case "metadata":
fieldEntity = entity.NewField().
WithName(field.RawName).
WithDataType(entity.FieldTypeJSON)
schema.WithField(fieldEntity)
case "created_at":
fieldEntity = entity.NewField().
WithName(field.RawName).
WithDataType(entity.FieldTypeInt64)
schema.WithField(fieldEntity)
}
}
return schema, nil
}
func (m *MilvusProvider) GetMetricType(metricType string) entity.MetricType {
switch strings.ToUpper(metricType) {
case "L2":
return entity.L2
case "IP":
return entity.IP
case "COSINE":
return entity.COSINE
case "HAMMING":
return entity.HAMMING
case "JACCARD":
return entity.JACCARD
case "TANIMOTO":
return entity.TANIMOTO
case "SUBSTRUCTURE":
return entity.SUBSTRUCTURE
case "SUPERSTRUCTURE":
return entity.SUPERSTRUCTURE
default:
return entity.IP
}
}
func (m *MilvusProvider) buildVectorIndex() (entity.Index, error) {
// Map index type
indexConfig, _ := m.mapper.GetIndexConfig()
searchConfig, _ := m.mapper.GetSearchConfig()
// Map index parameters
milvusIndexType := strings.ToUpper(indexConfig.IndexType)
if milvusIndexType == "" {
milvusIndexType = "HNSW"
}
metricType := m.GetMetricType(searchConfig.MetricType)
switch milvusIndexType {
case "FLAT":
// FLAT index doesn't need additional parameters
index, err := entity.NewIndexFlat(metricType)
if err != nil {
return nil, fmt.Errorf("failed to create FLAT index: %w", err)
}
return index, nil
case "BIN_FLAT":
// BIN_FLAT index doesn't need additional parameters
nlist := 128
if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
nlist = int(nlistVal)
}
index, err := entity.NewIndexBinFlat(metricType, nlist)
if err != nil {
return nil, fmt.Errorf("failed to create BIN_FLAT index: %w", err)
}
return index, nil
case "IVF_FLAT":
// Default parameters
nlist := 128
if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
nlist = int(nlistVal)
}
index, err := entity.NewIndexIvfFlat(metricType, nlist)
if err != nil {
return nil, fmt.Errorf("failed to create IVF_FLAT index: %w", err)
}
return index, nil
case "BIN_IVF_FLAT":
// Default parameters
nlist := 128
if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
nlist = int(nlistVal)
}
index, err := entity.NewIndexBinIvfFlat(metricType, nlist)
if err != nil {
return nil, fmt.Errorf("failed to create BIN_IVF_FLAT index: %w", err)
}
return index, nil
case "IVF_SQ8":
// Default parameters
nlist := 128
if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
nlist = int(nlistVal)
}
index, err := entity.NewIndexIvfSQ8(metricType, nlist)
if err != nil {
return nil, fmt.Errorf("failed to create IVF_SQ8 index: %w", err)
}
return index, nil
case "IVF_PQ":
// Default parameters
nlist := 128
m := 4
nbits := 8
if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
nlist = int(nlistVal)
}
if mVal, err := indexConfig.ParamsFloat64("m"); err == nil {
m = int(mVal)
}
if nbitsVal, err := indexConfig.ParamsInt64("nbits"); err == nil {
nbits = int(nbitsVal)
}
index, err := entity.NewIndexIvfPQ(metricType, nlist, m, nbits)
if err != nil {
return nil, fmt.Errorf("failed to create IVF_PQ index: %w", err)
}
return index, nil
case "HNSW":
// Default parameters
m := 8
efConstruction := 64
if mVal, err := indexConfig.ParamsInt64("M"); err == nil {
m = int(mVal)
}
if efConstructionVal, err := indexConfig.ParamsInt64("efConstruction"); err == nil {
efConstruction = int(efConstructionVal)
}
index, err := entity.NewIndexHNSW(metricType, m, efConstruction)
if err != nil {
return nil, fmt.Errorf("failed to create HNSW index: %w", err)
}
return index, nil
case "IVF_HNSW":
// Default parameters
nlist := 128
m := 8
efConstruction := 64
if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
nlist = int(nlistVal)
}
if mVal, err := indexConfig.ParamsInt64("M"); err == nil {
m = int(mVal)
}
if efConstructionVal, err := indexConfig.ParamsInt64("efConstruction"); err == nil {
efConstruction = int(efConstructionVal)
}
index, err := entity.NewIndexIvfHNSW(metricType, nlist, m, efConstruction)
if err != nil {
return nil, fmt.Errorf("failed to create IVF_HNSW index: %w", err)
}
return index, nil
case "DISKANN":
// DISKANN index parameters
index, err := entity.NewIndexDISKANN(metricType)
if err != nil {
return nil, fmt.Errorf("failed to create DISKANN index: %w", err)
}
return index, nil
case "SCANN":
// SCANN index parameters
nlist := 128
with_raw_data := false
if nlistVal, err := indexConfig.ParamsInt64("nlist"); err == nil {
nlist = int(nlistVal)
}
if with_raw_dataVal, err := indexConfig.ParamsBool("with_raw_data"); err == nil {
with_raw_data = with_raw_dataVal
}
index, err := entity.NewIndexSCANN(metricType, nlist, with_raw_data)
if err != nil {
return nil, fmt.Errorf("failed to create SCANN index: %w", err)
}
return index, nil
case "AUTOINDEX":
// Auto index
index, err := entity.NewIndexAUTOINDEX(metricType)
if err != nil {
return nil, fmt.Errorf("failed to create AUTOINDEX index: %w", err)
}
return index, nil
default:
return nil, fmt.Errorf("unsupported index type: %s", milvusIndexType)
}
}
// CreateCollection creates a new collection with the specified dimension
func (m *MilvusProvider) CreateCollection(ctx context.Context, dim int) error {
// Check if collection exists
document_exists, err := m.client.HasCollection(ctx, m.Collection)
document_exists, err := m.client.HasCollection(ctx, m.collection)
if err != nil {
return fmt.Errorf("failed to check %s collection existence: %w", m.Collection, err)
return fmt.Errorf("failed to check %s collection existence: %w", m.collection, err)
}
if !document_exists {
fmt.Printf("create collection %s\n", m.Collection)
fmt.Printf("create collection %s\n", m.collection)
// Create schema
schema := entity.NewSchema().
WithName(m.Collection).
WithDescription("Knowledge document collection").
WithAutoID(false).
WithDynamicFieldEnabled(false)
// Add fields based on schema.Document structure
// Primary key field - ID
pkField := entity.NewField().
WithName("id").
WithDataType(entity.FieldTypeVarChar).
WithMaxLength(256).
WithIsPrimaryKey(true).
WithIsAutoID(false)
schema.WithField(pkField)
// Content field
contentField := entity.NewField().
WithName("content").
WithDataType(entity.FieldTypeVarChar).
WithMaxLength(8192)
schema.WithField(contentField)
// Vector field
vectorField := entity.NewField().
WithName("vector").
WithDataType(entity.FieldTypeFloatVector).
WithDim(int64(dim))
schema.WithField(vectorField)
// Metadata field
metadataField := entity.NewField().
WithName("metadata").
WithDataType(entity.FieldTypeJSON)
schema.WithField(metadataField)
// CreatedAt field (stored as Unix timestamp)
createdAtField := entity.NewField().
WithName("created_at").
WithDataType(entity.FieldTypeInt64)
schema.WithField(createdAtField)
schema, err := m.buildSchema()
if err != nil {
return fmt.Errorf("failed to build schema: %w", err)
}
// Create collection
err = m.client.CreateCollection(ctx, schema, entity.DefaultShardNumber)
if err != nil {
return fmt.Errorf("failed to create collection: %w", err)
}
// Create vector index
vectorIndex, err := entity.NewIndexHNSW(entity.IP, 8, 64)
vectorIndex, err := m.buildVectorIndex()
vectorField, _ := m.mapper.GetVectorField()
if err != nil {
return fmt.Errorf("failed to create vector index: %w", err)
}
err = m.client.CreateIndex(ctx, m.Collection, "vector", vectorIndex, false, client.WithIndexName("vector_index"))
err = m.client.CreateIndex(ctx, m.collection, vectorField.RawName, vectorIndex, false, client.WithIndexName("vector_index"))
if err != nil {
return fmt.Errorf("failed to create vector index: %w", err)
}
}
// Load collection
err = m.client.LoadCollection(ctx, m.Collection, false)
err = m.client.LoadCollection(ctx, m.collection, false)
if err != nil {
return fmt.Errorf("failed to load document collection: %w", err)
}
@@ -197,15 +407,15 @@ func (m *MilvusProvider) CreateCollection(ctx context.Context, dim int) error {
// DropCollection removes the collection from the database
func (m *MilvusProvider) DropCollection(ctx context.Context) error {
// Check if collection exists
exists, err := m.client.HasCollection(ctx, m.Collection)
exists, err := m.client.HasCollection(ctx, m.collection)
if err != nil {
return fmt.Errorf("failed to check %s collection existence: %w", m.Collection, err)
return fmt.Errorf("failed to check %s collection existence: %w", m.collection, err)
}
if !exists {
return fmt.Errorf("collection %s does not exist", m.Collection)
return fmt.Errorf("collection %s does not exist", m.collection)
}
// Drop collection
err = m.client.DropCollection(ctx, m.Collection)
err = m.client.DropCollection(ctx, m.collection)
if err != nil {
return fmt.Errorf("failed to drop collection: %w", err)
}
@@ -217,51 +427,71 @@ func (m *MilvusProvider) AddDoc(ctx context.Context, docs []schema.Document) err
if len(docs) == 0 {
return nil
}
// Prepare data
ids := make([]string, len(docs))
contents := make([]string, len(docs))
vectors := make([][]float32, len(docs))
metadatas := make([][]byte, len(docs))
createdAts := make([]int64, len(docs))
for i, doc := range docs {
ids[i] = doc.ID
contents[i] = doc.Content
// Convert vector type
vectorFloat32 := make([]float32, len(doc.Vector))
for j, v := range doc.Vector {
vectorFloat32[j] = float32(v)
}
vectors[i] = vectorFloat32
// Serialize metadata
metadataBytes, err := json.Marshal(doc.Metadata)
if err != nil {
return fmt.Errorf("failed to marshal metadata for doc %s: %w", doc.ID, err)
}
metadatas[i] = metadataBytes
createdAts[i] = doc.CreatedAt.UnixMilli()
// Get field mappings
fieldMappings, err := m.mapper.GetFieldMappings()
if err != nil {
return fmt.Errorf("failed to get field mappings: %w", err)
}
// Prepare data and columns
columns := make([]entity.Column, 0, len(fieldMappings))
// Create corresponding column data for each field
for _, field := range fieldMappings {
// Skip ID field if configured as auto ID
if field.IsPrimaryKey() && field.IsAutoID() {
continue
}
switch field.StandardName {
case "id":
// Handle string type fields
values := make([]string, len(docs))
for i, doc := range docs {
values[i] = doc.ID
}
columns = append(columns, entity.NewColumnVarChar(field.RawName, values))
case "content":
values := make([]string, len(docs))
for i, doc := range docs {
values[i] = doc.Content
}
columns = append(columns, entity.NewColumnVarChar(field.RawName, values))
// Build insert data
columns := []entity.Column{
entity.NewColumnVarChar("id", ids),
entity.NewColumnVarChar("content", contents),
entity.NewColumnFloatVector("vector", len(vectors[0]), vectors),
entity.NewColumnJSONBytes("metadata", metadatas),
entity.NewColumnInt64("created_at", createdAts),
case "vector":
// Handle vector fields
vectors := make([][]float32, len(docs))
for i, doc := range docs {
vectors[i] = doc.Vector
}
columns = append(columns, entity.NewColumnFloatVector(field.RawName, len(vectors[0]), vectors))
case "metadata":
// Handle JSON type fields (like metadata)
values := make([][]byte, len(docs))
for i, doc := range docs {
// Serialize metadata
metadataBytes, err := json.Marshal(doc.Metadata)
if err != nil {
return fmt.Errorf("failed to marshal metadata for doc %s: %w", doc.ID, err)
}
values[i] = metadataBytes
}
columns = append(columns, entity.NewColumnJSONBytes(field.RawName, values))
case "created_at":
// Handle integer type fields
values := make([]int64, len(docs))
for i, doc := range docs {
values[i] = doc.CreatedAt.UnixMilli()
}
columns = append(columns, entity.NewColumnInt64(field.RawName, values))
}
}
// Insert data
_, err := m.client.Insert(ctx, m.Collection, "", columns...)
_, err = m.client.Insert(ctx, m.collection, "", columns...)
if err != nil {
return fmt.Errorf("failed to insert documents: %w", err)
}
// Flush data
err = m.client.Flush(ctx, m.Collection, false)
err = m.client.Flush(ctx, m.collection, false)
if err != nil {
return fmt.Errorf("failed to flush collection: %w", err)
}
@@ -271,16 +501,19 @@ func (m *MilvusProvider) AddDoc(ctx context.Context, docs []schema.Document) err
// DeleteDoc deletes a document by its ID
func (m *MilvusProvider) DeleteDoc(ctx context.Context, id string) error {
// Build delete expression
expr := fmt.Sprintf(`id == "%s"`, id)
// Get ID field
idField, _ := m.mapper.GetIDField()
// Build delete expression using the RawName of ID field
expr := fmt.Sprintf(`%s == "%s"`, idField.RawName, id)
// Delete data
err := m.client.Delete(ctx, m.Collection, "", expr)
err := m.client.Delete(ctx, m.collection, "", expr)
if err != nil {
return fmt.Errorf("failed to delete documents for id %s: %w", id, err)
}
// Flush data
err = m.client.Flush(ctx, m.Collection, false)
err = m.client.Flush(ctx, m.collection, false)
if err != nil {
return fmt.Errorf("failed to flush collection after delete: %w", err)
}
@@ -306,24 +539,127 @@ func (m *MilvusProvider) UpdateDoc(ctx context.Context, docs []schema.Document)
return nil
}
func (m *MilvusProvider) buildSearchParam() (entity.SearchParam, error) {
// Get index configuration
indexConfig, err := m.mapper.GetIndexConfig()
if err != nil {
return nil, fmt.Errorf("failed to get index config: %w", err)
}
// Get search configuration
searchConfig, err := m.mapper.GetSearchConfig()
if err != nil {
return nil, fmt.Errorf("failed to get search config: %w", err)
}
// Choose appropriate search parameters based on index type
milvusIndexType := strings.ToUpper(indexConfig.IndexType)
if milvusIndexType == "" {
milvusIndexType = "HNSW" // Default to HNSW index
}
switch milvusIndexType {
case "FLAT":
// FLAT and BIN_FLAT indices don't need additional search parameters
return entity.NewIndexFlatSearchParam()
case "BIN_FLAT", "IVF_FLAT", "BIN_IVF_FLAT", "IVF_SQ8":
// Search parameters for IVF series indices
nprobe := 16 // Default value
if nprobeVal, err := searchConfig.ParamsFloat64("nprobe"); err == nil {
nprobe = int(nprobeVal)
}
return entity.NewIndexIvfFlatSearchParam(nprobe)
case "IVF_PQ":
// Search parameters for IVF_PQ index
nprobe := 16 // Default value
if nprobeVal, err := searchConfig.ParamsFloat64("nprobe"); err == nil {
nprobe = int(nprobeVal)
}
return entity.NewIndexIvfPQSearchParam(nprobe)
case "HNSW":
// Search parameters for HNSW index
efSearch := 16 // Default value
if efSearchVal, err := searchConfig.ParamsFloat64("ef"); err == nil {
efSearch = int(efSearchVal)
}
return entity.NewIndexHNSWSearchParam(efSearch)
case "IVF_HNSW":
// Search parameters for IVF_HNSW index
nprobe := 16 // Default value
efSearch := 64 // Default value
if nprobeVal, err := searchConfig.ParamsFloat64("nprobe"); err == nil {
nprobe = int(nprobeVal)
}
if efSearchVal, err := searchConfig.ParamsFloat64("ef"); err == nil {
efSearch = int(efSearchVal)
}
return entity.NewIndexIvfHNSWSearchParam(nprobe, efSearch)
case "SCANN":
// Search parameters for SCANN index
nprobe := 16 // Default value
reorder_k := 64
if nprobeVal, err := searchConfig.ParamsFloat64("nprobe"); err == nil {
nprobe = int(nprobeVal)
}
if reorderKVal, err := searchConfig.ParamsInt64("reorder_k"); err == nil {
reorder_k = int(reorderKVal)
}
return entity.NewIndexSCANNSearchParam(nprobe, reorder_k)
case "DISKANN":
// Search parameters for DISKANN index
search_list := 100 // Default value
if searchListVal, err := searchConfig.ParamsInt64("search_list"); err == nil {
search_list = int(searchListVal)
}
return entity.NewIndexDISKANNSearchParam(search_list)
case "AUTOINDEX":
level := 8
if levelVal, err := searchConfig.ParamsInt64("level"); err == nil {
level = int(levelVal)
}
// Search parameters for AUTOINDEX index
return entity.NewIndexAUTOINDEXSearchParam(level)
default:
// Default to using HNSW search parameters
return entity.NewIndexHNSWSearchParam(16)
}
}
// SearchDocs performs similarity search for documents
func (m *MilvusProvider) SearchDocs(ctx context.Context, vector []float32, options *schema.SearchOptions) ([]schema.SearchResult, error) {
if options == nil {
options = &schema.SearchOptions{TopK: 10}
}
// Build search parameters
sp, _ := entity.NewIndexHNSWSearchParam(16)
sp, err := m.buildSearchParam()
if err != nil {
return nil, fmt.Errorf("failed to build search param: %w", err)
}
outputFields, _ := m.mapper.GetRawAllFieldNames()
vectorField, _ := m.mapper.GetVectorField()
searchConfig, _ := m.mapper.GetSearchConfig()
metricType := m.GetMetricType(searchConfig.MetricType)
// Build filter expression
expr := ""
searchResults, err := m.client.Search(
ctx,
m.Collection,
[]string{}, // partition names
expr, // filter expression
[]string{"id", "content", "metadata", "created_at"}, // output fields
m.collection,
[]string{}, // partition names
expr, // filter expression
outputFields, // output fields
[]entity.Vector{entity.FloatVector(vector)},
"vector", // anns_field
entity.IP, // metric_type
vectorField.RawName, // anns_field
metricType, // metric_type
options.TopK,
sp,
)
@@ -341,9 +677,13 @@ func (m *MilvusProvider) SearchDocs(ctx context.Context, vector []float32, optio
// Get field data
var content string
var metadata map[string]interface{}
for _, field := range result.Fields {
switch field.Name() {
fieldMapping, err := m.mapper.GetField(field.Name())
if err != nil {
continue
}
fieldName := strings.ToLower(fieldMapping.StandardName)
switch fieldName {
case "content":
if contentCol, ok := field.(*entity.ColumnVarChar); ok {
if contentVal, err := contentCol.Get(i); err == nil {
@@ -364,7 +704,6 @@ func (m *MilvusProvider) SearchDocs(ctx context.Context, vector []float32, optio
}
}
}
searchResult := schema.SearchResult{
Document: schema.Document{
ID: fmt.Sprintf("%s", id),
@@ -392,15 +731,17 @@ func (m *MilvusProvider) DeleteDocs(ctx context.Context, ids []string) error {
for i, id := range ids {
quotedIDs[i] = fmt.Sprintf("\"%s\"", id)
}
expr := fmt.Sprintf("id in [%s]", strings.Join(quotedIDs, ","))
idField, _ := m.mapper.GetIDField()
expr := fmt.Sprintf("%s in [%s]", idField.RawName, strings.Join(quotedIDs, ","))
// Delete data
err := m.client.Delete(ctx, m.Collection, "", expr)
err := m.client.Delete(ctx, m.collection, "", expr)
if err != nil {
return fmt.Errorf("failed to delete documents: %w", err)
}
// Flush data
err = m.client.Flush(ctx, m.Collection, false)
err = m.client.Flush(ctx, m.collection, false)
if err != nil {
return fmt.Errorf("failed to flush collection after delete: %w", err)
}
@@ -413,12 +754,13 @@ func (m *MilvusProvider) ListDocs(ctx context.Context, limit int) ([]schema.Docu
// Build query expression
expr := ""
// Query all relevant documents
outputFields, _ := m.mapper.GetRawAllFieldNames()
queryResult, err := m.client.Query(
ctx,
m.Collection,
m.collection,
[]string{}, // partitions
expr, // filter condition
[]string{"id", "content", "metadata", "created_at"},
outputFields,
client.WithOffset(0), client.WithLimit(int64(limit)),
)
@@ -443,7 +785,12 @@ func (m *MilvusProvider) ListDocs(ctx context.Context, limit int) ([]schema.Docu
)
for _, col := range queryResult {
switch col.Name() {
fieldMapping, err := m.mapper.GetField(col.Name())
if err != nil {
continue
}
fieldName := strings.ToLower(fieldMapping.StandardName)
switch fieldName {
case "id":
if v, err := col.(*entity.ColumnVarChar).Get(i); err == nil {
id = v.(string)
@@ -488,8 +835,3 @@ func (m *MilvusProvider) Close() error {
}
return nil
}
// joinStrings joins a slice of strings with the given separator
func joinStrings(elems []string, sep string) string {
return strings.Join(elems, sep)
}

View File

@@ -28,31 +28,19 @@ local function randomBool()
return math.random() >= 0.5
end
local function is_healthy(addr)
for i = 4, #KEYS do
if addr == KEYS[i] then
return true
end
end
return false
end
if redis.call('HEXISTS', hset_key, current_target) == 1 then
current_count = redis.call('HGET', hset_key, current_target)
local hash = redis.call('HGETALL', hset_key)
for i = 1, #hash, 2 do
local addr = hash[i]
local count = hash[i+1]
if is_healthy(addr) then
if tonumber(count) < tonumber(current_count) then
current_target = addr
current_count = count
elseif count == current_count and randomBool() then
current_target = addr
current_count = count
end
end
end
for i = 4, #KEYS do
if redis.call('HEXISTS', hset_key, KEYS[i]) == 1 then
local count = redis.call('HGET', hset_key, KEYS[i])
if tonumber(count) < tonumber(current_count) then
current_target = KEYS[i]
current_count = count
elseif count == current_count and randomBool() then
current_target = KEYS[i]
end
end
end
end
redis.call("HINCRBY", hset_key, current_target, 1)

View File

@@ -121,17 +121,14 @@ if target == "" then
target = default_target
if redis.call('HEXISTS', hset_key, target) == 1 then
current_count = redis.call('HGET', hset_key, target)
local hash = redis.call('HGETALL', hset_key)
for i = 1, #hash, 2 do
local addr = hash[i]
local count = hash[i+1]
if is_healthy(addr) then
for i = 4, #KEYS do
if redis.call('HEXISTS', hset_key, KEYS[i]) == 1 then
local count = redis.call('HGET', hset_key, KEYS[i])
if tonumber(count) < tonumber(current_count) then
target = addr
target = KEYS[i]
current_count = count
elseif count == current_count and randomBool() then
target = addr
current_count = count
target = KEYS[i]
end
end
end

View File

@@ -129,7 +129,15 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下:
| ----------------- | -------- | -------- | ------ | -------------------------------------------------------- |
| `azureServiceUrl` | string | 必填 | - | Azure OpenAI 服务的 URL须包含 `api-version` 查询参数。 |
**注意:** Azure OpenAI 只支持配置一个 API Token。
**注意:**
1. Azure OpenAI 只支持配置一个 API Token。
2. `azureServiceUrl` 支持以下三种配置格式:
1. 完整路径格式,例如:`https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview`
- 插件会直接将请求转发至该 URL不会参考实际的请求路径。
2. 部署名称格式,例如:`https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME?api-version=2024-02-15-preview`
- 插件会根据实际的请求路径拼接后续路径。路径中的部署名称会保留不变,不会按照模型映射规则进行修改。同时支持 URL 中不包含部署名称的接口。
3. 资源名称格式,例如:`https://YOUR_RESOURCE_NAME.openai.azure.com?api-version=2024-02-15-preview`
- 插件会根据实际的请求路径拼接后续路径。路径中的部署名称会根据请求中的模型名称结合模型映射规则进行填入。同时支持 URL 中不包含部署名称的接口。
#### 月之暗面Moonshot

View File

@@ -100,7 +100,15 @@ For Azure OpenAI, the corresponding `type` is `azure`. Its unique configuration
|---------------------|-------------|----------------------|---------------|---------------------------------------------------------------------------------------------------------------|
| `azureServiceUrl` | string | Required | - | The URL of the Azure OpenAI service, must include the `api-version` query parameter. |
**Note:** Azure OpenAI only supports configuring one API Token.
**Note:**
1. Azure OpenAI only supports configuring one API Token.
2. `azureServiceUrl` accepts three formats
1. Full URL. e.g. `https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview`
- Request will be forwarded to the given URL, no matter what original path the request uses.
2. Resource name + deployment namee.g. `https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME?api-version=2024-02-15-preview`
- The path will be updated based on the actual request path, leaving the deployment name unchanged. APIs with no deployment name in the path are also support.
3. Resource name only.e.g.`https://YOUR_RESOURCE_NAME.openai.azure.com?api-version=2024-02-15-preview`
- The path will be updated based on the actual request path. The deployment name will be filled based on the model name in the request and the configured model mapping rule. APIs with no deployment name in the path are also support.
#### Moonshot

View File

@@ -97,7 +97,6 @@ func init() {
wrapper.ProcessResponseHeaders(onHttpResponseHeaders),
wrapper.ProcessStreamingResponseBody(onStreamingResponseBody),
wrapper.ProcessResponseBody(onHttpResponseBody),
wrapper.WithRebuildAfterRequests[config.PluginConfig](1000),
)
}
@@ -433,6 +432,10 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
return claudeChunk
}
if !needsClaudeResponseConversion(ctx) {
return chunk
}
// If provider doesn't implement any streaming handlers but we need Claude conversion
// First extract complete events from the chunk
events := provider.ExtractStreamingEvents(ctx, chunk)

View File

@@ -38,6 +38,7 @@ var (
ApiNameFiles: true,
ApiNameRetrieveFile: true,
ApiNameRetrieveFileContent: true,
ApiNameResponses: true,
}
regexAzureModelWithPath = regexp.MustCompile("/openai/deployments/(.+?)(?:/(.*)|$)")
)
@@ -100,8 +101,15 @@ func (m *azureProviderInitializer) CreateProvider(config ProviderConfig) (Provid
}
log.Debugf("azureProvider: found default model from serviceUrl: %s", defaultModel)
} else {
serviceUrlType = azureServiceUrlTypeDomainOnly
log.Debugf("azureProvider: no default model found in serviceUrl")
// If path doesn't match the /openai/deployments pattern,
// check if it's a custom full path or domain only
if serviceUrl.Path != "" && serviceUrl.Path != "/" {
serviceUrlType = azureServiceUrlTypeFull
log.Debugf("azureProvider: using custom full path: %s", serviceUrl.Path)
} else {
serviceUrlType = azureServiceUrlTypeDomainOnly
log.Debugf("azureProvider: no default model found in serviceUrl")
}
}
log.Debugf("azureProvider: serviceUrlType=%d", serviceUrlType)

View File

@@ -146,6 +146,20 @@ var azureInvalidConfigMissingToken = func() json.RawMessage {
return data
}()
// 测试配置Azure OpenAI Response API配置
var azureResponseAPIConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "azure",
"apiTokens": []string{
"sk-azure-multi",
},
"azureServiceUrl": "https://multi-resource.openai.azure.com/openai/responses?api-version=2025-04-01-preview",
},
})
return data
}()
func RunAzureParseConfigTests(t *testing.T) {
test.RunGoTest(t, func(t *testing.T) {
// 测试基本Azure OpenAI配置解析
@@ -203,6 +217,17 @@ func RunAzureParseConfigTests(t *testing.T) {
require.NotNil(t, config)
})
// 测试Azure Response API 配置解析
t.Run("azure response api config", func(t *testing.T) {
host, status := test.NewTestHost(azureResponseAPIConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
config, err := host.GetMatchConfig()
require.NoError(t, err)
require.NotNil(t, config)
})
// 测试Azure OpenAI无效配置缺少azureServiceUrl
t.Run("azure invalid config missing url", func(t *testing.T) {
host, status := test.NewTestHost(azureInvalidConfigMissingUrl)
@@ -411,6 +436,61 @@ func RunAzureOnHttpRequestBodyTests(t *testing.T) {
require.Equal(t, "gpt-4", model, "Model should be mapped correctly")
})
// 测试Azure OpenAI Response API 处理
t.Run("azure response api request body", func(t *testing.T) {
host, status := test.NewTestHost(azureResponseAPIConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// 设置请求头
action := host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/responses/v1/responses"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.HeaderStopIteration, action)
// 设置请求体
requestBody := `{
"input": [
{
"role": "user",
"content": [
{
"type": "input_text",
"text": "Explain quantum computing"
}
]
}
],
"model": "gpt-5",
"reasoning": {
"effort": "medium"
}
}`
action = host.CallOnHttpRequestBody([]byte(requestBody))
require.Equal(t, types.ActionContinue, action)
// 验证请求体是否被正确处理
transformedBody := host.GetRequestBody()
require.NotNil(t, transformedBody)
var bodyMap map[string]interface{}
err := json.Unmarshal(transformedBody, &bodyMap)
require.NoError(t, err)
model, exists := bodyMap["model"]
require.True(t, exists, "Model should exist in request body")
require.Equal(t, "gpt-5", model, "Model should be mapped correctly")
// 验证请求路径是否被正确转换
requestHeaders := host.GetRequestHeaders()
pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
require.True(t, hasPath, "Path header should exist")
require.Equal(t, pathValue, "/openai/responses?api-version=2025-04-01-preview", "Path should not equal Azure response api path")
})
// 测试Azure OpenAI请求体处理仅部署配置
t.Run("azure deployment only request body", func(t *testing.T) {
host, status := test.NewTestHost(azureDeploymentOnlyConfig)
@@ -566,6 +646,10 @@ func RunAzureOnHttpResponseBodyTests(t *testing.T) {
}
]
}`
action = host.CallOnHttpResponseHeaders([][2]string{
{"Content-Type", "application/json"},
})
require.Equal(t, types.ActionContinue, action)
action = host.CallOnHttpRequestBody([]byte(requestBody))
require.Equal(t, types.ActionContinue, action)

View File

@@ -81,11 +81,11 @@ type Response struct {
}
type Data struct {
RiskLevel string `json:"RiskLevel"`
AttackLevel string `json:"AttackLevel,omitempty"`
Result []Result `json:"Result,omitempty"`
Advice []Advice `json:"Advice,omitempty"`
Detail []Detail `json:"Detail,omitempty"`
RiskLevel string `json:"RiskLevel"`
AttackLevel string `json:"AttackLevel,omitempty"`
Result []Result `json:"Result,omitempty"`
Advice []Advice `json:"Advice,omitempty"`
Detail []Detail `json:"Detail,omitempty"`
}
type Result struct {
@@ -123,6 +123,7 @@ type AISecurityConfig struct {
denyCode int64
denyMessage string
protocolOriginal bool
riskLevelBar string
contentModerationLevelBar string
promptAttackLevelBar string
sensitiveDataLevelBar string
@@ -224,7 +225,7 @@ func isRiskLevelAcceptable(action string, data Data, config AISecurityConfig) bo
}
return true
} else {
return levelToInt(data.RiskLevel) < levelToInt(config.contentModerationLevelBar)
return levelToInt(data.RiskLevel) < levelToInt(config.riskLevelBar)
}
}
@@ -281,8 +282,17 @@ func parseConfig(json gjson.Result, config *AISecurityConfig) error {
if config.ak == "" || config.sk == "" {
return errors.New("invalid AK/SK config")
}
if obj := json.Get("riskLevelBar"); obj.Exists() {
config.riskLevelBar = obj.String()
} else {
config.riskLevelBar = HighRisk
}
config.token = json.Get("securityToken").String()
config.action = json.Get("action").String()
if obj := json.Get("action"); obj.Exists() {
config.action = json.Get("action").String()
} else {
config.action = "TextModerationPlus"
}
config.checkRequest = json.Get("checkRequest").Bool()
config.checkResponse = json.Get("checkResponse").Bool()
config.protocolOriginal = json.Get("protocol").String() == "original"

View File

@@ -28,7 +28,6 @@ func init() {
wrapper.ProcessResponseHeaders(onHttpResponseHeaders),
wrapper.ProcessStreamingResponseBody(onHttpStreamingBody),
wrapper.ProcessResponseBody(onHttpResponseBody),
wrapper.WithRebuildAfterRequests[AIStatisticsConfig](1000),
)
}

View File

@@ -291,6 +291,8 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config KeyAuthConfig, log log
return deniedUnauthorizedConsumer()
}
proxywasm.AddHttpRequestHeader("X-Mse-Consumer", name)
// 全局生效:
// - global_auth == true 且 当前 domain/route 未配置该插件
// - global_auth 未设置 且 没有任何一个 domain/route 配置该插件
@@ -343,7 +345,6 @@ func deniedUnauthorizedConsumer() types.Action {
}
func authenticated(name string) types.Action {
_ = proxywasm.AddHttpRequestHeader("X-Mse-Consumer", name)
return types.ActionContinue
}

View File

@@ -63,7 +63,7 @@ type McpTool struct {
}
type ToolSecurity struct {
Id string `json:"type,omitempty"`
Id string `json:"id,omitempty"`
PassThrough bool `json:"passthrough,omitempty"`
Credential string `json:"credential"`
}

View File

@@ -0,0 +1,367 @@
# Higress
## 📋 Overview of This Release
This release includes **30** updates, covering various aspects such as feature enhancements, bug fixes, performance optimizations, and more.
### Update Distribution
- **New Features**: 13
- **Bug Fixes**: 7
- **Refactoring and Optimization**: 5
- **Documentation Updates**: 4
- **Testing Improvements**: 1
### ⭐ Key Highlights
This release includes **2** major updates, which are highly recommended for your attention:
- **feat: add rag mcp server** ([#2930](https://github.com/alibaba/higress/pull/2930)): By introducing the RAG MCP server, this update provides a new way for users to manage and retrieve knowledge, enhancing the functionality and practicality of the system.
- **refactor(mcp): use ECDS for golang filter configuration to avoid connection drain** ([#2931](https://github.com/alibaba/higress/pull/2931)): Using ECDS for filter configuration avoids instability caused by directly embedding golang filter configurations, improving the system's stability and maintainability, and reducing unnecessary service interruptions for users.
For more details, please refer to the important features section below.
---
## 🌟 Detailed Description of Important Features
Below is a detailed description of the key features and improvements in this release:
### 1. feat: add rag mcp server
**Related PR**: [#2930](https://github.com/alibaba/higress/pull/2930) | **Contributor**: [@2456868764](https://github.com/2456868764)
**Use Case**
In modern applications, knowledge management and retrieval have become increasingly important. Many systems require fast and accurate extraction and retrieval of information from large volumes of text data. RAG (Retrieval-Augmented Generation) technology combines retrieval and generation models to effectively enhance the efficiency and accuracy of knowledge management. This PR introduces a Model Context Protocol (MCP) server specifically for knowledge management and retrieval, meeting the needs of users for efficient information processing. The target user group includes enterprises and developers who need to handle large amounts of text data, especially in the fields of natural language processing (NLP) and machine learning.
**Feature Details**
This PR implements the RAG MCP server, adding multiple functional modules, including knowledge management, block management, search, and chat functions. The core features include:
1. **Knowledge Management**: Supports creating knowledge blocks from text.
2. **Block Management**: Provides functionalities for listing and deleting knowledge blocks.
3. **Search**: Supports keyword-based search.
4. **Chat Function**: Allows users to send chat messages and receive responses.
Technically, the server uses several external libraries, such as `github.com/dlclark/regexp2`, `github.com/milvus-io/milvus-sdk-go/v2`, and `github.com/pkoukk/tiktoken-go`, which provide regular expression handling, vector database management, and text encoding functionalities. Key code changes include adding an HTTP client, configuration files, and multiple processing functions to ensure the flexibility and configurability of the system.
**Usage Instructions**
To enable and configure the RAG MCP server, follow these steps:
1. Enable the MCP server in the `higress-config` configuration file and set the corresponding path and configuration items.
2. Configure the basic parameters of the RAG system, such as splitter type, chunk size, and overlap.
3. Configure the LLM (Large Language Model) provider and its API key, model name, etc.
4. Configure the embedding model provider and its API key, model name, etc.
5. Configure the vector database provider and its connection information.
Example configuration:
```yaml
rag:
splitter:
type: "recursive"
chunk_size: 500
chunk_overlap: 50
top_k: 5
threshold: 0.5
llm:
provider: "openai"
api_key: "your-llm-api-key"
model: "gpt-3.5-turbo"
embedding:
provider: "openai"
api_key: "your-embedding-api-key"
model: "text-embedding-ada-002"
vectordb:
provider: "milvus"
host: "localhost"
port: 19530
collection: "test_collection"
```
Notes:
- Ensure all configuration items are correct, especially API keys and model names.
- In production environments, it is recommended to adjust parameters such as timeout appropriately to adapt to different network conditions.
**Feature Value**
The RAG MCP server provides a complete solution for knowledge management and retrieval, enhancing the intelligence and automation of the system. Specific benefits include:
1. **Improved Efficiency**: Through integrated knowledge management and retrieval functions, users can quickly process and retrieve large volumes of text data, saving time and resources.
2. **Enhanced Accuracy**: Combining RAG technology, the system can more accurately extract and retrieve information, reducing error rates.
3. **Flexible Configuration**: Provides rich configuration options, allowing users to flexibly adjust according to actual needs, meeting the requirements of different scenarios.
4. **High Scalability**: Supports multiple providers and models, making it easy for users to choose suitable components and technology stacks based on business needs.
5. **Stability Improvement**: Through detailed configuration validation and error handling mechanisms, the stability and robustness of the system are ensured.
---
### 2. refactor(mcp): use ECDS for golang filter configuration to avoid connection drain
**Related PR**: [#2931](https://github.com/alibaba/higress/pull/2931) | **Contributor**: [@johnlanni](https://github.com/johnlanni)
**Use Case**
In the current implementation, Golang filter configurations are directly embedded in the HTTP_FILTER patch, which can lead to connection drain when configurations change. The main reason is the inconsistent sorting of Go maps in the `map[string]any` field, and the listener configuration changes triggered by HTTP_FILTER updates. This issue affects the stability and user experience of the system. The target user group is developers and operations personnel using Higress for service mesh management.
**Feature Details**
This PR splits the configuration into two parts: HTTP_FILTER only contains filter references with `config_discovery`, while EXTENSION_CONFIG contains the actual Golang filter configuration. This way, configuration changes do not directly cause connection drain. The specific implementation includes updating the `constructMcpSessionStruct` and `constructMcpServerStruct` methods to return formats compatible with EXTENSION_CONFIG and updating unit tests to match the new configuration structure. The core innovation lies in using the ECDS mechanism to separate configurations, making configuration changes smoother.
**Usage Instructions**
Enabling and configuring this feature does not require any additional operations as it is automatically handled in the background. A typical use case is when configuring Golang filters in Higress; the system will automatically split them into HTTP_FILTER and EXTENSION_CONFIG. Users only need to configure Golang filters as usual. Note that when upgrading to the new version, ensure all related configuration files are updated and thoroughly tested in the production environment to ensure that configuration changes do not introduce other issues.
**Feature Value**
By separating configurations and using ECDS, this feature eliminates the connection drain problem during configuration changes, significantly improving the system's stability and user experience. Additionally, this design makes configurations easier to manage and maintain, reducing potential issues caused by configuration changes. For large-scale service mesh deployments, this improvement is particularly important as it reduces service interruptions caused by configuration changes, thereby enhancing the overall reliability and availability of the system.
---
## 📝 Full Changelog
### 🚀 New Features (Features)
- **Related PR**: [#2926](https://github.com/alibaba/higress/pull/2926) \
**Contributor**: @rinfx \
**Change Log**: This PR adds support for multimodal, function calls, and thinking in vertex-ai, involving the introduction of a regular expression library and improvements to the processing logic. \
**Feature Value**: By adding new features, vertex-ai can better support application needs in complex scenarios, such as multimodal data processing and more flexible function call methods, enhancing the system's flexibility and practicality.
- **Related PR**: [#2917](https://github.com/alibaba/higress/pull/2917) \
**Contributor**: @Aias00 \
**Change Log**: This PR adds support for Fireworks AI, expanding the functionality of the AI agent plugin, including the addition of necessary configuration files and test code. \
**Feature Value**: Adding support for Fireworks AI allows users to leverage the AI features provided by the platform, broadening the range of AI services that applications can integrate with, and enhancing the user experience.
- **Related PR**: [#2907](https://github.com/alibaba/higress/pull/2907) \
**Contributor**: @Aias00 \
**Change Log**: This PR upgrades wasm-go to support outputSchema, involving dependency updates for jsonrpc-converter and oidc plugins. \
**Feature Value**: By supporting outputSchema, the functionality and flexibility of the wasm-go plugin are enhanced, making it easier for users to handle and define output data structures.
- **Related PR**: [#2897](https://github.com/alibaba/higress/pull/2897) \
**Contributor**: @rinfx \
**Change Log**: This PR adds multimodal support and thinking functionality to the ai-proxy bedrock, achieved by extending the relevant code in bedrock.go. \
**Feature Value**: The added multimodal and thinking support enriches the ai-proxy's feature set, enabling users to utilize more advanced AI technologies for complex scenarios, enhancing the system's flexibility and practicality.
- **Related PR**: [#2891](https://github.com/alibaba/higress/pull/2891) \
**Contributor**: @rinfx \
**Change Log**: This PR adds the ability to configure specific detection services for different consumers in the AI content security plugin, allowing users to customize request and response check rules according to their needs. \
**Feature Value**: By supporting independent detection services for different consumers, this feature enhances the system's flexibility and security, enabling users to control the content review process more precisely, thus meeting diverse security policy requirements.
- **Related PR**: [#2883](https://github.com/alibaba/higress/pull/2883) \
**Contributor**: @Aias00 \
**Change Log**: This PR adds support for Meituan Longcat, including integration with the Longcat platform and related unit tests. \
**Feature Value**: Adding support for Meituan Longcat expands the plugin's functionality, allowing users to leverage more AI service providers' technologies, enhancing the flexibility and diversity of the application.
- **Related PR**: [#2867](https://github.com/alibaba/higress/pull/2867) \
**Contributor**: @Aias00 \
**Change Log**: This PR adds support for Gzip configuration and updates the default settings. By adding gzip options in the Helm configuration file, users can customize compression parameters to optimize response performance. \
**Feature Value**: Adding support for Gzip configuration allows users to adjust the compression level of HTTP responses according to their needs, helping to reduce the amount of transmitted data, speed up page loading, and improve the user experience.
- **Related PR**: [#2844](https://github.com/alibaba/higress/pull/2844) \
**Contributor**: @Aias00 \
**Change Log**: This PR enhances the consistent hashing algorithm for load balancing by supporting useSourceIp, modifying the relevant Go code files, and adding an example configuration file. \
**Feature Value**: The newly added useSourceIp option allows users to perform consistent hash load balancing based on source IP addresses, which helps to improve the stability and reliability of services under specific network conditions.
- **Related PR**: [#2843](https://github.com/alibaba/higress/pull/2843) \
**Contributor**: @erasernoob \
**Change Log**: This PR adds NVIDIA Triton server support to the AI agent plugin, including related configuration instructions and code implementation. \
**Feature Value**: Adding support for the Triton server expands the AI agent plugin's feature set, allowing users to leverage high-performance machine learning inference services.
- **Related PR**: [#2806](https://github.com/alibaba/higress/pull/2806) \
**Contributor**: @C-zhaozhou \
**Change Log**: This PR makes ai-security-guard compatible with the MultiModalGuard interface, adding support for multimodal APIs and updating the relevant documentation. \
**Feature Value**: By supporting multimodal APIs, the functionality of ai-security-guard is enhanced, enabling it to handle more complex content security scenarios, improving the user experience and security.
- **Related PR**: [#2727](https://github.com/alibaba/higress/pull/2727) \
**Contributor**: @Aias00 \
**Change Log**: This PR adds end-to-end testing support for OpenAI, including test cases for non-streaming and streaming requests. \
**Feature Value**: The added end-to-end testing for OpenAI ensures the system remains stable and accurate when handling different types of requests, improving the user experience.
- **Related PR**: [#2593](https://github.com/alibaba/higress/pull/2593) \
**Contributor**: @Xscaperrr \
**Change Log**: Adds the WorkloadSelector field to limit the scope of EnvoyFilter, ensuring that it does not affect other components in the same namespace in an open-source istio environment. \
**Feature Value**: By limiting EnvoyFilter to only apply to the Higress Gateway, this feature prevents interference with other istio gateways/sidecars in the environment, enhancing the security and isolation of the configuration.
### 🐛 Bug Fixes (Bug Fixes)
- **Related PR**: [#2938](https://github.com/alibaba/higress/pull/2938) \
**Contributor**: @wydream \
**Change Log**: This PR fixes the issue where prompt attack detection fails due to the lack of AttackLevel field support in MultiModalGuard mode, ensuring that all levels of attacks are correctly identified. \
**Feature Value**: By adding support for the AttackLevel field, the system's security is improved, preventing high-risk-level prompt attacks from going undetected, ensuring user experience and security.
- **Related PR**: [#2904](https://github.com/alibaba/higress/pull/2904) \
**Contributor**: @johnlanni \
**Change Log**: This PR fixes the issue where the original Authorization header might be overwritten when processing HTTP requests. By unconditionally saving and checking for non-empty before writing to the context, it ensures the accuracy and security of authentication information. \
**Feature Value**: This fix improves the system's security and stability, preventing potential authentication failures or security vulnerabilities due to lost authentication information, enhancing user experience and trust.
- **Related PR**: [#2899](https://github.com/alibaba/higress/pull/2899) \
**Contributor**: @Jing-ze \
**Change Log**: This PR optimizes the MCP server, including pre-parsing the host pattern to reduce runtime overhead and removing the unused DomainList field. It also fixes the SSE message format issue, particularly the handling of extra newline characters. \
**Feature Value**: By improving pattern matching efficiency and memory usage, as well as correcting errors in SSE messages, the user experience and service stability are enhanced, ensuring the correctness and integrity of data transmission.
- **Related PR**: [#2892](https://github.com/alibaba/higress/pull/2892) \
**Contributor**: @johnlanni \
**Change Log**: This PR corrects the JSON unmarshalling error when Claude API returns content in array format and removes redundant code structures, improving code quality and maintainability. \
**Feature Value**: This resolves the message parsing failure due to incorrect data types, enhancing the system's stability and user experience. For users using array as the content format, this fix ensures a smooth message processing flow.
- **Related PR**: [#2882](https://github.com/alibaba/higress/pull/2882) \
**Contributor**: @johnlanni \
**Change Log**: This PR addresses the SSE event chunking issue in Claude's streaming response conversion logic, improving protocol auto-conversion and tool invocation state tracking. \
**Feature Value**: It enhances the bidirectional conversion reliability between Claude and OpenAI-compatible providers, avoiding connection blocking, and enhancing the user experience.
- **Related PR**: [#2865](https://github.com/alibaba/higress/pull/2865) \
**Contributor**: @Thomas-Eliot \
**Change Log**: This PR solves the issue where SSE connections would be blocked when SSE events were split into multiple chunks. By adding a caching mechanism in the proxy mcp server scenario, it ensures the continuity of data stream processing. \
**Feature Value**: This fix resolves the potential issue of SSE connection interruption, enhancing the system's stability and user experience. Users will no longer encounter incomplete data reception due to network conditions or server response methods.
- **Related PR**: [#2859](https://github.com/alibaba/higress/pull/2859) \
**Contributor**: @lcfang \
**Change Log**: This PR solves the issue of route configuration failure when the registered service instance ports are inconsistent by adding a vport element in the mcpbridge. The main changes include updating the CRD definition, protobuf files, and related generated code. \
**Feature Value**: This feature ensures that even if the backend instance ports change, the service route configuration remains valid, thereby improving the system's stability and compatibility, providing a more reliable service experience for users.
### ♻️ Refactoring and Optimization (Refactoring)
- **Related PR**: [#2933](https://github.com/alibaba/higress/pull/2933) \
**Contributor**: @rinfx \
**Change Log**: This PR removes duplicate think tags in bedrock and vertex, reducing redundant code and improving code readability and maintainability. \
**Feature Value**: By removing unnecessary duplicate code, the overall quality and development efficiency of the project are improved, making the code structure clearer and easier to maintain and extend.
- **Related PR**: [#2927](https://github.com/alibaba/higress/pull/2927) \
**Contributor**: @rinfx \
**Change Log**: This PR modifies the API name extraction logic in the ai-statistics plugin, adjusting the check condition from a fixed length of 5 to at least 3 parts to enhance flexibility and compatibility. \
**Feature Value**: By relaxing the restriction on API string splitting, the system's support for different format API strings is enhanced, improving the system's adaptability and stability.
- **Related PR**: [#2922](https://github.com/alibaba/higress/pull/2922) \
**Contributor**: @daixijun \
**Change Log**: This PR upgrades the Higress SDK package reference in the project from `github.com/alibaba/higress` to `github.com/alibaba/higress/v2` to be compatible with the latest version. \
**Feature Value**: By updating the package name, the project can introduce and use the latest features and improvements of Higress, enhancing development efficiency and code quality.
- **Related PR**: [#2890](https://github.com/alibaba/higress/pull/2890) \
**Contributor**: @johnlanni \
**Change Log**: This PR refactors the `matchDomain` function, introduces the HostMatcher struct and matching types, replaces regular expressions with simple string operations to improve performance, and implements port stripping logic. \
**Feature Value**: By optimizing the host matching logic, the system performance and code maintainability are improved, making the handling of host headers with port numbers more accurate and efficient, enhancing the user experience.
### 📚 Documentation Updates (Documentation)
- **Related PR**: [#2915](https://github.com/alibaba/higress/pull/2915) \
**Contributor**: @a6d9a6m \
**Change Log**: This PR fixes a broken link in README_JP.md and adds missing parts in README.md, making the multilingual documentation more consistent. \
**Feature Value**: This improves the accuracy and consistency of the documentation, helping users find relevant information more easily, enhancing the user experience.
- **Related PR**: [#2912](https://github.com/alibaba/higress/pull/2912) \
**Contributor**: @hanxiantao \
**Change Log**: This PR optimizes the English and Chinese documentation for the hmac-auth-apisix plugin, adding more detailed configuration explanations, and improving the clarity of the documentation. \
**Feature Value**: By providing more detailed documentation, it helps developers better understand and use the hmac-auth-apisix plugin, improving the user experience.
- **Related PR**: [#2880](https://github.com/alibaba/higress/pull/2880) \
**Contributor**: @a6d9a6m \
**Change Log**: This PR fixes grammatical errors in README.md, README_JP.md, and README_ZH.md files, ensuring the correctness and consistency of the documentation. \
**Feature Value**: By correcting language errors in the documentation, the quality and readability of the documentation are improved, helping users better understand project information.
- **Related PR**: [#2873](https://github.com/alibaba/higress/pull/2873) \
**Contributor**: @CH3CHO \
**Change Log**: This PR adds methods to obtain Higress runtime logs and configurations in the non-crash-safe vulnerability issue template, helping to better investigate problems. \
**Feature Value**: By providing more detailed log and configuration information, users can more easily diagnose and resolve issues, improving the efficiency and accuracy of problem handling.
### 🧪 Testing Improvements (Testing)
- **Related PR**: [#2928](https://github.com/alibaba/higress/pull/2928) \
**Contributor**: @rinfx \
**Change Log**: This PR updates the test code for the ai-security-guard component, adding new test cases and adjusting some existing test logic. \
**Feature Value**: By improving the test coverage and accuracy of ai-security-guard, the stability and reliability of the entire project are enhanced, helping developers better understand and maintain related features.
---
## 📊 Release Statistics
- 🚀 New Features: 13
- 🐛 Bug Fixes: 7
- ♻️ Refactoring and Optimization: 5
- 📚 Documentation Updates: 4
- 🧪 Testing Improvements: 1
**Total**: 30 changes (including 2 major updates)
Thank you to all contributors for your hard work! 🎉
# Higress Console
## 📋 Overview of This Release
This release includes **4** updates, covering aspects such as feature enhancements, bug fixes, and performance optimizations.
### Update Content Distribution
- **New Features**: 1 item
- **Bug Fixes**: 2 items
- **Documentation Updates**: 1 item
### ⭐ Key Focus
This release contains **1** significant update, which is recommended for special attention:
- **feat: Support using a known service in OpenAI LLM provider** ([#589](https://github.com/higress-group/higress-console/pull/589)): This feature allows users to utilize existing service resources within the OpenAI LLM provider, thereby enhancing the flexibility and usability of the system, offering more options to users.
For more details, please refer to the "Important Features in Detail" section below.
---
## 🌟 Important Features in Detail
Here are detailed explanations of the important features and improvements in this release:
### 1. feat: Support using a known service in OpenAI LLM provider
**Related PR**: [#589](https://github.com/higress-group/higress-console/pull/589) | **Contributor**: [@CH3CHO](https://github.com/CH3CHO)
**Usage Background**
In many application scenarios, developers may wish to use their own custom OpenAI service instance instead of the default one. This could be due to specific security requirements, performance optimizations, or infrastructure constraints. This PR meets these needs by introducing support for known services. The target user group includes enterprise-level users and technical experts who require highly customized configurations. This feature addresses the issue of users not being able to flexibly choose and configure OpenAI services, improving the adaptability and user experience of the system.
**Feature Details**
This PR mainly implements the following:
1. Allows users to specify a custom service when configuring the OpenAI LLM provider.
2. Modifies the `OpenaiLlmProviderHandler` class, adding the `buildServiceSource` and `buildUpstreamService` methods to handle the logic for custom services.
3. Adds a delete method with an `internal` parameter to the `WasmPluginInstanceService` interface, supporting finer-grained control.
4. Updates the frontend internationalization resource files, adding prompts related to custom services. The key technical point lies in extending the existing architecture so that the system can recognize and use user-provided custom services while maintaining backward compatibility.
**Usage Instructions**
Enabling and configuring this feature is straightforward. First, when creating or updating an LLM provider, select the "Custom OpenAI Service" option and enter the corresponding service host and service path. Then, the system will automatically use these custom configurations to connect to the OpenAI service. Typical use cases include internally deployed OpenAI service instances within enterprises or environments requiring specific security policies. It's important to ensure that the entered URL is valid and that the service host and service path are correct. Best practice involves thorough testing to ensure that the custom configuration works as expected.
**Feature Value**
This new feature significantly enhances the flexibility and configurability of the system, allowing users to choose the most suitable OpenAI service based on their needs. For enterprise-level users who require high levels of customization, this flexibility is particularly crucial. Additionally, by supporting custom services, the system can better integrate into existing infrastructures, improving overall stability and performance. This is of great significance for maintaining and scaling large application systems. Overall, this feature not only enhances the user experience but also brings higher scalability and reliability to the system.
---
## 📝 Full Changelog
### 🐛 Bug Fixes
- **Related PR**: [#591](https://github.com/higress-group/higress-console/pull/591) \
**Contributor**: @CH3CHO \
**Change Log**: This PR fixes the issue where mandatory fields were not properly validated when enabling route rewrite configuration, ensuring that both `host` and `newPath.path` must provide valid values to avoid configuration errors. \
**Feature Value**: By correcting the validation logic for route rewrites, it prevents potential errors caused by incomplete configurations, enhancing the system's stability and user experience.
- **Related PR**: [#590](https://github.com/higress-group/higress-console/pull/590) \
**Contributor**: @CH3CHO \
**Change Log**: Fixed an error in the Route.customLabels handling logic, ensuring that built-in labels are correctly excluded during updates. \
**Feature Value**: Resolved the conflict between custom labels and built-in labels, ensuring flexibility and accuracy for users when updating route settings.
### 📚 Documentation
- **Related PR**: [#595](https://github.com/higress-group/higress-console/pull/595) \
**Contributor**: @CH3CHO \
**Change Log**: Removed irrelevant descriptions from README.md and added a code formatting guide, making the documentation more focused on the project itself. \
**Feature Value**: By updating README.md, users can more clearly understand the project structure and code formatting requirements, helping new contributors get up to speed quickly.
---
## 📊 Release Statistics
- 🚀 New Features: 1 item
- 🐛 Bug Fixes: 2 items
- 📚 Documentation Updates: 1 item
**Total**: 4 changes (including 1 significant update)
Thank you to all contributors for their hard work! 🎉

View File

@@ -0,0 +1,365 @@
# Higress
## 📋 本次发布概览
本次发布包含 **30** 项更新涵盖了功能增强、Bug修复、性能优化等多个方面。
### 更新内容分布
- **新功能**: 13项
- **Bug修复**: 7项
- **重构优化**: 5项
- **文档更新**: 4项
- **测试改进**: 1项
### ⭐ 重点关注
本次发布包含 **2** 项重要更新,建议重点关注:
- **feat: add rag mcp server** ([#2930](https://github.com/alibaba/higress/pull/2930)): 通过引入RAG MCP服务器为用户提供了一种新的方式来管理与检索知识增强了系统的功能性和实用性。
- **refactor(mcp): use ECDS for golang filter configuration to avoid connection drain** ([#2931](https://github.com/alibaba/higress/pull/2931)): 采用ECDS进行过滤器配置避免了直接嵌入golang过滤器配置带来的不稳定因素提高了系统的稳定性和可维护性对用户而言减少了不必要的服务中断。
详细信息请查看下方重要功能详述部分。
---
## 🌟 重要功能详述
以下是本次发布中的重要功能和改进的详细说明:
### 1. feat: add rag mcp server
**相关PR**: [#2930](https://github.com/alibaba/higress/pull/2930) | **贡献者**: [@2456868764](https://github.com/2456868764)
**使用背景**
在现代应用中知识管理和检索变得越来越重要。许多系统需要快速、准确地从大量文本数据中提取和检索信息。RAG (Retrieval-Augmented Generation) 技术结合了检索和生成模型能够有效提升知识管理的效率和准确性。本PR引入了一个Model Context Protocol (MCP) 服务器专门用于知识管理和检索满足了用户对高效信息处理的需求。目标用户群体包括需要处理大量文本数据的企业和开发者尤其是在自然语言处理NLP和机器学习领域。
**功能详述**
该PR实现了RAG MCP服务器新增了多个功能模块包括知识管理、块管理、搜索和聊天功能。核心功能包括
1. **知识管理**:支持从文本创建知识块。
2. **块管理**:提供列表显示和删除知识块的功能。
3. **搜索**:支持基于关键词的搜索功能。
4. **聊天功能**:允许用户发送聊天消息并获取响应。
技术实现上,该服务器使用了多种外部库,如`github.com/dlclark/regexp2``github.com/milvus-io/milvus-sdk-go/v2``github.com/pkoukk/tiktoken-go`这些库提供了正则表达式处理、向量数据库管理和文本编码等功能。关键代码变更包括新增HTTP客户端、配置文件和多个处理函数确保了系统的灵活性和可配置性。
**使用方式**
启用和配置RAG MCP服务器的步骤如下
1.`higress-config`配置文件中启用MCP服务器并设置相应的路径和配置项。
2. 配置RAG系统的基础参数如分块器类型、块大小和重叠等。
3. 配置LLM大语言模型提供商及其API密钥、模型名称等。
4. 配置嵌入模型提供商及其API密钥、模型名称等。
5. 配置向量数据库提供商及其连接信息。
示例配置如下:
```yaml
rag:
splitter:
type: "recursive"
chunk_size: 500
chunk_overlap: 50
top_k: 5
threshold: 0.5
llm:
provider: "openai"
api_key: "your-llm-api-key"
model: "gpt-3.5-turbo"
embedding:
provider: "openai"
api_key: "your-embedding-api-key"
model: "text-embedding-ada-002"
vectordb:
provider: "milvus"
host: "localhost"
port: 19530
collection: "test_collection"
```
注意事项:
- 确保所有配置项正确无误特别是API密钥和模型名称。
- 在生产环境中,建议对超时时间等参数进行适当调整以适应不同网络环境。
**功能价值**
RAG MCP服务器为用户提供了一套完整的知识管理和检索解决方案提升了系统的智能化和自动化水平。具体好处包括
1. **提高效率**:通过集成的知识管理和检索功能,用户可以快速处理和检索大量文本数据,节省时间和资源。
2. **增强准确性**结合RAG技术系统能够更准确地提取和检索信息减少错误率。
3. **灵活配置**:提供了丰富的配置选项,用户可以根据实际需求进行灵活调整,满足不同场景下的需求。
4. **扩展性强**:支持多种提供商和模型,方便用户根据业务需求选择合适的组件和技术栈。
5. **稳定性提升**:通过详细的配置验证和错误处理机制,确保系统的稳定性和健壮性。
---
### 2. refactor(mcp): use ECDS for golang filter configuration to avoid connection drain
**相关PR**: [#2931](https://github.com/alibaba/higress/pull/2931) | **贡献者**: [@johnlanni](https://github.com/johnlanni)
**使用背景**
当前实现中Golang过滤器配置直接嵌入在HTTP_FILTER补丁中这会导致配置更改时出现连接耗尽的问题。主要原因是Go map在`map[string]any`字段中的排序不一致以及HTTP_FILTER更新触发的监听器配置更改。这个问题影响了系统的稳定性和用户体验。目标用户群体是使用Higress进行服务网格管理的开发者和运维人员。
**功能详述**
此PR将配置分为两部分HTTP_FILTER仅包含带有`config_discovery`的过滤器引用而EXTENSION_CONFIG则包含实际的Golang过滤器配置。通过这种方式配置更改不会直接导致连接耗尽。具体实现包括更新`constructMcpSessionStruct``constructMcpServerStruct`方法以返回与EXTENSION_CONFIG兼容的格式并更新单元测试以匹配新的配置结构。核心技术创新在于利用ECDS机制分离配置使配置更改更加平滑。
**使用方式**
启用和配置这个功能不需要额外的操作因为它是在后台自动处理的。典型的使用场景是在Higress中配置Golang过滤器时系统会自动将其分为HTTP_FILTER和EXTENSION_CONFIG两部分。用户只需按照常规方式配置Golang过滤器即可。需要注意的是在升级到新版本时确保所有相关的配置文件都已更新并且在生产环境中进行充分的测试以确保配置更改不会引入其他问题。
**功能价值**
通过分离配置并使用ECDS此功能消除了配置更改时的连接耗尽问题显著提高了系统的稳定性和用户体验。此外这种设计使得配置更易于管理和维护减少了因配置更改引起的潜在问题。对于大规模的服务网格部署这一改进尤为重要因为它可以减少因配置更改导致的服务中断从而提高整体系统的可靠性和可用性。
---
## 📝 完整变更日志
### 🚀 新功能 (Features)
- **Related PR**: [#2926](https://github.com/alibaba/higress/pull/2926) \
**Contributor**: @rinfx \
**Change Log**: 此PR在vertex-ai中添加了对多模态、函数调用和思考的支持涉及引入正则表达式库及处理逻辑的改进。 \
**Feature Value**: 通过增加新功能使得vertex-ai能够更好地支持复杂场景下的应用需求如多模态数据处理和更灵活的功能调用方式提升了系统的灵活性与实用性。
- **Related PR**: [#2917](https://github.com/alibaba/higress/pull/2917) \
**Contributor**: @Aias00 \
**Change Log**: 此次PR新增了对Fireworks AI的支持扩展了AI代理插件的功能包括必要的配置文件和测试代码的添加。 \
**Feature Value**: 增加对Fireworks AI的支持使用户能够利用该平台提供的AI功能拓宽了应用程序可以集成的AI服务范围增强了用户体验。
- **Related PR**: [#2907](https://github.com/alibaba/higress/pull/2907) \
**Contributor**: @Aias00 \
**Change Log**: 此PR升级了wasm-go以支持outputSchema功能涉及jsonrpc-converter和oidc插件的依赖更新。 \
**Feature Value**: 通过支持outputSchema增强了wasm-go插件的功能性和灵活性使用户能够更方便地处理和定义输出数据结构。
- **Related PR**: [#2897](https://github.com/alibaba/higress/pull/2897) \
**Contributor**: @rinfx \
**Change Log**: 此次PR为ai-proxy bedrock添加了多模态支持及thinking功能通过扩展bedrock.go中的相关代码来实现。 \
**Feature Value**: 新增的多模态和thinking支持丰富了ai-proxy的功能集使得用户能够利用更先进的AI技术处理复杂场景提升了系统的灵活性与实用性。
- **Related PR**: [#2891](https://github.com/alibaba/higress/pull/2891) \
**Contributor**: @rinfx \
**Change Log**: 此PR在AI内容安全插件中添加了针对不同消费者配置特定检测服务的功能允许用户根据需求自定义请求和响应的检查规则。 \
**Feature Value**: 通过支持为不同消费者设置独立的检测服务,该功能增强了系统的灵活性与安全性,使用户能够更精确地控制内容审查过程,从而满足多样化的安全策略需求。
- **Related PR**: [#2883](https://github.com/alibaba/higress/pull/2883) \
**Contributor**: @Aias00 \
**Change Log**: 此PR为美团Longcat增加了支持包括实现与Longcat平台的集成和相关的单元测试。 \
**Feature Value**: 新增对美团Longcat的支持扩展了插件的功能范围使得用户能够利用更多AI服务提供商的技术增强了应用的灵活性和多样性。
- **Related PR**: [#2867](https://github.com/alibaba/higress/pull/2867) \
**Contributor**: @Aias00 \
**Change Log**: 此PR新增了Gzip配置支持并更新了默认设置。通过在Helm配置文件中添加gzip选项用户可以自定义压缩参数以优化响应性能。 \
**Feature Value**: 增加了对Gzip配置的支持使得用户可以根据需求调整HTTP响应的压缩级别有助于减少传输的数据量加快页面加载速度提升用户体验。
- **Related PR**: [#2844](https://github.com/alibaba/higress/pull/2844) \
**Contributor**: @Aias00 \
**Change Log**: 此PR通过支持useSourceIp增强了负载均衡的一致性哈希算法修改了相关的Go代码文件以及添加了一个示例配置文件。 \
**Feature Value**: 新增的useSourceIp选项允许用户基于源IP地址进行一致性哈希负载均衡这有助于提高服务在特定网络条件下的稳定性和可靠性。
- **Related PR**: [#2843](https://github.com/alibaba/higress/pull/2843) \
**Contributor**: @erasernoob \
**Change Log**: 此PR为AI代理插件添加了NVIDIA Triton服务器支持包括相关配置说明和代码实现。 \
**Feature Value**: 新增对Triton服务器的支持扩展了AI代理插件的功能集使用户能够利用高性能的机器学习推理服务。
- **Related PR**: [#2806](https://github.com/alibaba/higress/pull/2806) \
**Contributor**: @C-zhaozhou \
**Change Log**: 此PR使ai-security-guard兼容MultiModalGuard接口增加了多模态API的支持并更新了相关文档。 \
**Feature Value**: 通过支持多模态API增强了ai-security-guard的功能使其能够处理更复杂的内容安全场景提升了用户体验和安全性。
- **Related PR**: [#2727](https://github.com/alibaba/higress/pull/2727) \
**Contributor**: @Aias00 \
**Change Log**: 本PR为OpenAI添加了端到端测试支持包括非流式和流式请求的测试用例。 \
**Feature Value**: 新增的OpenAI端到端测试有助于确保系统在处理不同类型的请求时保持稳定性和准确性提升了用户体验。
- **Related PR**: [#2593](https://github.com/alibaba/higress/pull/2593) \
**Contributor**: @Xscaperrr \
**Change Log**: 增加了WorkloadSelector字段以限制EnvoyFilter的作用范围确保在存在开源istio环境下不影响同命名空间的其他组件。 \
**Feature Value**: 通过限定EnvoyFilter仅作用于Higress Gateway避免了对环境内其他istio gateway/sidecar造成干扰提升了配置的安全性和隔离性。
### 🐛 Bug修复 (Bug Fixes)
- **Related PR**: [#2938](https://github.com/alibaba/higress/pull/2938) \
**Contributor**: @wydream \
**Change Log**: 此PR修复了MultiModalGuard模式下因缺少AttackLevel字段支持而导致的提示攻击检测失效问题确保所有级别的攻击都能被正确识别。 \
**Feature Value**: 通过增加对AttackLevel字段的支持提高了系统安全性防止高风险级别的提示攻击未被拦截的情况发生保障了用户体验和安全。
- **Related PR**: [#2904](https://github.com/alibaba/higress/pull/2904) \
**Contributor**: @johnlanni \
**Change Log**: 修复了在处理HTTP请求时原始Authorization头可能被覆盖的问题。通过无条件保存并检查非空后再写入上下文确保认证信息的准确性和安全性。 \
**Feature Value**: 该修复提升了系统的安全性和稳定性,避免了因认证信息丢失而导致的潜在认证失败或安全漏洞问题,增强了用户体验和信任度。
- **Related PR**: [#2899](https://github.com/alibaba/higress/pull/2899) \
**Contributor**: @Jing-ze \
**Change Log**: 此PR对MCP服务器进行了优化包括提前解析主机模式以减少运行时开销和移除未使用的DomainList字段。同时修复了SSE消息格式问题特别是处理多余换行符的问题。 \
**Feature Value**: 通过提高模式匹配效率和内存使用率以及修正SSE消息中的错误提升了用户体验和服务稳定性确保了数据传输的正确性和完整性。
- **Related PR**: [#2892](https://github.com/alibaba/higress/pull/2892) \
**Contributor**: @johnlanni \
**Change Log**: 修正了Claude API返回数组格式content时的JSON解组错误并移除了重复的代码结构提升了代码质量和维护性。 \
**Feature Value**: 解决了由于不正确的数据类型而导致的消息解析失败问题增强了系统的稳定性和用户体验对于使用数组作为content格式的用户来说这修复确保了消息处理流程的顺畅。
- **Related PR**: [#2882](https://github.com/alibaba/higress/pull/2882) \
**Contributor**: @johnlanni \
**Change Log**: 解决了Claude流式响应转换逻辑中的SSE事件分块问题改进了协议自动转换和工具调用状态跟踪。 \
**Feature Value**: 提高了Claude与OpenAI兼容提供者之间的双向转换可靠性避免了连接阻塞增强了用户体验。
- **Related PR**: [#2865](https://github.com/alibaba/higress/pull/2865) \
**Contributor**: @Thomas-Eliot \
**Change Log**: 该PR解决了当SSE事件被分割成多个chunk时SSE连接会被阻塞的问题。通过在代理mcp server场景下增加缓存机制来确保数据流处理的连续性。 \
**Feature Value**: 修复了可能导致SSE连接中断的问题增强了系统的稳定性和用户体验。用户不再会因为网络条件或服务器响应方式而遇到数据接收不完整的情况。
- **Related PR**: [#2859](https://github.com/alibaba/higress/pull/2859) \
**Contributor**: @lcfang \
**Change Log**: 此PR通过在mcpbridge中新增vport元素解决了当注册服务实例端口不一致时路由配置失效的问题。主要改动包括更新CRD定义、protobuf文件及相关生成代码。 \
**Feature Value**: 该功能确保了即使后端实例端口发生变化,服务的路由配置也能保持有效,从而提高了系统的稳定性和兼容性,为用户提供了更加可靠的服务体验。
### ♻️ 重构优化 (Refactoring)
- **Related PR**: [#2933](https://github.com/alibaba/higress/pull/2933) \
**Contributor**: @rinfx \
**Change Log**: 移除了bedrock和vertex中重复的think标签减少了冗余代码提高了代码的可读性和维护性。 \
**Feature Value**: 通过去除不必要的重复代码,提升了项目的整体质量和开发效率,使得代码结构更加清晰,方便后续的维护和扩展。
- **Related PR**: [#2927](https://github.com/alibaba/higress/pull/2927) \
**Contributor**: @rinfx \
**Change Log**: 此PR修改了ai-statistics插件中API名称提取逻辑将检查条件从固定长度5调整为至少3个部分以提高灵活性和兼容性。 \
**Feature Value**: 通过放宽API字符串分割的限制条件增强了系统对不同格式API字符串的支持能力提升了系统的适应性和稳定性。
- **Related PR**: [#2922](https://github.com/alibaba/higress/pull/2922) \
**Contributor**: @daixijun \
**Change Log**: 该PR将项目中引用的Higress SDK包名从github.com/alibaba/higress升级为github.com/alibaba/higress/v2以兼容最新版本。 \
**Feature Value**: 通过更新包名确保项目可以引入并使用Higress的最新功能和改进提升开发效率和代码质量。
- **Related PR**: [#2890](https://github.com/alibaba/higress/pull/2890) \
**Contributor**: @johnlanni \
**Change Log**: 重构了`matchDomain`函数引入HostMatcher结构及匹配类型替换正则表达式以简单字符串操作提高性能并实现端口剥离逻辑。 \
**Feature Value**: 通过优化主机匹配逻辑提高了系统性能和代码可维护性,使得处理包含端口号的主机头更加准确高效,提升了用户体验。
### 📚 文档更新 (Documentation)
- **Related PR**: [#2915](https://github.com/alibaba/higress/pull/2915) \
**Contributor**: @a6d9a6m \
**Change Log**: 修复了README_JP.md中的一个失效链接并在README.md中添加了缺失的部分使多语言文档内容更加一致。 \
**Feature Value**: 提高了文档的准确性和一致性,帮助用户更容易地找到相关信息,提升了用户体验。
- **Related PR**: [#2912](https://github.com/alibaba/higress/pull/2912) \
**Contributor**: @hanxiantao \
**Change Log**: 优化了hmac-auth-apisix插件的英文和中文文档增加了更多配置说明细节提升了文档清晰度。 \
**Feature Value**: 通过更详细的文档解释帮助开发者更好地理解和使用hmac-auth-apisix插件提高了用户体验。
- **Related PR**: [#2880](https://github.com/alibaba/higress/pull/2880) \
**Contributor**: @a6d9a6m \
**Change Log**: 此PR修复了README.md、README_JP.md和README_ZH.md文件中的语法错误确保文档的正确性和一致性。 \
**Feature Value**: 通过修正文档中的语言错误,提升了文档的质量与可读性,帮助用户更好地理解项目信息。
- **Related PR**: [#2873](https://github.com/alibaba/higress/pull/2873) \
**Contributor**: @CH3CHO \
**Change Log**: 此PR在非崩溃安全漏洞问题模板中增加了获取Higress运行时日志和配置的方法帮助更好地调查问题。 \
**Feature Value**: 通过提供更详细的日志和配置信息,用户可以更容易地诊断和解决问题,提高了问题处理的效率和准确性。
### 🧪 测试改进 (Testing)
- **Related PR**: [#2928](https://github.com/alibaba/higress/pull/2928) \
**Contributor**: @rinfx \
**Change Log**: 该PR更新了ai-security-guard组件的测试代码增加了新的测试用例并调整了一些现有的测试逻辑。 \
**Feature Value**: 通过改进ai-security-guard的测试覆盖率和准确性提高了整个项目的稳定性和可靠性有助于开发者更好地理解和维护相关功能。
---
## 📊 发布统计
- 🚀 新功能: 13项
- 🐛 Bug修复: 7项
- ♻️ 重构优化: 5项
- 📚 文档更新: 4项
- 🧪 测试改进: 1项
**总计**: 30项更改包含2项重要更新
感谢所有贡献者的辛勤付出!🎉
# Higress Console
## 📋 本次发布概览
本次发布包含 **4** 项更新涵盖了功能增强、Bug修复、性能优化等多个方面。
### 更新内容分布
- **新功能**: 1项
- **Bug修复**: 2项
- **文档更新**: 1项
### ⭐ 重点关注
本次发布包含 **1** 项重要更新,建议重点关注:
- **feat: Support using a known service in OpenAI LLM provider** ([#589](https://github.com/higress-group/higress-console/pull/589)): 该功能允许用户在OpenAI LLM提供者中利用现有的服务资源从而扩展了系统的灵活性和可用性为用户提供更多选择。
详细信息请查看下方重要功能详述部分。
---
## 🌟 重要功能详述
以下是本次发布中的重要功能和改进的详细说明:
### 1. feat: Support using a known service in OpenAI LLM provider
**相关PR**: [#589](https://github.com/higress-group/higress-console/pull/589) | **贡献者**: [@CH3CHO](https://github.com/CH3CHO)
**使用背景**
在许多应用场景中开发者可能希望使用自定义的OpenAI服务实例而不是默认的服务。这可能是由于特定的安全要求、性能优化或基础设施限制。此PR通过引入对已知服务的支持满足了这些需求。目标用户群体包括需要高度定制化配置的企业级用户和技术专家。此功能解决了用户无法灵活选择和配置OpenAI服务的问题提升了系统的适应性和用户体验。
**功能详述**
该PR主要实现了以下功能1. 允许用户在配置OpenAI LLM提供者时指定自定义的服务。2. 修改了`OpenaiLlmProviderHandler`类,添加了`buildServiceSource``buildUpstreamService`方法以处理自定义服务的逻辑。3. 在`WasmPluginInstanceService`接口中新增了带`internal`参数的删除方法以支持更细粒度的控制。4. 更新了前端国际化资源文件,增加了与自定义服务相关的提示信息。核心技术要点在于对现有架构的扩展,使得系统能够识别并使用用户提供的自定义服务,同时保持了向后兼容性。
**使用方式**
启用和配置这个功能非常简单。首先在创建或更新LLM提供者时选择“自定义OpenAI服务”选项并填写相应的服务主机和服务路径。然后系统会自动使用这些自定义配置来连接OpenAI服务。典型的使用场景包括企业内部部署的OpenAI服务实例或者需要特定安全策略的环境。注意事项包括确保输入的URL是有效的并且服务主机和服务路径正确。最佳实践是进行充分的测试确保自定义配置能够正常工作。
**功能价值**
这一新功能显著提升了系统的灵活性和可配置性使用户能够根据自身需求选择最合适的OpenAI服务。对于需要高度定制化的企业级用户来说这种灵活性尤为重要。此外通过支持自定义服务系统可以更好地集成到现有的基础设施中提高了整体的稳定性和性能。这对于维护和扩展大型应用系统具有重要意义。总体而言这一功能不仅增强了用户体验还为系统带来了更高的可扩展性和可靠性。
---
## 📝 完整变更日志
### 🐛 Bug修复 (Bug Fixes)
- **Related PR**: [#591](https://github.com/higress-group/higress-console/pull/591) \
**Contributor**: @CH3CHO \
**Change Log**: 此PR修复了在启用路由重写配置时未正确验证必填字段的问题确保`host``newPath.path`都必须提供有效值以避免配置错误。 \
**Feature Value**: 通过修正路由重写的验证逻辑,防止因配置不完整而导致的潜在错误,提升了系统的稳定性和用户体验。
- **Related PR**: [#590](https://github.com/higress-group/higress-console/pull/590) \
**Contributor**: @CH3CHO \
**Change Log**: 修正了Route.customLabels处理逻辑中的错误确保内置标签在更新时能够被正确排除。 \
**Feature Value**: 解决了自定义标签与内置标签冲突的问题,保证了用户在更新路由设置时的灵活性和准确性。
### 📚 文档更新 (Documentation)
- **Related PR**: [#595](https://github.com/higress-group/higress-console/pull/595) \
**Contributor**: @CH3CHO \
**Change Log**: 移除了README.md中与项目无关的描述并添加了代码格式指南使得文档更加专注于项目本身。 \
**Feature Value**: 通过更新README.md使用户能够更清晰地了解项目的结构和代码规范要求有助于新贡献者快速上手。
---
## 📊 发布统计
- 🚀 新功能: 1项
- 🐛 Bug修复: 2项
- 📚 文档更新: 1项
**总计**: 4项更改包含1项重要更新
感谢所有贡献者的辛勤付出!🎉