fix: llm can be empty and optimize document and prompt (#2942)

This commit is contained in:
Jun
2025-09-23 14:03:00 +08:00
committed by GitHub
parent bfca4667bb
commit e2d00da861
8 changed files with 357 additions and 187 deletions

View File

@@ -1,173 +0,0 @@
# Higress RAG MCP Server
这是一个 Model Context Protocol (MCP) 服务器,提供知识管理和检索功能。
该 MCP 服务器提供以下工具:
## MCP Tools
### 知识管理
- `create-chunks-from-text` - 从 Text 创建知识 (p1)
### 块管理
- `list-chunks` - 列出知识块
- `delete-chunk` - 删除知识块
### 搜索
- `search` - 搜索
### 聊天功能
- `chat` - 发送聊天消息
## 配置说明
### 配置结构
```yaml
rag:
# RAG系统基础配置
splitter:
type: "recursive" # 递归分块器 recursive 和 nosplitter
chunk_size: 500
chunk_overlap: 50
top_k: 5 # 搜索返回的知识块数量
threshold: 0.5 # 搜索阈值
llm:
provider: "openai" # openai
api_key: "your-llm-api-key"
base_url: "https://api.openai.com/v1" # 可选
model: "gpt-3.5-turbo" # LLM模型
max_tokens: 2048 # 最大令牌数
temperature: 0.5 # 温度参数
embedding:
provider: "openai" # openai, dashscope
api_key: "your-embedding-api-key"
base_url: "https://api.openai.com/v1" # 可选
model: "text-embedding-ada-002" # 嵌入模型
vectordb:
provider: "milvus" # milvus
host: "localhost"
port: 19530
database: "default"
collection: "test_collection"
username: "" # 可选
password: "" # 可选
```
### higress-config 配置样例
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: higress-config
namespace: higress-system
data:
higress: |
mcpServer:
enable: true
sse_path_suffix: "/sse"
redis:
address: "<Redis IP>:6379"
username: ""
password: ""
db: 0
match_list:
- path_rewrite_prefix: ""
upstream_type: ""
enable_path_rewrite: false
match_rule_domain: ""
match_rule_path: "/mcp-servers/rag"
match_rule_type: "prefix"
servers:
- path: "/mcp-servers/rag"
name: "rag"
type: "rag"
config:
rag:
splitter:
provider: recursive
chunk_size: 500
chunk_overlap: 50
top_k: 10
threshold: 0.5
llm:
provider: openai
api_key: sk-XXX
base_url: https://openrouter.ai/api/v1
model: openai/gpt-4o
temperature: 0.5
max_tokens: 2048
embedding:
provider: dashscope
api_key: sk-xxx
model: text-embedding-v4
vectordb:
provider: milvus
host: <milvus IP>
port: 19530
database: default
collection: test_collection
```
### 支持的提供商
#### Embedding
- **OpenAI**
- **DashScope**
#### Vector Database
- **Milvus**
#### LLM
- **OpenAI**
## Milvus 安装
### Docker 配置
配置 Docker Desktop 镜像加速器
编辑 daemon.json 配置,加上镜像加速器,例如:
```
{
"registry-mirrors": [
"https://docker.m.daocloud.io",
"https://mirror.ccs.tencentyun.com",
"https://hub-mirror.c.163.com"
],
"dns": ["8.8.8.8", "1.1.1.1"]
}
```
### 安装 milvus
```
v2.6.0
Download the configuration file
wget https://github.com/milvus-io/milvus/releases/download/v2.6.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
v2.4
$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.23/milvus-standalone-docker-compose.yml -O docker-compose.yml
# Start Milvus
$ sudo docker compose up -d
Creating milvus-etcd ... done
Creating milvus-minio ... done
Creating milvus-standalone ... done
```
### 安装 attu
Attu 是 Milvus 的可视化管理工具,用于查看和管理 Milvus 中的数据。
```
docker run -p 8000:3000 -e MILVUS_URL=http://<本机 IP>:19530 zilliz/attu:v2.6
Open your browser and navigate to http://localhost:8000
```

View File

@@ -0,0 +1,288 @@
# Higress RAG MCP Server
这是一个 Model Context Protocol (MCP) 服务器,提供知识管理和检索功能。
## MCP 工具说明
Higress RAG MCP Server 提供以下工具,根据配置不同,可用工具也会有所差异:
| 工具名称 | 功能描述 | 依赖配置 | 必选/可选 |
|---------|---------|---------|----------|
| `create-chunks-from-text` | 将文本内容分块并存储到向量数据库,用于知识库构建 | embedding, vectordb | **必选** |
| `list-chunks` | 列出已存储的知识块,用于知识库管理 | vectordb | **必选** |
| `delete-chunk` | 删除指定的知识块,用于知识库维护 | vectordb | **必选** |
| `search` | 基于语义相似度搜索知识库中的内容 | embedding, vectordb | **必选** |
| `chat` | 基于检索增强生成(RAG)回答用户问题,结合知识库内容生成回答 | embedding, vectordb, llm | **可选** |
### 工具与配置的关系
- **基础功能**(知识管理、搜索):只需配置 `embedding``vectordb`
- **高级功能**(聊天问答):需额外配置 `llm`
具体关系如下:
- 未配置 `llm` 时,`chat` 工具将不可用
- 所有工具都依赖 `embedding``vectordb` 配置
- `rag` 配置用于调整分块和检索参数,影响所有工具的行为
## 典型使用场景
### 最小工具集场景无LLM配置
适用于仅需要知识库管理和检索的场景,不需要生成式回答。
**可用工具**`create-chunks-from-text``list-chunks``delete-chunk``search`
**典型用例**
1. 构建企业文档库,仅需检索相关文档片段
2. 数据索引系统,通过语义搜索快速定位信息
3. 内容管理系统,管理和检索结构化/非结构化内容
**示例流程**
```
1. 使用 create-chunks-from-text 导入文档
2. 使用 search 检索相关内容
3. 使用 list-chunks 和 delete-chunk 管理知识库
```
### 完整工具集场景含LLM配置
适用于需要智能问答和内容生成的高级场景。
**可用工具**`create-chunks-from-text``list-chunks``delete-chunk``search``chat`
**典型用例**
1. 智能客服系统,基于企业知识库回答用户问题
2. 文档助手,帮助用户理解和分析复杂文档
3. 专业领域问答系统,如法律、金融、技术支持等
**示例流程**
```
1. 使用 create-chunks-from-text 导入专业领域文档
2. 用户通过 chat 工具提问
3. 系统使用 search 检索相关知识
4. LLM 结合检索结果生成回答
5. 管理员使用 list-chunks 和 delete-chunk 维护知识库
```
## 配置说明
### 配置结构
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|----------------------------|----------|-----------|---------|--------|
| **rag** | object | 必填 | - | RAG系统基础配置 |
| rag.splitter.provider | string | 必填 | recursive | 分块器类型recursive或nosplitter |
| rag.splitter.chunk_size | integer | 可选 | 500 | 块大小 |
| rag.splitter.chunk_overlap | integer | 可选 | 50 | 块重叠大小 |
| rag.top_k | integer | 可选 | 10 | 搜索返回的知识块数量 |
| rag.threshold | float | 可选 | 0.5 | 搜索阈值 |
| **llm** | object | 可选 | - | LLM配置不配置则无chat功能 |
| llm.provider | string | 可选 | openai | LLM提供商 |
| llm.api_key | string | 可选 | - | LLM API密钥 |
| llm.base_url | string | 可选 | | LLM API基础URL |
| llm.model | string | 可选 | gpt-4o | LLM模型名称 |
| llm.max_tokens | integer | 可选 | 2048 | 最大令牌数 |
| llm.temperature | float | 可选 | 0.5 | 温度参数 |
| **embedding** | object | 必填 | - | 嵌入配置(所有工具必需) |
| embedding.provider | string | 必填 | dashscope | 嵌入提供商openai或dashscope |
| embedding.api_key | string | 必填 | - | 嵌入API密钥 |
| embedding.base_url | string | 可选 | | 嵌入API基础URL |
| embedding.model | string | 必填 | text-embedding-v4 | 嵌入模型名称 |
| **vectordb** | object | 必填 | - | 向量数据库配置(所有工具必需) |
| vectordb.provider | string | 必填 | milvus | 向量数据库提供商 |
| vectordb.host | string | 必填 | localhost | 数据库主机地址 |
| vectordb.port | integer | 必填 | 19530 | 数据库端口 |
| vectordb.database | string | 必填 | default | 数据库名称 |
| vectordb.collection | string | 必填 | test_collection | 集合名称 |
| vectordb.username | string | 可选 | - | 数据库用户名 |
| vectordb.password | string | 可选 | - | 数据库密码 |
### higress-config 配置样例
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: higress-config
namespace: higress-system
data:
higress: |
mcpServer:
enable: true
sse_path_suffix: "/sse"
redis:
address: "<Redis IP>:6379"
username: ""
password: ""
db: 0
match_list:
- path_rewrite_prefix: ""
upstream_type: ""
enable_path_rewrite: false
match_rule_domain: ""
match_rule_path: "/mcp-servers/rag"
match_rule_type: "prefix"
servers:
- path: "/mcp-servers/rag"
name: "rag"
type: "rag"
config:
rag:
splitter:
provider: recursive
chunk_size: 500
chunk_overlap: 50
top_k: 10
threshold: 0.5
llm:
provider: openai
api_key: sk-XXX
base_url: https://openrouter.ai/api/v1
model: openai/gpt-4o
temperature: 0.5
max_tokens: 2048
embedding:
provider: dashscope
api_key: sk-xxx
model: text-embedding-v4
vectordb:
provider: milvus
host: <milvus IP>
port: 19530
database: default
collection: test_collection
```
### 支持的提供商
#### Embedding
- **OpenAI**
- **DashScope**
#### Vector Database
- **Milvus**
#### LLM
- **OpenAI**
## 如何测试数据集的效果
测试数据集的效果分两步第一步导入数据集语料第二步测试Chat效果。
### 导入数据集语料
使用 `RAGClient.CreateChunkFromText` 工具导入数据集语料,比如数据集语料格式为 JSON每个 JSON 对象包含 `body``title``url` 等字段。样例代码如下:
```golang
func TestRAGClient_LoadChunks(t *testing.T) {
t.Logf("TestRAGClient_LoadChunks")
ragClient, err := getRAGClient()
if err != nil {
t.Errorf("getRAGClient() error = %v", err)
return
}
// load json output/corpus.json and then call ragclient CreateChunkFromText to insert chunks
file, err := os.Open("/dataset/corpus.json")
if err != nil {
t.Errorf("LoadData() error = %v", err)
return
}
defer file.Close()
decoder := json.NewDecoder(file)
var data []struct {
Body string `json:"body"`
Title string `json:"title"`
Url string `json:"url"`
}
if err := decoder.Decode(&data); err != nil {
t.Errorf("LoadData() error = %v", err)
return
}
for _, item := range data {
t.Logf("LoadData() url = %s", item.Url)
t.Logf("LoadData() title = %s", item.Title)
t.Logf("LoadData() len body = %d", len(item.Body))
chunks, err := ragClient.CreateChunkFromText(item.Body, item.Title)
if err != nil {
t.Errorf("LoadData() error = %v", err)
continue
} else {
t.Logf("LoadData() chunks len = %d", len(chunks))
}
}
t.Logf("TestRAGClient_LoadChunks done")
}
```
### 测试Chat效果
使用 `RAGClient.Chat` 工具测试 Chat 效果。样例代码如下:
```golang
func TestRAGClient_Chat(t *testing.T) {
ragClient, err := getRAGClient()
if err != nil {
t.Errorf("getRAGClient() error = %v", err)
return
}
query := "Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?"
resp, err := ragClient.Chat(query)
if err != nil {
t.Errorf("Chat() error = %v", err)
return
}
if resp == "" {
t.Errorf("Chat() resp = %s, want not empty", resp)
return
}
t.Logf("Chat() resp = %s", resp)
}
```
## Milvus 安装
### Docker 配置
配置 Docker Desktop 镜像加速器
编辑 daemon.json 配置,加上镜像加速器,例如:
```
{
"registry-mirrors": [
"https://docker.m.daocloud.io",
"https://mirror.ccs.tencentyun.com",
"https://hub-mirror.c.163.com"
],
"dns": ["8.8.8.8", "1.1.1.1"]
}
```
### 安装 milvus
```
v2.6.0
Download the configuration file
wget https://github.com/milvus-io/milvus/releases/download/v2.6.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
v2.4
$ wget https://github.com/milvus-io/milvus/releases/download/v2.4.23/milvus-standalone-docker-compose.yml -O docker-compose.yml
# Start Milvus
$ sudo docker compose up -d
Creating milvus-etcd ... done
Creating milvus-minio ... done
Creating milvus-standalone ... done
```
### 安装 attu
Attu 是 Milvus 的可视化管理工具,用于查看和管理 Milvus 中的数据。
```
docker run -p 8000:3000 -e MILVUS_URL=http://<本机 IP>:19530 zilliz/attu:v2.6
Open your browser and navigate to http://localhost:8000
```

View File

@@ -12,7 +12,7 @@ import (
const (
OPENAI_CHAT_ENDPOINT = "/chat/completions"
OPENAI_DEFAULT_MODEL = "gpt-3.5-turbo"
OPENAI_DEFAULT_MODEL = "gpt-4o"
)
// openAI specific configuration captured after initialization.

View File

@@ -4,7 +4,7 @@ import (
"strings"
)
const RAGPromptTemplate = `You are a professional knowledge Q&A assistant. Your task is to provide accurate, complete, and strictly relevant answers based on the user's question and retrieved context.
const RAGPromptTemplate = `You are a professional knowledge Q&A assistant. Your task is to provide direct and concise answers based on the user's question and retrieved context.
Retrieved relevant context (may be empty, multiple segments separated by line breaks):
{contexts}
@@ -13,9 +13,11 @@ User question:
{query}
Requirements:
1. If the context provides sufficient information, answer directly based on the context. You may use domain knowledge to supplement, but do not fabricate facts beyond the context.
2. If the context is insufficient or unrelated to the question, respond with: "I am unable to answer this question."
3. Your response must correctly answer the user's question and must not contain any irrelevant or unrelated content.`
1. Provide ONLY the direct answer without any explanation, reasoning, or additional context.
2. If the context provides sufficient information, output the answer in the most concise form possible.
3. If the context is insufficient or unrelated to the question, respond with: "I am unable to answer this question."
4. Do not include any phrases like "The answer is", "Based on the context", etc. Just output the answer directly.
`
func BuildPrompt(query string, contexts []string, join string) string {
rendered := strings.ReplaceAll(RAGPromptTemplate, "{query}", query)

View File

@@ -46,11 +46,15 @@ func NewRAGClient(config *config.Config) (*RAGClient, error) {
}
ragclient.embeddingProvider = embeddingProvider
llmProvider, err := llm.NewLLMProvider(ragclient.config.LLM)
if err != nil {
return nil, fmt.Errorf("create llm provider failed, err: %w", err)
if ragclient.config.LLM.Provider == "" {
ragclient.llmProvider = nil
} else {
llmProvider, err := llm.NewLLMProvider(ragclient.config.LLM)
if err != nil {
return nil, fmt.Errorf("create llm provider failed, err: %w", err)
}
ragclient.llmProvider = llmProvider
}
ragclient.llmProvider = llmProvider
demoVector, err := embeddingProvider.GetEmbedding(context.Background(), "initialization")
if err != nil {

View File

@@ -1,6 +1,8 @@
package rag
import (
"encoding/json"
"os"
"testing"
"github.com/alibaba/higress/plugins/golang-filter/mcp-server/servers/rag/config"
@@ -137,7 +139,11 @@ func TestRAGClient_Chat(t *testing.T) {
t.Errorf("getRAGClient() error = %v", err)
return
}
query := "what is the competition about?"
// query := "Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?"
// query := "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?"
// query := "Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing theory suggesting a lack of full truthfulness with the board as reported by TechCrunch?"
// query := "Do the TechCrunch article on software companies and the Hacker News article on The Epoch Times both report an increase in revenue related to payment and subscription models, respectively?"
query := "Which online betting platform provides a welcome bonus of up to $1000 in bonus bets for new customers' first losses, runs NBA betting promotions, and is anticipated to extend the same sign-up offer to new users in Vermont, as reported by both CBSSports.com and Sporting News?"
resp, err := ragClient.Chat(query)
if err != nil {
t.Errorf("Chat() error = %v", err)
@@ -147,4 +153,45 @@ func TestRAGClient_Chat(t *testing.T) {
t.Errorf("Chat() resp = %s, want not empty", resp)
return
}
t.Logf("Chat() resp = %s", resp)
}
func TestRAGClient_LoadChunks(t *testing.T) {
t.Logf("TestRAGClient_LoadChunks")
ragClient, err := getRAGClient()
if err != nil {
t.Errorf("getRAGClient() error = %v", err)
return
}
// load json output/corpus.json and then call ragclient CreateChunkFromText to insert chunks
file, err := os.Open("/dataset/corpus.json")
if err != nil {
t.Errorf("LoadData() error = %v", err)
return
}
defer file.Close()
decoder := json.NewDecoder(file)
var data []struct {
Body string `json:"body"`
Title string `json:"title"`
Url string `json:"url"`
}
if err := decoder.Decode(&data); err != nil {
t.Errorf("LoadData() error = %v", err)
return
}
for _, item := range data {
t.Logf("LoadData() url = %s", item.Url)
t.Logf("LoadData() title = %s", item.Title)
t.Logf("LoadData() len body = %d", len(item.Body))
chunks, err := ragClient.CreateChunkFromText(item.Body, item.Title)
if err != nil {
t.Errorf("LoadData() error = %v", err)
continue
} else {
t.Logf("LoadData() chunks len = %d", len(chunks))
}
}
t.Logf("TestRAGClient_LoadChunks done")
}

View File

@@ -28,7 +28,7 @@ func init() {
TopK: 10,
},
LLM: config.LLMConfig{
Provider: "openai",
Provider: "",
APIKey: "",
BaseURL: "",
Model: "gpt-4o",
@@ -103,8 +103,6 @@ func (c *RAGConfig) ParseConfig(config map[string]any) error {
if llmConfig, ok := config["llm"].(map[string]any); ok {
if provider, exists := llmConfig["provider"].(string); exists {
c.config.LLM.Provider = provider
} else {
return errors.New("missing llm provider")
}
if apiKey, exists := llmConfig["api_key"].(string); exists {
c.config.LLM.APIKey = apiKey
@@ -190,7 +188,7 @@ func (c *RAGConfig) NewServer(serverName string) (*common.MCPServer, error) {
// Intelligent Q&A Tool
mcpServer.AddTool(
mcp.NewToolWithRawSchema("chat", "Generate contextually relevant responses using RAG system with LLM integration", GetChatSchema()),
mcp.NewToolWithRawSchema("chat", "Answer user questions by retrieving relevant knowledge from the database and generating responses using RAG-enhanced LLM", GetChatSchema()),
HandleChat(ragClient),
)

View File

@@ -169,6 +169,10 @@ func HandleChat(ragClient *RAGClient) common.ToolHandlerFunc {
if !ok {
return nil, fmt.Errorf("invalid query argument")
}
// check llm provider
if ragClient.llmProvider == nil {
return nil, fmt.Errorf("llm provider is empty, please check the llm configuration")
}
// Generate response using RAGClient's LLM
reply, err := ragClient.Chat(query)
if err != nil {