mirror of
https://github.com/alibaba/higress.git
synced 2026-02-06 15:10:54 +08:00
feat: optimize elasticsearch ai-search plugin and update related docs" (#2100)
This commit is contained in:
@@ -75,18 +75,22 @@ description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elastics
|
||||
|
||||
## Elasticsearch 特定配置
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|------|----------|----------|--------|-----------------------|
|
||||
| index | string | 必填 | - | 要搜索的Elasticsearch索引名称 |
|
||||
| contentField | string | 必填 | - | 要查询的内容字段名称 |
|
||||
| semanticTextField | string | 必填 | - | 要查询的 embedding 字段名称 |
|
||||
| linkField | string | 必填 | - | 结果链接字段名称 |
|
||||
| titleField | string | 必填 | - | 结果标题字段名称 |
|
||||
| username | string | 选填 | - | Elasticsearch 用户名 |
|
||||
| password | string | 选填 | - | Elasticsearch 密码 |
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|------|----------|------|--------|------------------------------------|
|
||||
| index | string | 必填 | - | 要搜索的 Elasticsearch 索引名称 |
|
||||
| contentField | string | 必填 | - | 要查询的内容字段名称 |
|
||||
| semanticTextField | string | 必填 | - | 要查询的 embedding 字段名称 |
|
||||
| linkField | string | 选填 | - | 结果链接字段名称,当配置 `needReference` 时需要填写 |
|
||||
| titleField | string | 选填 | - | 结果标题字段名称,当配置 `needReference` 时需要填写 |
|
||||
| username | string | 选填 | - | Elasticsearch 用户名 |
|
||||
| password | string | 选填 | - | Elasticsearch 密码 |
|
||||
|
||||
混合搜索中使用的 [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/8.17/rrf.html) 查询要求 Elasticsearch 的版本在 8.8 及以上。
|
||||
|
||||
目前文档向量化依赖于 Elasticsearch 的 Embedding 模型,该功能需要 Elasticsearch 企业版 License,或可使用 30 天的 Trial License。安装 Elasticsearch 内置 Embedding 模型的步骤可参考[该文档](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-elser#alternative-download-deploy);若需安装第三方 Embedding 模型,可参考[该文档](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-text-emb-vector-search-example)。
|
||||
|
||||
有关 ai-search 插件集成 Elasticsearch 的完整教程,请参考:[使用 LangChain + Higress + Elasticsearch 构建 RAG 应用](https://cr7258.github.io/blogs/original/2025/15-rag-higress-es-langchain)。
|
||||
|
||||
## Quark 特定配置
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
@@ -204,13 +208,9 @@ searchFrom:
|
||||
searchFrom:
|
||||
- type: elasticsearch
|
||||
serviceName: "es-svc.static"
|
||||
# 固定地址服务的端口默认是80
|
||||
servicePort: 80
|
||||
index: "knowledge_base"
|
||||
contentField: "content"
|
||||
semanticTextField: "semantic_text"
|
||||
linkField: "url"
|
||||
titleField: "title"
|
||||
# username: "elastic"
|
||||
# password: "password"
|
||||
```
|
||||
|
||||
@@ -80,13 +80,17 @@ It is strongly recommended to enable this feature when using Arxiv or Elasticsea
|
||||
| index | string | Required | - | Elasticsearch index name to search |
|
||||
| contentField | string | Required | - | Content field name to query |
|
||||
| semanticTextField | string | Required | - | Embedding field name to query |
|
||||
| linkField | string | Required | - | Result link field name |
|
||||
| titleField | string | Required | - | Result title field name |
|
||||
| linkField | string | Optional | - | Result link field name, needed when `needReference` is configured |
|
||||
| titleField | string | Optional | - | Result title field name, needed when `needReference` is configured |
|
||||
| username | string | Optional | - | Elasticsearch username |
|
||||
| password | string | Optional | - | Elasticsearch password |
|
||||
|
||||
The [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/8.17/rrf.html) query used in hybrid search requires Elasticsearch version 8.8 or higher.
|
||||
|
||||
Currently, document vectorization relies on Elasticsearch's embedding model, which requires an Elasticsearch Enterprise license or a 30-day Trial license. To install the built-in embedding model in Elasticsearch, please refer to [this documentation](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-elser#alternative-download-deploy). If you want to install a third-party embedding model, please refer to [this guide](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-text-emb-vector-search-example).
|
||||
|
||||
For a complete tutorial on integrating the ai-search plugin with Elasticsearch, please refer to: [Building a RAG Application with LangChain + Higress + Elasticsearch](https://cr7258.github.io/blogs/original/2025/15-rag-higress-es-langchain).
|
||||
|
||||
## Quark Specific Configuration
|
||||
|
||||
| Name | Data Type | Requirement | Default Value | Description |
|
||||
@@ -203,13 +207,9 @@ Note that excessive concurrency may lead to rate limiting, adjust according to a
|
||||
searchFrom:
|
||||
- type: elasticsearch
|
||||
serviceName: "es-svc.static"
|
||||
# static ip service use 80 as default port
|
||||
servicePort: 80
|
||||
index: "knowledge_base"
|
||||
contentField: "content"
|
||||
semanticTextField: "semantic_text"
|
||||
linkField: "url"
|
||||
titleField: "title"
|
||||
# username: "elastic"
|
||||
# password: "password"
|
||||
```
|
||||
|
||||
@@ -27,7 +27,7 @@ type ElasticsearchSearch struct {
|
||||
password string
|
||||
}
|
||||
|
||||
func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) {
|
||||
func NewElasticsearchSearch(config *gjson.Result, needReference bool) (*ElasticsearchSearch, error) {
|
||||
engine := &ElasticsearchSearch{}
|
||||
serviceName := config.Get("serviceName").String()
|
||||
if serviceName == "" {
|
||||
@@ -35,7 +35,13 @@ func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error)
|
||||
}
|
||||
servicePort := config.Get("servicePort").Int()
|
||||
if servicePort == 0 {
|
||||
return nil, errors.New("servicePort not found")
|
||||
if strings.HasSuffix(serviceName, ".static") {
|
||||
servicePort = 80
|
||||
} else if strings.HasSuffix(serviceName, ".dns") {
|
||||
servicePort = 443
|
||||
} else {
|
||||
return nil, errors.New("servicePort not found")
|
||||
}
|
||||
}
|
||||
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
|
||||
FQDN: serviceName,
|
||||
@@ -54,14 +60,18 @@ func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error)
|
||||
if engine.semanticTextField == "" {
|
||||
return nil, errors.New("semanticTextField not found")
|
||||
}
|
||||
engine.linkField = config.Get("linkField").String()
|
||||
if engine.linkField == "" {
|
||||
return nil, errors.New("linkField not found")
|
||||
}
|
||||
engine.titleField = config.Get("titleField").String()
|
||||
if engine.titleField == "" {
|
||||
return nil, errors.New("titleField not found")
|
||||
|
||||
if needReference {
|
||||
engine.linkField = config.Get("linkField").String()
|
||||
if engine.linkField == "" {
|
||||
return nil, errors.New("linkField not found")
|
||||
}
|
||||
engine.titleField = config.Get("titleField").String()
|
||||
if engine.titleField == "" {
|
||||
return nil, errors.New("titleField not found")
|
||||
}
|
||||
}
|
||||
|
||||
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
|
||||
if engine.timeoutMillisecond == 0 {
|
||||
engine.timeoutMillisecond = 5000
|
||||
@@ -93,6 +103,9 @@ func (e ElasticsearchSearch) generateAuthorizationHeader() string {
|
||||
func (e ElasticsearchSearch) generateQueryBody(ctx engine.SearchContext) string {
|
||||
queryText := strings.Join(ctx.Querys, " ")
|
||||
return fmt.Sprintf(`{
|
||||
"_source":{
|
||||
"excludes": "%s"
|
||||
},
|
||||
"retriever": {
|
||||
"rrf": {
|
||||
"retrievers": [
|
||||
@@ -118,7 +131,7 @@ func (e ElasticsearchSearch) generateQueryBody(ctx engine.SearchContext) string
|
||||
]
|
||||
}
|
||||
}
|
||||
}`, e.contentField, queryText, e.semanticTextField, queryText)
|
||||
}`, e.semanticTextField, e.contentField, queryText, e.semanticTextField, queryText)
|
||||
}
|
||||
|
||||
func (e ElasticsearchSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
|
||||
@@ -145,9 +158,7 @@ func (e ElasticsearchSearch) ParseResult(ctx engine.SearchContext, response []by
|
||||
Link: source.Get(e.linkField).String(),
|
||||
Content: source.Get(e.contentField).String(),
|
||||
}
|
||||
if result.Valid() {
|
||||
results = append(results, result)
|
||||
}
|
||||
results = append(results, result)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
@@ -185,7 +185,7 @@ func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error {
|
||||
arxivExists = true
|
||||
onlyQuark = false
|
||||
case "elasticsearch":
|
||||
searchEngine, err := elasticsearch.NewElasticsearchSearch(&e)
|
||||
searchEngine, err := elasticsearch.NewElasticsearchSearch(&e, config.needReference)
|
||||
if err != nil {
|
||||
return fmt.Errorf("elasticsearch search engine init failed:%s", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user