diff --git a/plugins/wasm-go/extensions/ai-search/README.md b/plugins/wasm-go/extensions/ai-search/README.md index 4cd0c6ed9..9229fb165 100644 --- a/plugins/wasm-go/extensions/ai-search/README.md +++ b/plugins/wasm-go/extensions/ai-search/README.md @@ -75,18 +75,22 @@ description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elastics ## Elasticsearch 特定配置 -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -|------|----------|----------|--------|-----------------------| -| index | string | 必填 | - | 要搜索的Elasticsearch索引名称 | -| contentField | string | 必填 | - | 要查询的内容字段名称 | -| semanticTextField | string | 必填 | - | 要查询的 embedding 字段名称 | -| linkField | string | 必填 | - | 结果链接字段名称 | -| titleField | string | 必填 | - | 结果标题字段名称 | -| username | string | 选填 | - | Elasticsearch 用户名 | -| password | string | 选填 | - | Elasticsearch 密码 | +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|------|--------|------------------------------------| +| index | string | 必填 | - | 要搜索的 Elasticsearch 索引名称 | +| contentField | string | 必填 | - | 要查询的内容字段名称 | +| semanticTextField | string | 必填 | - | 要查询的 embedding 字段名称 | +| linkField | string | 选填 | - | 结果链接字段名称,当配置 `needReference` 时需要填写 | +| titleField | string | 选填 | - | 结果标题字段名称,当配置 `needReference` 时需要填写 | +| username | string | 选填 | - | Elasticsearch 用户名 | +| password | string | 选填 | - | Elasticsearch 密码 | 混合搜索中使用的 [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/8.17/rrf.html) 查询要求 Elasticsearch 的版本在 8.8 及以上。 +目前文档向量化依赖于 Elasticsearch 的 Embedding 模型,该功能需要 Elasticsearch 企业版 License,或可使用 30 天的 Trial License。安装 Elasticsearch 内置 Embedding 模型的步骤可参考[该文档](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-elser#alternative-download-deploy);若需安装第三方 Embedding 模型,可参考[该文档](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-text-emb-vector-search-example)。 + +有关 ai-search 插件集成 Elasticsearch 的完整教程,请参考:[使用 LangChain + Higress + Elasticsearch 构建 RAG 应用](https://cr7258.github.io/blogs/original/2025/15-rag-higress-es-langchain)。 + ## Quark 特定配置 | 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | @@ -204,13 +208,9 @@ searchFrom: searchFrom: - type: elasticsearch serviceName: "es-svc.static" - # 固定地址服务的端口默认是80 - servicePort: 80 index: "knowledge_base" contentField: "content" semanticTextField: "semantic_text" - linkField: "url" - titleField: "title" # username: "elastic" # password: "password" ``` diff --git a/plugins/wasm-go/extensions/ai-search/README_EN.md b/plugins/wasm-go/extensions/ai-search/README_EN.md index cac9539e9..03fbe165d 100644 --- a/plugins/wasm-go/extensions/ai-search/README_EN.md +++ b/plugins/wasm-go/extensions/ai-search/README_EN.md @@ -80,13 +80,17 @@ It is strongly recommended to enable this feature when using Arxiv or Elasticsea | index | string | Required | - | Elasticsearch index name to search | | contentField | string | Required | - | Content field name to query | | semanticTextField | string | Required | - | Embedding field name to query | -| linkField | string | Required | - | Result link field name | -| titleField | string | Required | - | Result title field name | +| linkField | string | Optional | - | Result link field name, needed when `needReference` is configured | +| titleField | string | Optional | - | Result title field name, needed when `needReference` is configured | | username | string | Optional | - | Elasticsearch username | | password | string | Optional | - | Elasticsearch password | The [Reciprocal Rank Fusion (RRF)](https://www.elastic.co/guide/en/elasticsearch/reference/8.17/rrf.html) query used in hybrid search requires Elasticsearch version 8.8 or higher. +Currently, document vectorization relies on Elasticsearch's embedding model, which requires an Elasticsearch Enterprise license or a 30-day Trial license. To install the built-in embedding model in Elasticsearch, please refer to [this documentation](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-elser#alternative-download-deploy). If you want to install a third-party embedding model, please refer to [this guide](https://www.elastic.co/docs/explore-analyze/machine-learning/nlp/ml-nlp-text-emb-vector-search-example). + +For a complete tutorial on integrating the ai-search plugin with Elasticsearch, please refer to: [Building a RAG Application with LangChain + Higress + Elasticsearch](https://cr7258.github.io/blogs/original/2025/15-rag-higress-es-langchain). + ## Quark Specific Configuration | Name | Data Type | Requirement | Default Value | Description | @@ -203,13 +207,9 @@ Note that excessive concurrency may lead to rate limiting, adjust according to a searchFrom: - type: elasticsearch serviceName: "es-svc.static" - # static ip service use 80 as default port - servicePort: 80 index: "knowledge_base" contentField: "content" semanticTextField: "semantic_text" - linkField: "url" - titleField: "title" # username: "elastic" # password: "password" ``` diff --git a/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go index 85fc1055f..164df9838 100644 --- a/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go +++ b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go @@ -27,7 +27,7 @@ type ElasticsearchSearch struct { password string } -func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) { +func NewElasticsearchSearch(config *gjson.Result, needReference bool) (*ElasticsearchSearch, error) { engine := &ElasticsearchSearch{} serviceName := config.Get("serviceName").String() if serviceName == "" { @@ -35,7 +35,13 @@ func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) } servicePort := config.Get("servicePort").Int() if servicePort == 0 { - return nil, errors.New("servicePort not found") + if strings.HasSuffix(serviceName, ".static") { + servicePort = 80 + } else if strings.HasSuffix(serviceName, ".dns") { + servicePort = 443 + } else { + return nil, errors.New("servicePort not found") + } } engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{ FQDN: serviceName, @@ -54,14 +60,18 @@ func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) if engine.semanticTextField == "" { return nil, errors.New("semanticTextField not found") } - engine.linkField = config.Get("linkField").String() - if engine.linkField == "" { - return nil, errors.New("linkField not found") - } - engine.titleField = config.Get("titleField").String() - if engine.titleField == "" { - return nil, errors.New("titleField not found") + + if needReference { + engine.linkField = config.Get("linkField").String() + if engine.linkField == "" { + return nil, errors.New("linkField not found") + } + engine.titleField = config.Get("titleField").String() + if engine.titleField == "" { + return nil, errors.New("titleField not found") + } } + engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint()) if engine.timeoutMillisecond == 0 { engine.timeoutMillisecond = 5000 @@ -93,6 +103,9 @@ func (e ElasticsearchSearch) generateAuthorizationHeader() string { func (e ElasticsearchSearch) generateQueryBody(ctx engine.SearchContext) string { queryText := strings.Join(ctx.Querys, " ") return fmt.Sprintf(`{ + "_source":{ + "excludes": "%s" + }, "retriever": { "rrf": { "retrievers": [ @@ -118,7 +131,7 @@ func (e ElasticsearchSearch) generateQueryBody(ctx engine.SearchContext) string ] } } - }`, e.contentField, queryText, e.semanticTextField, queryText) + }`, e.semanticTextField, e.contentField, queryText, e.semanticTextField, queryText) } func (e ElasticsearchSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs { @@ -145,9 +158,7 @@ func (e ElasticsearchSearch) ParseResult(ctx engine.SearchContext, response []by Link: source.Get(e.linkField).String(), Content: source.Get(e.contentField).String(), } - if result.Valid() { - results = append(results, result) - } + results = append(results, result) } return results } diff --git a/plugins/wasm-go/extensions/ai-search/main.go b/plugins/wasm-go/extensions/ai-search/main.go index 80a7aff28..d19d09b5a 100644 --- a/plugins/wasm-go/extensions/ai-search/main.go +++ b/plugins/wasm-go/extensions/ai-search/main.go @@ -185,7 +185,7 @@ func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error { arxivExists = true onlyQuark = false case "elasticsearch": - searchEngine, err := elasticsearch.NewElasticsearchSearch(&e) + searchEngine, err := elasticsearch.NewElasticsearchSearch(&e, config.needReference) if err != nil { return fmt.Errorf("elasticsearch search engine init failed:%s", err) }