Add ai search plugin (#1804)

This commit is contained in:
澄潭
2025-02-24 11:14:47 +08:00
committed by GitHub
parent 2328e19c9d
commit 2e6ddd7e35
17 changed files with 2252 additions and 2 deletions

View File

@@ -0,0 +1,134 @@
package arxiv
import (
"bytes"
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/antchfx/xmlquery"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type ArxivSearch struct {
optionArgs map[string]string
start int
count int
timeoutMillisecond uint32
client wrapper.HttpClient
arxivCategory string
}
func NewArxivSearch(config *gjson.Result) (*ArxivSearch, error) {
engine := &ArxivSearch{}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.start = int(config.Get("start").Uint())
engine.count = int(config.Get("count").Uint())
if engine.count == 0 {
engine.count = 10
}
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
engine.optionArgs = map[string]string{}
for key, value := range config.Get("optionArgs").Map() {
valStr := value.String()
if valStr != "" {
engine.optionArgs[key] = value.String()
}
}
engine.arxivCategory = config.Get("arxivCategory").String()
return engine, nil
}
func (a ArxivSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "arxiv"
}
func (a ArxivSearch) Client() wrapper.HttpClient {
return a.client
}
func (a ArxivSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
var searchQueryItems []string
for _, q := range ctx.Querys {
searchQueryItems = append(searchQueryItems, fmt.Sprintf("all:%s", url.QueryEscape(q)))
}
searchQuery := strings.Join(searchQueryItems, "+AND+")
category := ctx.ArxivCategory
if category == "" {
category = a.arxivCategory
}
if category != "" {
searchQuery = fmt.Sprintf("%s+AND+cat:%s", searchQuery, category)
}
queryUrl := fmt.Sprintf("https://export.arxiv.org/api/query?search_query=%s&max_results=%d&start=%d",
searchQuery, a.count, a.start)
var extraArgs []string
for key, value := range a.optionArgs {
extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
}
if len(extraArgs) > 0 {
queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
}
return engine.CallArgs{
Method: http.MethodGet,
Url: queryUrl,
Headers: [][2]string{{"Accept", "application/atom+xml"}},
TimeoutMillisecond: a.timeoutMillisecond,
}
}
func (a ArxivSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
var results []engine.SearchResult
doc, err := xmlquery.Parse(bytes.NewReader(response))
if err != nil {
return results
}
entries := xmlquery.Find(doc, "//entry")
for _, entry := range entries {
title := entry.SelectElement("title").InnerText()
link := ""
for _, l := range entry.SelectElements("link") {
if l.SelectAttr("rel") == "alternate" && l.SelectAttr("type") == "text/html" {
link = l.SelectAttr("href")
break
}
}
summary := entry.SelectElement("summary").InnerText()
publishTime := entry.SelectElement("published").InnerText()
authors := entry.SelectElements("author")
var authorNames []string
for _, author := range authors {
authorNames = append(authorNames, author.SelectElement("name").InnerText())
}
content := fmt.Sprintf("%s\nAuthors: %s\nPublication time: %s", summary, strings.Join(authorNames, ", "), publishTime)
result := engine.SearchResult{
Title: title,
Link: link,
Content: content,
}
if result.Valid() {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,128 @@
package bing
import (
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type BingSearch struct {
optionArgs map[string]string
apiKey string
start int
count int
timeoutMillisecond uint32
client wrapper.HttpClient
}
func NewBingSearch(config *gjson.Result) (*BingSearch, error) {
engine := &BingSearch{}
engine.apiKey = config.Get("apiKey").String()
if engine.apiKey == "" {
return nil, errors.New("apiKey not found")
}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.start = int(config.Get("start").Uint())
engine.count = int(config.Get("count").Uint())
if engine.count == 0 {
engine.count = 10
}
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
engine.optionArgs = map[string]string{}
for key, value := range config.Get("optionArgs").Map() {
valStr := value.String()
if valStr != "" {
engine.optionArgs[key] = value.String()
}
}
return engine, nil
}
func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "internet"
}
func (b BingSearch) Client() wrapper.HttpClient {
return b.client
}
func (b BingSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
queryUrl := fmt.Sprintf("https://api.bing.microsoft.com/v7.0/search?q=%s&count=%d&offset=%d",
url.QueryEscape(strings.Join(ctx.Querys, " ")), b.count, b.start)
var extraArgs []string
for key, value := range b.optionArgs {
extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
}
if ctx.Language != "" {
extraArgs = append(extraArgs, fmt.Sprintf("mkt=%s", ctx.Language))
}
if len(extraArgs) > 0 {
queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
}
return engine.CallArgs{
Method: http.MethodGet,
Url: queryUrl,
Headers: [][2]string{{"Ocp-Apim-Subscription-Key", b.apiKey}},
TimeoutMillisecond: b.timeoutMillisecond,
}
}
func (b BingSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
jsonObj := gjson.ParseBytes(response)
var results []engine.SearchResult
webPages := jsonObj.Get("webPages.value")
for _, page := range webPages.Array() {
result := engine.SearchResult{
Title: page.Get("name").String(),
Link: page.Get("url").String(),
Content: page.Get("snippet").String(),
}
if result.Valid() {
results = append(results, result)
}
deepLinks := page.Get("deepLinks")
for _, inner := range deepLinks.Array() {
innerResult := engine.SearchResult{
Title: inner.Get("name").String(),
Link: inner.Get("url").String(),
Content: inner.Get("snippet").String(),
}
if innerResult.Valid() {
results = append(results, innerResult)
}
}
}
news := jsonObj.Get("news.value")
for _, article := range news.Array() {
result := engine.SearchResult{
Title: article.Get("name").String(),
Link: article.Get("url").String(),
Content: article.Get("description").String(),
}
if result.Valid() {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,114 @@
package elasticsearch
import (
"errors"
"fmt"
"net/http"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type ElasticsearchSearch struct {
client wrapper.HttpClient
index string
contentField string
linkField string
titleField string
start int
count int
timeoutMillisecond uint32
}
func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) {
engine := &ElasticsearchSearch{}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.index = config.Get("index").String()
if engine.index == "" {
return nil, errors.New("index not found")
}
engine.contentField = config.Get("contentField").String()
if engine.contentField == "" {
return nil, errors.New("contentField not found")
}
engine.linkField = config.Get("linkField").String()
if engine.linkField == "" {
return nil, errors.New("linkField not found")
}
engine.titleField = config.Get("titleField").String()
if engine.titleField == "" {
return nil, errors.New("titleField not found")
}
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
engine.start = int(config.Get("start").Uint())
engine.count = int(config.Get("count").Uint())
if engine.count == 0 {
engine.count = 10
}
return engine, nil
}
func (e ElasticsearchSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "private"
}
func (e ElasticsearchSearch) Client() wrapper.HttpClient {
return e.client
}
func (e ElasticsearchSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
searchBody := fmt.Sprintf(`{
"query": {
"match": {
"%s": {
"query": "%s",
"operator": "AND"
}
}
}
}`, e.contentField, strings.Join(ctx.Querys, " "))
return engine.CallArgs{
Method: http.MethodPost,
Url: fmt.Sprintf("/%s/_search?from=%d&size=%d", e.index, e.start, e.count),
Headers: [][2]string{
{"Content-Type", "application/json"},
},
Body: []byte(searchBody),
TimeoutMillisecond: e.timeoutMillisecond,
}
}
func (e ElasticsearchSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
jsonObj := gjson.ParseBytes(response)
var results []engine.SearchResult
for _, hit := range jsonObj.Get("hits.hits").Array() {
source := hit.Get("_source")
result := engine.SearchResult{
Title: source.Get(e.titleField).String(),
Link: source.Get(e.linkField).String(),
Content: source.Get(e.contentField).String(),
}
if result.Valid() {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,120 @@
package google
import (
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type GoogleSearch struct {
optionArgs map[string]string
apiKey string
cx string
start int
count int
timeoutMillisecond uint32
client wrapper.HttpClient
}
func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) {
engine := &GoogleSearch{}
engine.apiKey = config.Get("apiKey").String()
if engine.apiKey == "" {
return nil, errors.New("apiKey not found")
}
engine.cx = config.Get("cx").String()
if engine.cx == "" {
return nil, errors.New("cx not found")
}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.start = int(config.Get("start").Uint())
engine.count = int(config.Get("count").Uint())
if engine.count == 0 {
engine.count = 10
}
if engine.count > 10 || engine.start+engine.count > 100 {
return nil, errors.New("count must be less than 10, and start + count must be less than or equal to 100.")
}
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
engine.optionArgs = map[string]string{}
for key, value := range config.Get("optionArgs").Map() {
valStr := value.String()
if valStr != "" {
engine.optionArgs[key] = value.String()
}
}
return engine, nil
}
func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "internet"
}
func (g GoogleSearch) Client() wrapper.HttpClient {
return g.client
}
func (g GoogleSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
queryUrl := fmt.Sprintf("https://customsearch.googleapis.com/customsearch/v1?cx=%s&q=%s&num=%d&key=%s&start=%d",
g.cx, url.QueryEscape(strings.Join(ctx.Querys, " ")), g.count, g.apiKey, g.start+1)
var extraArgs []string
for key, value := range g.optionArgs {
extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
}
if ctx.Language != "" {
extraArgs = append(extraArgs, fmt.Sprintf("lr=lang_%s", ctx.Language))
}
if len(extraArgs) > 0 {
queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
}
return engine.CallArgs{
Method: http.MethodGet,
Url: queryUrl,
Headers: [][2]string{
{"Accept", "application/json"},
},
TimeoutMillisecond: g.timeoutMillisecond,
}
}
func (g GoogleSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
jsonObj := gjson.ParseBytes(response)
var results []engine.SearchResult
for _, item := range jsonObj.Get("items").Array() {
content := item.Get("snippet").String()
metaDescription := item.Get("pagemap.metatags.0.og:description").String()
if metaDescription != "" {
content = fmt.Sprintf("%s\n...\n%s", content, metaDescription)
}
result := engine.SearchResult{
Title: item.Get("title").String(),
Link: item.Get("link").String(),
Content: content,
}
if result.Valid() {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,37 @@
package engine
import (
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
)
type SearchResult struct {
Title string
Link string
Content string
}
func (result SearchResult) Valid() bool {
return result.Title != "" && result.Link != "" && result.Content != ""
}
type SearchContext struct {
EngineType string
Querys []string
Language string
ArxivCategory string
}
type CallArgs struct {
Method string
Url string
Headers [][2]string
Body []byte
TimeoutMillisecond uint32
}
type SearchEngine interface {
NeedExectue(ctx SearchContext) bool
Client() wrapper.HttpClient
CallArgs(ctx SearchContext) CallArgs
ParseResult(ctx SearchContext, response []byte) []SearchResult
}