mirror of
https://github.com/alibaba/higress.git
synced 2026-04-21 12:07:26 +08:00
135 lines
3.8 KiB
Go
135 lines
3.8 KiB
Go
package arxiv
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
|
|
"github.com/antchfx/xmlquery"
|
|
"github.com/tidwall/gjson"
|
|
|
|
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
|
|
)
|
|
|
|
type ArxivSearch struct {
|
|
optionArgs map[string]string
|
|
start int
|
|
count int
|
|
timeoutMillisecond uint32
|
|
client wrapper.HttpClient
|
|
arxivCategory string
|
|
}
|
|
|
|
func NewArxivSearch(config *gjson.Result) (*ArxivSearch, error) {
|
|
engine := &ArxivSearch{}
|
|
serviceName := config.Get("serviceName").String()
|
|
if serviceName == "" {
|
|
return nil, errors.New("serviceName not found")
|
|
}
|
|
servicePort := config.Get("servicePort").Int()
|
|
if servicePort == 0 {
|
|
return nil, errors.New("servicePort not found")
|
|
}
|
|
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
|
|
FQDN: serviceName,
|
|
Port: servicePort,
|
|
})
|
|
engine.start = int(config.Get("start").Uint())
|
|
engine.count = int(config.Get("count").Uint())
|
|
if engine.count == 0 {
|
|
engine.count = 10
|
|
}
|
|
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
|
|
if engine.timeoutMillisecond == 0 {
|
|
engine.timeoutMillisecond = 5000
|
|
}
|
|
engine.optionArgs = map[string]string{}
|
|
for key, value := range config.Get("optionArgs").Map() {
|
|
valStr := value.String()
|
|
if valStr != "" {
|
|
engine.optionArgs[key] = value.String()
|
|
}
|
|
}
|
|
engine.arxivCategory = config.Get("arxivCategory").String()
|
|
return engine, nil
|
|
}
|
|
|
|
func (a ArxivSearch) NeedExectue(ctx engine.SearchContext) bool {
|
|
return ctx.EngineType == "arxiv"
|
|
}
|
|
|
|
func (a ArxivSearch) Client() wrapper.HttpClient {
|
|
return a.client
|
|
}
|
|
|
|
func (a ArxivSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
|
|
var searchQueryItems []string
|
|
for _, q := range ctx.Querys {
|
|
searchQueryItems = append(searchQueryItems, fmt.Sprintf("all:%s", url.QueryEscape(q)))
|
|
}
|
|
searchQuery := strings.Join(searchQueryItems, "+AND+")
|
|
category := ctx.ArxivCategory
|
|
if category == "" {
|
|
category = a.arxivCategory
|
|
}
|
|
if category != "" {
|
|
searchQuery = fmt.Sprintf("%s+AND+cat:%s", searchQuery, category)
|
|
}
|
|
queryUrl := fmt.Sprintf("https://export.arxiv.org/api/query?search_query=%s&max_results=%d&start=%d",
|
|
searchQuery, a.count, a.start)
|
|
var extraArgs []string
|
|
for key, value := range a.optionArgs {
|
|
extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
|
|
}
|
|
if len(extraArgs) > 0 {
|
|
queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
|
|
}
|
|
return engine.CallArgs{
|
|
Method: http.MethodGet,
|
|
Url: queryUrl,
|
|
Headers: [][2]string{{"Accept", "application/atom+xml"}},
|
|
TimeoutMillisecond: a.timeoutMillisecond,
|
|
}
|
|
}
|
|
|
|
func (a ArxivSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
|
|
var results []engine.SearchResult
|
|
doc, err := xmlquery.Parse(bytes.NewReader(response))
|
|
if err != nil {
|
|
return results
|
|
}
|
|
|
|
entries := xmlquery.Find(doc, "//entry")
|
|
for _, entry := range entries {
|
|
title := entry.SelectElement("title").InnerText()
|
|
link := ""
|
|
for _, l := range entry.SelectElements("link") {
|
|
if l.SelectAttr("rel") == "alternate" && l.SelectAttr("type") == "text/html" {
|
|
link = l.SelectAttr("href")
|
|
break
|
|
}
|
|
}
|
|
summary := entry.SelectElement("summary").InnerText()
|
|
publishTime := entry.SelectElement("published").InnerText()
|
|
authors := entry.SelectElements("author")
|
|
var authorNames []string
|
|
for _, author := range authors {
|
|
authorNames = append(authorNames, author.SelectElement("name").InnerText())
|
|
}
|
|
content := fmt.Sprintf("%s\nAuthors: %s\nPublication time: %s", summary, strings.Join(authorNames, ", "), publishTime)
|
|
result := engine.SearchResult{
|
|
Title: title,
|
|
Link: link,
|
|
Content: content,
|
|
}
|
|
if result.Valid() {
|
|
results = append(results, result)
|
|
}
|
|
}
|
|
return results
|
|
}
|