mirror of
https://github.com/alibaba/higress.git
synced 2026-02-23 04:00:51 +08:00
ai-search support quark (#1811)
This commit is contained in:
@@ -48,10 +48,10 @@ description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elastics
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|------|----------|----------|--------|------|
|
||||
| type | string | 必填 | - | 引擎类型(google/bing/arxiv/elasticsearch) |
|
||||
| apiKey | string | 必填 | - | 搜索引擎API密钥 |
|
||||
| type | string | 必填 | - | 引擎类型(google/bing/arxiv/elasticsearch/quark) |
|
||||
| serviceName | string | 必填 | - | 后端服务名称 |
|
||||
| servicePort | number | 必填 | - | 后端服务端口 |
|
||||
| apiKey | string | 必填 | - | 搜索引擎API密钥/Aliyun AccessKey |
|
||||
| count | number | 选填 | 10 | 单次搜索返回结果数量 |
|
||||
| start | number | 选填 | 0 | 搜索结果偏移量(从第start+1条结果开始返回) |
|
||||
| timeoutMillisecond | number | 选填 | 5000 | API调用超时时间(毫秒) |
|
||||
@@ -78,6 +78,12 @@ description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elastics
|
||||
| linkField | string | 必填 | - | 结果链接字段名称 |
|
||||
| titleField | string | 必填 | - | 结果标题字段名称 |
|
||||
|
||||
## Quark 特定配置
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|------|----------|----------|--------|------|
|
||||
| secretKey | string | 必填 | - | Aliyun SecretKey |
|
||||
| endpoint | string | 选填 | iqs.cn-zhangjiakou.aliyuncs.com | 请求搜索引擎服务时的接入点 |
|
||||
|
||||
## 配置示例
|
||||
|
||||
@@ -94,6 +100,7 @@ searchFrom:
|
||||
count: 5
|
||||
optionArgs:
|
||||
fileType: "pdf"
|
||||
```
|
||||
|
||||
### Arxiv搜索配置
|
||||
|
||||
@@ -106,6 +113,20 @@ searchFrom:
|
||||
count: 10
|
||||
```
|
||||
|
||||
|
||||
### 夸克搜索配置
|
||||
|
||||
```yaml
|
||||
searchFrom:
|
||||
- type: quark
|
||||
serviceName: "quark-svc.dns"
|
||||
servicePort: 443
|
||||
apiKey: "aliyun accessKey"
|
||||
count: 10 # 搜索网页数,最多10条
|
||||
secretKey: "aliyun secretKey"
|
||||
endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
|
||||
```
|
||||
|
||||
### 多搜索引擎配置
|
||||
|
||||
```yaml
|
||||
|
||||
@@ -48,8 +48,8 @@ It is strongly recommended to enable this feature when using Arxiv or Elasticsea
|
||||
|
||||
| Name | Data Type | Requirement | Default Value | Description |
|
||||
|------|-----------|-------------|---------------|-------------|
|
||||
| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch) |
|
||||
| apiKey | string | Required | - | Search engine API key |
|
||||
| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch/quark) |
|
||||
| apiKey | string | Required | - | Search engine API key/Aliyun AccessKey |
|
||||
| serviceName | string | Required | - | Backend service name |
|
||||
| servicePort | number | Required | - | Backend service port |
|
||||
| count | number | Optional | 10 | Number of results returned per search |
|
||||
@@ -78,6 +78,12 @@ It is strongly recommended to enable this feature when using Arxiv or Elasticsea
|
||||
| linkField | string | Required | - | Result link field name |
|
||||
| titleField | string | Required | - | Result title field name |
|
||||
|
||||
## Quark Specific Configuration
|
||||
|
||||
| Name | Data Type | Requirement | Default Value | Description |
|
||||
|------|----------|----------|--------|------|
|
||||
| secretKey | string | Required | - | Aliyun SecretKey |
|
||||
| endpoint | string | Optional | iqs.cn-zhangjiakou.aliyuncs.com | Endpoint for accessing quark |
|
||||
|
||||
## Configuration Examples
|
||||
|
||||
@@ -107,6 +113,19 @@ searchFrom:
|
||||
count: 10
|
||||
```
|
||||
|
||||
### 夸克搜索配置
|
||||
|
||||
```yaml
|
||||
searchFrom:
|
||||
- type: quark
|
||||
serviceName: "quark-svc.dns"
|
||||
servicePort: 443
|
||||
apiKey: "aliyun accessKey"
|
||||
count: 10
|
||||
secretKey: "aliyun secretKey"
|
||||
endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
|
||||
```
|
||||
|
||||
### Multiple Search Engines Configuration
|
||||
|
||||
```yaml
|
||||
|
||||
@@ -60,7 +60,7 @@ func NewBingSearch(config *gjson.Result) (*BingSearch, error) {
|
||||
}
|
||||
|
||||
func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool {
|
||||
return ctx.EngineType == "internet"
|
||||
return ctx.EngineType == "" || ctx.EngineType == "internet"
|
||||
}
|
||||
|
||||
func (b BingSearch) Client() wrapper.HttpClient {
|
||||
|
||||
@@ -68,7 +68,7 @@ func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) {
|
||||
}
|
||||
|
||||
func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool {
|
||||
return ctx.EngineType == "internet"
|
||||
return ctx.EngineType == "" || ctx.EngineType == "internet"
|
||||
}
|
||||
|
||||
func (g GoogleSearch) Client() wrapper.HttpClient {
|
||||
|
||||
194
plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
Normal file
194
plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
Normal file
@@ -0,0 +1,194 @@
|
||||
package quark
|
||||
|
||||
import (
|
||||
"crypto/hmac"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
|
||||
"github.com/tidwall/gjson"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
|
||||
)
|
||||
|
||||
type QuarkSearch struct {
|
||||
apiKey string
|
||||
secretKey string
|
||||
timeoutMillisecond uint32
|
||||
client wrapper.HttpClient
|
||||
count uint32
|
||||
endpoint string
|
||||
}
|
||||
|
||||
const (
|
||||
Path = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/genericSearch"
|
||||
ContentSha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" // for empty body
|
||||
Action = "GenericSearch"
|
||||
Version = "2024-11-11"
|
||||
SignatureAlgorithm = "ACS3-HMAC-SHA256"
|
||||
SignedHeaders = "host;x-acs-action;x-acs-content-sha256;x-acs-date;x-acs-signature-nonce;x-acs-version"
|
||||
)
|
||||
|
||||
func urlEncoding(rawStr string) string {
|
||||
encodedStr := url.PathEscape(rawStr)
|
||||
encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B")
|
||||
encodedStr = strings.ReplaceAll(encodedStr, ":", "%3A")
|
||||
encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D")
|
||||
encodedStr = strings.ReplaceAll(encodedStr, "&", "%26")
|
||||
encodedStr = strings.ReplaceAll(encodedStr, "$", "%24")
|
||||
encodedStr = strings.ReplaceAll(encodedStr, "@", "%40")
|
||||
// encodedStr := url.QueryEscape(rawStr)
|
||||
return encodedStr
|
||||
}
|
||||
|
||||
func getSignature(stringToSign, secret string) string {
|
||||
h := hmac.New(sha256.New, []byte(secret))
|
||||
h.Write([]byte(stringToSign))
|
||||
hash := h.Sum(nil)
|
||||
return hex.EncodeToString(hash)
|
||||
}
|
||||
|
||||
func getCanonicalHeaders(params map[string]string) string {
|
||||
paramArray := []string{}
|
||||
for k, v := range params {
|
||||
paramArray = append(paramArray, k+":"+v)
|
||||
}
|
||||
sort.Slice(paramArray, func(i, j int) bool {
|
||||
return paramArray[i] <= paramArray[j]
|
||||
})
|
||||
return strings.Join(paramArray, "\n") + "\n"
|
||||
}
|
||||
|
||||
func getHasedString(input string) string {
|
||||
hash := sha256.Sum256([]byte(input))
|
||||
hashHex := hex.EncodeToString(hash[:])
|
||||
return hashHex
|
||||
}
|
||||
|
||||
func generateHexID(length int) (string, error) {
|
||||
bytes := make([]byte, length/2)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return hex.EncodeToString(bytes), nil
|
||||
}
|
||||
|
||||
func NewQuarkSearch(config *gjson.Result) (*QuarkSearch, error) {
|
||||
engine := &QuarkSearch{}
|
||||
engine.apiKey = config.Get("apiKey").String()
|
||||
if engine.apiKey == "" {
|
||||
return nil, errors.New("apiKey not found")
|
||||
}
|
||||
engine.secretKey = config.Get("secretKey").String()
|
||||
if engine.secretKey == "" {
|
||||
return nil, errors.New("secretKey not found")
|
||||
}
|
||||
serviceName := config.Get("serviceName").String()
|
||||
if serviceName == "" {
|
||||
return nil, errors.New("serviceName not found")
|
||||
}
|
||||
servicePort := config.Get("servicePort").Int()
|
||||
if servicePort == 0 {
|
||||
return nil, errors.New("servicePort not found")
|
||||
}
|
||||
engine.endpoint = config.Get("endpoint").String()
|
||||
if engine.endpoint == "" {
|
||||
engine.endpoint = "iqs.cn-zhangjiakou.aliyuncs.com"
|
||||
}
|
||||
engine.count = uint32(config.Get("count").Int())
|
||||
if engine.count == 0 {
|
||||
engine.count = 10
|
||||
}
|
||||
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
|
||||
FQDN: serviceName,
|
||||
Port: servicePort,
|
||||
})
|
||||
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
|
||||
if engine.timeoutMillisecond == 0 {
|
||||
engine.timeoutMillisecond = 5000
|
||||
}
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
func (g QuarkSearch) NeedExectue(ctx engine.SearchContext) bool {
|
||||
return ctx.EngineType == "" || ctx.EngineType == "internet"
|
||||
}
|
||||
|
||||
func (g QuarkSearch) Client() wrapper.HttpClient {
|
||||
return g.client
|
||||
}
|
||||
|
||||
func (g QuarkSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
|
||||
query := strings.Join(ctx.Querys, " ")
|
||||
canonicalURI := Path
|
||||
queryParams := map[string]string{
|
||||
"query": query,
|
||||
"timeRange": "NoLimit",
|
||||
}
|
||||
queryParamsStr := []string{}
|
||||
for k, v := range queryParams {
|
||||
queryParamsStr = append(queryParamsStr, k+"="+urlEncoding(v))
|
||||
}
|
||||
canonicalQueryString := strings.Join(queryParamsStr, "&")
|
||||
timeStamp := time.Now().UTC().Format("2006-01-02T15:04:05Z")
|
||||
randomID, _ := generateHexID(32)
|
||||
params := map[string]string{
|
||||
"host": g.endpoint,
|
||||
"x-acs-action": Action,
|
||||
"x-acs-content-sha256": ContentSha256,
|
||||
"x-acs-date": timeStamp,
|
||||
"x-acs-signature-nonce": randomID,
|
||||
"x-acs-version": Version,
|
||||
}
|
||||
canonicalHeaders := getCanonicalHeaders(params)
|
||||
canonicalRequest := http.MethodGet + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + SignedHeaders + "\n" + ContentSha256
|
||||
stringToSign := SignatureAlgorithm + "\n" + getHasedString(canonicalRequest)
|
||||
|
||||
authHeaderFmt := "%s Credential=%s,SignedHeaders=%s,Signature=%s"
|
||||
authHeader := fmt.Sprintf(authHeaderFmt, SignatureAlgorithm, g.apiKey, SignedHeaders, getSignature(stringToSign, g.secretKey))
|
||||
|
||||
reqParams := url.Values{}
|
||||
for k, v := range queryParams {
|
||||
reqParams.Add(k, v)
|
||||
}
|
||||
requestURL := fmt.Sprintf("https://%s%s?%s", g.endpoint, Path, reqParams.Encode())
|
||||
|
||||
return engine.CallArgs{
|
||||
Method: http.MethodGet,
|
||||
Url: requestURL,
|
||||
Headers: [][2]string{
|
||||
{"x-acs-date", timeStamp},
|
||||
{"x-acs-signature-nonce", randomID},
|
||||
{"x-acs-content-sha256", ContentSha256},
|
||||
{"x-acs-version", Version},
|
||||
{"x-acs-action", Action},
|
||||
{"Authorization", authHeader},
|
||||
},
|
||||
Body: nil,
|
||||
TimeoutMillisecond: g.timeoutMillisecond,
|
||||
}
|
||||
}
|
||||
|
||||
func (g QuarkSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
|
||||
jsonObj := gjson.ParseBytes(response)
|
||||
var results []engine.SearchResult
|
||||
for index, item := range jsonObj.Get("pageItems").Array() {
|
||||
result := engine.SearchResult{
|
||||
Title: item.Get("title").String(),
|
||||
Link: item.Get("link").String(),
|
||||
Content: item.Get("mainText").String(),
|
||||
}
|
||||
if result.Valid() && index < int(g.count) {
|
||||
results = append(results, result)
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
@@ -34,6 +34,7 @@ import (
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/bing"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/elasticsearch"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/google"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/quark"
|
||||
)
|
||||
|
||||
type SearchRewrite struct {
|
||||
@@ -166,6 +167,13 @@ func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error {
|
||||
}
|
||||
config.engine = append(config.engine, searchEngine)
|
||||
privateExists = true
|
||||
case "quark":
|
||||
searchEngine, err := quark.NewQuarkSearch(&e)
|
||||
if err != nil {
|
||||
return fmt.Errorf("elasticsearch search engine init failed:%s", err)
|
||||
}
|
||||
config.engine = append(config.engine, searchEngine)
|
||||
internetExists = true
|
||||
default:
|
||||
return fmt.Errorf("unkown search engine:%s", e.Get("type").String())
|
||||
}
|
||||
@@ -541,7 +549,8 @@ func setReferencesToFirstMessage(ctx wrapper.HttpContext, chunk []byte, referenc
|
||||
if len(messages) > 1 {
|
||||
firstMessage := messages[0]
|
||||
log.Debugf("first message: %s", firstMessage)
|
||||
firstMessage = strings.TrimPrefix(firstMessage, "data: ")
|
||||
firstMessage = strings.TrimPrefix(firstMessage, "data:")
|
||||
firstMessage = strings.TrimPrefix(firstMessage, " ")
|
||||
firstMessage = strings.TrimSuffix(firstMessage, "\n")
|
||||
deltaContent := gjson.Get(firstMessage, "choices.0.delta.content")
|
||||
modifiedMessage, err := sjson.Set(firstMessage, "choices.0.delta.content", fmt.Sprintf("%s\n\n%s", references, deltaContent))
|
||||
|
||||
Reference in New Issue
Block a user