mirror of
https://github.com/alibaba/higress.git
synced 2026-06-10 05:07:30 +08:00
feat: add xfyun emb to ai-cache (#1921)
This commit is contained in:
@@ -100,14 +100,58 @@ LLM 结果缓存插件,默认配置方式可以直接用于 openai 协议的
|
||||
| responseTemplate | string | optional | `{"id":"ai-cache.hit","choices":[{"index":0,"message":{"role":"assistant","content":%s},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | 返回 HTTP 响应的模版,用 %s 标记需要被 cache value 替换的部分 |
|
||||
| streamResponseTemplate | string | optional | `data:{"id":"ai-cache.hit","choices":[{"index":0,"delta":{"role":"assistant","content":%s},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | 返回流式 HTTP 响应的模版,用 %s 标记需要被 cache value 替换的部分 |
|
||||
|
||||
# 向量数据库提供商特有配置
|
||||
## Chroma
|
||||
## 文本向量化提供商特有配置
|
||||
|
||||
### OpenAI
|
||||
|
||||
OpenAI 所对应的 `embedding.type` 为 `openai`。它并无特有的配置字段。需要提前创建 [API Key](https://platform.openai.com/settings/organization/api-keys),并将其填入`embedding.apiKey`,一个 API Key 的示例为` sk-xxxxxxx`。
|
||||
|
||||
### Ollama
|
||||
|
||||
Ollama 所对应的 `embedding.type` 为 `ollama`。它并无特有的配置字段。
|
||||
|
||||
### 讯飞星火
|
||||
|
||||
讯飞星火 所对应的 `embedding.type` 为 `xfyun`。它需要提前创建[应用](https://console.xfyun.cn/services/emb),获取`APPID` 、`APISecret`和`APIKey`,并将`APIKey`填入`embedding.apiKey`中。
|
||||
|
||||
它特有的配置字段如下:
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | 填写值 |
|
||||
| --------------------- | -------- | -------- | ------ | -------------------- | ---------------- |
|
||||
| `embedding.appId` | string | 必填 | - | 应用 ID | 获取的 APPID |
|
||||
| `embedding.apiSecret` | string | 必填 | - | 调用 API 所需 Secret | 获取的 APISecret |
|
||||
|
||||
### Textln
|
||||
|
||||
Textln 所对应的 `embedding.type` 为 `textln`。它需要提前获取[`app-id` 和`secret-code`](https://www.textin.com/document/acge_text_embedding)。
|
||||
|
||||
它特有的配置字段如下:
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | 填写值 |
|
||||
| ------------------------------- | -------- | -------- | ------ | -------------------- | ------------------ |
|
||||
| `embedding.textinAppId` | string | 必填 | - | 应用 ID | 获取的 app-id |
|
||||
| `embedding.textinSecretCode` | string | 必填 | - | 调用 API 所需 Secret | 获取的 secret-code |
|
||||
| `embedding.textinMatryoshkaDim` | int | 必填 | - | 返回的单个向量长度 | |
|
||||
|
||||
### Hugging Face
|
||||
|
||||
Hugging Face 所对应的 `embedding.type` 为 `huggingface`。它并无特有的配置字段。需要提前创建 [hf_token](https://huggingface.co/blog/getting-started-with-embeddings),并将其填入`embedding.apiKey`,一个 hf_token 的示例为` hf_xxxxxxx`。
|
||||
|
||||
`embedding.model`默认指定为`sentence-transformers/all-MiniLM-L6-v2`
|
||||
|
||||
### Cohere
|
||||
|
||||
Cohere 所对应的 `embedding.type` 为 `cohere`。它并无特有的配置字段。需要提前创建 [API Key](https://docs.cohere.com/reference/embed),并将其填入`embedding.apiKey`。
|
||||
|
||||
## 向量数据库提供商特有配置
|
||||
|
||||
### Chroma
|
||||
Chroma 所对应的 `vector.type` 为 `chroma`。它并无特有的配置字段。需要提前创建 Collection,并填写 Collection ID 至配置项 `vector.collectionID`,一个 Collection ID 的示例为 `52bbb8b3-724c-477b-a4ce-d5b578214612`。
|
||||
|
||||
## DashVector
|
||||
### DashVector
|
||||
DashVector 所对应的 `vector.type` 为 `dashvector`。它并无特有的配置字段。需要提前创建 Collection,并填写 `Collection 名称` 至配置项 `vector.collectionID`。
|
||||
|
||||
## ElasticSearch
|
||||
### ElasticSearch
|
||||
ElasticSearch 所对应的 `vector.type` 为 `elasticsearch`。需要提前创建 Index 并填写 Index Name 至配置项 `vector.collectionID` 。
|
||||
|
||||
当前依赖于 [KNN](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html) 方法,请保证 ES 版本支持 `KNN`,当前已在 `8.16` 版本测试。
|
||||
@@ -121,18 +165,18 @@ ElasticSearch 所对应的 `vector.type` 为 `elasticsearch`。需要提前创
|
||||
|
||||
`vector.esUsername` 和 `vector.esPassword` 用于 Basic 认证。同时也支持 Api Key 认证,当填写了 `vector.apiKey` 时,则启用 Api Key 认证,如果使用 SaaS 版本需要填写 `encoded` 的值。
|
||||
|
||||
## Milvus
|
||||
### Milvus
|
||||
Milvus 所对应的 `vector.type` 为 `milvus`。它并无特有的配置字段。需要提前创建 Collection,并填写 Collection Name 至配置项 `vector.collectionID`。
|
||||
|
||||
## Pinecone
|
||||
### Pinecone
|
||||
Pinecone 所对应的 `vector.type` 为 `pinecone`。它并无特有的配置字段。需要提前创建 Index,并填写 Index 访问域名至 `vector.serviceHost`。
|
||||
|
||||
Pinecone 中的 `Namespace` 参数通过插件的 `vector.collectionID` 进行配置,如果不填写 `vector.collectionID`,则默认为 Default Namespace。
|
||||
|
||||
## Qdrant
|
||||
### Qdrant
|
||||
Qdrant 所对应的 `vector.type` 为 `qdrant`。它并无特有的配置字段。需要提前创建 Collection,并填写 Collection Name 至配置项 `vector.collectionID`。
|
||||
|
||||
## Weaviate
|
||||
### Weaviate
|
||||
Weaviate 所对应的 `vector.type` 为 `weaviate`。它并无特有的配置字段。
|
||||
需要提前创建 Collection,并填写 Collection Name 至配置项 `vector.collectionID`。
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ const (
|
||||
PROVIDER_TYPE_OPENAI = "openai"
|
||||
PROVIDER_TYPE_OLLAMA = "ollama"
|
||||
PROVIDER_TYPE_HUGGINGFACE = "huggingface"
|
||||
PROVIDER_TYPE_XFYUN = "xfyun"
|
||||
)
|
||||
|
||||
type providerInitializer interface {
|
||||
@@ -30,6 +31,7 @@ var (
|
||||
PROVIDER_TYPE_OPENAI: &openAIProviderInitializer{},
|
||||
PROVIDER_TYPE_OLLAMA: &ollamaProviderInitializer{},
|
||||
PROVIDER_TYPE_HUGGINGFACE: &HuggingFaceProviderInitializer{},
|
||||
PROVIDER_TYPE_XFYUN: &XfyunProviderInitializer{},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
310
plugins/wasm-go/extensions/ai-cache/embedding/xfyun.go
Normal file
310
plugins/wasm-go/extensions/ai-cache/embedding/xfyun.go
Normal file
@@ -0,0 +1,310 @@
|
||||
package embedding
|
||||
|
||||
import (
|
||||
"crypto/hmac"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
|
||||
"github.com/tidwall/gjson"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
XFYUN_DOMAIN = "emb-cn-huabei-1.xf-yun.com"
|
||||
XFYUN_PORT = 443
|
||||
)
|
||||
|
||||
type XfyunProviderInitializer struct {
|
||||
}
|
||||
|
||||
var XfyunConfig XfyunProviderConfig
|
||||
|
||||
type XfyunProviderConfig struct {
|
||||
// @Title zh-CN 文本特征提取服务 API Key
|
||||
// @Description zh-CN 文本特征提取服务 API Key。
|
||||
apiKey string
|
||||
// @Title zh-CN 文本特征提取服务 APPID
|
||||
// @Description zh-CN 文本特征提取服务 APPID。仅适用与 Xfyun
|
||||
xfyunAppID string
|
||||
// @Title zh-CN 文本特征提取服务 APISecret
|
||||
// @Description zh-CN 文本特征提取服务 APISecret。仅适用与 Xfyun
|
||||
xfyunApiSecret string
|
||||
}
|
||||
|
||||
func (c *XfyunProviderInitializer) InitConfig(json gjson.Result) {
|
||||
XfyunConfig.xfyunAppID = json.Get("appId").String()
|
||||
XfyunConfig.xfyunApiSecret = json.Get("apiSecret").String()
|
||||
XfyunConfig.apiKey = json.Get("apiKey").String()
|
||||
}
|
||||
|
||||
func (c *XfyunProviderInitializer) ValidateConfig() error {
|
||||
if XfyunConfig.apiKey == "" {
|
||||
return errors.New("[Xfyun] apiKey is required")
|
||||
}
|
||||
if XfyunConfig.xfyunAppID == "" {
|
||||
return errors.New("[Xfyun] appId is required")
|
||||
}
|
||||
if XfyunConfig.xfyunApiSecret == "" {
|
||||
return errors.New("[Xfyun] apiSecret is required")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *XfyunProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
|
||||
if c.servicePort == 0 {
|
||||
c.servicePort = XFYUN_PORT
|
||||
}
|
||||
if c.serviceHost == "" {
|
||||
c.serviceHost = XFYUN_DOMAIN
|
||||
}
|
||||
|
||||
return &XfyunProvider{
|
||||
config: c,
|
||||
client: wrapper.NewClusterClient(wrapper.FQDNCluster{
|
||||
FQDN: c.serviceName,
|
||||
Host: c.serviceHost,
|
||||
Port: c.servicePort,
|
||||
}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *XfyunProvider) GetProviderType() string {
|
||||
return PROVIDER_TYPE_XFYUN
|
||||
}
|
||||
|
||||
type XfyunProvider struct {
|
||||
config ProviderConfig
|
||||
client wrapper.HttpClient
|
||||
}
|
||||
|
||||
type XfyunHeader struct {
|
||||
AppID string `json:"app_id"`
|
||||
Status int `json:"status"`
|
||||
}
|
||||
|
||||
type ReqFeature struct {
|
||||
Encoding string `json:"encoding"`
|
||||
}
|
||||
|
||||
type XfyunEmb struct {
|
||||
Domain string `json:"domain"`
|
||||
Feature ReqFeature `json:"feature"`
|
||||
}
|
||||
|
||||
type XfyunParameter struct {
|
||||
Emb XfyunEmb `json:"emb"`
|
||||
}
|
||||
|
||||
type XfyunPayload struct {
|
||||
Messages struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"messages"`
|
||||
}
|
||||
|
||||
type XfyunText struct {
|
||||
MainMessages []struct {
|
||||
Content *string `json:"content"`
|
||||
Role *string `json:"role"`
|
||||
} `json:"messages"`
|
||||
}
|
||||
|
||||
type XfyunReqBody struct {
|
||||
Header XfyunHeader `json:"header"`
|
||||
Parameter XfyunParameter `json:"parameter"`
|
||||
Payload XfyunPayload `json:"payload"`
|
||||
}
|
||||
|
||||
type XfyunResponse struct {
|
||||
Header XfyunResHeader `json:"header"`
|
||||
Payload XfyunResPayload `json:"payload"`
|
||||
}
|
||||
|
||||
type XfyunResHeader struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
Sid string `json:"sid"`
|
||||
}
|
||||
|
||||
type XfyunResPayload struct {
|
||||
Feature struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"feature"`
|
||||
}
|
||||
|
||||
func constructAuth(requestURL, method, apiKey, apiSecret string) (string, error) {
|
||||
u, err := url.Parse(requestURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
now := time.Now().UTC().Format(http.TimeFormat)
|
||||
signatureOrigin := fmt.Sprintf("host: %s\ndate: %s\n%s %s HTTP/1.1", u.Host, now, method, u.Path)
|
||||
h := hmac.New(sha256.New, []byte(apiSecret))
|
||||
h.Write([]byte(signatureOrigin))
|
||||
signature := base64.StdEncoding.EncodeToString(h.Sum(nil))
|
||||
authorizationOrigin := fmt.Sprintf("api_key=\"%s\", algorithm=\"hmac-sha256\", headers=\"host date request-line\", signature=\"%s\"", apiKey, signature)
|
||||
authorization := base64.StdEncoding.EncodeToString([]byte(authorizationOrigin))
|
||||
|
||||
params := url.Values{}
|
||||
params.Add("host", u.Host)
|
||||
params.Add("date", now)
|
||||
params.Add("authorization", authorization)
|
||||
|
||||
return "?" + params.Encode(), nil
|
||||
}
|
||||
|
||||
func (t *XfyunProvider) constructParameters(text string, log wrapper.Log) (string, [][2]string, []byte, error) {
|
||||
if text == "" {
|
||||
err := errors.New("queryString text cannot be empty")
|
||||
return "", nil, nil, err
|
||||
}
|
||||
|
||||
host := "https://" + t.config.serviceHost + "/"
|
||||
auth, err := constructAuth(host, "POST", XfyunConfig.apiKey, XfyunConfig.xfyunApiSecret)
|
||||
if err != nil {
|
||||
return "", nil, nil, err
|
||||
}
|
||||
|
||||
role := "user"
|
||||
|
||||
xfyunText := XfyunText{
|
||||
MainMessages: []struct {
|
||||
Content *string `json:"content"`
|
||||
Role *string `json:"role"`
|
||||
}{
|
||||
{
|
||||
Content: &text,
|
||||
Role: &role,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// 将 XfyunText 转换为 JSON
|
||||
xfyunTextJSON, err := json.Marshal(xfyunText)
|
||||
if err != nil {
|
||||
log.Errorf("Error marshaling XfyunText: %v", err)
|
||||
return "", nil, nil, err
|
||||
}
|
||||
|
||||
// 将整个 XfyunText JSON 字符串转换为 Base64 编码
|
||||
encodedText := base64.StdEncoding.EncodeToString(xfyunTextJSON)
|
||||
|
||||
// 构建请求体
|
||||
data := XfyunReqBody{
|
||||
Header: XfyunHeader{
|
||||
AppID: XfyunConfig.xfyunAppID,
|
||||
Status: 3,
|
||||
},
|
||||
Parameter: XfyunParameter{
|
||||
Emb: XfyunEmb{
|
||||
Domain: "query",
|
||||
Feature: ReqFeature{
|
||||
Encoding: "utf8",
|
||||
},
|
||||
},
|
||||
},
|
||||
Payload: XfyunPayload{
|
||||
Messages: struct {
|
||||
Text string `json:"text"`
|
||||
}{Text: encodedText}, // 填充经过 Base64 编码的文本
|
||||
},
|
||||
}
|
||||
|
||||
// 序列化请求数据
|
||||
requestBody, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
log.Errorf("failed to marshal request data: %v", err)
|
||||
return "", nil, nil, err
|
||||
}
|
||||
|
||||
// 构建请求头
|
||||
headers := [][2]string{
|
||||
{"Content-Type", "application/json"},
|
||||
}
|
||||
|
||||
return "/" + auth, headers, requestBody, nil
|
||||
}
|
||||
|
||||
func (t *XfyunProvider) parseTextEmbedding(responseBody []byte) ([]float32, error) {
|
||||
var resp XfyunResponse
|
||||
err := json.Unmarshal(responseBody, &resp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
base64Text := resp.Payload.Feature.Text
|
||||
decodedBytes, err := base64.StdEncoding.DecodeString(base64Text)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(decodedBytes) == 0 {
|
||||
return nil, errors.New("decoded embedding is empty")
|
||||
}
|
||||
|
||||
if len(decodedBytes)%4 != 0 {
|
||||
return nil, errors.New("decoded data is not a valid float32 array")
|
||||
}
|
||||
|
||||
floatArray := make([]float32, len(decodedBytes)/4)
|
||||
for i := 0; i < len(floatArray); i++ {
|
||||
bits := binary.LittleEndian.Uint32(decodedBytes[i*4 : (i+1)*4])
|
||||
floatArray[i] = math.Float32frombits(bits)
|
||||
}
|
||||
|
||||
return floatArray, nil
|
||||
}
|
||||
|
||||
func (t *XfyunProvider) GetEmbedding(
|
||||
queryString string,
|
||||
ctx wrapper.HttpContext,
|
||||
log wrapper.Log,
|
||||
callback func(emb []float64, err error)) error {
|
||||
embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString, log)
|
||||
if err != nil {
|
||||
log.Errorf("failed to construct parameters: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
err = t.client.Post(embUrl, embHeaders, embRequestBody,
|
||||
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
|
||||
|
||||
if statusCode != http.StatusOK {
|
||||
err = errors.New("failed to get embedding due to status code: " + strconv.Itoa(statusCode))
|
||||
callback(nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
var resp []float32
|
||||
resp, err = t.parseTextEmbedding(responseBody)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to parse response: %v", err)
|
||||
callback(nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debugf("get embedding response: %d, %s", statusCode, responseBody)
|
||||
|
||||
if len(resp) == 0 {
|
||||
err = errors.New("no embedding found in response")
|
||||
callback(nil, err)
|
||||
return
|
||||
}
|
||||
|
||||
embedding := make([]float64, len(resp))
|
||||
for i, v := range resp {
|
||||
embedding[i] = float64(v)
|
||||
}
|
||||
|
||||
callback(embedding, nil)
|
||||
|
||||
}, t.config.timeout)
|
||||
return err
|
||||
}
|
||||
Reference in New Issue
Block a user