diff --git a/plugins/wasm-go/extensions/ai-cache/README.md b/plugins/wasm-go/extensions/ai-cache/README.md index 70f3e1b9d..c70241b63 100644 --- a/plugins/wasm-go/extensions/ai-cache/README.md +++ b/plugins/wasm-go/extensions/ai-cache/README.md @@ -100,14 +100,58 @@ LLM 结果缓存插件,默认配置方式可以直接用于 openai 协议的 | responseTemplate | string | optional | `{"id":"ai-cache.hit","choices":[{"index":0,"message":{"role":"assistant","content":%s},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | 返回 HTTP 响应的模版,用 %s 标记需要被 cache value 替换的部分 | | streamResponseTemplate | string | optional | `data:{"id":"ai-cache.hit","choices":[{"index":0,"delta":{"role":"assistant","content":%s},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | 返回流式 HTTP 响应的模版,用 %s 标记需要被 cache value 替换的部分 | -# 向量数据库提供商特有配置 -## Chroma +## 文本向量化提供商特有配置 + +### OpenAI + +OpenAI 所对应的 `embedding.type` 为 `openai`。它并无特有的配置字段。需要提前创建 [API Key](https://platform.openai.com/settings/organization/api-keys),并将其填入`embedding.apiKey`,一个 API Key 的示例为` sk-xxxxxxx`。 + +### Ollama + +Ollama 所对应的 `embedding.type` 为 `ollama`。它并无特有的配置字段。 + +### 讯飞星火 + +讯飞星火 所对应的 `embedding.type` 为 `xfyun`。它需要提前创建[应用](https://console.xfyun.cn/services/emb),获取`APPID` 、`APISecret`和`APIKey`,并将`APIKey`填入`embedding.apiKey`中。 + +它特有的配置字段如下: + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | 填写值 | +| --------------------- | -------- | -------- | ------ | -------------------- | ---------------- | +| `embedding.appId` | string | 必填 | - | 应用 ID | 获取的 APPID | +| `embedding.apiSecret` | string | 必填 | - | 调用 API 所需 Secret | 获取的 APISecret | + +### Textln + +Textln 所对应的 `embedding.type` 为 `textln`。它需要提前获取[`app-id` 和`secret-code`](https://www.textin.com/document/acge_text_embedding)。 + +它特有的配置字段如下: + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | 填写值 | +| ------------------------------- | -------- | -------- | ------ | -------------------- | ------------------ | +| `embedding.textinAppId` | string | 必填 | - | 应用 ID | 获取的 app-id | +| `embedding.textinSecretCode` | string | 必填 | - | 调用 API 所需 Secret | 获取的 secret-code | +| `embedding.textinMatryoshkaDim` | int | 必填 | - | 返回的单个向量长度 | | + +### Hugging Face + +Hugging Face 所对应的 `embedding.type` 为 `huggingface`。它并无特有的配置字段。需要提前创建 [hf_token](https://huggingface.co/blog/getting-started-with-embeddings),并将其填入`embedding.apiKey`,一个 hf_token 的示例为` hf_xxxxxxx`。 + +`embedding.model`默认指定为`sentence-transformers/all-MiniLM-L6-v2` + +### Cohere + +Cohere 所对应的 `embedding.type` 为 `cohere`。它并无特有的配置字段。需要提前创建 [API Key](https://docs.cohere.com/reference/embed),并将其填入`embedding.apiKey`。 + +## 向量数据库提供商特有配置 + +### Chroma Chroma 所对应的 `vector.type` 为 `chroma`。它并无特有的配置字段。需要提前创建 Collection,并填写 Collection ID 至配置项 `vector.collectionID`,一个 Collection ID 的示例为 `52bbb8b3-724c-477b-a4ce-d5b578214612`。 -## DashVector +### DashVector DashVector 所对应的 `vector.type` 为 `dashvector`。它并无特有的配置字段。需要提前创建 Collection,并填写 `Collection 名称` 至配置项 `vector.collectionID`。 -## ElasticSearch +### ElasticSearch ElasticSearch 所对应的 `vector.type` 为 `elasticsearch`。需要提前创建 Index 并填写 Index Name 至配置项 `vector.collectionID` 。 当前依赖于 [KNN](https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html) 方法,请保证 ES 版本支持 `KNN`,当前已在 `8.16` 版本测试。 @@ -121,18 +165,18 @@ ElasticSearch 所对应的 `vector.type` 为 `elasticsearch`。需要提前创 `vector.esUsername` 和 `vector.esPassword` 用于 Basic 认证。同时也支持 Api Key 认证,当填写了 `vector.apiKey` 时,则启用 Api Key 认证,如果使用 SaaS 版本需要填写 `encoded` 的值。 -## Milvus +### Milvus Milvus 所对应的 `vector.type` 为 `milvus`。它并无特有的配置字段。需要提前创建 Collection,并填写 Collection Name 至配置项 `vector.collectionID`。 -## Pinecone +### Pinecone Pinecone 所对应的 `vector.type` 为 `pinecone`。它并无特有的配置字段。需要提前创建 Index,并填写 Index 访问域名至 `vector.serviceHost`。 Pinecone 中的 `Namespace` 参数通过插件的 `vector.collectionID` 进行配置,如果不填写 `vector.collectionID`,则默认为 Default Namespace。 -## Qdrant +### Qdrant Qdrant 所对应的 `vector.type` 为 `qdrant`。它并无特有的配置字段。需要提前创建 Collection,并填写 Collection Name 至配置项 `vector.collectionID`。 -## Weaviate +### Weaviate Weaviate 所对应的 `vector.type` 为 `weaviate`。它并无特有的配置字段。 需要提前创建 Collection,并填写 Collection Name 至配置项 `vector.collectionID`。 diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go index 3f3c027b0..62c3970fe 100644 --- a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go +++ b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go @@ -14,6 +14,7 @@ const ( PROVIDER_TYPE_OPENAI = "openai" PROVIDER_TYPE_OLLAMA = "ollama" PROVIDER_TYPE_HUGGINGFACE = "huggingface" + PROVIDER_TYPE_XFYUN = "xfyun" ) type providerInitializer interface { @@ -30,6 +31,7 @@ var ( PROVIDER_TYPE_OPENAI: &openAIProviderInitializer{}, PROVIDER_TYPE_OLLAMA: &ollamaProviderInitializer{}, PROVIDER_TYPE_HUGGINGFACE: &HuggingFaceProviderInitializer{}, + PROVIDER_TYPE_XFYUN: &XfyunProviderInitializer{}, } ) diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/xfyun.go b/plugins/wasm-go/extensions/ai-cache/embedding/xfyun.go new file mode 100644 index 000000000..a3410d3dd --- /dev/null +++ b/plugins/wasm-go/extensions/ai-cache/embedding/xfyun.go @@ -0,0 +1,310 @@ +package embedding + +import ( + "crypto/hmac" + "crypto/sha256" + "encoding/base64" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/tidwall/gjson" + "math" + "net/http" + "net/url" + "strconv" + "time" +) + +const ( + XFYUN_DOMAIN = "emb-cn-huabei-1.xf-yun.com" + XFYUN_PORT = 443 +) + +type XfyunProviderInitializer struct { +} + +var XfyunConfig XfyunProviderConfig + +type XfyunProviderConfig struct { + // @Title zh-CN 文本特征提取服务 API Key + // @Description zh-CN 文本特征提取服务 API Key。 + apiKey string + // @Title zh-CN 文本特征提取服务 APPID + // @Description zh-CN 文本特征提取服务 APPID。仅适用与 Xfyun + xfyunAppID string + // @Title zh-CN 文本特征提取服务 APISecret + // @Description zh-CN 文本特征提取服务 APISecret。仅适用与 Xfyun + xfyunApiSecret string +} + +func (c *XfyunProviderInitializer) InitConfig(json gjson.Result) { + XfyunConfig.xfyunAppID = json.Get("appId").String() + XfyunConfig.xfyunApiSecret = json.Get("apiSecret").String() + XfyunConfig.apiKey = json.Get("apiKey").String() +} + +func (c *XfyunProviderInitializer) ValidateConfig() error { + if XfyunConfig.apiKey == "" { + return errors.New("[Xfyun] apiKey is required") + } + if XfyunConfig.xfyunAppID == "" { + return errors.New("[Xfyun] appId is required") + } + if XfyunConfig.xfyunApiSecret == "" { + return errors.New("[Xfyun] apiSecret is required") + } + return nil +} + +func (t *XfyunProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) { + if c.servicePort == 0 { + c.servicePort = XFYUN_PORT + } + if c.serviceHost == "" { + c.serviceHost = XFYUN_DOMAIN + } + + return &XfyunProvider{ + config: c, + client: wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: c.serviceName, + Host: c.serviceHost, + Port: c.servicePort, + }), + }, nil +} + +func (t *XfyunProvider) GetProviderType() string { + return PROVIDER_TYPE_XFYUN +} + +type XfyunProvider struct { + config ProviderConfig + client wrapper.HttpClient +} + +type XfyunHeader struct { + AppID string `json:"app_id"` + Status int `json:"status"` +} + +type ReqFeature struct { + Encoding string `json:"encoding"` +} + +type XfyunEmb struct { + Domain string `json:"domain"` + Feature ReqFeature `json:"feature"` +} + +type XfyunParameter struct { + Emb XfyunEmb `json:"emb"` +} + +type XfyunPayload struct { + Messages struct { + Text string `json:"text"` + } `json:"messages"` +} + +type XfyunText struct { + MainMessages []struct { + Content *string `json:"content"` + Role *string `json:"role"` + } `json:"messages"` +} + +type XfyunReqBody struct { + Header XfyunHeader `json:"header"` + Parameter XfyunParameter `json:"parameter"` + Payload XfyunPayload `json:"payload"` +} + +type XfyunResponse struct { + Header XfyunResHeader `json:"header"` + Payload XfyunResPayload `json:"payload"` +} + +type XfyunResHeader struct { + Code int `json:"code"` + Message string `json:"message"` + Sid string `json:"sid"` +} + +type XfyunResPayload struct { + Feature struct { + Text string `json:"text"` + } `json:"feature"` +} + +func constructAuth(requestURL, method, apiKey, apiSecret string) (string, error) { + u, err := url.Parse(requestURL) + if err != nil { + return "", err + } + now := time.Now().UTC().Format(http.TimeFormat) + signatureOrigin := fmt.Sprintf("host: %s\ndate: %s\n%s %s HTTP/1.1", u.Host, now, method, u.Path) + h := hmac.New(sha256.New, []byte(apiSecret)) + h.Write([]byte(signatureOrigin)) + signature := base64.StdEncoding.EncodeToString(h.Sum(nil)) + authorizationOrigin := fmt.Sprintf("api_key=\"%s\", algorithm=\"hmac-sha256\", headers=\"host date request-line\", signature=\"%s\"", apiKey, signature) + authorization := base64.StdEncoding.EncodeToString([]byte(authorizationOrigin)) + + params := url.Values{} + params.Add("host", u.Host) + params.Add("date", now) + params.Add("authorization", authorization) + + return "?" + params.Encode(), nil +} + +func (t *XfyunProvider) constructParameters(text string, log wrapper.Log) (string, [][2]string, []byte, error) { + if text == "" { + err := errors.New("queryString text cannot be empty") + return "", nil, nil, err + } + + host := "https://" + t.config.serviceHost + "/" + auth, err := constructAuth(host, "POST", XfyunConfig.apiKey, XfyunConfig.xfyunApiSecret) + if err != nil { + return "", nil, nil, err + } + + role := "user" + + xfyunText := XfyunText{ + MainMessages: []struct { + Content *string `json:"content"` + Role *string `json:"role"` + }{ + { + Content: &text, + Role: &role, + }, + }, + } + + // 将 XfyunText 转换为 JSON + xfyunTextJSON, err := json.Marshal(xfyunText) + if err != nil { + log.Errorf("Error marshaling XfyunText: %v", err) + return "", nil, nil, err + } + + // 将整个 XfyunText JSON 字符串转换为 Base64 编码 + encodedText := base64.StdEncoding.EncodeToString(xfyunTextJSON) + + // 构建请求体 + data := XfyunReqBody{ + Header: XfyunHeader{ + AppID: XfyunConfig.xfyunAppID, + Status: 3, + }, + Parameter: XfyunParameter{ + Emb: XfyunEmb{ + Domain: "query", + Feature: ReqFeature{ + Encoding: "utf8", + }, + }, + }, + Payload: XfyunPayload{ + Messages: struct { + Text string `json:"text"` + }{Text: encodedText}, // 填充经过 Base64 编码的文本 + }, + } + + // 序列化请求数据 + requestBody, err := json.Marshal(data) + if err != nil { + log.Errorf("failed to marshal request data: %v", err) + return "", nil, nil, err + } + + // 构建请求头 + headers := [][2]string{ + {"Content-Type", "application/json"}, + } + + return "/" + auth, headers, requestBody, nil +} + +func (t *XfyunProvider) parseTextEmbedding(responseBody []byte) ([]float32, error) { + var resp XfyunResponse + err := json.Unmarshal(responseBody, &resp) + if err != nil { + return nil, err + } + + base64Text := resp.Payload.Feature.Text + decodedBytes, err := base64.StdEncoding.DecodeString(base64Text) + if err != nil { + return nil, err + } + + if len(decodedBytes) == 0 { + return nil, errors.New("decoded embedding is empty") + } + + if len(decodedBytes)%4 != 0 { + return nil, errors.New("decoded data is not a valid float32 array") + } + + floatArray := make([]float32, len(decodedBytes)/4) + for i := 0; i < len(floatArray); i++ { + bits := binary.LittleEndian.Uint32(decodedBytes[i*4 : (i+1)*4]) + floatArray[i] = math.Float32frombits(bits) + } + + return floatArray, nil +} + +func (t *XfyunProvider) GetEmbedding( + queryString string, + ctx wrapper.HttpContext, + log wrapper.Log, + callback func(emb []float64, err error)) error { + embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString, log) + if err != nil { + log.Errorf("failed to construct parameters: %v", err) + return err + } + + err = t.client.Post(embUrl, embHeaders, embRequestBody, + func(statusCode int, responseHeaders http.Header, responseBody []byte) { + + if statusCode != http.StatusOK { + err = errors.New("failed to get embedding due to status code: " + strconv.Itoa(statusCode)) + callback(nil, err) + return + } + + var resp []float32 + resp, err = t.parseTextEmbedding(responseBody) + if err != nil { + err = fmt.Errorf("failed to parse response: %v", err) + callback(nil, err) + return + } + + log.Debugf("get embedding response: %d, %s", statusCode, responseBody) + + if len(resp) == 0 { + err = errors.New("no embedding found in response") + callback(nil, err) + return + } + + embedding := make([]float64, len(resp)) + for i, v := range resp { + embedding[i] = float64(v) + } + + callback(embedding, nil) + + }, t.config.timeout) + return err +}