feat: Support embeddings API for Qwen in the ai-proxy plugin (#1079)

This commit is contained in:
Kent Dong
2024-07-08 19:37:08 +08:00
committed by GitHub
parent 12a5612450
commit 5a2c6835f7
10 changed files with 254 additions and 39 deletions

View File

@@ -255,26 +255,72 @@ provider:
'gpt-3': "qwen-turbo"
'gpt-35-turbo': "qwen-plus"
'gpt-4-turbo': "qwen-max"
'qwen-*': ""
'gpt-4-*': "qwen-max"
'text-embedding-v1': 'text-embedding-v1'
'*': "qwen-turbo"
```
**AI 对话请求示例**
URL: http://your-domain/v1/chat/completions
请求体:
```json
{
"model": "text-embedding-v1",
"input": "Hello"
}
```
响应体示例:
```json
{
"object": "list",
"data": [
{
"object": "embedding",
"index": 0,
"embedding": [
-1.0437825918197632,
5.208984375,
3.0483806133270264,
-1.7897135019302368,
-2.0107421875,
...,
0.8125,
-1.1759847402572632,
0.8174641728401184,
1.0432943105697632,
-0.5885213017463684
]
}
],
"model": "text-embedding-v1",
"usage": {
"prompt_tokens": 1,
"total_tokens": 1
}
}
```
**请求示例**
URL: http://your-domain/v1/embeddings
示例请求内容:
```json
{
"model": "gpt-3",
"messages": [
{
"role": "user",
"content": "你好,你是谁?"
}
],
"temperature": 0.3
"model": "text-embedding-v1",
"input": [
"Hello world!"
]
}
```
**响应示例**
示例响应内容:
```json
{

View File

@@ -61,7 +61,7 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf
rawPath := ctx.Path()
path, _ := url.Parse(rawPath)
apiName := getApiName(path.Path)
apiName := getOpenAiApiName(path.Path)
if apiName == "" {
log.Debugf("[onHttpRequestHeader] unsupported path: %s", path.Path)
_ = util.SendResponse(404, util.MimeTypeTextPlain, "API not found: "+path.Path)
@@ -77,7 +77,7 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf
if err == nil {
return action
}
_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process request headers: %v", err))
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to process request headers: %v", err))
return types.ActionContinue
}
@@ -104,7 +104,7 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
if err == nil {
return action
}
_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process request body: %v", err))
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to process request body: %v", err))
return types.ActionContinue
}
return types.ActionContinue
@@ -144,7 +144,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
if err == nil {
return action
}
_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process response headers: %v", err))
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to process response headers: %v", err))
return types.ActionContinue
}
@@ -198,15 +198,18 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
if err == nil {
return action
}
_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process response body: %v", err))
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to process response body: %v", err))
return types.ActionContinue
}
return types.ActionContinue
}
func getApiName(path string) provider.ApiName {
func getOpenAiApiName(path string) provider.ApiName {
if strings.HasSuffix(path, "/v1/chat/completions") {
return provider.ApiNameChatCompletion
}
if strings.HasSuffix(path, "/v1/embeddings") {
return provider.ApiNameEmbeddings
}
return ""
}

View File

@@ -302,7 +302,7 @@ func (b *baiduProvider) responseBaidu2OpenAI(ctx wrapper.HttpContext, response *
SystemFingerprint: "",
Object: objectChatCompletion,
Choices: []chatCompletionChoice{choice},
Usage: chatCompletionUsage{
Usage: usage{
PromptTokens: response.Usage.PromptTokens,
CompletionTokens: response.Usage.CompletionTokens,
TotalTokens: response.Usage.TotalTokens,
@@ -325,7 +325,7 @@ func (b *baiduProvider) streamResponseBaidu2OpenAI(ctx wrapper.HttpContext, resp
SystemFingerprint: "",
Object: objectChatCompletion,
Choices: []chatCompletionChoice{choice},
Usage: chatCompletionUsage{
Usage: usage{
PromptTokens: response.Usage.PromptTokens,
CompletionTokens: response.Usage.CompletionTokens,
TotalTokens: response.Usage.TotalTokens,

View File

@@ -296,7 +296,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
SystemFingerprint: "",
Object: objectChatCompletion,
Choices: []chatCompletionChoice{choice},
Usage: chatCompletionUsage{
Usage: usage{
PromptTokens: origResponse.Usage.InputTokens,
CompletionTokens: origResponse.Usage.OutputTokens,
TotalTokens: origResponse.Usage.InputTokens + origResponse.Usage.OutputTokens,

View File

@@ -354,7 +354,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
Model: ctx.GetContext(ctxKeyFinalRequestModel).(string),
SystemFingerprint: "",
Object: objectChatCompletionChunk,
Usage: chatCompletionUsage{
Usage: usage{
PromptTokens: hunyuanFormattedChunk.Usage.PromptTokens,
CompletionTokens: hunyuanFormattedChunk.Usage.CompletionTokens,
TotalTokens: hunyuanFormattedChunk.Usage.TotalTokens,
@@ -474,7 +474,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
SystemFingerprint: "",
Object: objectChatCompletion,
Choices: choices,
Usage: chatCompletionUsage{
Usage: usage{
PromptTokens: hunyuanResponse.Response.Usage.PromptTokens,
CompletionTokens: hunyuanResponse.Response.Usage.CompletionTokens,
TotalTokens: hunyuanResponse.Response.Usage.TotalTokens,

View File

@@ -461,7 +461,7 @@ func (m *minimaxProvider) responseV2ToOpenAI(response *minimaxChatCompletionV2Re
Created: response.Created,
Model: response.Model,
Choices: choices,
Usage: chatCompletionUsage{
Usage: usage{
TotalTokens: int(response.Usage.TotalTokens),
},
}

View File

@@ -60,7 +60,7 @@ type chatCompletionResponse struct {
Model string `json:"model,omitempty"`
SystemFingerprint string `json:"system_fingerprint,omitempty"`
Object string `json:"object,omitempty"`
Usage chatCompletionUsage `json:"usage,omitempty"`
Usage usage `json:"usage,omitempty"`
}
type chatCompletionChoice struct {
@@ -70,7 +70,7 @@ type chatCompletionChoice struct {
FinishReason string `json:"finish_reason,omitempty"`
}
type chatCompletionUsage struct {
type usage struct {
PromptTokens int `json:"prompt_tokens,omitempty"`
CompletionTokens int `json:"completion_tokens,omitempty"`
TotalTokens int `json:"total_tokens,omitempty"`
@@ -140,3 +140,24 @@ func (e *streamEvent) setValue(key, value string) {
}
}
}
type embeddingsRequest struct {
Input interface{} `json:"input"`
Model string `json:"model"`
EncodingFormat string `json:"encoding_format,omitempty"`
Dimensions int `json:"dimensions,omitempty"`
User string `json:"user,omitempty"`
}
type embeddingsResponse struct {
Object string `json:"object"`
Data []embedding `json:"data"`
Model string `json:"model"`
Usage usage `json:"usage"`
}
type embedding struct {
Object string `json:"object"`
Index int `json:"index"`
Embedding []float64 `json:"embedding"`
}

View File

@@ -14,6 +14,7 @@ import (
const (
openaiDomain = "api.openai.com"
openaiChatCompletionPath = "/v1/chat/completions"
openaiEmbeddingsPath = "/v1/chat/embeddings"
)
type openaiProviderInitializer struct {
@@ -40,14 +41,19 @@ func (m *openaiProvider) GetProviderType() string {
}
func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
skipRequestBody := true
switch apiName {
case ApiNameChatCompletion:
_ = util.OverwriteRequestPath(openaiChatCompletionPath)
skipRequestBody = m.contextCache == nil
break
case ApiNameEmbeddings:
_ = util.OverwriteRequestPath(openaiEmbeddingsPath)
break
}
_ = util.OverwriteRequestPath(openaiChatCompletionPath)
_ = util.OverwriteRequestHost(openaiDomain)
_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
if m.contextCache == nil {
if skipRequestBody {
ctx.DontReadRequestBody()
} else {
_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
@@ -58,7 +64,8 @@ func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiNa
func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
// We don't need to process the request body for other APIs.
return types.ActionContinue, nil
}
if m.contextCache == nil {
return types.ActionContinue, nil

View File

@@ -15,6 +15,7 @@ type Pointcut string
const (
ApiNameChatCompletion ApiName = "chatCompletion"
ApiNameEmbeddings ApiName = "embeddings"
providerTypeMoonshot = "moonshot"
providerTypeAzure = "azure"

View File

@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"math"
"reflect"
"strings"
"time"
@@ -21,6 +22,7 @@ const (
qwenDomain = "dashscope.aliyuncs.com"
qwenChatCompletionPath = "/api/v1/services/aigc/text-generation/generation"
qwenTextEmbeddingPath = "/api/v1/services/embeddings/text-embedding/text-embedding"
qwenTopPMin = 0.000001
qwenTopPMax = 0.999999
@@ -58,14 +60,19 @@ func (m *qwenProvider) GetProviderType() string {
}
func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
needRequestBody := false
if apiName == ApiNameChatCompletion {
_ = util.OverwriteRequestPath(qwenChatCompletionPath)
needRequestBody = m.config.context != nil
} else if apiName == ApiNameEmbeddings {
_ = util.OverwriteRequestPath(qwenTextEmbeddingPath)
} else {
return types.ActionContinue, errUnsupportedApiName
}
_ = util.OverwriteRequestPath(qwenChatCompletionPath)
_ = util.OverwriteRequestHost(qwenDomain)
_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
if m.config.protocol == protocolOriginal && m.config.context == nil {
if m.config.protocol == protocolOriginal && !needRequestBody {
ctx.DontReadRequestBody()
return types.ActionContinue, nil
}
@@ -78,10 +85,16 @@ func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName
}
func (m *qwenProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
if apiName == ApiNameChatCompletion {
return m.onChatCompletionRequestBody(ctx, body, log)
}
if apiName == ApiNameEmbeddings {
return m.onEmbeddingsRequestBody(ctx, body, log)
}
return types.ActionContinue, errUnsupportedApiName
}
func (m *qwenProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) (types.Action, error) {
if m.config.protocol == protocolOriginal {
if m.config.context == nil {
return types.ActionContinue, nil
@@ -169,6 +182,33 @@ func (m *qwenProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, b
return types.ActionContinue, err
}
func (m *qwenProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) (types.Action, error) {
request := &embeddingsRequest{}
if err := json.Unmarshal(body, request); err != nil {
return types.ActionContinue, fmt.Errorf("unable to unmarshal request: %v", err)
}
log.Debugf("=== embeddings request: %v", request)
model := request.Model
if model == "" {
return types.ActionContinue, errors.New("missing model in the request")
}
ctx.SetContext(ctxKeyOriginalRequestModel, model)
mappedModel := getMappedModel(model, m.config.modelMapping, log)
if mappedModel == "" {
return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping")
}
request.Model = mappedModel
ctx.SetContext(ctxKeyFinalRequestModel, request.Model)
if qwenRequest, err := m.buildQwenTextEmbeddingRequest(request); err == nil {
return types.ActionContinue, replaceJsonRequestBody(qwenRequest, log)
} else {
return types.ActionContinue, err
}
}
func (m *qwenProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if m.config.protocol == protocolOriginal {
ctx.DontReadResponseBody()
@@ -180,6 +220,10 @@ func (m *qwenProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiNam
}
func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
if name != ApiNameChatCompletion {
return chunk, nil
}
receivedBody := chunk
if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
receivedBody = append(bufferedStreamingBody, chunk...)
@@ -264,6 +308,16 @@ func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name Api
}
func (m *qwenProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName == ApiNameChatCompletion {
return m.onChatCompletionResponseBody(ctx, body, log)
}
if apiName == ApiNameEmbeddings {
return m.onEmbeddingsResponseBody(ctx, body, log)
}
return types.ActionContinue, errUnsupportedApiName
}
func (m *qwenProvider) onChatCompletionResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) (types.Action, error) {
qwenResponse := &qwenTextGenResponse{}
if err := json.Unmarshal(body, qwenResponse); err != nil {
return types.ActionContinue, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
@@ -272,6 +326,15 @@ func (m *qwenProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName,
return types.ActionContinue, replaceJsonResponseBody(response, log)
}
func (m *qwenProvider) onEmbeddingsResponseBody(ctx wrapper.HttpContext, body []byte, log wrapper.Log) (types.Action, error) {
qwenResponse := &qwenTextEmbeddingResponse{}
if err := json.Unmarshal(body, qwenResponse); err != nil {
return types.ActionContinue, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
}
response := m.buildEmbeddingsResponse(ctx, qwenResponse)
return types.ActionContinue, replaceJsonResponseBody(response, log)
}
func (m *qwenProvider) buildQwenTextGenerationRequest(origRequest *chatCompletionRequest, streaming bool) *qwenTextGenRequest {
messages := make([]qwenMessage, 0, len(origRequest.Messages))
for i := range origRequest.Messages {
@@ -328,7 +391,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
SystemFingerprint: "",
Object: objectChatCompletion,
Choices: choices,
Usage: chatCompletionUsage{
Usage: usage{
PromptTokens: qwenResponse.Usage.InputTokens,
CompletionTokens: qwenResponse.Usage.OutputTokens,
TotalTokens: qwenResponse.Usage.TotalTokens,
@@ -400,7 +463,7 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
usageResponse := *&baseMessage
usageResponse.Choices = []chatCompletionChoice{{Delta: &chatMessage{}}}
usageResponse.Usage = chatCompletionUsage{
usageResponse.Usage = usage{
PromptTokens: qwenResponse.Usage.InputTokens,
CompletionTokens: qwenResponse.Usage.OutputTokens,
TotalTokens: qwenResponse.Usage.TotalTokens,
@@ -485,6 +548,50 @@ func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event
responseBuilder.WriteString("\n\n")
}
func (m *qwenProvider) buildQwenTextEmbeddingRequest(request *embeddingsRequest) (*qwenTextEmbeddingRequest, error) {
var texts []string
if str, isString := request.Input.(string); isString {
texts = []string{str}
} else if strs, isArray := request.Input.([]interface{}); isArray {
texts = make([]string, 0, len(strs))
for _, item := range strs {
if str, isString := item.(string); isString {
texts = append(texts, str)
} else {
return nil, errors.New("unsupported input type in array: " + reflect.TypeOf(item).String())
}
}
} else {
return nil, errors.New("unsupported input type: " + reflect.TypeOf(request.Input).String())
}
return &qwenTextEmbeddingRequest{
Model: request.Model,
Input: qwenTextEmbeddingInput{
Texts: texts,
},
}, nil
}
func (m *qwenProvider) buildEmbeddingsResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextEmbeddingResponse) *embeddingsResponse {
data := make([]embedding, 0, len(qwenResponse.Output.Embeddings))
for _, qwenEmbedding := range qwenResponse.Output.Embeddings {
data = append(data, embedding{
Object: "embedding",
Index: qwenEmbedding.TextIndex,
Embedding: qwenEmbedding.Embedding,
})
}
return &embeddingsResponse{
Object: "list",
Data: data,
Model: ctx.GetContext(ctxKeyFinalRequestModel).(string),
Usage: usage{
PromptTokens: qwenResponse.Usage.TotalTokens,
TotalTokens: qwenResponse.Usage.TotalTokens,
},
}
}
type qwenTextGenRequest struct {
Model string `json:"model"`
Input qwenTextGenInput `json:"input"`
@@ -511,7 +618,7 @@ type qwenTextGenParameters struct {
type qwenTextGenResponse struct {
RequestId string `json:"request_id"`
Output qwenTextGenOutput `json:"output"`
Usage qwenTextGenUsage `json:"usage"`
Usage qwenUsage `json:"usage"`
}
type qwenTextGenOutput struct {
@@ -524,7 +631,7 @@ type qwenTextGenChoice struct {
Message qwenMessage `json:"message"`
}
type qwenTextGenUsage struct {
type qwenUsage struct {
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
TotalTokens int `json:"total_tokens"`
@@ -537,6 +644,36 @@ type qwenMessage struct {
ToolCalls []toolCall `json:"tool_calls,omitempty"`
}
type qwenTextEmbeddingRequest struct {
Model string `json:"model"`
Input qwenTextEmbeddingInput `json:"input"`
Parameters qwenTextEmbeddingParameters `json:"parameters,omitempty"`
}
type qwenTextEmbeddingInput struct {
Texts []string `json:"texts"`
}
type qwenTextEmbeddingParameters struct {
TextType string `json:"text_type,omitempty"`
}
type qwenTextEmbeddingResponse struct {
RequestId string `json:"request_id"`
Output qwenTextEmbeddingOutput `json:"output"`
Usage qwenUsage `json:"usage"`
}
type qwenTextEmbeddingOutput struct {
RequestId string `json:"request_id"`
Embeddings []qwenTextEmbeddings `json:"embeddings"`
}
type qwenTextEmbeddings struct {
TextIndex int `json:"text_index"`
Embedding []float64 `json:"embedding"`
}
func qwenMessageToChatMessage(qwenMessage qwenMessage) chatMessage {
return chatMessage{
Name: qwenMessage.Name,