mirror of
https://github.com/alibaba/higress.git
synced 2026-06-09 20:57:32 +08:00
fix: resolve blocking issue with minimax responses in ai-proxy (#1663)
This commit is contained in:
@@ -93,10 +93,6 @@ func (m *minimaxProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *minimaxProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
|
|
||||||
return m.handleRequestBodyByChatCompletionV2(body, headers, log)
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleRequestBodyByChatCompletionPro processes the request body using the chat completion Pro API.
|
// handleRequestBodyByChatCompletionPro processes the request body using the chat completion Pro API.
|
||||||
func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte, log wrapper.Log) (types.Action, error) {
|
func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte, log wrapper.Log) (types.Action, error) {
|
||||||
request := &chatCompletionRequest{}
|
request := &chatCompletionRequest{}
|
||||||
@@ -109,7 +105,7 @@ func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte, log
|
|||||||
_ = util.OverwriteRequestPath(fmt.Sprintf("%s?GroupId=%s", minimaxChatCompletionProPath, m.config.minimaxGroupId))
|
_ = util.OverwriteRequestPath(fmt.Sprintf("%s?GroupId=%s", minimaxChatCompletionProPath, m.config.minimaxGroupId))
|
||||||
|
|
||||||
if m.config.context == nil {
|
if m.config.context == nil {
|
||||||
minimaxRequest := m.buildMinimaxChatCompletionV2Request(request, "")
|
minimaxRequest := m.buildMinimaxChatCompletionProRequest(request, "")
|
||||||
return types.ActionContinue, replaceJsonRequestBody(minimaxRequest, log)
|
return types.ActionContinue, replaceJsonRequestBody(minimaxRequest, log)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,7 +120,7 @@ func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte, log
|
|||||||
// Since minimaxChatCompletionV2 (format consistent with OpenAI) and minimaxChatCompletionPro (different format from OpenAI) have different logic for insertHttpContextMessage, we cannot unify them within one provider.
|
// Since minimaxChatCompletionV2 (format consistent with OpenAI) and minimaxChatCompletionPro (different format from OpenAI) have different logic for insertHttpContextMessage, we cannot unify them within one provider.
|
||||||
// For minimaxChatCompletionPro, we need to manually handle context messages.
|
// For minimaxChatCompletionPro, we need to manually handle context messages.
|
||||||
// minimaxChatCompletionV2 uses the default defaultInsertHttpContextMessage method to insert context messages.
|
// minimaxChatCompletionV2 uses the default defaultInsertHttpContextMessage method to insert context messages.
|
||||||
minimaxRequest := m.buildMinimaxChatCompletionV2Request(request, content)
|
minimaxRequest := m.buildMinimaxChatCompletionProRequest(request, content)
|
||||||
if err := replaceJsonRequestBody(minimaxRequest, log); err != nil {
|
if err := replaceJsonRequestBody(minimaxRequest, log); err != nil {
|
||||||
util.ErrorHandler("ai-proxy.minimax.insert_ctx_failed", fmt.Errorf("failed to replace Request body: %v", err))
|
util.ErrorHandler("ai-proxy.minimax.insert_ctx_failed", fmt.Errorf("failed to replace Request body: %v", err))
|
||||||
}
|
}
|
||||||
@@ -135,6 +131,10 @@ func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte, log
|
|||||||
return types.ActionContinue, err
|
return types.ActionContinue, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *minimaxProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
|
||||||
|
return m.handleRequestBodyByChatCompletionV2(body, headers, log)
|
||||||
|
}
|
||||||
|
|
||||||
// handleRequestBodyByChatCompletionV2 processes the request body using the chat completion V2 API.
|
// handleRequestBodyByChatCompletionV2 processes the request body using the chat completion V2 API.
|
||||||
func (m *minimaxProvider) handleRequestBodyByChatCompletionV2(body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
|
func (m *minimaxProvider) handleRequestBodyByChatCompletionV2(body []byte, headers http.Header, log wrapper.Log) ([]byte, error) {
|
||||||
util.OverwriteRequestPathHeader(headers, minimaxChatCompletionV2Path)
|
util.OverwriteRequestPathHeader(headers, minimaxChatCompletionV2Path)
|
||||||
@@ -144,15 +144,13 @@ func (m *minimaxProvider) handleRequestBodyByChatCompletionV2(body []byte, heade
|
|||||||
return sjson.SetBytes(body, "model", mappedModel)
|
return sjson.SetBytes(body, "model", mappedModel)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip OnStreamingResponseBody() and OnResponseBody() when using original protocol.
|
|
||||||
func (m *minimaxProvider) TransformResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
|
func (m *minimaxProvider) TransformResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header, log wrapper.Log) {
|
||||||
if m.config.protocol == protocolOriginal {
|
// Skip OnStreamingResponseBody() and OnResponseBody() when using the original protocol
|
||||||
ctx.DontReadResponseBody()
|
// or when the model corresponds to the chat completion V2 interface.
|
||||||
}
|
if m.config.protocol == protocolOriginal || minimaxApiTypePro != m.config.minimaxApiType {
|
||||||
|
|
||||||
// Skip OnStreamingResponseBody() and OnResponseBody() when the model corresponds to the chat completion V2 interface.
|
|
||||||
if minimaxApiTypePro != m.config.minimaxApiType {
|
|
||||||
ctx.DontReadResponseBody()
|
ctx.DontReadResponseBody()
|
||||||
|
} else {
|
||||||
|
headers.Del("Content-Length")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -174,12 +172,12 @@ func (m *minimaxProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
data = data[6:]
|
data = data[6:]
|
||||||
var minimaxResp minimaxChatCompletionV2Resp
|
var minimaxResp minimaxChatCompletionProResp
|
||||||
if err := json.Unmarshal([]byte(data), &minimaxResp); err != nil {
|
if err := json.Unmarshal([]byte(data), &minimaxResp); err != nil {
|
||||||
log.Errorf("unable to unmarshal minimax response: %v", err)
|
log.Errorf("unable to unmarshal minimax response: %v", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
response := m.responseV2ToOpenAI(&minimaxResp)
|
response := m.responseProToOpenAI(&minimaxResp)
|
||||||
responseBody, err := json.Marshal(response)
|
responseBody, err := json.Marshal(response)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("unable to marshal response: %v", err)
|
log.Errorf("unable to marshal response: %v", err)
|
||||||
@@ -192,21 +190,21 @@ func (m *minimaxProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name
|
|||||||
return []byte(modifiedResponseChunk), nil
|
return []byte(modifiedResponseChunk), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// OnResponseBody handles the final response body from the Minimax service only for requests using the OpenAI protocol and corresponding to the chat completion Pro API.
|
// TransformResponseBody handles the final response body from the Minimax service only for requests using the OpenAI protocol and corresponding to the chat completion Pro API.
|
||||||
func (m *minimaxProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
|
func (m *minimaxProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
|
||||||
minimaxResp := &minimaxChatCompletionV2Resp{}
|
minimaxResp := &minimaxChatCompletionProResp{}
|
||||||
if err := json.Unmarshal(body, minimaxResp); err != nil {
|
if err := json.Unmarshal(body, minimaxResp); err != nil {
|
||||||
return nil, fmt.Errorf("unable to unmarshal minimax response: %v", err)
|
return nil, fmt.Errorf("unable to unmarshal minimax response: %v", err)
|
||||||
}
|
}
|
||||||
if minimaxResp.BaseResp.StatusCode != 0 {
|
if minimaxResp.BaseResp.StatusCode != 0 {
|
||||||
return nil, fmt.Errorf("minimax response error, error_code: %d, error_message: %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg)
|
return nil, fmt.Errorf("minimax response error, error_code: %d, error_message: %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg)
|
||||||
}
|
}
|
||||||
response := m.responseV2ToOpenAI(minimaxResp)
|
response := m.responseProToOpenAI(minimaxResp)
|
||||||
return json.Marshal(response)
|
return json.Marshal(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
// minimaxChatCompletionV2Request represents the structure of a chat completion V2 request.
|
// minimaxChatCompletionProRequest represents the structure of a chat completion Pro request.
|
||||||
type minimaxChatCompletionV2Request struct {
|
type minimaxChatCompletionProRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Stream bool `json:"stream,omitempty"`
|
Stream bool `json:"stream,omitempty"`
|
||||||
TokensToGenerate int64 `json:"tokens_to_generate,omitempty"`
|
TokensToGenerate int64 `json:"tokens_to_generate,omitempty"`
|
||||||
@@ -237,15 +235,13 @@ type minimaxReplyConstraints struct {
|
|||||||
SenderName string `json:"sender_name"`
|
SenderName string `json:"sender_name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// minimaxChatCompletionV2Resp represents the structure of a Minimax Chat Completion V2 response.
|
// minimaxChatCompletionProResp represents the structure of a Minimax Chat Completion Pro response.
|
||||||
type minimaxChatCompletionV2Resp struct {
|
type minimaxChatCompletionProResp struct {
|
||||||
Created int64 `json:"created"`
|
Created int64 `json:"created"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Reply string `json:"reply"`
|
Reply string `json:"reply"`
|
||||||
InputSensitive bool `json:"input_sensitive,omitempty"`
|
InputSensitive bool `json:"input_sensitive,omitempty"`
|
||||||
InputSensitiveType int64 `json:"input_sensitive_type,omitempty"`
|
|
||||||
OutputSensitive bool `json:"output_sensitive,omitempty"`
|
OutputSensitive bool `json:"output_sensitive,omitempty"`
|
||||||
OutputSensitiveType int64 `json:"output_sensitive_type,omitempty"`
|
|
||||||
Choices []minimaxChoice `json:"choices,omitempty"`
|
Choices []minimaxChoice `json:"choices,omitempty"`
|
||||||
Usage minimaxUsage `json:"usage,omitempty"`
|
Usage minimaxUsage `json:"usage,omitempty"`
|
||||||
Id string `json:"id"`
|
Id string `json:"id"`
|
||||||
@@ -268,6 +264,8 @@ type minimaxChoice struct {
|
|||||||
// minimaxUsage represents token usage statistics.
|
// minimaxUsage represents token usage statistics.
|
||||||
type minimaxUsage struct {
|
type minimaxUsage struct {
|
||||||
TotalTokens int64 `json:"total_tokens"`
|
TotalTokens int64 `json:"total_tokens"`
|
||||||
|
PromptTokens int64 `json:"prompt_tokens"`
|
||||||
|
CompletionTokens int64 `json:"completion_tokens"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *minimaxProvider) parseModel(body []byte) (string, error) {
|
func (m *minimaxProvider) parseModel(body []byte) (string, error) {
|
||||||
@@ -282,7 +280,7 @@ func (m *minimaxProvider) parseModel(body []byte) (string, error) {
|
|||||||
return model, nil
|
return model, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *minimaxProvider) setBotSettings(request *minimaxChatCompletionV2Request, botSettingContent string) {
|
func (m *minimaxProvider) setBotSettings(request *minimaxChatCompletionProRequest, botSettingContent string) {
|
||||||
if len(request.BotSettings) == 0 {
|
if len(request.BotSettings) == 0 {
|
||||||
request.BotSettings = []minimaxBotSetting{
|
request.BotSettings = []minimaxBotSetting{
|
||||||
{
|
{
|
||||||
@@ -304,7 +302,7 @@ func (m *minimaxProvider) setBotSettings(request *minimaxChatCompletionV2Request
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *minimaxProvider) buildMinimaxChatCompletionV2Request(request *chatCompletionRequest, botSettingContent string) *minimaxChatCompletionV2Request {
|
func (m *minimaxProvider) buildMinimaxChatCompletionProRequest(request *chatCompletionRequest, botSettingContent string) *minimaxChatCompletionProRequest {
|
||||||
var messages []minimaxMessage
|
var messages []minimaxMessage
|
||||||
var botSetting []minimaxBotSetting
|
var botSetting []minimaxBotSetting
|
||||||
var botName string
|
var botName string
|
||||||
@@ -343,7 +341,7 @@ func (m *minimaxProvider) buildMinimaxChatCompletionV2Request(request *chatCompl
|
|||||||
SenderType: senderTypeBot,
|
SenderType: senderTypeBot,
|
||||||
SenderName: determineName(botName, defaultBotName),
|
SenderName: determineName(botName, defaultBotName),
|
||||||
}
|
}
|
||||||
result := &minimaxChatCompletionV2Request{
|
result := &minimaxChatCompletionProRequest{
|
||||||
Model: request.Model,
|
Model: request.Model,
|
||||||
Stream: request.Stream,
|
Stream: request.Stream,
|
||||||
TokensToGenerate: int64(request.MaxTokens),
|
TokensToGenerate: int64(request.MaxTokens),
|
||||||
@@ -359,7 +357,7 @@ func (m *minimaxProvider) buildMinimaxChatCompletionV2Request(request *chatCompl
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *minimaxProvider) responseV2ToOpenAI(response *minimaxChatCompletionV2Resp) *chatCompletionResponse {
|
func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionProResp) *chatCompletionResponse {
|
||||||
var choices []chatCompletionChoice
|
var choices []chatCompletionChoice
|
||||||
messageIndex := 0
|
messageIndex := 0
|
||||||
for _, choice := range response.Choices {
|
for _, choice := range response.Choices {
|
||||||
@@ -385,6 +383,8 @@ func (m *minimaxProvider) responseV2ToOpenAI(response *minimaxChatCompletionV2Re
|
|||||||
Choices: choices,
|
Choices: choices,
|
||||||
Usage: usage{
|
Usage: usage{
|
||||||
TotalTokens: int(response.Usage.TotalTokens),
|
TotalTokens: int(response.Usage.TotalTokens),
|
||||||
|
PromptTokens: int(response.Usage.PromptTokens),
|
||||||
|
CompletionTokens: int(response.Usage.CompletionTokens),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user