higress/plugins/wasm-go/extensions/ai-proxy/provider/minimax.go

package provider

import (
	"encoding/json"
	"errors"
	"fmt"
	"net/http"
	"strings"

	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
	"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
	"github.com/tidwall/gjson"
	"github.com/tidwall/sjson"
)

// minimaxProvider is the provider for minimax service.

const (
	minimaxApiTypeV2  = "v2"  // minimaxApiTypeV2 represents chat completion V2 API.
	minimaxApiTypePro = "pro" // minimaxApiTypePro represents chat completion Pro API.
	minimaxDomain     = "api.minimax.chat"
	// minimaxChatCompletionV2Path represents the API path for chat completion V2 API which has a response format similar to OpenAI's.
	minimaxChatCompletionV2Path = "/v1/text/chatcompletion_v2"
	// minimaxChatCompletionProPath represents the API path for chat completion Pro API which has a different response format from OpenAI's.
	minimaxChatCompletionProPath = "/v1/text/chatcompletion_pro"

	senderTypeUser string = "USER" // Content sent by the user.
	senderTypeBot  string = "BOT"  // Content generated by the model.

	// Default bot settings.
	defaultBotName           string = "MM智能助理"
	defaultBotSettingContent string = "MM智能助理是一款由MiniMax自研的，没有调用其他产品的接口的大型语言模型。MiniMax是一家中国科技公司，一直致力于进行大模型相关的研究。"
	defaultSenderName        string = "小明"
)

type minimaxProviderInitializer struct{}

func (m *minimaxProviderInitializer) ValidateConfig(config *ProviderConfig) error {
	// If using the chat completion Pro API, a group ID must be set.
	if minimaxApiTypePro == config.minimaxApiType && config.minimaxGroupId == "" {
		return fmt.Errorf("missing minimaxGroupId in provider config when minimaxApiType is %s", minimaxApiTypePro)
	}
	if config.apiTokens == nil || len(config.apiTokens) == 0 {
		return errors.New("no apiToken found in provider config")
	}
	return nil
}

func (m *minimaxProviderInitializer) DefaultCapabilities() map[string]string {
	return map[string]string{
		// minimax 替换path的时候，要根据modelmapping替换，这儿的配置无实质作用，只是为了保持和其他provider的一致性
		string(ApiNameChatCompletion): minimaxChatCompletionV2Path,
	}
}

func (m *minimaxProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
	config.setDefaultCapabilities(m.DefaultCapabilities())
	return &minimaxProvider{
		config:       config,
		contextCache: createContextCache(&config),
	}, nil
}

type minimaxProvider struct {
	config       ProviderConfig
	contextCache *contextCache
}

func (m *minimaxProvider) GetProviderType() string {
	return providerTypeMinimax
}

func (m *minimaxProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName) error {
	m.config.handleRequestHeaders(m, ctx, apiName)
	// Delay the header processing to allow changing streaming mode in OnRequestBody
	return nil
}

func (m *minimaxProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
	util.OverwriteRequestHostHeader(headers, minimaxDomain)
	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+m.config.GetApiTokenInUse(ctx))
	headers.Del("Content-Length")
}

func (m *minimaxProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
	if !m.config.isSupportedAPI(apiName) {
		return types.ActionContinue, errUnsupportedApiName
	}
	if minimaxApiTypePro == m.config.minimaxApiType {
		// Use chat completion Pro API.
		return m.handleRequestBodyByChatCompletionPro(body)
	} else {
		// Use chat completion V2 API.
		return m.config.handleRequestBody(m, m.contextCache, ctx, apiName, body)
	}
}

// handleRequestBodyByChatCompletionPro processes the request body using the chat completion Pro API.
func (m *minimaxProvider) handleRequestBodyByChatCompletionPro(body []byte) (types.Action, error) {
	request := &chatCompletionRequest{}
	if err := decodeChatCompletionRequest(body, request); err != nil {
		return types.ActionContinue, err
	}

	// Map the model and rewrite the request path.
	request.Model = getMappedModel(request.Model, m.config.modelMapping)
	_ = util.OverwriteRequestPath(fmt.Sprintf("%s?GroupId=%s", minimaxChatCompletionProPath, m.config.minimaxGroupId))

	if m.config.context == nil {
		minimaxRequest := m.buildMinimaxChatCompletionProRequest(request, "")
		return types.ActionContinue, replaceJsonRequestBody(minimaxRequest)
	}

	err := m.contextCache.GetContent(func(content string, err error) {
		defer func() {
			_ = proxywasm.ResumeHttpRequest()
		}()
		if err != nil {
			log.Errorf("failed to load context file: %v", err)
			util.ErrorHandler("ai-proxy.minimax.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
		}
		// Since minimaxChatCompletionV2 (format consistent with OpenAI) and minimaxChatCompletionPro (different format from OpenAI) have different logic for insertHttpContextMessage, we cannot unify them within one provider.
		// For minimaxChatCompletionPro, we need to manually handle context messages.
		// minimaxChatCompletionV2 uses the default defaultInsertHttpContextMessage method to insert context messages.
		minimaxRequest := m.buildMinimaxChatCompletionProRequest(request, content)
		if err := replaceJsonRequestBody(minimaxRequest); err != nil {
			util.ErrorHandler("ai-proxy.minimax.insert_ctx_failed", fmt.Errorf("failed to replace Request body: %v", err))
		}
	})
	if err == nil {
		return types.ActionPause, nil
	}
	return types.ActionContinue, err
}

func (m *minimaxProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) {
	return m.handleRequestBodyByChatCompletionV2(body, headers)
}

// handleRequestBodyByChatCompletionV2 processes the request body using the chat completion V2 API.
func (m *minimaxProvider) handleRequestBodyByChatCompletionV2(body []byte, headers http.Header) ([]byte, error) {
	util.OverwriteRequestPathHeader(headers, minimaxChatCompletionV2Path)

	rawModel := gjson.GetBytes(body, "model").String()
	mappedModel := getMappedModel(rawModel, m.config.modelMapping)
	return sjson.SetBytes(body, "model", mappedModel)
}

func (m *minimaxProvider) TransformResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
	// Skip OnStreamingResponseBody() and OnResponseBody() when using the original protocol
	// or when the model corresponds to the chat completion V2 interface.
	if m.config.protocol == protocolOriginal || minimaxApiTypePro != m.config.minimaxApiType {
		ctx.DontReadResponseBody()
	} else {
		headers.Del("Content-Length")
	}
}

// OnStreamingResponseBody handles streaming response chunks from the Minimax service only for requests using the OpenAI protocol and corresponding to the chat completion Pro API.
func (m *minimaxProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool) ([]byte, error) {
	if isLastChunk || len(chunk) == 0 {
		return nil, nil
	}
	if name != ApiNameChatCompletion {
		return chunk, nil
	}
	// Sample event response:
	// data: {"created":1689747645,"model":"abab6.5s-chat","reply":"","choices":[{"messages":[{"sender_type":"BOT","sender_name":"MM智能助理","text":"am from China."}]}],"output_sensitive":false}

	// Sample end event response:
	// data: {"created":1689747645,"model":"abab6.5s-chat","reply":"I am from China.","choices":[{"finish_reason":"stop","messages":[{"sender_type":"BOT","sender_name":"MM智能助理","text":"I am from China."}]}],"usage":{"total_tokens":187},"input_sensitive":false,"output_sensitive":false,"id":"0106b3bc9fd844a9f3de1aa06004e2ab","base_resp":{"status_code":0,"status_msg":""}}
	responseBuilder := &strings.Builder{}
	lines := strings.Split(string(chunk), "\n")
	for _, data := range lines {
		if len(data) < 6 {
			// Ignore blank line or improperly formatted lines.
			continue
		}
		data = data[6:]
		var minimaxResp minimaxChatCompletionProResp
		if err := json.Unmarshal([]byte(data), &minimaxResp); err != nil {
			log.Errorf("unable to unmarshal minimax response: %v", err)
			continue
		}
		response := m.responseProToOpenAI(&minimaxResp)
		responseBody, err := json.Marshal(response)
		if err != nil {
			log.Errorf("unable to marshal response: %v", err)
			return nil, err
		}
		m.appendResponse(responseBuilder, string(responseBody))
	}
	modifiedResponseChunk := responseBuilder.String()
	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
	return []byte(modifiedResponseChunk), nil
}

// TransformResponseBody handles the final response body from the Minimax service only for requests using the OpenAI protocol and corresponding to the chat completion Pro API.
func (m *minimaxProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
	if apiName != ApiNameChatCompletion {
		return body, nil
	}
	minimaxResp := &minimaxChatCompletionProResp{}
	if err := json.Unmarshal(body, minimaxResp); err != nil {
		return nil, fmt.Errorf("unable to unmarshal minimax response: %v", err)
	}
	if minimaxResp.BaseResp.StatusCode != 0 {
		return nil, fmt.Errorf("minimax response error, error_code: %d, error_message: %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg)
	}
	response := m.responseProToOpenAI(minimaxResp)
	return json.Marshal(response)
}

// minimaxChatCompletionProRequest represents the structure of a chat completion Pro request.
type minimaxChatCompletionProRequest struct {
	Model             string                  `json:"model"`
	Stream            bool                    `json:"stream,omitempty"`
	TokensToGenerate  int64                   `json:"tokens_to_generate,omitempty"`
	Temperature       float64                 `json:"temperature,omitempty"`
	TopP              float64                 `json:"top_p,omitempty"`
	MaskSensitiveInfo bool                    `json:"mask_sensitive_info"` // Whether to mask sensitive information, defaults to true.
	Messages          []minimaxMessage        `json:"messages"`
	BotSettings       []minimaxBotSetting     `json:"bot_setting"`
	ReplyConstraints  minimaxReplyConstraints `json:"reply_constraints"`
}

// minimaxMessage represents a message in the conversation.
type minimaxMessage struct {
	SenderType string `json:"sender_type"`
	SenderName string `json:"sender_name"`
	Text       string `json:"text"`
}

// minimaxBotSetting represents the bot's settings.
type minimaxBotSetting struct {
	BotName string `json:"bot_name"`
	Content string `json:"content"`
}

// minimaxReplyConstraints represents requirements for model replies.
type minimaxReplyConstraints struct {
	SenderType string `json:"sender_type"`
	SenderName string `json:"sender_name"`
}

// minimaxChatCompletionProResp represents the structure of a Minimax Chat Completion Pro response.
type minimaxChatCompletionProResp struct {
	Created         int64           `json:"created"`
	Model           string          `json:"model"`
	Reply           string          `json:"reply"`
	InputSensitive  bool            `json:"input_sensitive,omitempty"`
	OutputSensitive bool            `json:"output_sensitive,omitempty"`
	Choices         []minimaxChoice `json:"choices,omitempty"`
	Usage           minimaxUsage    `json:"usage,omitempty"`
	Id              string          `json:"id"`
	BaseResp        minimaxBaseResp `json:"base_resp"`
}

// minimaxBaseResp contains error status code and details.
type minimaxBaseResp struct {
	StatusCode int64  `json:"status_code"`
	StatusMsg  string `json:"status_msg"`
}

// minimaxChoice represents a result option.
type minimaxChoice struct {
	Messages     []minimaxMessage `json:"messages"`
	Index        int64            `json:"index"`
	FinishReason string           `json:"finish_reason"`
}

// minimaxUsage represents token usage statistics.
type minimaxUsage struct {
	TotalTokens      int64 `json:"total_tokens"`
	PromptTokens     int64 `json:"prompt_tokens"`
	CompletionTokens int64 `json:"completion_tokens"`
}

func (m *minimaxProvider) setBotSettings(request *minimaxChatCompletionProRequest, botSettingContent string) {
	if len(request.BotSettings) == 0 {
		request.BotSettings = []minimaxBotSetting{
			{
				BotName: defaultBotName,
				Content: func() string {
					if botSettingContent != "" {
						return botSettingContent
					}
					return defaultBotSettingContent
				}(),
			},
		}
	} else if botSettingContent != "" {
		newSetting := minimaxBotSetting{
			BotName: request.BotSettings[0].BotName,
			Content: botSettingContent,
		}
		request.BotSettings = append([]minimaxBotSetting{newSetting}, request.BotSettings...)
	}
}

func (m *minimaxProvider) buildMinimaxChatCompletionProRequest(request *chatCompletionRequest, botSettingContent string) *minimaxChatCompletionProRequest {
	var messages []minimaxMessage
	var botSetting []minimaxBotSetting
	var botName string

	determineName := func(name string, defaultName string) string {
		if name != "" {
			return name
		}
		return defaultName
	}

	for _, message := range request.Messages {
		switch message.Role {
		case roleSystem:
			botName = determineName(message.Name, defaultBotName)
			botSetting = append(botSetting, minimaxBotSetting{
				BotName: botName,
				Content: message.StringContent(),
			})
		case roleAssistant:
			messages = append(messages, minimaxMessage{
				SenderType: senderTypeBot,
				SenderName: determineName(message.Name, defaultBotName),
				Text:       message.StringContent(),
			})
		case roleUser:
			messages = append(messages, minimaxMessage{
				SenderType: senderTypeUser,
				SenderName: determineName(message.Name, defaultSenderName),
				Text:       message.StringContent(),
			})
		}
	}

	replyConstraints := minimaxReplyConstraints{
		SenderType: senderTypeBot,
		SenderName: determineName(botName, defaultBotName),
	}
	result := &minimaxChatCompletionProRequest{
		Model:             request.Model,
		Stream:            request.Stream,
		TokensToGenerate:  int64(request.MaxTokens),
		Temperature:       request.Temperature,
		TopP:              request.TopP,
		MaskSensitiveInfo: true,
		Messages:          messages,
		BotSettings:       botSetting,
		ReplyConstraints:  replyConstraints,
	}

	m.setBotSettings(result, botSettingContent)
	return result
}

func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionProResp) *chatCompletionResponse {
	var choices []chatCompletionChoice
	messageIndex := 0
	for _, choice := range response.Choices {
		for _, message := range choice.Messages {
			message := &chatMessage{
				Name:    message.SenderName,
				Role:    roleAssistant,
				Content: message.Text,
			}
			choices = append(choices, chatCompletionChoice{
				FinishReason: util.Ptr(choice.FinishReason),
				Index:        messageIndex,
				Message:      message,
			})
			messageIndex++
		}
	}
	return &chatCompletionResponse{
		Id:      response.Id,
		Object:  objectChatCompletion,
		Created: response.Created,
		Model:   response.Model,
		Choices: choices,
		Usage: &usage{
			TotalTokens:      int(response.Usage.TotalTokens),
			PromptTokens:     int(response.Usage.PromptTokens),
			CompletionTokens: int(response.Usage.CompletionTokens),
		},
	}
}

func (m *minimaxProvider) appendResponse(responseBuilder *strings.Builder, responseBody string) {
	responseBuilder.WriteString(fmt.Sprintf("%s %s\n\n", streamDataItemKey, responseBody))
}

func (m *minimaxProvider) GetApiName(path string) ApiName {
	if strings.Contains(path, minimaxChatCompletionV2Path) || strings.Contains(path, minimaxChatCompletionProPath) {
		return ApiNameChatCompletion
	}
	return ""
}