mirror of
https://github.com/alibaba/higress.git
synced 2026-03-08 02:30:56 +08:00
feat(ai-proxy): Adjust the streaming response structure to keep it consistent with the openai (#2391)
Signed-off-by: Xijun Dai <daixijun1990@gmail.com>
This commit is contained in:
@@ -42,8 +42,7 @@ const (
|
||||
requestIdHeader = "X-Amzn-Requestid"
|
||||
)
|
||||
|
||||
type bedrockProviderInitializer struct {
|
||||
}
|
||||
type bedrockProviderInitializer struct{}
|
||||
|
||||
func (b *bedrockProviderInitializer) ValidateConfig(config *ProviderConfig) error {
|
||||
if len(config.awsAccessKey) == 0 || len(config.awsSecretKey) == 0 {
|
||||
@@ -104,7 +103,7 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
|
||||
chatChoice.Delta = &chatMessage{Content: bedrockEvent.Delta.Text}
|
||||
}
|
||||
if bedrockEvent.StopReason != nil {
|
||||
chatChoice.FinishReason = stopReasonBedrock2OpenAI(*bedrockEvent.StopReason)
|
||||
chatChoice.FinishReason = util.Ptr(stopReasonBedrock2OpenAI(*bedrockEvent.StopReason))
|
||||
}
|
||||
choices = append(choices, chatChoice)
|
||||
requestId := ctx.GetStringContext(requestIdHeader, "")
|
||||
@@ -118,7 +117,7 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
|
||||
}
|
||||
if bedrockEvent.Usage != nil {
|
||||
openAIFormattedChunk.Choices = choices[:0]
|
||||
openAIFormattedChunk.Usage = usage{
|
||||
openAIFormattedChunk.Usage = &usage{
|
||||
CompletionTokens: bedrockEvent.Usage.OutputTokens,
|
||||
PromptTokens: bedrockEvent.Usage.InputTokens,
|
||||
TotalTokens: bedrockEvent.Usage.TotalTokens,
|
||||
@@ -756,7 +755,7 @@ func (b *bedrockProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, b
|
||||
Role: bedrockResponse.Output.Message.Role,
|
||||
Content: outputContent,
|
||||
},
|
||||
FinishReason: stopReasonBedrock2OpenAI(bedrockResponse.StopReason),
|
||||
FinishReason: util.Ptr(stopReasonBedrock2OpenAI(bedrockResponse.StopReason)),
|
||||
},
|
||||
}
|
||||
requestId := ctx.GetStringContext(requestIdHeader, "")
|
||||
@@ -768,7 +767,7 @@ func (b *bedrockProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, b
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: choices,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: bedrockResponse.Usage.InputTokens,
|
||||
CompletionTokens: bedrockResponse.Usage.OutputTokens,
|
||||
TotalTokens: bedrockResponse.Usage.TotalTokens,
|
||||
|
||||
@@ -341,7 +341,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
|
||||
choice := chatCompletionChoice{
|
||||
Index: 0,
|
||||
Message: &chatMessage{Role: roleAssistant, Content: origResponse.Content[0].Text},
|
||||
FinishReason: stopReasonClaude2OpenAI(origResponse.StopReason),
|
||||
FinishReason: util.Ptr(stopReasonClaude2OpenAI(origResponse.StopReason)),
|
||||
}
|
||||
|
||||
return &chatCompletionResponse{
|
||||
@@ -351,7 +351,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: []chatCompletionChoice{choice},
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: origResponse.Usage.InputTokens,
|
||||
CompletionTokens: origResponse.Usage.OutputTokens,
|
||||
TotalTokens: origResponse.Usage.InputTokens + origResponse.Usage.OutputTokens,
|
||||
@@ -404,7 +404,7 @@ func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, or
|
||||
choice := chatCompletionChoice{
|
||||
Index: origResponse.Index,
|
||||
Delta: &chatMessage{},
|
||||
FinishReason: stopReasonClaude2OpenAI(origResponse.Delta.StopReason),
|
||||
FinishReason: util.Ptr(stopReasonClaude2OpenAI(origResponse.Delta.StopReason)),
|
||||
}
|
||||
return c.createChatCompletionResponse(ctx, origResponse, choice)
|
||||
case "message_stop":
|
||||
@@ -415,7 +415,7 @@ func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, or
|
||||
Object: objectChatCompletionChunk,
|
||||
Choices: []chatCompletionChoice{},
|
||||
ServiceTier: c.serviceTier,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: c.usage.PromptTokens,
|
||||
CompletionTokens: c.usage.CompletionTokens,
|
||||
TotalTokens: c.usage.TotalTokens,
|
||||
|
||||
@@ -116,23 +116,23 @@ func (d *difyProvider) responseDify2OpenAI(ctx wrapper.HttpContext, response *Di
|
||||
choice = chatCompletionChoice{
|
||||
Index: 0,
|
||||
Message: &chatMessage{Role: roleAssistant, Content: response.Answer},
|
||||
FinishReason: finishReasonStop,
|
||||
FinishReason: util.Ptr(finishReasonStop),
|
||||
}
|
||||
//response header中增加conversationId字段
|
||||
// response header中增加conversationId字段
|
||||
_ = proxywasm.ReplaceHttpResponseHeader("ConversationId", response.ConversationId)
|
||||
id = response.ConversationId
|
||||
case BotTypeCompletion:
|
||||
choice = chatCompletionChoice{
|
||||
Index: 0,
|
||||
Message: &chatMessage{Role: roleAssistant, Content: response.Answer},
|
||||
FinishReason: finishReasonStop,
|
||||
FinishReason: util.Ptr(finishReasonStop),
|
||||
}
|
||||
id = response.MessageId
|
||||
case BotTypeWorkflow:
|
||||
choice = chatCompletionChoice{
|
||||
Index: 0,
|
||||
Message: &chatMessage{Role: roleAssistant, Content: response.Data.Outputs[d.config.outputVariable]},
|
||||
FinishReason: finishReasonStop,
|
||||
FinishReason: util.Ptr(finishReasonStop),
|
||||
}
|
||||
id = response.Data.WorkflowId
|
||||
}
|
||||
@@ -143,7 +143,7 @@ func (d *difyProvider) responseDify2OpenAI(ctx wrapper.HttpContext, response *Di
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: []chatCompletionChoice{choice},
|
||||
Usage: response.MetaData.Usage,
|
||||
Usage: &response.MetaData.Usage,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,7 +188,7 @@ func (d *difyProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name Api
|
||||
func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, response *DifyChunkChatResponse) *chatCompletionResponse {
|
||||
var choice chatCompletionChoice
|
||||
var id string
|
||||
var responseUsage usage
|
||||
var responseUsage *usage
|
||||
switch d.config.botType {
|
||||
case BotTypeChat, BotTypeAgent:
|
||||
choice = chatCompletionChoice{
|
||||
@@ -211,9 +211,9 @@ func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, respon
|
||||
id = response.Data.WorkflowId
|
||||
}
|
||||
if response.Event == "message_end" || response.Event == "workflow_finished" {
|
||||
choice.FinishReason = finishReasonStop
|
||||
choice.FinishReason = util.Ptr(finishReasonStop)
|
||||
if response.Event == "message_end" {
|
||||
responseUsage = usage{
|
||||
responseUsage = &usage{
|
||||
PromptTokens: response.MetaData.Usage.PromptTokens,
|
||||
CompletionTokens: response.MetaData.Usage.CompletionTokens,
|
||||
TotalTokens: response.MetaData.Usage.TotalTokens,
|
||||
|
||||
@@ -500,7 +500,7 @@ func (g *geminiProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
Object: objectChatCompletion,
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: response.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: response.UsageMetadata.TotalTokenCount,
|
||||
@@ -514,7 +514,7 @@ func (g *geminiProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
Message: &chatMessage{
|
||||
Role: roleAssistant,
|
||||
},
|
||||
FinishReason: finishReasonStop,
|
||||
FinishReason: util.Ptr(finishReasonStop),
|
||||
}
|
||||
if part.FunctionCall != nil {
|
||||
choice.Message.ToolCalls = g.buildToolCalls(&candidate)
|
||||
@@ -524,7 +524,7 @@ func (g *geminiProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
choice.Message.Content = part.Text
|
||||
}
|
||||
|
||||
choice.FinishReason = candidate.FinishReason
|
||||
choice.FinishReason = util.Ptr(candidate.FinishReason)
|
||||
fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
|
||||
choiceIndex += 1
|
||||
}
|
||||
@@ -567,7 +567,7 @@ func (g *geminiProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Choices: []chatCompletionChoice{choice},
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: geminiResp.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: geminiResp.UsageMetadata.TotalTokenCount,
|
||||
|
||||
@@ -387,7 +387,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletionChunk,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: hunyuanFormattedChunk.Usage.PromptTokens,
|
||||
CompletionTokens: hunyuanFormattedChunk.Usage.CompletionTokens,
|
||||
TotalTokens: hunyuanFormattedChunk.Usage.TotalTokens,
|
||||
@@ -400,7 +400,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
|
||||
if hunyuanFormattedChunk.Choices[0].FinishReason == hunyuanStreamEndMark {
|
||||
// log.Debugf("@@@ --- 最后chunk: ")
|
||||
openAIFormattedChunk.Choices = append(openAIFormattedChunk.Choices, chatCompletionChoice{
|
||||
FinishReason: hunyuanFormattedChunk.Choices[0].FinishReason,
|
||||
FinishReason: util.Ptr(hunyuanFormattedChunk.Choices[0].FinishReason),
|
||||
})
|
||||
} else {
|
||||
deltaMsg := chatMessage{
|
||||
@@ -495,7 +495,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
|
||||
Content: choice.Message.Content,
|
||||
ToolCalls: nil,
|
||||
},
|
||||
FinishReason: choice.FinishReason,
|
||||
FinishReason: util.Ptr(choice.FinishReason),
|
||||
})
|
||||
}
|
||||
return &chatCompletionResponse{
|
||||
@@ -505,7 +505,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: choices,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: hunyuanResponse.Response.Usage.PromptTokens,
|
||||
CompletionTokens: hunyuanResponse.Response.Usage.CompletionTokens,
|
||||
TotalTokens: hunyuanResponse.Response.Usage.TotalTokens,
|
||||
|
||||
@@ -36,8 +36,7 @@ const (
|
||||
defaultSenderName string = "小明"
|
||||
)
|
||||
|
||||
type minimaxProviderInitializer struct {
|
||||
}
|
||||
type minimaxProviderInitializer struct{}
|
||||
|
||||
func (m *minimaxProviderInitializer) ValidateConfig(config *ProviderConfig) error {
|
||||
// If using the chat completion Pro API, a group ID must be set.
|
||||
@@ -368,7 +367,7 @@ func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionPro
|
||||
Content: message.Text,
|
||||
}
|
||||
choices = append(choices, chatCompletionChoice{
|
||||
FinishReason: choice.FinishReason,
|
||||
FinishReason: util.Ptr(choice.FinishReason),
|
||||
Index: messageIndex,
|
||||
Message: message,
|
||||
})
|
||||
@@ -381,7 +380,7 @@ func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionPro
|
||||
Created: response.Created,
|
||||
Model: response.Model,
|
||||
Choices: choices,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
TotalTokens: int(response.Usage.TotalTokens),
|
||||
PromptTokens: int(response.Usage.PromptTokens),
|
||||
CompletionTokens: int(response.Usage.CompletionTokens),
|
||||
|
||||
@@ -138,15 +138,15 @@ type chatCompletionResponse struct {
|
||||
ServiceTier string `json:"service_tier,omitempty"`
|
||||
SystemFingerprint string `json:"system_fingerprint,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
Usage usage `json:"usage,omitempty"`
|
||||
Usage *usage `json:"usage"`
|
||||
}
|
||||
|
||||
type chatCompletionChoice struct {
|
||||
Index int `json:"index"`
|
||||
Message *chatMessage `json:"message,omitempty"`
|
||||
Delta *chatMessage `json:"delta,omitempty"`
|
||||
FinishReason string `json:"finish_reason,omitempty"`
|
||||
Logprobs map[string]interface{} `json:"logprobs,omitempty"`
|
||||
FinishReason *string `json:"finish_reason"`
|
||||
Logprobs map[string]interface{} `json:"logprobs"`
|
||||
}
|
||||
|
||||
type usage struct {
|
||||
|
||||
@@ -302,7 +302,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
|
||||
message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
|
||||
choices = append(choices, chatCompletionChoice{
|
||||
Message: &message,
|
||||
FinishReason: qwenChoice.FinishReason,
|
||||
FinishReason: util.Ptr(qwenChoice.FinishReason),
|
||||
})
|
||||
}
|
||||
return &chatCompletionResponse{
|
||||
@@ -312,7 +312,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: choices,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: qwenResponse.Usage.InputTokens,
|
||||
CompletionTokens: qwenResponse.Usage.OutputTokens,
|
||||
TotalTokens: qwenResponse.Usage.TotalTokens,
|
||||
@@ -413,11 +413,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
|
||||
|
||||
if finished {
|
||||
finishResponse := *&baseMessage
|
||||
finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: qwenChoice.FinishReason})
|
||||
finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: util.Ptr(qwenChoice.FinishReason)})
|
||||
|
||||
usageResponse := *&baseMessage
|
||||
usageResponse.Choices = []chatCompletionChoice{{Delta: &chatMessage{}}}
|
||||
usageResponse.Usage = usage{
|
||||
usageResponse.Usage = &usage{
|
||||
PromptTokens: qwenResponse.Usage.InputTokens,
|
||||
CompletionTokens: qwenResponse.Usage.OutputTokens,
|
||||
TotalTokens: qwenResponse.Usage.TotalTokens,
|
||||
|
||||
@@ -150,7 +150,7 @@ func (p *sparkProvider) responseSpark2OpenAI(ctx wrapper.HttpContext, response *
|
||||
Object: objectChatCompletion,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Choices: choices,
|
||||
Usage: response.Usage,
|
||||
Usage: &response.Usage,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ func (p *sparkProvider) streamResponseSpark2OpenAI(ctx wrapper.HttpContext, resp
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Object: objectChatCompletion,
|
||||
Choices: choices,
|
||||
Usage: response.Usage,
|
||||
Usage: &response.Usage,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,8 +32,7 @@ const (
|
||||
vertexEmbeddingAction = "predict"
|
||||
)
|
||||
|
||||
type vertexProviderInitializer struct {
|
||||
}
|
||||
type vertexProviderInitializer struct{}
|
||||
|
||||
func (v *vertexProviderInitializer) ValidateConfig(config *ProviderConfig) error {
|
||||
if config.vertexAuthKey == "" {
|
||||
@@ -245,7 +244,7 @@ func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Choices: make([]chatCompletionChoice, 0, len(response.Candidates)),
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: response.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: response.UsageMetadata.TotalTokenCount,
|
||||
@@ -257,7 +256,7 @@ func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
Message: &chatMessage{
|
||||
Role: roleAssistant,
|
||||
},
|
||||
FinishReason: candidate.FinishReason,
|
||||
FinishReason: util.Ptr(candidate.FinishReason),
|
||||
}
|
||||
if len(candidate.Content.Parts) > 0 {
|
||||
choice.Message.Content = candidate.Content.Parts[0].Text
|
||||
@@ -310,7 +309,7 @@ func (v *vertexProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Choices: []chatCompletionChoice{choice},
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: vertexResp.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: vertexResp.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: vertexResp.UsageMetadata.TotalTokenCount,
|
||||
|
||||
5
plugins/wasm-go/extensions/ai-proxy/util/ptr.go
Normal file
5
plugins/wasm-go/extensions/ai-proxy/util/ptr.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package util
|
||||
|
||||
func Ptr[T any](v T) *T {
|
||||
return &v
|
||||
}
|
||||
Reference in New Issue
Block a user