diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/model.go b/plugins/wasm-go/extensions/ai-proxy/provider/model.go index 72e1cc01d..7e709abd0 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go @@ -54,7 +54,7 @@ type toolChoice struct { type chatCompletionResponse struct { Id string `json:"id,omitempty"` - Choices []chatCompletionChoice `json:"choices,omitempty"` + Choices []chatCompletionChoice `json:"choices"` Created int64 `json:"created,omitempty"` Model string `json:"model,omitempty"` SystemFingerprint string `json:"system_fingerprint,omitempty"` @@ -102,14 +102,15 @@ func (m *chatMessage) IsEmpty() bool { } type toolCall struct { + Index int `json:"index"` Id string `json:"id"` Type string `json:"type"` Function functionCall `json:"function"` } type functionCall struct { - Id string `json:"id,omitempty"` - Name string `json:"name,omitempty"` + Id string `json:"id"` + Name string `json:"name"` Arguments string `json:"arguments"` } diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go index 9697e8334..400dde9be 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go @@ -341,6 +341,7 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont Id: qwenResponse.RequestId, Created: time.Now().UnixMilli() / 1000, Model: ctx.GetContext(ctxKeyFinalRequestModel).(string), + Choices: make([]chatCompletionChoice, 0), SystemFingerprint: "", Object: objectChatCompletionChunk, } @@ -350,12 +351,16 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont qwenChoice := qwenResponse.Output.Choices[0] message := qwenChoice.Message - deltaMessage := &chatMessage{Role: message.Role, Content: message.Content, ToolCalls: append([]toolCall{}, message.ToolCalls...)} + deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content} + deltaToolCallsMessage := &chatMessage{Role: message.Role, ToolCalls: append([]toolCall{}, message.ToolCalls...)} if !incrementalStreaming { if pushedMessage, ok := ctx.GetContext(ctxKeyPushedMessage).(qwenMessage); ok { - deltaMessage.Content = util.StripPrefix(deltaMessage.Content, pushedMessage.Content) - if len(deltaMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil { - for i, tc := range deltaMessage.ToolCalls { + if message.Content == "" { + message.Content = pushedMessage.Content + } + deltaContentMessage.Content = util.StripPrefix(deltaContentMessage.Content, pushedMessage.Content) + if len(deltaToolCallsMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil { + for i, tc := range deltaToolCallsMessage.ToolCalls { if i >= len(pushedMessage.ToolCalls) { break } @@ -363,24 +368,37 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont tc.Function.Id = util.StripPrefix(tc.Function.Id, pushedFunction.Id) tc.Function.Name = util.StripPrefix(tc.Function.Name, pushedFunction.Name) tc.Function.Arguments = util.StripPrefix(tc.Function.Arguments, pushedFunction.Arguments) - deltaMessage.ToolCalls[i] = tc + deltaToolCallsMessage.ToolCalls[i] = tc } } } ctx.SetContext(ctxKeyPushedMessage, message) } - if !deltaMessage.IsEmpty() { - deltaResponse := *&baseMessage - deltaResponse.Choices = append(deltaResponse.Choices, chatCompletionChoice{Delta: deltaMessage}) - responses = append(responses, &deltaResponse) + if !deltaContentMessage.IsEmpty() { + response := *&baseMessage + response.Choices = append(response.Choices, chatCompletionChoice{Delta: deltaContentMessage}) + responses = append(responses, &response) + } + if !deltaToolCallsMessage.IsEmpty() { + response := *&baseMessage + response.Choices = append(response.Choices, chatCompletionChoice{Delta: deltaToolCallsMessage}) + responses = append(responses, &response) } // Yes, Qwen uses a string "null" as null. if qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null" { finishResponse := *&baseMessage finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{FinishReason: qwenChoice.FinishReason}) - responses = append(responses, &finishResponse) + + usageResponse := *&baseMessage + usageResponse.Usage = chatCompletionUsage{ + PromptTokens: qwenResponse.Usage.InputTokens, + CompletionTokens: qwenResponse.Usage.OutputTokens, + TotalTokens: qwenResponse.Usage.TotalTokens, + } + + responses = append(responses, &finishResponse, &usageResponse) } return responses