mirror of
https://github.com/alibaba/higress.git
synced 2026-03-06 01:20:51 +08:00
support disable thinking and add reasoning token usage (#3261)
This commit is contained in:
@@ -290,6 +290,9 @@ func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
PromptTokens: response.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: response.UsageMetadata.TotalTokenCount,
|
||||
CompletionTokensDetails: &completionTokensDetails{
|
||||
ReasoningTokens: response.UsageMetadata.ThoughtsTokenCount,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, candidate := range response.Candidates {
|
||||
@@ -400,6 +403,9 @@ func (v *vertexProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
|
||||
PromptTokens: vertexResp.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: vertexResp.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: vertexResp.UsageMetadata.TotalTokenCount,
|
||||
CompletionTokensDetails: &completionTokensDetails{
|
||||
ReasoningTokens: vertexResp.UsageMetadata.ThoughtsTokenCount,
|
||||
},
|
||||
},
|
||||
}
|
||||
return &streamResponse
|
||||
@@ -449,19 +455,22 @@ func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest)
|
||||
},
|
||||
}
|
||||
if request.ReasoningEffort != "" {
|
||||
thinkingBudget := 1024 // default
|
||||
switch request.ReasoningEffort {
|
||||
case "low":
|
||||
thinkingBudget = 1024
|
||||
case "medium":
|
||||
thinkingBudget = 4096
|
||||
case "high":
|
||||
thinkingBudget = 16384
|
||||
}
|
||||
vertexRequest.GenerationConfig.ThinkingConfig = vertexThinkingConfig{
|
||||
thinkingConfig := vertexThinkingConfig{
|
||||
IncludeThoughts: true,
|
||||
ThinkingBudget: thinkingBudget,
|
||||
ThinkingBudget: 1024,
|
||||
}
|
||||
switch request.ReasoningEffort {
|
||||
case "none":
|
||||
thinkingConfig.IncludeThoughts = false
|
||||
thinkingConfig.ThinkingBudget = 0
|
||||
case "low":
|
||||
thinkingConfig.ThinkingBudget = 1024
|
||||
case "medium":
|
||||
thinkingConfig.ThinkingBudget = 4096
|
||||
case "high":
|
||||
thinkingConfig.ThinkingBudget = 16384
|
||||
}
|
||||
vertexRequest.GenerationConfig.ThinkingConfig = thinkingConfig
|
||||
}
|
||||
if request.Tools != nil {
|
||||
functions := make([]function, 0, len(request.Tools))
|
||||
@@ -682,6 +691,7 @@ type vertexUsageMetadata struct {
|
||||
PromptTokenCount int `json:"promptTokenCount,omitempty"`
|
||||
CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"`
|
||||
TotalTokenCount int `json:"totalTokenCount,omitempty"`
|
||||
ThoughtsTokenCount int `json:"thoughtsTokenCount,omitempty"`
|
||||
}
|
||||
|
||||
type vertexEmbeddingResponse struct {
|
||||
|
||||
Reference in New Issue
Block a user