support disable thinking and add reasoning token usage (#3261)

This commit is contained in:
rinfx
2025-12-26 17:04:07 +08:00
committed by jingze
parent 8024a96881
commit 387c337654

View File

@@ -290,6 +290,9 @@ func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
PromptTokens: response.UsageMetadata.PromptTokenCount,
CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
TotalTokens: response.UsageMetadata.TotalTokenCount,
CompletionTokensDetails: &completionTokensDetails{
ReasoningTokens: response.UsageMetadata.ThoughtsTokenCount,
},
},
}
for _, candidate := range response.Candidates {
@@ -400,6 +403,9 @@ func (v *vertexProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
PromptTokens: vertexResp.UsageMetadata.PromptTokenCount,
CompletionTokens: vertexResp.UsageMetadata.CandidatesTokenCount,
TotalTokens: vertexResp.UsageMetadata.TotalTokenCount,
CompletionTokensDetails: &completionTokensDetails{
ReasoningTokens: vertexResp.UsageMetadata.ThoughtsTokenCount,
},
},
}
return &streamResponse
@@ -449,19 +455,22 @@ func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest)
},
}
if request.ReasoningEffort != "" {
thinkingBudget := 1024 // default
switch request.ReasoningEffort {
case "low":
thinkingBudget = 1024
case "medium":
thinkingBudget = 4096
case "high":
thinkingBudget = 16384
}
vertexRequest.GenerationConfig.ThinkingConfig = vertexThinkingConfig{
thinkingConfig := vertexThinkingConfig{
IncludeThoughts: true,
ThinkingBudget: thinkingBudget,
ThinkingBudget: 1024,
}
switch request.ReasoningEffort {
case "none":
thinkingConfig.IncludeThoughts = false
thinkingConfig.ThinkingBudget = 0
case "low":
thinkingConfig.ThinkingBudget = 1024
case "medium":
thinkingConfig.ThinkingBudget = 4096
case "high":
thinkingConfig.ThinkingBudget = 16384
}
vertexRequest.GenerationConfig.ThinkingConfig = thinkingConfig
}
if request.Tools != nil {
functions := make([]function, 0, len(request.Tools))
@@ -682,6 +691,7 @@ type vertexUsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount,omitempty"`
CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"`
TotalTokenCount int `json:"totalTokenCount,omitempty"`
ThoughtsTokenCount int `json:"thoughtsTokenCount,omitempty"`
}
type vertexEmbeddingResponse struct {