support disable thinking and add reasoning token usage (#3261)

This commit is contained in:
rinfx
2025-12-26 17:04:07 +08:00
committed by GitHub
parent 17e80b30fe
commit 5cc9f65aaa

View File

@@ -290,6 +290,9 @@ func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
PromptTokens: response.UsageMetadata.PromptTokenCount, PromptTokens: response.UsageMetadata.PromptTokenCount,
CompletionTokens: response.UsageMetadata.CandidatesTokenCount, CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
TotalTokens: response.UsageMetadata.TotalTokenCount, TotalTokens: response.UsageMetadata.TotalTokenCount,
CompletionTokensDetails: &completionTokensDetails{
ReasoningTokens: response.UsageMetadata.ThoughtsTokenCount,
},
}, },
} }
for _, candidate := range response.Candidates { for _, candidate := range response.Candidates {
@@ -400,6 +403,9 @@ func (v *vertexProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
PromptTokens: vertexResp.UsageMetadata.PromptTokenCount, PromptTokens: vertexResp.UsageMetadata.PromptTokenCount,
CompletionTokens: vertexResp.UsageMetadata.CandidatesTokenCount, CompletionTokens: vertexResp.UsageMetadata.CandidatesTokenCount,
TotalTokens: vertexResp.UsageMetadata.TotalTokenCount, TotalTokens: vertexResp.UsageMetadata.TotalTokenCount,
CompletionTokensDetails: &completionTokensDetails{
ReasoningTokens: vertexResp.UsageMetadata.ThoughtsTokenCount,
},
}, },
} }
return &streamResponse return &streamResponse
@@ -449,19 +455,22 @@ func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest)
}, },
} }
if request.ReasoningEffort != "" { if request.ReasoningEffort != "" {
thinkingBudget := 1024 // default thinkingConfig := vertexThinkingConfig{
switch request.ReasoningEffort {
case "low":
thinkingBudget = 1024
case "medium":
thinkingBudget = 4096
case "high":
thinkingBudget = 16384
}
vertexRequest.GenerationConfig.ThinkingConfig = vertexThinkingConfig{
IncludeThoughts: true, IncludeThoughts: true,
ThinkingBudget: thinkingBudget, ThinkingBudget: 1024,
} }
switch request.ReasoningEffort {
case "none":
thinkingConfig.IncludeThoughts = false
thinkingConfig.ThinkingBudget = 0
case "low":
thinkingConfig.ThinkingBudget = 1024
case "medium":
thinkingConfig.ThinkingBudget = 4096
case "high":
thinkingConfig.ThinkingBudget = 16384
}
vertexRequest.GenerationConfig.ThinkingConfig = thinkingConfig
} }
if request.Tools != nil { if request.Tools != nil {
functions := make([]function, 0, len(request.Tools)) functions := make([]function, 0, len(request.Tools))
@@ -682,6 +691,7 @@ type vertexUsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount,omitempty"` PromptTokenCount int `json:"promptTokenCount,omitempty"`
CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"` CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"`
TotalTokenCount int `json:"totalTokenCount,omitempty"` TotalTokenCount int `json:"totalTokenCount,omitempty"`
ThoughtsTokenCount int `json:"thoughtsTokenCount,omitempty"`
} }
type vertexEmbeddingResponse struct { type vertexEmbeddingResponse struct {