mirror of
https://github.com/alibaba/higress.git
synced 2026-02-06 23:21:08 +08:00
fix(ai-proxy): resolve Claude streaming response conversion and SSE event chunking issues (#2882)
This commit is contained in:
@@ -406,7 +406,8 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
|
||||
return chunk
|
||||
}
|
||||
if len(outputEvents) == 0 {
|
||||
responseBuilder.WriteString(event.ToHttpString())
|
||||
// no need convert, keep original events
|
||||
responseBuilder.WriteString(event.RawEvent)
|
||||
} else {
|
||||
for _, outputEvent := range outputEvents {
|
||||
responseBuilder.WriteString(outputEvent.ToHttpString())
|
||||
|
||||
@@ -138,7 +138,7 @@ type claudeSystemPrompt struct {
|
||||
// Will be set to the string value if system is a simple string
|
||||
StringValue string
|
||||
// Will be set to the array value if system is an array of text blocks
|
||||
ArrayValue []claudeTextGenContent
|
||||
ArrayValue []claudeChatMessageContent
|
||||
// Indicates which type this represents
|
||||
IsArray bool
|
||||
}
|
||||
@@ -154,7 +154,7 @@ func (csp *claudeSystemPrompt) UnmarshalJSON(data []byte) error {
|
||||
}
|
||||
|
||||
// Try to unmarshal as array of text blocks
|
||||
var arrayValue []claudeTextGenContent
|
||||
var arrayValue []claudeChatMessageContent
|
||||
if err := json.Unmarshal(data, &arrayValue); err == nil {
|
||||
csp.ArrayValue = arrayValue
|
||||
csp.IsArray = true
|
||||
@@ -196,7 +196,7 @@ type claudeThinkingConfig struct {
|
||||
type claudeTextGenRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []claudeChatMessage `json:"messages"`
|
||||
System claudeSystemPrompt `json:"system,omitempty"`
|
||||
System *claudeSystemPrompt `json:"system,omitempty"`
|
||||
MaxTokens int `json:"max_tokens,omitempty"`
|
||||
StopSequences []string `json:"stop_sequences,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
@@ -232,9 +232,11 @@ type claudeTextGenContent struct {
|
||||
}
|
||||
|
||||
type claudeTextGenUsage struct {
|
||||
InputTokens int `json:"input_tokens,omitempty"`
|
||||
OutputTokens int `json:"output_tokens,omitempty"`
|
||||
ServiceTier string `json:"service_tier,omitempty"`
|
||||
InputTokens int `json:"input_tokens,omitempty"`
|
||||
OutputTokens int `json:"output_tokens,omitempty"`
|
||||
CacheReadInputTokens int `json:"cache_read_input_tokens,omitempty"`
|
||||
CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"`
|
||||
ServiceTier string `json:"service_tier,omitempty"`
|
||||
}
|
||||
|
||||
type claudeTextGenError struct {
|
||||
@@ -254,6 +256,7 @@ type claudeTextGenStreamResponse struct {
|
||||
type claudeTextGenDelta struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text,omitempty"`
|
||||
PartialJson string `json:"partial_json,omitempty"`
|
||||
StopReason *string `json:"stop_reason,omitempty"`
|
||||
StopSequence *string `json:"stop_sequence,omitempty"`
|
||||
}
|
||||
@@ -401,7 +404,7 @@ func (c *claudeProvider) buildClaudeTextGenRequest(origRequest *chatCompletionRe
|
||||
|
||||
for _, message := range origRequest.Messages {
|
||||
if message.Role == roleSystem {
|
||||
claudeRequest.System = claudeSystemPrompt{
|
||||
claudeRequest.System = &claudeSystemPrompt{
|
||||
StringValue: message.StringContent(),
|
||||
IsArray: false,
|
||||
}
|
||||
@@ -622,12 +625,12 @@ func (c *claudeProvider) insertHttpContextMessage(body []byte, content string, o
|
||||
|
||||
systemStr := request.System.String()
|
||||
if systemStr == "" {
|
||||
request.System = claudeSystemPrompt{
|
||||
request.System = &claudeSystemPrompt{
|
||||
StringValue: content,
|
||||
IsArray: false,
|
||||
}
|
||||
} else {
|
||||
request.System = claudeSystemPrompt{
|
||||
request.System = &claudeSystemPrompt{
|
||||
StringValue: content + "\n" + systemStr,
|
||||
IsArray: false,
|
||||
}
|
||||
|
||||
@@ -29,7 +29,18 @@ type ClaudeToOpenAIConverter struct {
|
||||
toolBlockStarted bool
|
||||
toolBlockStopped bool
|
||||
// Tool call state tracking
|
||||
toolCallStates map[string]*toolCallState
|
||||
toolCallStates map[int]*toolCallInfo // Map of OpenAI index to tool call state
|
||||
activeToolIndex *int // Currently active tool call index (for Claude serialization)
|
||||
}
|
||||
|
||||
// toolCallInfo tracks tool call state
|
||||
type toolCallInfo struct {
|
||||
id string // Tool call ID
|
||||
name string // Tool call name
|
||||
claudeContentIndex int // Claude content block index
|
||||
contentBlockStarted bool // Whether content_block_start has been sent
|
||||
contentBlockStopped bool // Whether content_block_stop has been sent
|
||||
cachedArguments string // Cache arguments for this tool call
|
||||
}
|
||||
|
||||
// contentConversionResult represents the result of converting Claude content to OpenAI format
|
||||
@@ -41,14 +52,6 @@ type contentConversionResult struct {
|
||||
hasNonTextContent bool
|
||||
}
|
||||
|
||||
// toolCallState tracks the state of a tool call during streaming
|
||||
type toolCallState struct {
|
||||
id string
|
||||
name string
|
||||
argumentsBuffer string
|
||||
isComplete bool
|
||||
}
|
||||
|
||||
// ConvertClaudeRequestToOpenAI converts a Claude chat completion request to OpenAI format
|
||||
func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]byte, error) {
|
||||
log.Debugf("[Claude->OpenAI] Original Claude request body: %s", string(body))
|
||||
@@ -114,18 +117,9 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b
|
||||
|
||||
// Handle regular content if no tool calls or tool results
|
||||
if len(conversionResult.toolCalls) == 0 && len(conversionResult.toolResults) == 0 {
|
||||
var content interface{}
|
||||
if !conversionResult.hasNonTextContent && len(conversionResult.textParts) > 0 {
|
||||
// Simple text content
|
||||
content = strings.Join(conversionResult.textParts, "\n\n")
|
||||
} else {
|
||||
// Multi-modal content or empty content
|
||||
content = conversionResult.openaiContents
|
||||
}
|
||||
|
||||
openaiMsg := chatMessage{
|
||||
Role: claudeMsg.Role,
|
||||
Content: content,
|
||||
Content: conversionResult.openaiContents,
|
||||
}
|
||||
openaiRequest.Messages = append(openaiRequest.Messages, openaiMsg)
|
||||
}
|
||||
@@ -133,11 +127,13 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b
|
||||
}
|
||||
|
||||
// Handle system message - Claude has separate system field
|
||||
systemStr := claudeRequest.System.String()
|
||||
if systemStr != "" {
|
||||
systemMsg := chatMessage{
|
||||
Role: roleSystem,
|
||||
Content: systemStr,
|
||||
if claudeRequest.System != nil {
|
||||
systemMsg := chatMessage{Role: roleSystem}
|
||||
if !claudeRequest.System.IsArray {
|
||||
systemMsg.Content = claudeRequest.System.StringValue
|
||||
} else {
|
||||
conversionResult := c.convertContentArray(claudeRequest.System.ArrayValue)
|
||||
systemMsg.Content = conversionResult.openaiContents
|
||||
}
|
||||
// Insert system message at the beginning
|
||||
openaiRequest.Messages = append([]chatMessage{systemMsg}, openaiRequest.Messages...)
|
||||
@@ -231,6 +227,9 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIResponseToClaude(ctx wrapper.Http
|
||||
InputTokens: openaiResponse.Usage.PromptTokens,
|
||||
OutputTokens: openaiResponse.Usage.CompletionTokens,
|
||||
}
|
||||
if openaiResponse.Usage.PromptTokensDetails != nil {
|
||||
claudeResponse.Usage.CacheReadInputTokens = openaiResponse.Usage.PromptTokensDetails.CachedTokens
|
||||
}
|
||||
}
|
||||
|
||||
// Convert the first choice content
|
||||
@@ -312,11 +311,6 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIResponseToClaude(ctx wrapper.Http
|
||||
func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrapper.HttpContext, chunk []byte) ([]byte, error) {
|
||||
log.Debugf("[OpenAI->Claude] Original OpenAI streaming chunk: %s", string(chunk))
|
||||
|
||||
// Initialize tool call states if needed
|
||||
if c.toolCallStates == nil {
|
||||
c.toolCallStates = make(map[string]*toolCallState)
|
||||
}
|
||||
|
||||
// For streaming responses, we need to handle the Server-Sent Events format
|
||||
lines := strings.Split(string(chunk), "\n")
|
||||
var result strings.Builder
|
||||
@@ -350,15 +344,18 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrappe
|
||||
stopData, _ := json.Marshal(stopEvent)
|
||||
result.WriteString(fmt.Sprintf("data: %s\n\n", stopData))
|
||||
}
|
||||
if c.toolBlockStarted && !c.toolBlockStopped {
|
||||
c.toolBlockStopped = true
|
||||
log.Debugf("[OpenAI->Claude] Sending final tool content_block_stop event at index %d", c.toolBlockIndex)
|
||||
stopEvent := &claudeTextGenStreamResponse{
|
||||
Type: "content_block_stop",
|
||||
Index: &c.toolBlockIndex,
|
||||
// Send final content_block_stop events for any remaining unclosed tool calls
|
||||
for index, toolCall := range c.toolCallStates {
|
||||
if toolCall.contentBlockStarted && !toolCall.contentBlockStopped {
|
||||
log.Debugf("[OpenAI->Claude] Sending final tool content_block_stop event for index %d at Claude index %d",
|
||||
index, toolCall.claudeContentIndex)
|
||||
stopEvent := &claudeTextGenStreamResponse{
|
||||
Type: "content_block_stop",
|
||||
Index: &toolCall.claudeContentIndex,
|
||||
}
|
||||
stopData, _ := json.Marshal(stopEvent)
|
||||
result.WriteString(fmt.Sprintf("data: %s\n\n", stopData))
|
||||
}
|
||||
stopData, _ := json.Marshal(stopEvent)
|
||||
result.WriteString(fmt.Sprintf("data: %s\n\n", stopData))
|
||||
}
|
||||
|
||||
// If we have a pending stop_reason but no usage, send message_delta with just stop_reason
|
||||
@@ -401,7 +398,8 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrappe
|
||||
c.toolBlockIndex = -1
|
||||
c.toolBlockStarted = false
|
||||
c.toolBlockStopped = false
|
||||
c.toolCallStates = make(map[string]*toolCallState)
|
||||
c.toolCallStates = make(map[int]*toolCallInfo)
|
||||
c.activeToolIndex = nil
|
||||
log.Debugf("[OpenAI->Claude] Reset converter state for next request")
|
||||
|
||||
continue
|
||||
@@ -424,7 +422,7 @@ func (c *ClaudeToOpenAIConverter) ConvertOpenAIStreamResponseToClaude(ctx wrappe
|
||||
continue
|
||||
}
|
||||
log.Debugf("[OpenAI->Claude] Stream event [%d/%d]: %s", i+1, len(claudeStreamResponses), string(responseData))
|
||||
result.WriteString(fmt.Sprintf("data: %s\n\n", responseData))
|
||||
result.WriteString(fmt.Sprintf("event: %s\ndata: %s\n\n", claudeStreamResponse.Type, responseData))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -579,79 +577,60 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont
|
||||
|
||||
// Handle tool calls in streaming response
|
||||
if choice.Delta != nil && len(choice.Delta.ToolCalls) > 0 {
|
||||
// Initialize toolCallStates if needed
|
||||
if c.toolCallStates == nil {
|
||||
c.toolCallStates = make(map[int]*toolCallInfo)
|
||||
}
|
||||
|
||||
for _, toolCall := range choice.Delta.ToolCalls {
|
||||
if !toolCall.Function.IsEmpty() {
|
||||
log.Debugf("[OpenAI->Claude] Processing tool call delta")
|
||||
log.Debugf("[OpenAI->Claude] Processing tool call delta: index=%d, id=%s, name=%s, args=%s",
|
||||
toolCall.Index, toolCall.Id, toolCall.Function.Name, toolCall.Function.Arguments)
|
||||
|
||||
// Get or create tool call state
|
||||
state := c.toolCallStates[toolCall.Id]
|
||||
if state == nil {
|
||||
state = &toolCallState{
|
||||
id: toolCall.Id,
|
||||
name: toolCall.Function.Name,
|
||||
argumentsBuffer: "",
|
||||
isComplete: false,
|
||||
// Handle new tool call (has id and name)
|
||||
if toolCall.Id != "" && toolCall.Function.Name != "" {
|
||||
// Create or update tool call state
|
||||
if _, exists := c.toolCallStates[toolCall.Index]; !exists {
|
||||
c.toolCallStates[toolCall.Index] = &toolCallInfo{
|
||||
id: toolCall.Id,
|
||||
name: toolCall.Function.Name,
|
||||
contentBlockStarted: false,
|
||||
contentBlockStopped: false,
|
||||
cachedArguments: "",
|
||||
}
|
||||
c.toolCallStates[toolCall.Id] = state
|
||||
log.Debugf("[OpenAI->Claude] Created new tool call state for id: %s, name: %s", toolCall.Id, toolCall.Function.Name)
|
||||
}
|
||||
|
||||
// Accumulate arguments
|
||||
if toolCall.Function.Arguments != "" {
|
||||
state.argumentsBuffer += toolCall.Function.Arguments
|
||||
log.Debugf("[OpenAI->Claude] Accumulated tool arguments: %s", state.argumentsBuffer)
|
||||
toolState := c.toolCallStates[toolCall.Index]
|
||||
|
||||
// Check if we can start this tool call (Claude requires serialization)
|
||||
if c.activeToolIndex == nil {
|
||||
// No active tool call, start this one
|
||||
c.activeToolIndex = &toolCall.Index
|
||||
toolCallResponses := c.startToolCall(toolState)
|
||||
responses = append(responses, toolCallResponses...)
|
||||
}
|
||||
// If there's already an active tool call, we'll start this one when the current one finishes
|
||||
}
|
||||
|
||||
// Try to parse accumulated arguments as JSON to check if complete
|
||||
var input map[string]interface{}
|
||||
if state.argumentsBuffer != "" {
|
||||
if err := json.Unmarshal([]byte(state.argumentsBuffer), &input); err == nil {
|
||||
// Successfully parsed - arguments are complete
|
||||
if !state.isComplete {
|
||||
state.isComplete = true
|
||||
log.Debugf("[OpenAI->Claude] Tool call arguments complete for %s: %s", state.name, state.argumentsBuffer)
|
||||
// Handle arguments for any tool call - cache all arguments regardless of active state
|
||||
if toolCall.Function.Arguments != "" {
|
||||
if toolState, exists := c.toolCallStates[toolCall.Index]; exists {
|
||||
// Always cache arguments for this tool call
|
||||
toolState.cachedArguments += toolCall.Function.Arguments
|
||||
log.Debugf("[OpenAI->Claude] Cached arguments for tool index %d: %s (total: %s)",
|
||||
toolCall.Index, toolCall.Function.Arguments, toolState.cachedArguments)
|
||||
|
||||
// Close thinking content block if it's still open
|
||||
if c.thinkingBlockStarted && !c.thinkingBlockStopped {
|
||||
c.thinkingBlockStopped = true
|
||||
log.Debugf("[OpenAI->Claude] Closing thinking content block before tool use")
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_stop",
|
||||
Index: &c.thinkingBlockIndex,
|
||||
})
|
||||
}
|
||||
|
||||
// Close text content block if it's still open
|
||||
if c.textBlockStarted && !c.textBlockStopped {
|
||||
c.textBlockStopped = true
|
||||
log.Debugf("[OpenAI->Claude] Closing text content block before tool use")
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_stop",
|
||||
Index: &c.textBlockIndex,
|
||||
})
|
||||
}
|
||||
|
||||
// Send content_block_start for tool_use only when we have complete arguments with dynamic index
|
||||
if !c.toolBlockStarted {
|
||||
c.toolBlockIndex = c.nextContentIndex
|
||||
c.nextContentIndex++
|
||||
c.toolBlockStarted = true
|
||||
log.Debugf("[OpenAI->Claude] Generated content_block_start event for tool_use at index %d", c.toolBlockIndex)
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_start",
|
||||
Index: &c.toolBlockIndex,
|
||||
ContentBlock: &claudeTextGenContent{
|
||||
Type: "tool_use",
|
||||
Id: toolCall.Id,
|
||||
Name: state.name,
|
||||
Input: input,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Still accumulating arguments
|
||||
log.Debugf("[OpenAI->Claude] Tool arguments not yet complete, continuing to accumulate: %v", err)
|
||||
// Send input_json_delta event only if this tool is currently active and content block started
|
||||
if c.activeToolIndex != nil && *c.activeToolIndex == toolCall.Index && toolState.contentBlockStarted {
|
||||
log.Debugf("[OpenAI->Claude] Generated input_json_delta event for active tool index %d: %s",
|
||||
toolCall.Index, toolCall.Function.Arguments)
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_delta",
|
||||
Index: &toolState.claudeContentIndex,
|
||||
Delta: &claudeTextGenDelta{
|
||||
Type: "input_json_delta",
|
||||
PartialJson: toolCall.Function.Arguments,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -680,15 +659,52 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont
|
||||
Index: &c.textBlockIndex,
|
||||
})
|
||||
}
|
||||
if c.toolBlockStarted && !c.toolBlockStopped {
|
||||
c.toolBlockStopped = true
|
||||
log.Debugf("[OpenAI->Claude] Generated tool content_block_stop event at index %d", c.toolBlockIndex)
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_stop",
|
||||
Index: &c.toolBlockIndex,
|
||||
})
|
||||
|
||||
// First, start any remaining unstarted tool calls (they may have no arguments)
|
||||
// Process in order to maintain Claude's sequential requirement
|
||||
var sortedIndices []int
|
||||
for index := range c.toolCallStates {
|
||||
sortedIndices = append(sortedIndices, index)
|
||||
}
|
||||
|
||||
// Sort indices to process in order
|
||||
for i := 0; i < len(sortedIndices)-1; i++ {
|
||||
for j := i + 1; j < len(sortedIndices); j++ {
|
||||
if sortedIndices[i] > sortedIndices[j] {
|
||||
sortedIndices[i], sortedIndices[j] = sortedIndices[j], sortedIndices[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, index := range sortedIndices {
|
||||
toolCall := c.toolCallStates[index]
|
||||
if !toolCall.contentBlockStarted {
|
||||
log.Debugf("[OpenAI->Claude] Starting remaining tool call at finish: index=%d, id=%s, name=%s",
|
||||
index, toolCall.id, toolCall.name)
|
||||
c.activeToolIndex = &index
|
||||
toolCallResponses := c.startToolCall(toolCall)
|
||||
responses = append(responses, toolCallResponses...)
|
||||
c.activeToolIndex = nil // Clear immediately since tool is now fully started
|
||||
}
|
||||
}
|
||||
|
||||
// Then send content_block_stop for all started tool calls in order
|
||||
for _, index := range sortedIndices {
|
||||
toolCall := c.toolCallStates[index]
|
||||
if toolCall.contentBlockStarted && !toolCall.contentBlockStopped {
|
||||
log.Debugf("[OpenAI->Claude] Generated content_block_stop for tool at index %d, Claude index %d",
|
||||
index, toolCall.claudeContentIndex)
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_stop",
|
||||
Index: &toolCall.claudeContentIndex,
|
||||
})
|
||||
toolCall.contentBlockStopped = true
|
||||
}
|
||||
}
|
||||
|
||||
// Clear active tool index
|
||||
c.activeToolIndex = nil
|
||||
|
||||
// Cache stop_reason until we get usage info (Claude protocol requires them together)
|
||||
c.pendingStopReason = &claudeFinishReason
|
||||
log.Debugf("[OpenAI->Claude] Cached stop_reason: %s, waiting for usage", claudeFinishReason)
|
||||
@@ -764,8 +780,9 @@ func (c *ClaudeToOpenAIConverter) convertContentArray(claudeContents []claudeCha
|
||||
if claudeContent.Text != "" {
|
||||
result.textParts = append(result.textParts, claudeContent.Text)
|
||||
result.openaiContents = append(result.openaiContents, chatMessageContent{
|
||||
Type: contentTypeText,
|
||||
Text: claudeContent.Text,
|
||||
Type: contentTypeText,
|
||||
Text: claudeContent.Text,
|
||||
CacheControl: claudeContent.CacheControl,
|
||||
})
|
||||
}
|
||||
case "image":
|
||||
@@ -822,3 +839,63 @@ func (c *ClaudeToOpenAIConverter) convertContentArray(claudeContents []claudeCha
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// startToolCall starts a new tool call content block
|
||||
func (c *ClaudeToOpenAIConverter) startToolCall(toolState *toolCallInfo) []*claudeTextGenStreamResponse {
|
||||
var responses []*claudeTextGenStreamResponse
|
||||
|
||||
// Close thinking content block if it's still open
|
||||
if c.thinkingBlockStarted && !c.thinkingBlockStopped {
|
||||
c.thinkingBlockStopped = true
|
||||
log.Debugf("[OpenAI->Claude] Closing thinking content block before tool use")
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_stop",
|
||||
Index: &c.thinkingBlockIndex,
|
||||
})
|
||||
}
|
||||
|
||||
// Close text content block if it's still open
|
||||
if c.textBlockStarted && !c.textBlockStopped {
|
||||
c.textBlockStopped = true
|
||||
log.Debugf("[OpenAI->Claude] Closing text content block before tool use")
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_stop",
|
||||
Index: &c.textBlockIndex,
|
||||
})
|
||||
}
|
||||
|
||||
// Assign Claude content index
|
||||
toolState.claudeContentIndex = c.nextContentIndex
|
||||
c.nextContentIndex++
|
||||
toolState.contentBlockStarted = true
|
||||
|
||||
log.Debugf("[OpenAI->Claude] Started tool call: Claude index=%d, id=%s, name=%s",
|
||||
toolState.claudeContentIndex, toolState.id, toolState.name)
|
||||
|
||||
// Send content_block_start
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_start",
|
||||
Index: &toolState.claudeContentIndex,
|
||||
ContentBlock: &claudeTextGenContent{
|
||||
Type: "tool_use",
|
||||
Id: toolState.id,
|
||||
Name: toolState.name,
|
||||
Input: map[string]interface{}{}, // Empty input as per Claude spec
|
||||
},
|
||||
})
|
||||
|
||||
// Send any cached arguments as input_json_delta events
|
||||
if toolState.cachedArguments != "" {
|
||||
log.Debugf("[OpenAI->Claude] Outputting cached arguments for tool: %s", toolState.cachedArguments)
|
||||
responses = append(responses, &claudeTextGenStreamResponse{
|
||||
Type: "content_block_delta",
|
||||
Index: &toolState.claudeContentIndex,
|
||||
Delta: &claudeTextGenDelta{
|
||||
Type: "input_json_delta",
|
||||
PartialJson: toolState.cachedArguments,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return responses
|
||||
}
|
||||
|
||||
@@ -35,7 +35,8 @@ func TestClaudeToOpenAIConverter_ConvertClaudeRequestToOpenAI(t *testing.T) {
|
||||
converter := &ClaudeToOpenAIConverter{}
|
||||
|
||||
t.Run("convert_multiple_text_content_blocks", func(t *testing.T) {
|
||||
// Test case for the bug fix: multiple text content blocks should be merged into a single string
|
||||
// Test case: multiple text content blocks should remain as separate array elements with cache control support
|
||||
// Both system and user messages should handle array content format
|
||||
claudeRequest := `{
|
||||
"max_tokens": 32000,
|
||||
"messages": [{
|
||||
@@ -98,15 +99,64 @@ func TestClaudeToOpenAIConverter_ConvertClaudeRequestToOpenAI(t *testing.T) {
|
||||
// First message should be system message (converted from Claude's system field)
|
||||
systemMsg := openaiRequest.Messages[0]
|
||||
assert.Equal(t, roleSystem, systemMsg.Role)
|
||||
assert.Equal(t, "xxx\nyyy", systemMsg.Content) // Claude system uses single \n
|
||||
|
||||
// Second message should be user message with merged text content
|
||||
// System content should now also be an array for multiple text blocks
|
||||
systemContentArray, ok := systemMsg.Content.([]interface{})
|
||||
require.True(t, ok, "System content should be an array for multiple text blocks")
|
||||
require.Len(t, systemContentArray, 2)
|
||||
|
||||
// First system text block
|
||||
firstSystemElement, ok := systemContentArray[0].(map[string]interface{})
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, contentTypeText, firstSystemElement["type"])
|
||||
assert.Equal(t, "xxx", firstSystemElement["text"])
|
||||
assert.NotNil(t, firstSystemElement["cache_control"]) // Has cache control
|
||||
systemCacheControl1, ok := firstSystemElement["cache_control"].(map[string]interface{})
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "ephemeral", systemCacheControl1["type"])
|
||||
|
||||
// Second system text block
|
||||
secondSystemElement, ok := systemContentArray[1].(map[string]interface{})
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, contentTypeText, secondSystemElement["type"])
|
||||
assert.Equal(t, "yyy", secondSystemElement["text"])
|
||||
assert.NotNil(t, secondSystemElement["cache_control"]) // Has cache control
|
||||
systemCacheControl2, ok := secondSystemElement["cache_control"].(map[string]interface{})
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "ephemeral", systemCacheControl2["type"])
|
||||
|
||||
// Second message should be user message with text content as array
|
||||
userMsg := openaiRequest.Messages[1]
|
||||
assert.Equal(t, "user", userMsg.Role)
|
||||
|
||||
// The key fix: multiple text blocks should be merged into a single string
|
||||
expectedContent := "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.</system-reminder>\n\n<system-reminder>\nyyy</system-reminder>\n\n你是谁"
|
||||
assert.Equal(t, expectedContent, userMsg.Content)
|
||||
// The content should now be an array of separate text blocks, not merged
|
||||
contentArray, ok := userMsg.Content.([]interface{})
|
||||
require.True(t, ok, "Content should be an array for multiple text blocks")
|
||||
require.Len(t, contentArray, 3)
|
||||
|
||||
// First text block
|
||||
firstElement, ok := contentArray[0].(map[string]interface{})
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, contentTypeText, firstElement["type"])
|
||||
assert.Equal(t, "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.</system-reminder>", firstElement["text"])
|
||||
assert.Nil(t, firstElement["cache_control"]) // No cache control for first block
|
||||
|
||||
// Second text block
|
||||
secondElement, ok := contentArray[1].(map[string]interface{})
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, contentTypeText, secondElement["type"])
|
||||
assert.Equal(t, "<system-reminder>\nyyy</system-reminder>", secondElement["text"])
|
||||
assert.Nil(t, secondElement["cache_control"]) // No cache control for second block
|
||||
|
||||
// Third text block with cache control
|
||||
thirdElement, ok := contentArray[2].(map[string]interface{})
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, contentTypeText, thirdElement["type"])
|
||||
assert.Equal(t, "你是谁", thirdElement["text"])
|
||||
assert.NotNil(t, thirdElement["cache_control"]) // Has cache control
|
||||
cacheControl, ok := thirdElement["cache_control"].(map[string]interface{})
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "ephemeral", cacheControl["type"])
|
||||
})
|
||||
|
||||
t.Run("convert_mixed_content_with_image", func(t *testing.T) {
|
||||
|
||||
@@ -159,10 +159,17 @@ type usage struct {
|
||||
CompletionTokens int `json:"completion_tokens,omitempty"`
|
||||
TotalTokens int `json:"total_tokens,omitempty"`
|
||||
CompletionTokensDetails *completionTokensDetails `json:"completion_tokens_details,omitempty"`
|
||||
PromptTokensDetails *promptTokensDetails `json:"prompt_tokens_details,omitempty"`
|
||||
}
|
||||
|
||||
type promptTokensDetails struct {
|
||||
AudioTokens int `json:"audio_tokens,omitempty"`
|
||||
CachedTokens int `json:"cached_tokens,omitempty"`
|
||||
}
|
||||
|
||||
type completionTokensDetails struct {
|
||||
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
||||
AudioTokens int `json:"audio_tokens,omitempty"`
|
||||
AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
|
||||
RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
|
||||
}
|
||||
@@ -240,11 +247,12 @@ func (m *chatMessage) handleStreamingReasoningContent(ctx wrapper.HttpContext, r
|
||||
}
|
||||
|
||||
type chatMessageContent struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
Text string `json:"text"`
|
||||
ImageUrl *chatMessageContentImageUrl `json:"image_url,omitempty"`
|
||||
File *chatMessageContentFile `json:"file,omitempty"`
|
||||
InputAudio *chatMessageContentAudio `json:"input_audio,omitempty"`
|
||||
CacheControl map[string]interface{} `json:"cache_control,omitempty"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Text string `json:"text"`
|
||||
ImageUrl *chatMessageContentImageUrl `json:"image_url,omitempty"`
|
||||
File *chatMessageContentFile `json:"file,omitempty"`
|
||||
InputAudio *chatMessageContentAudio `json:"input_audio,omitempty"`
|
||||
}
|
||||
|
||||
type chatMessageContentAudio struct {
|
||||
@@ -402,6 +410,7 @@ func (m *functionCall) IsEmpty() bool {
|
||||
}
|
||||
|
||||
type StreamEvent struct {
|
||||
RawEvent string `json:"-"`
|
||||
Id string `json:"id"`
|
||||
Event string `json:"event"`
|
||||
Data string `json:"data"`
|
||||
|
||||
@@ -813,6 +813,7 @@ func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte) []StreamEvent
|
||||
value := string(body[valueStartIndex:i])
|
||||
currentEvent.SetValue(currentKey, value)
|
||||
} else {
|
||||
currentEvent.RawEvent = string(body[eventStartIndex : i+1])
|
||||
// Extra new line. The current event is complete.
|
||||
events = append(events, *currentEvent)
|
||||
// Reset event parsing state.
|
||||
|
||||
@@ -6,14 +6,15 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
|
||||
"github.com/higress-group/wasm-go/pkg/wrapper"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/higress-group/wasm-go/pkg/wrapper"
|
||||
)
|
||||
|
||||
const (
|
||||
zhipuAiDomain = "open.bigmodel.cn"
|
||||
zhipuAiChatCompletionPath = "/api/paas/v4/chat/completions"
|
||||
zhipuAiEmbeddingsPath = "/api/paas/v4/embeddings"
|
||||
zhipuAiDomain = "open.bigmodel.cn"
|
||||
zhipuAiChatCompletionPath = "/api/paas/v4/chat/completions"
|
||||
zhipuAiEmbeddingsPath = "/api/paas/v4/embeddings"
|
||||
zhipuAiAnthropicMessagesPath = "/api/anthropic/v1/messages"
|
||||
)
|
||||
|
||||
type zhipuAiProviderInitializer struct{}
|
||||
@@ -27,8 +28,9 @@ func (m *zhipuAiProviderInitializer) ValidateConfig(config *ProviderConfig) erro
|
||||
|
||||
func (m *zhipuAiProviderInitializer) DefaultCapabilities() map[string]string {
|
||||
return map[string]string{
|
||||
string(ApiNameChatCompletion): zhipuAiChatCompletionPath,
|
||||
string(ApiNameEmbeddings): zhipuAiEmbeddingsPath,
|
||||
string(ApiNameChatCompletion): zhipuAiChatCompletionPath,
|
||||
string(ApiNameEmbeddings): zhipuAiEmbeddingsPath,
|
||||
string(ApiNameAnthropicMessages): zhipuAiAnthropicMessagesPath,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,5 +77,8 @@ func (m *zhipuAiProvider) GetApiName(path string) ApiName {
|
||||
if strings.Contains(path, zhipuAiEmbeddingsPath) {
|
||||
return ApiNameEmbeddings
|
||||
}
|
||||
if strings.Contains(path, zhipuAiAnthropicMessagesPath) {
|
||||
return ApiNameAnthropicMessages
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -83,6 +83,23 @@ const (
|
||||
RuleFirst = "first"
|
||||
RuleReplace = "replace"
|
||||
RuleAppend = "append"
|
||||
|
||||
// Built-in attributes
|
||||
BuiltinQuestionKey = "question"
|
||||
BuiltinAnswerKey = "answer"
|
||||
|
||||
// Built-in attribute paths
|
||||
// Question paths (from request body)
|
||||
QuestionPathOpenAI = "messages.@reverse.0.content"
|
||||
QuestionPathClaude = "messages.@reverse.0.content" // Claude uses same format
|
||||
|
||||
// Answer paths (from response body - non-streaming)
|
||||
AnswerPathOpenAINonStreaming = "choices.0.message.content"
|
||||
AnswerPathClaudeNonStreaming = "content.0.text"
|
||||
|
||||
// Answer paths (from response streaming body)
|
||||
AnswerPathOpenAIStreaming = "choices.0.delta.content"
|
||||
AnswerPathClaudeStreaming = "delta.text"
|
||||
)
|
||||
|
||||
// TracingSpan is the tracing span configuration.
|
||||
@@ -325,12 +342,14 @@ func onHttpRequestBody(ctx wrapper.HttpContext, config AIStatisticsConfig, body
|
||||
|
||||
userPromptCount := 0
|
||||
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
|
||||
// OpenAI and Claude/Anthropic format - both use "messages" array with "role" field
|
||||
for _, msg := range messages.Array() {
|
||||
if msg.Get("role").String() == "user" {
|
||||
userPromptCount += 1
|
||||
}
|
||||
}
|
||||
} else if contents := gjson.GetBytes(body, "contents"); contents.Exists() && contents.IsArray() { // Google Gemini GenerateContent
|
||||
} else if contents := gjson.GetBytes(body, "contents"); contents.Exists() && contents.IsArray() {
|
||||
// Google Gemini GenerateContent
|
||||
for _, content := range contents.Array() {
|
||||
if !content.Get("role").Exists() || content.Get("role").String() == "user" {
|
||||
userPromptCount += 1
|
||||
@@ -387,7 +406,7 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config AIStatisticsConfig, dat
|
||||
"id",
|
||||
"response.id",
|
||||
"responseId", // Gemini generateContent
|
||||
"message.id", // anthropic messages
|
||||
"message.id", // anthropic/claude messages
|
||||
}); chatID != nil {
|
||||
ctx.SetUserAttribute(ChatID, chatID.String())
|
||||
}
|
||||
@@ -456,7 +475,7 @@ func onHttpResponseBody(ctx wrapper.HttpContext, config AIStatisticsConfig, body
|
||||
"id",
|
||||
"response.id",
|
||||
"responseId", // Gemini generateContent
|
||||
"message.id", // anthropic messages
|
||||
"message.id", // anthropic/claude messages
|
||||
}); chatID != nil {
|
||||
ctx.SetUserAttribute(ChatID, chatID.String())
|
||||
}
|
||||
@@ -507,6 +526,15 @@ func setAttributeBySource(ctx wrapper.HttpContext, config AIStatisticsConfig, so
|
||||
value = gjson.GetBytes(body, attribute.Value).Value()
|
||||
default:
|
||||
}
|
||||
|
||||
// Handle built-in attributes with Claude/OpenAI protocol fallback logic
|
||||
if (value == nil || value == "") && isBuiltinAttribute(key) {
|
||||
value = getBuiltinAttributeFallback(ctx, config, key, source, body, attribute.Rule)
|
||||
if value != nil && value != "" {
|
||||
log.Debugf("[attribute] Used protocol fallback for %s: %+v", key, value)
|
||||
}
|
||||
}
|
||||
|
||||
if (value == nil || value == "") && attribute.DefaultValue != "" {
|
||||
value = attribute.DefaultValue
|
||||
}
|
||||
@@ -538,6 +566,45 @@ func setAttributeBySource(ctx wrapper.HttpContext, config AIStatisticsConfig, so
|
||||
}
|
||||
}
|
||||
|
||||
// isBuiltinAttribute checks if the given key is a built-in attribute
|
||||
func isBuiltinAttribute(key string) bool {
|
||||
return key == BuiltinQuestionKey || key == BuiltinAnswerKey
|
||||
}
|
||||
|
||||
// getBuiltinAttributeFallback provides protocol compatibility fallback for question/answer attributes
|
||||
func getBuiltinAttributeFallback(ctx wrapper.HttpContext, config AIStatisticsConfig, key, source string, body []byte, rule string) interface{} {
|
||||
switch key {
|
||||
case BuiltinQuestionKey:
|
||||
if source == RequestBody {
|
||||
// Try OpenAI/Claude format (both use same messages structure)
|
||||
if value := gjson.GetBytes(body, QuestionPathOpenAI).Value(); value != nil && value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
case BuiltinAnswerKey:
|
||||
if source == ResponseStreamingBody {
|
||||
// Try OpenAI format first
|
||||
if value := extractStreamingBodyByJsonPath(body, AnswerPathOpenAIStreaming, rule); value != nil && value != "" {
|
||||
return value
|
||||
}
|
||||
// Try Claude format
|
||||
if value := extractStreamingBodyByJsonPath(body, AnswerPathClaudeStreaming, rule); value != nil && value != "" {
|
||||
return value
|
||||
}
|
||||
} else if source == ResponseBody {
|
||||
// Try OpenAI format first
|
||||
if value := gjson.GetBytes(body, AnswerPathOpenAINonStreaming).Value(); value != nil && value != "" {
|
||||
return value
|
||||
}
|
||||
// Try Claude format
|
||||
if value := gjson.GetBytes(body, AnswerPathClaudeNonStreaming).Value(); value != nil && value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func extractStreamingBodyByJsonPath(data []byte, jsonPath string, rule string) interface{} {
|
||||
chunks := bytes.Split(bytes.TrimSpace(wrapper.UnifySSEChunk(data)), []byte("\n\n"))
|
||||
var value interface{}
|
||||
|
||||
Reference in New Issue
Block a user