diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go index 558371608..d9416270e 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai.go @@ -788,16 +788,21 @@ func (c *ClaudeToOpenAIConverter) buildClaudeStreamResponse(ctx wrapper.HttpCont log.Debugf("[OpenAI->Claude] Processing usage info - input: %d, output: %d", openaiResponse.Usage.PromptTokens, openaiResponse.Usage.CompletionTokens) + usage := &claudeTextGenUsage{ + InputTokens: openaiResponse.Usage.PromptTokens, + OutputTokens: openaiResponse.Usage.CompletionTokens, + } + if openaiResponse.Usage.PromptTokensDetails != nil { + usage.CacheReadInputTokens = openaiResponse.Usage.PromptTokensDetails.CachedTokens + } + // Send message_delta with both stop_reason and usage (Claude protocol requirement) messageDelta := &claudeTextGenStreamResponse{ Type: "message_delta", Delta: &claudeTextGenDelta{ StopSequence: json.RawMessage("null"), // Explicit null per Claude spec }, - Usage: &claudeTextGenUsage{ - InputTokens: openaiResponse.Usage.PromptTokens, - OutputTokens: openaiResponse.Usage.CompletionTokens, - }, + Usage: usage, } // Include cached stop_reason if available diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go index 94751d770..b490a3591 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude_to_openai_test.go @@ -859,6 +859,22 @@ func TestClaudeToOpenAIConverter_ConvertReasoningResponseToClaude(t *testing.T) } } +func TestClaudeToOpenAIConverter_ConvertOpenAIStreamResponseToClaude_WithCachedTokens(t *testing.T) { + converter := &ClaudeToOpenAIConverter{} + + streamChunk := "data: {\"id\":\"chatcmpl-test\",\"model\":\"gpt-4o\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\"}}]}\n\n" + + "data: {\"id\":\"chatcmpl-test\",\"model\":\"gpt-4o\",\"choices\":[{\"index\":0,\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":100,\"completion_tokens\":20,\"total_tokens\":120,\"prompt_tokens_details\":{\"cached_tokens\":60}}}\n\n" + + result, err := converter.ConvertOpenAIStreamResponseToClaude(nil, []byte(streamChunk)) + require.NoError(t, err) + + resultStr := string(result) + assert.Contains(t, resultStr, "\"type\":\"message_delta\"") + assert.Contains(t, resultStr, "\"input_tokens\":100") + assert.Contains(t, resultStr, "\"output_tokens\":20") + assert.Contains(t, resultStr, "\"cache_read_input_tokens\":60") +} + func TestClaudeToOpenAIConverter_StripCchFromSystemMessage(t *testing.T) { converter := &ClaudeToOpenAIConverter{}