feat/ai proxy vertex ai compatible (#3324)

This commit is contained in:
woody
2026-01-14 10:13:00 +08:00
committed by GitHub
parent e7010256fe
commit f1a5f18c78
7 changed files with 802 additions and 1 deletions

View File

@@ -1,6 +1,10 @@
package util
import "regexp"
import (
"regexp"
"strconv"
"strings"
)
func StripPrefix(s string, prefix string) string {
if len(prefix) != 0 && len(s) >= len(prefix) && s[0:len(prefix)] == prefix {
@@ -18,3 +22,43 @@ func MatchStatus(status string, patterns []string) bool {
}
return false
}
// unicodeEscapeRegex matches Unicode escape sequences like \uXXXX
var unicodeEscapeRegex = regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
// DecodeUnicodeEscapes decodes Unicode escape sequences (\uXXXX) in a string to UTF-8 characters.
// This is useful when a JSON response contains ASCII-safe encoded non-ASCII characters.
func DecodeUnicodeEscapes(input []byte) []byte {
result := unicodeEscapeRegex.ReplaceAllFunc(input, func(match []byte) []byte {
// match is like \uXXXX, extract the hex part (XXXX)
hexStr := string(match[2:6])
codePoint, err := strconv.ParseInt(hexStr, 16, 32)
if err != nil {
return match // return original if parse fails
}
return []byte(string(rune(codePoint)))
})
return result
}
// DecodeUnicodeEscapesInSSE decodes Unicode escape sequences in SSE formatted data.
// It processes each line that starts with "data: " and decodes Unicode escapes in the JSON payload.
func DecodeUnicodeEscapesInSSE(input []byte) []byte {
lines := strings.Split(string(input), "\n")
var result strings.Builder
for i, line := range lines {
if strings.HasPrefix(line, "data: ") {
// Decode Unicode escapes in the JSON payload
jsonData := line[6:]
decodedData := DecodeUnicodeEscapes([]byte(jsonData))
result.WriteString("data: ")
result.Write(decodedData)
} else {
result.WriteString(line)
}
if i < len(lines)-1 {
result.WriteString("\n")
}
}
return []byte(result.String())
}

View File

@@ -0,0 +1,108 @@
package util
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestDecodeUnicodeEscapes(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "Chinese characters",
input: `\u4e2d\u6587\u6d4b\u8bd5`,
expected: `中文测试`,
},
{
name: "Mixed content",
input: `Hello \u4e16\u754c World`,
expected: `Hello 世界 World`,
},
{
name: "No escape sequences",
input: `Hello World`,
expected: `Hello World`,
},
{
name: "JSON with Unicode escapes",
input: `{"content":"\u76c8\u5229\u80fd\u529b"}`,
expected: `{"content":"盈利能力"}`,
},
{
name: "Full width parentheses",
input: `\uff08\u76c8\u5229\uff09`,
expected: `(盈利)`,
},
{
name: "Empty string",
input: ``,
expected: ``,
},
{
name: "Invalid escape sequence (not modified)",
input: `\u00GG`,
expected: `\u00GG`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := DecodeUnicodeEscapes([]byte(tt.input))
assert.Equal(t, tt.expected, string(result))
})
}
}
func TestDecodeUnicodeEscapesInSSE(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "SSE data with Unicode escapes",
input: `data: {"choices":[{"delta":{"content":"\u4e2d\u6587"}}]}
`,
expected: `data: {"choices":[{"delta":{"content":"中文"}}]}
`,
},
{
name: "Multiple SSE data lines",
input: `data: {"content":"\u4e2d\u6587"}
data: {"content":"\u82f1\u6587"}
data: [DONE]
`,
expected: `data: {"content":"中文"}
data: {"content":"英文"}
data: [DONE]
`,
},
{
name: "Non-data lines unchanged",
input: ": comment\nevent: message\ndata: test\n",
expected: ": comment\nevent: message\ndata: test\n",
},
{
name: "Real Vertex AI response format",
input: `data: {"choices":[{"delta":{"content":"\uff08\u76c8\u5229\u80fd\u529b\uff09","role":"assistant"},"index":0}],"created":1768307454,"id":"test","model":"gemini","object":"chat.completion.chunk"}
`,
expected: `data: {"choices":[{"delta":{"content":"(盈利能力)","role":"assistant"},"index":0}],"created":1768307454,"id":"test","model":"gemini","object":"chat.completion.chunk"}
`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := DecodeUnicodeEscapesInSSE([]byte(tt.input))
assert.Equal(t, tt.expected, string(result))
})
}
}