mirror of
https://github.com/alibaba/higress.git
synced 2026-05-22 03:37:26 +08:00
feat/ai proxy vertex ai compatible (#3324)
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
package util
|
||||
|
||||
import "regexp"
|
||||
import (
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func StripPrefix(s string, prefix string) string {
|
||||
if len(prefix) != 0 && len(s) >= len(prefix) && s[0:len(prefix)] == prefix {
|
||||
@@ -18,3 +22,43 @@ func MatchStatus(status string, patterns []string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// unicodeEscapeRegex matches Unicode escape sequences like \uXXXX
|
||||
var unicodeEscapeRegex = regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
|
||||
|
||||
// DecodeUnicodeEscapes decodes Unicode escape sequences (\uXXXX) in a string to UTF-8 characters.
|
||||
// This is useful when a JSON response contains ASCII-safe encoded non-ASCII characters.
|
||||
func DecodeUnicodeEscapes(input []byte) []byte {
|
||||
result := unicodeEscapeRegex.ReplaceAllFunc(input, func(match []byte) []byte {
|
||||
// match is like \uXXXX, extract the hex part (XXXX)
|
||||
hexStr := string(match[2:6])
|
||||
codePoint, err := strconv.ParseInt(hexStr, 16, 32)
|
||||
if err != nil {
|
||||
return match // return original if parse fails
|
||||
}
|
||||
return []byte(string(rune(codePoint)))
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
// DecodeUnicodeEscapesInSSE decodes Unicode escape sequences in SSE formatted data.
|
||||
// It processes each line that starts with "data: " and decodes Unicode escapes in the JSON payload.
|
||||
func DecodeUnicodeEscapesInSSE(input []byte) []byte {
|
||||
lines := strings.Split(string(input), "\n")
|
||||
var result strings.Builder
|
||||
for i, line := range lines {
|
||||
if strings.HasPrefix(line, "data: ") {
|
||||
// Decode Unicode escapes in the JSON payload
|
||||
jsonData := line[6:]
|
||||
decodedData := DecodeUnicodeEscapes([]byte(jsonData))
|
||||
result.WriteString("data: ")
|
||||
result.Write(decodedData)
|
||||
} else {
|
||||
result.WriteString(line)
|
||||
}
|
||||
if i < len(lines)-1 {
|
||||
result.WriteString("\n")
|
||||
}
|
||||
}
|
||||
return []byte(result.String())
|
||||
}
|
||||
|
||||
108
plugins/wasm-go/extensions/ai-proxy/util/string_test.go
Normal file
108
plugins/wasm-go/extensions/ai-proxy/util/string_test.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestDecodeUnicodeEscapes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "Chinese characters",
|
||||
input: `\u4e2d\u6587\u6d4b\u8bd5`,
|
||||
expected: `中文测试`,
|
||||
},
|
||||
{
|
||||
name: "Mixed content",
|
||||
input: `Hello \u4e16\u754c World`,
|
||||
expected: `Hello 世界 World`,
|
||||
},
|
||||
{
|
||||
name: "No escape sequences",
|
||||
input: `Hello World`,
|
||||
expected: `Hello World`,
|
||||
},
|
||||
{
|
||||
name: "JSON with Unicode escapes",
|
||||
input: `{"content":"\u76c8\u5229\u80fd\u529b"}`,
|
||||
expected: `{"content":"盈利能力"}`,
|
||||
},
|
||||
{
|
||||
name: "Full width parentheses",
|
||||
input: `\uff08\u76c8\u5229\uff09`,
|
||||
expected: `(盈利)`,
|
||||
},
|
||||
{
|
||||
name: "Empty string",
|
||||
input: ``,
|
||||
expected: ``,
|
||||
},
|
||||
{
|
||||
name: "Invalid escape sequence (not modified)",
|
||||
input: `\u00GG`,
|
||||
expected: `\u00GG`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := DecodeUnicodeEscapes([]byte(tt.input))
|
||||
assert.Equal(t, tt.expected, string(result))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeUnicodeEscapesInSSE(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "SSE data with Unicode escapes",
|
||||
input: `data: {"choices":[{"delta":{"content":"\u4e2d\u6587"}}]}
|
||||
|
||||
`,
|
||||
expected: `data: {"choices":[{"delta":{"content":"中文"}}]}
|
||||
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "Multiple SSE data lines",
|
||||
input: `data: {"content":"\u4e2d\u6587"}
|
||||
data: {"content":"\u82f1\u6587"}
|
||||
data: [DONE]
|
||||
`,
|
||||
expected: `data: {"content":"中文"}
|
||||
data: {"content":"英文"}
|
||||
data: [DONE]
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "Non-data lines unchanged",
|
||||
input: ": comment\nevent: message\ndata: test\n",
|
||||
expected: ": comment\nevent: message\ndata: test\n",
|
||||
},
|
||||
{
|
||||
name: "Real Vertex AI response format",
|
||||
input: `data: {"choices":[{"delta":{"content":"\uff08\u76c8\u5229\u80fd\u529b\uff09","role":"assistant"},"index":0}],"created":1768307454,"id":"test","model":"gemini","object":"chat.completion.chunk"}
|
||||
|
||||
`,
|
||||
expected: `data: {"choices":[{"delta":{"content":"(盈利能力)","role":"assistant"},"index":0}],"created":1768307454,"id":"test","model":"gemini","object":"chat.completion.chunk"}
|
||||
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := DecodeUnicodeEscapesInSSE([]byte(tt.input))
|
||||
assert.Equal(t, tt.expected, string(result))
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user