feat/ai proxy vertex ai compatible (#3324)

2026-05-22 03:37:26 +08:00 · 2026-01-14 10:13:00 +08:00
parent e7010256fe
commit f1a5f18c78
7 changed files with 802 additions and 1 deletions
--- a/plugins/wasm-go/extensions/ai-proxy/util/string.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/string.go
@@ -1,6 +1,10 @@
 package util

-import "regexp"
+import (
+	"regexp"
+	"strconv"
+	"strings"
+)

 func StripPrefix(s string, prefix string) string {
 	if len(prefix) != 0 && len(s) >= len(prefix) && s[0:len(prefix)] == prefix {
@@ -18,3 +22,43 @@ func MatchStatus(status string, patterns []string) bool {
 	}
 	return false
 }
+
+// unicodeEscapeRegex matches Unicode escape sequences like \uXXXX
+var unicodeEscapeRegex = regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
+
+// DecodeUnicodeEscapes decodes Unicode escape sequences (\uXXXX) in a string to UTF-8 characters.
+// This is useful when a JSON response contains ASCII-safe encoded non-ASCII characters.
+func DecodeUnicodeEscapes(input []byte) []byte {
+	result := unicodeEscapeRegex.ReplaceAllFunc(input, func(match []byte) []byte {
+		// match is like \uXXXX, extract the hex part (XXXX)
+		hexStr := string(match[2:6])
+		codePoint, err := strconv.ParseInt(hexStr, 16, 32)
+		if err != nil {
+			return match // return original if parse fails
+		}
+		return []byte(string(rune(codePoint)))
+	})
+	return result
+}
+
+// DecodeUnicodeEscapesInSSE decodes Unicode escape sequences in SSE formatted data.
+// It processes each line that starts with "data: " and decodes Unicode escapes in the JSON payload.
+func DecodeUnicodeEscapesInSSE(input []byte) []byte {
+	lines := strings.Split(string(input), "\n")
+	var result strings.Builder
+	for i, line := range lines {
+		if strings.HasPrefix(line, "data: ") {
+			// Decode Unicode escapes in the JSON payload
+			jsonData := line[6:]
+			decodedData := DecodeUnicodeEscapes([]byte(jsonData))
+			result.WriteString("data: ")
+			result.Write(decodedData)
+		} else {
+			result.WriteString(line)
+		}
+		if i < len(lines)-1 {
+			result.WriteString("\n")
+		}
+	}
+	return []byte(result.String())
+}
--- a/plugins/wasm-go/extensions/ai-proxy/util/string_test.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/string_test.go
@@ -0,0 +1,108 @@
+package util
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestDecodeUnicodeEscapes(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "Chinese characters",
+			input:    `\u4e2d\u6587\u6d4b\u8bd5`,
+			expected: `中文测试`,
+		},
+		{
+			name:     "Mixed content",
+			input:    `Hello \u4e16\u754c World`,
+			expected: `Hello 世界 World`,
+		},
+		{
+			name:     "No escape sequences",
+			input:    `Hello World`,
+			expected: `Hello World`,
+		},
+		{
+			name:     "JSON with Unicode escapes",
+			input:    `{"content":"\u76c8\u5229\u80fd\u529b"}`,
+			expected: `{"content":"盈利能力"}`,
+		},
+		{
+			name:     "Full width parentheses",
+			input:    `\uff08\u76c8\u5229\uff09`,
+			expected: `（盈利）`,
+		},
+		{
+			name:     "Empty string",
+			input:    ``,
+			expected: ``,
+		},
+		{
+			name:     "Invalid escape sequence (not modified)",
+			input:    `\u00GG`,
+			expected: `\u00GG`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := DecodeUnicodeEscapes([]byte(tt.input))
+			assert.Equal(t, tt.expected, string(result))
+		})
+	}
+}
+
+func TestDecodeUnicodeEscapesInSSE(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name: "SSE data with Unicode escapes",
+			input: `data: {"choices":[{"delta":{"content":"\u4e2d\u6587"}}]}
+
+`,
+			expected: `data: {"choices":[{"delta":{"content":"中文"}}]}
+
+`,
+		},
+		{
+			name: "Multiple SSE data lines",
+			input: `data: {"content":"\u4e2d\u6587"}
+data: {"content":"\u82f1\u6587"}
+data: [DONE]
+`,
+			expected: `data: {"content":"中文"}
+data: {"content":"英文"}
+data: [DONE]
+`,
+		},
+		{
+			name:     "Non-data lines unchanged",
+			input:    ": comment\nevent: message\ndata: test\n",
+			expected: ": comment\nevent: message\ndata: test\n",
+		},
+		{
+			name: "Real Vertex AI response format",
+			input: `data: {"choices":[{"delta":{"content":"\uff08\u76c8\u5229\u80fd\u529b\uff09","role":"assistant"},"index":0}],"created":1768307454,"id":"test","model":"gemini","object":"chat.completion.chunk"}
+
+`,
+			expected: `data: {"choices":[{"delta":{"content":"（盈利能力）","role":"assistant"},"index":0}],"created":1768307454,"id":"test","model":"gemini","object":"chat.completion.chunk"}
+
+`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := DecodeUnicodeEscapesInSSE([]byte(tt.input))
+			assert.Equal(t, tt.expected, string(result))
+		})
+	}
+}