[ai-cache] Implement a WASM plugin for LLM result retrieval based on vector similarity (#1290)

This commit is contained in:
Yang Beining
2024-10-27 08:21:04 +00:00
committed by GitHub
parent d309bf2e25
commit acec48ed8b
27 changed files with 2025 additions and 346 deletions

View File

@@ -0,0 +1,76 @@
// Copyright (c) 2022 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tests
import (
"testing"
"github.com/alibaba/higress/test/e2e/conformance/utils/http"
"github.com/alibaba/higress/test/e2e/conformance/utils/suite"
)
func init() {
Register(WasmPluginsAiCache)
}
var WasmPluginsAiCache = suite.ConformanceTest{
ShortName: "WasmPluginAiCache",
Description: "The Ingress in the higress-conformance-infra namespace test the ai-cache WASM plugin.",
Features: []suite.SupportedFeature{suite.WASMGoConformanceFeature},
Manifests: []string{"tests/go-wasm-ai-cache.yaml"},
Test: func(t *testing.T, suite *suite.ConformanceTestSuite) {
testcases := []http.Assertion{
{
Meta: http.AssertionMeta{
TestCaseName: "case 1: basic",
TargetBackend: "infra-backend-v1",
TargetNamespace: "higress-conformance-infra",
},
Request: http.AssertionRequest{
ActualRequest: http.Request{
Host: "dashscope.aliyuncs.com",
Path: "/v1/chat/completions",
Method: "POST",
ContentType: http.ContentTypeApplicationJson,
Body: []byte(`{
"model": "qwen-long",
"messages": [{"role":"user","content":"hi"}]}`),
},
ExpectedRequest: &http.ExpectedRequest{
Request: http.Request{
Host: "dashscope.aliyuncs.com",
Path: "/compatible-mode/v1/chat/completions",
Method: "POST",
ContentType: http.ContentTypeApplicationJson,
Body: []byte(`{
"model": "qwen-long",
"messages": [{"role":"user","content":"hi"}]}`),
},
},
},
Response: http.AssertionResponse{
ExpectedResponse: http.Response{
StatusCode: 200,
},
},
},
}
t.Run("WasmPlugins ai-cache", func(t *testing.T) {
for _, testcase := range testcases {
http.MakeRequestAndExpectEventuallyConsistentResponse(t, suite.RoundTripper, suite.TimeoutConfig, suite.GatewayAddress, testcase)
}
})
},
}

View File

@@ -0,0 +1,103 @@
# Copyright (c) 2022 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
name: wasmplugin-ai-cache-openai
namespace: higress-conformance-infra
spec:
ingressClassName: higress
rules:
- host: "dashscope.aliyuncs.com"
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: infra-backend-v1
port:
number: 8080
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
name: wasmplugin-ai-cache-qwen
namespace: higress-conformance-infra
spec:
ingressClassName: higress
rules:
- host: "qwen.ai.com"
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: infra-backend-v1
port:
number: 8080
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: ai-cache
namespace: higress-system
spec:
priority: 400
matchRules:
- config:
embedding:
type: "dashscope"
serviceName: "qwen"
apiKey: "{{secret.qwenApiKey}}"
timeout: 12000
vector:
type: "dashvector"
serviceName: "dashvector"
collectionID: "{{secret.collectionID}}"
serviceDomain: "{{secret.serviceDomain}}"
apiKey: "{{secret.apiKey}}"
timeout: 12000
cache:
ingress:
- higress-conformance-infra/wasmplugin-ai-cache-openai
- higress-conformance-infra/wasmplugin-ai-cache-qwen
# url: file:///opt/plugins/wasm-go/extensions/ai-cache/plugin.wasm
url: oci://registry.cn-shanghai.aliyuncs.com/suchunsv/higress_ai:1.18
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: ai-proxy
namespace: higress-system
spec:
priority: 201
matchRules:
- config:
provider:
type: "qwen"
qwenEnableCompatible: true
apiTokens:
- "{{secret.qwenApiKey}}"
timeout: 1200000
modelMapping:
"*": "qwen-long"
ingress:
- higress-conformance-infra/wasmplugin-ai-cache-openai
- higress-conformance-infra/wasmplugin-ai-cache-qwen
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0