mirror of
https://github.com/alibaba/higress.git
synced 2026-05-24 20:57:31 +08:00
82 lines
2.7 KiB
Go
82 lines
2.7 KiB
Go
package least_busy
|
|
|
|
import (
|
|
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/scheduling"
|
|
|
|
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
|
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
|
"github.com/higress-group/wasm-go/pkg/log"
|
|
"github.com/higress-group/wasm-go/pkg/wrapper"
|
|
"github.com/tidwall/gjson"
|
|
)
|
|
|
|
type LeastBusyLoadBalancer struct {
|
|
criticalModels map[string]struct{}
|
|
}
|
|
|
|
func NewLeastBusyLoadBalancer(json gjson.Result) (LeastBusyLoadBalancer, error) {
|
|
lb := LeastBusyLoadBalancer{}
|
|
lb.criticalModels = make(map[string]struct{})
|
|
for _, model := range json.Get("criticalModels").Array() {
|
|
lb.criticalModels[model.String()] = struct{}{}
|
|
}
|
|
return lb, nil
|
|
}
|
|
|
|
// Callbacks which are called in request path
|
|
func (lb LeastBusyLoadBalancer) HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action {
|
|
// If return types.ActionContinue, SetUpstreamOverrideHost will not take effect
|
|
return types.HeaderStopIteration
|
|
}
|
|
|
|
func (lb LeastBusyLoadBalancer) HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action {
|
|
requestModel := gjson.GetBytes(body, "model")
|
|
if !requestModel.Exists() {
|
|
return types.ActionContinue
|
|
}
|
|
_, isCritical := lb.criticalModels[requestModel.String()]
|
|
llmReq := &scheduling.LLMRequest{
|
|
Model: requestModel.String(),
|
|
Critical: isCritical,
|
|
}
|
|
hostInfos, err := proxywasm.GetUpstreamHosts()
|
|
if err != nil {
|
|
return types.ActionContinue
|
|
}
|
|
hostMetrics := make(map[string]string)
|
|
for _, hostInfo := range hostInfos {
|
|
if gjson.Get(hostInfo[1], "health_status").String() == "Healthy" {
|
|
hostMetrics[hostInfo[0]] = gjson.Get(hostInfo[1], "metrics").String()
|
|
}
|
|
}
|
|
scheduler, err := scheduling.GetScheduler(hostMetrics)
|
|
if err != nil {
|
|
log.Debugf("initial scheduler failed: %v", err)
|
|
return types.ActionContinue
|
|
}
|
|
targetPod, err := scheduler.Schedule(llmReq)
|
|
log.Debugf("targetPod: %+v", targetPod.Address)
|
|
if err != nil {
|
|
log.Debugf("pod select failed: %v", err)
|
|
proxywasm.SendHttpResponseWithDetail(429, "limited resources", nil, []byte("limited resources"), 0)
|
|
} else {
|
|
proxywasm.SetUpstreamOverrideHost([]byte(targetPod.Address))
|
|
}
|
|
return types.ActionContinue
|
|
}
|
|
|
|
func (lb LeastBusyLoadBalancer) HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action {
|
|
ctx.DontReadResponseBody()
|
|
return types.ActionContinue
|
|
}
|
|
|
|
func (lb LeastBusyLoadBalancer) HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte {
|
|
return data
|
|
}
|
|
|
|
func (lb LeastBusyLoadBalancer) HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action {
|
|
return types.ActionContinue
|
|
}
|
|
|
|
func (lb LeastBusyLoadBalancer) HandleHttpStreamDone(ctx wrapper.HttpContext) {}
|