Go daemon listening on :8770 that fronts mvoice (8766), whisper-server
(8178), ollama (11434), comfyui (8188) behind a single /v1 façade.
What this MVP does:
- Loads config/consumers.yaml: routing table, per-consumer URL + health +
paths + vram_resident_mib + can_coexist_with + load/unload routes.
- Background health probe (5s) on every consumer; refuses fast with a
structured 503 if the last probe failed (no Felix-Banholzer-style
silent fallback).
- POST /v1/{tts,stt,llm,image} proxies the request body + Content-Type
to the routed consumer's path and streams the response back.
- GET /audio/* proxies to audio_proxy consumer (wa.sh fetches its WAV
this way).
- GET /v1/status exposes live GPU sample (nvidia-smi every 2s),
per-consumer health/loaded/gpu_resident_mib/active/total_requests,
scheduler stats.
- GET /healthz, GET / — broker liveness.
The Scheduler interface is in place but the implementation is
'Passthrough' — every job runs immediately, no lock, no queue. Schritt 4
replaces it with a serialising mutex; Schritt 5 adds VRAM-pressure
eviction. The interface boundary means server.go stays unchanged.
Out of scope here:
- Schritt 3: wa.sh migration (parallel work in mAi).
- Schritt 4: queue + global GPU lock.
- Schritt 5: nvidia-smi-driven LRU eviction.
Tests: config validation (good/bad), proxy forwards body, audio proxy
streams bytes, unhealthy consumer returns 503, /v1/status JSON shape.
Refs: m/mGPUmanager#1
179 lines
4.6 KiB
Go
179 lines
4.6 KiB
Go
// Package registry tracks the live state of every GPU consumer.
|
|
//
|
|
// At Schritt 2 (MVP) the registry only does health probing — periodic GET on
|
|
// each consumer's health route, last-success timestamp, last error. Schritt 4
|
|
// adds per-consumer in-flight counts and LastUsed for LRU eviction in
|
|
// Schritt 5.
|
|
package registry
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"sync"
|
|
"time"
|
|
|
|
"mgit.msbls.de/m/mGPUmanager/internal/config"
|
|
)
|
|
|
|
// State is a snapshot of a single consumer's live status.
|
|
type State struct {
|
|
Name string
|
|
Healthy bool
|
|
LastProbe time.Time
|
|
LastError string
|
|
GPUResidentMiB int // populated from consumer health response when present
|
|
Loaded bool // mvoice reports this; others default to true
|
|
Active int // in-flight job count (Schritt 4)
|
|
LastUsed time.Time // last successful job completion (Schritt 5)
|
|
TotalRequests int64
|
|
}
|
|
|
|
// Registry holds the live state of all consumers.
|
|
type Registry struct {
|
|
cfg *config.Config
|
|
client *http.Client
|
|
logger *slog.Logger
|
|
|
|
mu sync.RWMutex
|
|
states map[string]*State
|
|
}
|
|
|
|
// New builds a Registry from the loaded config.
|
|
func New(cfg *config.Config, logger *slog.Logger) *Registry {
|
|
r := &Registry{
|
|
cfg: cfg,
|
|
client: &http.Client{Timeout: 5 * time.Second},
|
|
logger: logger,
|
|
states: make(map[string]*State, len(cfg.Consumers)),
|
|
}
|
|
for name := range cfg.Consumers {
|
|
r.states[name] = &State{Name: name}
|
|
}
|
|
return r
|
|
}
|
|
|
|
// Run starts the background health-probe loop and blocks until ctx is done.
|
|
// Cadence is fixed at 5s for health (independent of GPU polling cadence).
|
|
func (r *Registry) Run(ctx context.Context) {
|
|
r.probeAll(ctx)
|
|
t := time.NewTicker(5 * time.Second)
|
|
defer t.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-t.C:
|
|
r.probeAll(ctx)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *Registry) probeAll(ctx context.Context) {
|
|
var wg sync.WaitGroup
|
|
for name, cons := range r.cfg.Consumers {
|
|
wg.Add(1)
|
|
go func(name string, cons *config.Consumer) {
|
|
defer wg.Done()
|
|
r.probeOne(ctx, name, cons)
|
|
}(name, cons)
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
func (r *Registry) probeOne(ctx context.Context, name string, cons *config.Consumer) {
|
|
cctx, cancel := context.WithTimeout(ctx, 3*time.Second)
|
|
defer cancel()
|
|
|
|
req, err := http.NewRequestWithContext(cctx, cons.Health.Method, cons.URL+cons.Health.Path, nil)
|
|
if err != nil {
|
|
r.recordProbe(name, false, err.Error(), nil)
|
|
return
|
|
}
|
|
resp, err := r.client.Do(req)
|
|
if err != nil {
|
|
r.recordProbe(name, false, err.Error(), nil)
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 8192))
|
|
if resp.StatusCode >= 400 {
|
|
r.recordProbe(name, false, fmt.Sprintf("status %d", resp.StatusCode), nil)
|
|
return
|
|
}
|
|
r.recordProbe(name, true, "", body)
|
|
}
|
|
|
|
// recordProbe stores the outcome of one health check, optionally parsing
|
|
// gpu_resident_mib / loaded fields out of the response body.
|
|
func (r *Registry) recordProbe(name string, ok bool, errMsg string, body []byte) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
s := r.states[name]
|
|
if s == nil {
|
|
return
|
|
}
|
|
s.LastProbe = time.Now()
|
|
s.Healthy = ok
|
|
s.LastError = errMsg
|
|
if ok && body != nil {
|
|
s.GPUResidentMiB, s.Loaded = parseGPUFields(body, s.Loaded)
|
|
}
|
|
if !ok && r.logger != nil {
|
|
r.logger.Debug("consumer probe failed", "consumer", name, "err", errMsg)
|
|
}
|
|
}
|
|
|
|
// RecordProbeForTest exposes the internal probe-recording path to tests
|
|
// in other packages without depending on the live 5s probe loop.
|
|
func (r *Registry) RecordProbeForTest(name string, ok bool, errMsg string, body []byte) {
|
|
r.recordProbe(name, ok, errMsg, body)
|
|
}
|
|
|
|
// Snapshot returns a copy of all consumer states, ordered by config-declared
|
|
// consumer name set (Go map iteration order is randomized — callers that need
|
|
// stable ordering should sort).
|
|
func (r *Registry) Snapshot() map[string]State {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
out := make(map[string]State, len(r.states))
|
|
for k, v := range r.states {
|
|
out[k] = *v
|
|
}
|
|
return out
|
|
}
|
|
|
|
// Get returns a single consumer state (copy) or zero-value if unknown.
|
|
func (r *Registry) Get(name string) State {
|
|
r.mu.RLock()
|
|
defer r.mu.RUnlock()
|
|
if s, ok := r.states[name]; ok {
|
|
return *s
|
|
}
|
|
return State{}
|
|
}
|
|
|
|
// MarkActive increments the in-flight count and updates LastUsed.
|
|
// Returns a release func to call on job completion.
|
|
func (r *Registry) MarkActive(name string) func() {
|
|
r.mu.Lock()
|
|
if s, ok := r.states[name]; ok {
|
|
s.Active++
|
|
s.TotalRequests++
|
|
}
|
|
r.mu.Unlock()
|
|
return func() {
|
|
r.mu.Lock()
|
|
if s, ok := r.states[name]; ok {
|
|
if s.Active > 0 {
|
|
s.Active--
|
|
}
|
|
s.LastUsed = time.Now()
|
|
}
|
|
r.mu.Unlock()
|
|
}
|
|
}
|