Files
mGPUmanager/internal/registry/registry.go
mAi c81c145163 feat: Schritt 2 — mGPUmanager MVP routing + /v1/status
Go daemon listening on :8770 that fronts mvoice (8766), whisper-server
(8178), ollama (11434), comfyui (8188) behind a single /v1 façade.

What this MVP does:
- Loads config/consumers.yaml: routing table, per-consumer URL + health +
  paths + vram_resident_mib + can_coexist_with + load/unload routes.
- Background health probe (5s) on every consumer; refuses fast with a
  structured 503 if the last probe failed (no Felix-Banholzer-style
  silent fallback).
- POST /v1/{tts,stt,llm,image} proxies the request body + Content-Type
  to the routed consumer's path and streams the response back.
- GET /audio/* proxies to audio_proxy consumer (wa.sh fetches its WAV
  this way).
- GET /v1/status exposes live GPU sample (nvidia-smi every 2s),
  per-consumer health/loaded/gpu_resident_mib/active/total_requests,
  scheduler stats.
- GET /healthz, GET / — broker liveness.

The Scheduler interface is in place but the implementation is
'Passthrough' — every job runs immediately, no lock, no queue. Schritt 4
replaces it with a serialising mutex; Schritt 5 adds VRAM-pressure
eviction. The interface boundary means server.go stays unchanged.

Out of scope here:
- Schritt 3: wa.sh migration (parallel work in mAi).
- Schritt 4: queue + global GPU lock.
- Schritt 5: nvidia-smi-driven LRU eviction.

Tests: config validation (good/bad), proxy forwards body, audio proxy
streams bytes, unhealthy consumer returns 503, /v1/status JSON shape.

Refs: m/mGPUmanager#1
2026-05-11 13:30:17 +02:00

179 lines
4.6 KiB
Go

// Package registry tracks the live state of every GPU consumer.
//
// At Schritt 2 (MVP) the registry only does health probing — periodic GET on
// each consumer's health route, last-success timestamp, last error. Schritt 4
// adds per-consumer in-flight counts and LastUsed for LRU eviction in
// Schritt 5.
package registry
import (
"context"
"fmt"
"io"
"log/slog"
"net/http"
"sync"
"time"
"mgit.msbls.de/m/mGPUmanager/internal/config"
)
// State is a snapshot of a single consumer's live status.
type State struct {
Name string
Healthy bool
LastProbe time.Time
LastError string
GPUResidentMiB int // populated from consumer health response when present
Loaded bool // mvoice reports this; others default to true
Active int // in-flight job count (Schritt 4)
LastUsed time.Time // last successful job completion (Schritt 5)
TotalRequests int64
}
// Registry holds the live state of all consumers.
type Registry struct {
cfg *config.Config
client *http.Client
logger *slog.Logger
mu sync.RWMutex
states map[string]*State
}
// New builds a Registry from the loaded config.
func New(cfg *config.Config, logger *slog.Logger) *Registry {
r := &Registry{
cfg: cfg,
client: &http.Client{Timeout: 5 * time.Second},
logger: logger,
states: make(map[string]*State, len(cfg.Consumers)),
}
for name := range cfg.Consumers {
r.states[name] = &State{Name: name}
}
return r
}
// Run starts the background health-probe loop and blocks until ctx is done.
// Cadence is fixed at 5s for health (independent of GPU polling cadence).
func (r *Registry) Run(ctx context.Context) {
r.probeAll(ctx)
t := time.NewTicker(5 * time.Second)
defer t.Stop()
for {
select {
case <-ctx.Done():
return
case <-t.C:
r.probeAll(ctx)
}
}
}
func (r *Registry) probeAll(ctx context.Context) {
var wg sync.WaitGroup
for name, cons := range r.cfg.Consumers {
wg.Add(1)
go func(name string, cons *config.Consumer) {
defer wg.Done()
r.probeOne(ctx, name, cons)
}(name, cons)
}
wg.Wait()
}
func (r *Registry) probeOne(ctx context.Context, name string, cons *config.Consumer) {
cctx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(cctx, cons.Health.Method, cons.URL+cons.Health.Path, nil)
if err != nil {
r.recordProbe(name, false, err.Error(), nil)
return
}
resp, err := r.client.Do(req)
if err != nil {
r.recordProbe(name, false, err.Error(), nil)
return
}
defer resp.Body.Close()
body, _ := io.ReadAll(io.LimitReader(resp.Body, 8192))
if resp.StatusCode >= 400 {
r.recordProbe(name, false, fmt.Sprintf("status %d", resp.StatusCode), nil)
return
}
r.recordProbe(name, true, "", body)
}
// recordProbe stores the outcome of one health check, optionally parsing
// gpu_resident_mib / loaded fields out of the response body.
func (r *Registry) recordProbe(name string, ok bool, errMsg string, body []byte) {
r.mu.Lock()
defer r.mu.Unlock()
s := r.states[name]
if s == nil {
return
}
s.LastProbe = time.Now()
s.Healthy = ok
s.LastError = errMsg
if ok && body != nil {
s.GPUResidentMiB, s.Loaded = parseGPUFields(body, s.Loaded)
}
if !ok && r.logger != nil {
r.logger.Debug("consumer probe failed", "consumer", name, "err", errMsg)
}
}
// RecordProbeForTest exposes the internal probe-recording path to tests
// in other packages without depending on the live 5s probe loop.
func (r *Registry) RecordProbeForTest(name string, ok bool, errMsg string, body []byte) {
r.recordProbe(name, ok, errMsg, body)
}
// Snapshot returns a copy of all consumer states, ordered by config-declared
// consumer name set (Go map iteration order is randomized — callers that need
// stable ordering should sort).
func (r *Registry) Snapshot() map[string]State {
r.mu.RLock()
defer r.mu.RUnlock()
out := make(map[string]State, len(r.states))
for k, v := range r.states {
out[k] = *v
}
return out
}
// Get returns a single consumer state (copy) or zero-value if unknown.
func (r *Registry) Get(name string) State {
r.mu.RLock()
defer r.mu.RUnlock()
if s, ok := r.states[name]; ok {
return *s
}
return State{}
}
// MarkActive increments the in-flight count and updates LastUsed.
// Returns a release func to call on job completion.
func (r *Registry) MarkActive(name string) func() {
r.mu.Lock()
if s, ok := r.states[name]; ok {
s.Active++
s.TotalRequests++
}
r.mu.Unlock()
return func() {
r.mu.Lock()
if s, ok := r.states[name]; ok {
if s.Active > 0 {
s.Active--
}
s.LastUsed = time.Now()
}
r.mu.Unlock()
}
}