Files
mGPUmanager/internal/config/config.go
mAi c81c145163 feat: Schritt 2 — mGPUmanager MVP routing + /v1/status
Go daemon listening on :8770 that fronts mvoice (8766), whisper-server
(8178), ollama (11434), comfyui (8188) behind a single /v1 façade.

What this MVP does:
- Loads config/consumers.yaml: routing table, per-consumer URL + health +
  paths + vram_resident_mib + can_coexist_with + load/unload routes.
- Background health probe (5s) on every consumer; refuses fast with a
  structured 503 if the last probe failed (no Felix-Banholzer-style
  silent fallback).
- POST /v1/{tts,stt,llm,image} proxies the request body + Content-Type
  to the routed consumer's path and streams the response back.
- GET /audio/* proxies to audio_proxy consumer (wa.sh fetches its WAV
  this way).
- GET /v1/status exposes live GPU sample (nvidia-smi every 2s),
  per-consumer health/loaded/gpu_resident_mib/active/total_requests,
  scheduler stats.
- GET /healthz, GET / — broker liveness.

The Scheduler interface is in place but the implementation is
'Passthrough' — every job runs immediately, no lock, no queue. Schritt 4
replaces it with a serialising mutex; Schritt 5 adds VRAM-pressure
eviction. The interface boundary means server.go stays unchanged.

Out of scope here:
- Schritt 3: wa.sh migration (parallel work in mAi).
- Schritt 4: queue + global GPU lock.
- Schritt 5: nvidia-smi-driven LRU eviction.

Tests: config validation (good/bad), proxy forwards body, audio proxy
streams bytes, unhealthy consumer returns 503, /v1/status JSON shape.

Refs: m/mGPUmanager#1
2026-05-11 13:30:17 +02:00

166 lines
5.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package config loads the mGPUmanager consumer registry from YAML.
//
// The consumers.yaml file declares every GPU consumer (mvoice, whisper-server,
// ollama, comfyui), how to route the four logical endpoint kinds (tts, stt,
// llm, image) to a consumer, how to probe its health, and how to load/unload
// it from VRAM. The scheduler (Schritt 45) reads vram_resident_mib +
// can_coexist_with to drive eviction.
package config
import (
"fmt"
"net/url"
"os"
"strings"
"time"
"gopkg.in/yaml.v3"
)
// EndpointKind enumerates the four logical broker endpoints exposed on /v1/*.
type EndpointKind string
const (
KindTTS EndpointKind = "tts"
KindSTT EndpointKind = "stt"
KindLLM EndpointKind = "llm"
KindImage EndpointKind = "image"
)
// AllKinds is the canonical ordering used by /v1/status and tests.
var AllKinds = []EndpointKind{KindTTS, KindSTT, KindLLM, KindImage}
// Route describes an HTTP method + path on a consumer.
type Route struct {
Method string `yaml:"method"`
Path string `yaml:"path"`
// Body is an optional fixed request body for admin operations
// (e.g. ComfyUI's /api/free expects {"unload_models":true,"free_memory":true}).
Body string `yaml:"body,omitempty"`
}
// Consumer describes a single GPU consumer behind the broker.
type Consumer struct {
URL string `yaml:"url"`
Health Route `yaml:"health"`
Paths map[EndpointKind]Route `yaml:"paths"`
VRAMResidentMiB int `yaml:"vram_resident_mib"`
VRAMManaged bool `yaml:"vram_managed"` // self-managed LRU (ollama)
Load *Route `yaml:"load,omitempty"`
Unload *Route `yaml:"unload,omitempty"`
SystemdUnit string `yaml:"systemd_unit,omitempty"` // fallback unload (whisper-server)
CanCoexistWith []string `yaml:"can_coexist_with"`
Priority int `yaml:"priority"`
MaxConcurrency int `yaml:"max_concurrency"`
}
// GPU describes the host's GPU envelope.
type GPU struct {
TotalMiB int `yaml:"total_mib"`
ReservedMiB int `yaml:"reserved_mib"`
PollIntervalSeconds int `yaml:"poll_interval_seconds"`
}
// PollInterval returns the GPU polling cadence as a Duration. Defaults to 2s.
func (g GPU) PollInterval() time.Duration {
if g.PollIntervalSeconds <= 0 {
return 2 * time.Second
}
return time.Duration(g.PollIntervalSeconds) * time.Second
}
// AvailableMiB returns total VRAM minus the system-reserved headroom.
func (g GPU) AvailableMiB() int {
if g.TotalMiB <= 0 {
return 0
}
avail := g.TotalMiB - g.ReservedMiB
if avail < 0 {
return 0
}
return avail
}
// Config is the parsed mGPUmanager configuration.
type Config struct {
Listen string `yaml:"listen"`
GPU GPU `yaml:"gpu"`
Routing map[EndpointKind]string `yaml:"routing"`
AudioProxy string `yaml:"audio_proxy"`
Consumers map[string]*Consumer `yaml:"consumers"`
}
// Load reads and validates a consumers.yaml file from disk.
func Load(path string) (*Config, error) {
b, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("read %s: %w", path, err)
}
var cfg Config
if err := yaml.Unmarshal(b, &cfg); err != nil {
return nil, fmt.Errorf("parse %s: %w", path, err)
}
if err := cfg.validate(); err != nil {
return nil, fmt.Errorf("validate %s: %w", path, err)
}
return &cfg, nil
}
func (c *Config) validate() error {
if c.Listen == "" {
c.Listen = "127.0.0.1:8770"
}
if len(c.Consumers) == 0 {
return fmt.Errorf("no consumers declared")
}
for name, cons := range c.Consumers {
if cons.URL == "" {
return fmt.Errorf("consumer %q: url is required", name)
}
if _, err := url.Parse(cons.URL); err != nil {
return fmt.Errorf("consumer %q: invalid url %q: %w", name, cons.URL, err)
}
if cons.Health.Path == "" {
return fmt.Errorf("consumer %q: health.path is required", name)
}
if cons.Health.Method == "" {
cons.Health.Method = "GET"
}
cons.Health.Method = strings.ToUpper(cons.Health.Method)
for kind, route := range cons.Paths {
if route.Path == "" {
return fmt.Errorf("consumer %q: paths.%s.path is required", name, kind)
}
if route.Method == "" {
route.Method = "POST"
}
route.Method = strings.ToUpper(route.Method)
cons.Paths[kind] = route
}
if cons.MaxConcurrency <= 0 {
cons.MaxConcurrency = 1
}
}
for kind, consName := range c.Routing {
if _, ok := c.Consumers[consName]; !ok {
return fmt.Errorf("routing.%s: unknown consumer %q", kind, consName)
}
}
if c.AudioProxy != "" {
if _, ok := c.Consumers[c.AudioProxy]; !ok {
return fmt.Errorf("audio_proxy: unknown consumer %q", c.AudioProxy)
}
}
return nil
}
// ConsumerForKind returns the consumer designated to handle a given endpoint
// kind, or nil if routing is unset.
func (c *Config) ConsumerForKind(kind EndpointKind) (string, *Consumer) {
name, ok := c.Routing[kind]
if !ok {
return "", nil
}
return name, c.Consumers[name]
}