Merge deploy-time fixes (systemd --user unit, initialLoaded heuristic)

This commit is contained in:
mAi
2026-05-15 16:56:47 +02:00
5 changed files with 73 additions and 29 deletions

View File

@@ -3,8 +3,8 @@
# `make build` — compile the Go binary into ./bin/mgpumanager.
# `make test` — go test ./...
# `make run` — run locally against ./config/consumers.yaml.
# `make deploy` — rsync binary + config + systemd unit to mRock,
# reload systemd, restart the service.
# `make deploy` — rsync binary + config + user-unit to mRock and
# (re)start it under `systemctl --user`.
BIN := bin/mgpumanager
PKG := ./cmd/mgpumanager
@@ -12,6 +12,7 @@ PKG := ./cmd/mgpumanager
GO ?= go
HOST ?= mrock
REMOTE_DIR ?= /home/m/dev/mGPUmanager
USER_UNIT_DIR ?= /home/m/.config/systemd/user
.PHONY: build test run deploy clean
@@ -25,11 +26,13 @@ test:
run: build
./$(BIN) --config config/consumers.yaml --log-level debug
# Deploys to mRock as a user unit (systemd --user). User lingering must
# be enabled on the target host: `sudo loginctl enable-linger m`.
deploy: build
rsync -a --mkpath $(BIN) $(HOST):$(REMOTE_DIR)/$(BIN)
rsync -a --mkpath config/consumers.yaml $(HOST):$(REMOTE_DIR)/config/consumers.yaml
rsync -a --mkpath systemd/mgpumanager.service $(HOST):$(REMOTE_DIR)/systemd/mgpumanager.service
ssh $(HOST) "sudo cp $(REMOTE_DIR)/systemd/mgpumanager.service /etc/systemd/system/mgpumanager.service && sudo systemctl daemon-reload && sudo systemctl enable mgpumanager.service && sudo systemctl restart mgpumanager.service && sudo systemctl status mgpumanager.service --no-pager -l"
rsync -a --mkpath systemd/mgpumanager.service $(HOST):$(USER_UNIT_DIR)/mgpumanager.service
ssh $(HOST) "systemctl --user daemon-reload && systemctl --user enable mgpumanager.service && systemctl --user restart mgpumanager.service && systemctl --user status mgpumanager.service --no-pager -l"
clean:
rm -rf bin

View File

@@ -1,4 +1,4 @@
listen: 127.0.0.1:8770
listen: 0.0.0.0:8770
gpu:
total_mib: 16376 # RTX 4070 Ti SUPER

View File

@@ -70,13 +70,38 @@ func NewEvicting(cfg *config.Config, reg *registry.Registry, gpuPoller *gpu.Poll
lastUsed: make(map[string]time.Time, len(cfg.Consumers)),
}
for name, cons := range cfg.Consumers {
// Self-managed VRAM consumers (ollama) are always 'loaded' from
// the scheduler's perspective — we never evict them via HTTP.
e.loaded[name] = !cons.VRAMManaged || true
e.loaded[name] = initialLoaded(cons)
}
return e
}
// initialLoaded picks the believed-loaded state for a consumer at scheduler
// startup. The rule:
//
// - VRAM-managed (ollama): true — we never track or evict it.
// - Has a load route AND an unload route (mvoice): true — the consumer
// is set up to be controllable in both directions, and typically
// preloads on its own systemd-managed startup.
// - Has only an unload route, no load route (comfyui): false — lazy.
// FLUX isn't resident until the first /prompt; until that happens we
// don't account for its VRAM cost.
// - Has a systemd_unit but no HTTP routes (whisper-server): true — these
// are always-on services that load their model at process start.
// - Neither: true — fallback, assume it's there if the consumer is up.
//
// Getting this right matters for the eviction smoke test: if comfyui were
// believed loaded at startup, ensureFits would short-circuit on the first
// /v1/image request and never trigger eviction. (m/mGPUmanager#1 live deploy.)
func initialLoaded(cons *config.Consumer) bool {
if cons.VRAMManaged {
return true
}
if cons.Load == nil && cons.Unload != nil {
return false
}
return true
}
// Run is the public Scheduler interface: ensure room + load + serialise.
func (e *Evicting) Run(ctx context.Context, consumer string, fn Job) error {
if err := e.ensureFits(ctx, consumer); err != nil {

View File

@@ -118,6 +118,37 @@ func buildCfg(mvoiceURL, comfyURL string) *config.Config {
}
}
// TestInitialLoadedHeuristic pins the comfyui-isn't-preloaded rule down:
// a consumer with Unload but no Load is lazy; everything else is assumed
// resident at startup.
func TestInitialLoadedHeuristic(t *testing.T) {
cases := []struct {
name string
cons *config.Consumer
want bool
}{
{"vram_managed (ollama)", &config.Consumer{VRAMManaged: true}, true},
{"load+unload (mvoice)", &config.Consumer{
Load: &config.Route{Path: "/load"},
Unload: &config.Route{Path: "/unload"},
}, true},
{"unload only — lazy (comfyui)", &config.Consumer{
Unload: &config.Route{Path: "/api/free"},
}, false},
{"systemd unit only (whisper-server)", &config.Consumer{
SystemdUnit: "whisper-server.service",
}, true},
{"empty consumer", &config.Consumer{}, true},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
if got := initialLoaded(c.cons); got != c.want {
t.Errorf("initialLoaded = %v, want %v", got, c.want)
}
})
}
}
// TestEvictingSkipsWhenAlreadyResident verifies the no-op fast path: a job
// for an already-loaded consumer with plenty of free VRAM runs without any
// unload call.

View File

@@ -1,30 +1,15 @@
[Unit]
Description=mGPUmanager — GPU-Inference-Control-Plane for mRock
Description=mGPUmanager — GPU-Inference-Control-Plane
Documentation=https://mgit.msbls.de/m/mGPUmanager
After=network-online.target
Wants=network-online.target
After=network.target
[Service]
Type=simple
User=m
Group=m
WorkingDirectory=/home/m/dev/mGPUmanager
ExecStart=/home/m/dev/mGPUmanager/bin/mgpumanager \
--config /home/m/dev/mGPUmanager/config/consumers.yaml \
--log-level info
WorkingDirectory=%h/dev/mGPUmanager
ExecStart=%h/dev/mGPUmanager/bin/mgpumanager --config %h/dev/mGPUmanager/config/consumers.yaml --log-level info
Restart=on-failure
RestartSec=3
RestartSec=5
TimeoutStopSec=10
# Hardening — broker has no need for elevated capabilities.
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=read-only
ReadWritePaths=/home/m/dev/mGPUmanager
# The broker only proxies; nvidia-smi is the only GPU-touching call.
PrivateDevices=false
[Install]
WantedBy=multi-user.target
WantedBy=default.target