mGPUmanager/internal/scheduler/scheduler.go

// Package scheduler controls who gets the GPU when.
//
// Three responsibilities, added in three phases:
//
//   - Schritt 2 (this file's first version): a passthrough — every job runs
//     immediately, no locking, no queueing. Only useful for proving the HTTP
//     façade end-to-end.
//   - Schritt 4: a global mutex (or capacity-1 channel) serialises all GPU
//     work. Per-consumer max_concurrency limits stay at 1 for now.
//   - Schritt 5: VRAM-pressure-aware eviction kicks in before acquire when the
//     requested consumer's resident cost would exceed available headroom.
//
// The interface deliberately hides which phase is active from callers
// (server.go) so the upgrade path is local to this package.
package scheduler

import (
	"context"
	"errors"
	"sync"
	"time"

	"mgit.msbls.de/m/mGPUmanager/internal/config"
	"mgit.msbls.de/m/mGPUmanager/internal/registry"
)

// ErrSchedulerStopped is returned if Run is called after Close.
var ErrSchedulerStopped = errors.New("scheduler stopped")

// Job is what a consumer route worker executes while holding the GPU lock.
type Job func(ctx context.Context) error

// Scheduler decides when GPU work runs. Implementations may queue, serialise,
// or evict other consumers before granting access.
type Scheduler interface {
	// Run executes fn while the caller holds the right to use the GPU for
	// the named consumer. It blocks until fn returns or ctx is cancelled.
	Run(ctx context.Context, consumer string, fn Job) error

	// Stats returns a snapshot of scheduler internals for /v1/status.
	Stats() Stats
}

// Stats is what /v1/status reports about the scheduler.
type Stats struct {
	QueueDepth   int           `json:"queue_depth"`
	InFlight     int           `json:"in_flight"`
	TotalJobs    int64         `json:"total_jobs"`
	LastWaitMS   int64         `json:"last_wait_ms"`
	LastRunMS    int64         `json:"last_run_ms"`
	Evictions    int64         `json:"evictions"`
	OldestQueued time.Time     `json:"oldest_queued,omitzero"`
}

// Passthrough is the Schritt 2 stand-in: no lock, no queue. Every job runs
// concurrently. It exists so the server package can be written against the
// final interface from day one.
type Passthrough struct {
	reg *registry.Registry

	mu        sync.Mutex
	inFlight  int
	total     int64
	lastRunMS int64
}

// NewPassthrough returns a Scheduler that runs every job immediately.
func NewPassthrough(reg *registry.Registry) *Passthrough {
	return &Passthrough{reg: reg}
}

// Run executes fn straight away, only tracking in-flight count for stats.
func (p *Passthrough) Run(ctx context.Context, consumer string, fn Job) error {
	release := p.reg.MarkActive(consumer)
	defer release()

	p.mu.Lock()
	p.inFlight++
	p.total++
	p.mu.Unlock()
	defer func() {
		p.mu.Lock()
		p.inFlight--
		p.mu.Unlock()
	}()

	start := time.Now()
	err := fn(ctx)
	elapsed := time.Since(start).Milliseconds()
	p.mu.Lock()
	p.lastRunMS = elapsed
	p.mu.Unlock()
	return err
}

// Stats returns current passthrough statistics.
func (p *Passthrough) Stats() Stats {
	p.mu.Lock()
	defer p.mu.Unlock()
	return Stats{
		InFlight:  p.inFlight,
		TotalJobs: p.total,
		LastRunMS: p.lastRunMS,
	}
}

// Compile-time interface guard.
var _ Scheduler = (*Passthrough)(nil)

// Ensure config package is imported (used by later Schritte that read
// per-consumer max_concurrency and vram_resident_mib).
var _ = config.KindTTS