Replaces the golang-migrate single-counter tracker with a hand-rolled runner over embed.FS that tracks applied state as a set in paliad.applied_migrations (version PK, name, applied_at, checksum). Closes the parallel-merge skip-hole the 2026-05-20 mig-103 incident exposed (m/paliad#44): a migration whose version is missing from applied_migrations runs on the next deploy regardless of which higher versions are already applied. Gaps are first-class. Slice 1 of the design at docs/design-migration-runner-applied-set-2026-05-20.md. All eight design decisions m-picked = inventor recommendation. Runner contract: - Ensure paliad schema → pg_advisory_lock(hash('paliad.applied_migrations')) → CREATE TABLE IF NOT EXISTS applied_migrations. - bootstrapFromLegacyTracker: if applied_migrations is empty and the legacy paliad.paliad_schema_migrations row is present and clean, INSERT rows 1..N for every on-disk version with checksum=NULL via ON CONFLICT DO NOTHING. Hard-fail if legacy tracker is dirty (operator must recover). - scanEmbeddedMigrations: hard-fail on two .up.sql files sharing a version prefix — the failure mode the post-mortem exposed. - checkNameAgreement: hard-fail on rename-after-apply mismatch (disk name for an already-applied version != DB name). - applyOne: SQL body + INSERT(version, name, now(), sha256(file_bytes)) in one transaction. All-or-nothing per migration. Checksums populated on apply for future drift detection; rows backfilled from the legacy tracker carry NULL (we can't fabricate a hash for what golang-migrate applied historically). Verify-on-deploy intentionally deferred to a focused follow-up — single if-block flip when m wants it. Up-only runner. .down.sql files stay in embed.FS as reference; manual roll-back path is psql + DELETE FROM paliad.applied_migrations WHERE version=N. Zero call sites for migrate.Down in the codebase today. Drops github.com/golang-migrate/migrate/v4 from go.mod (no other importers; verified via grep). Tests: - internal/db/migrate_test.go: TestMigrations_DryRun walks pending = on_disk \\ applied (read from paliad.applied_migrations, missing-table → empty set), runs each in BEGIN/ROLLBACK against the scratch DB. - cmd/server/main_smoke_test.go: TestBootSmoke asserts the applied set equals the on-disk set exactly (not just max-version-match) — catches the skip class the post-mortem documented. Dirty-flag check removed (rows are committed or absent, not 'dirty'). - All 45 service-test call sites of db.ApplyMigrations work unchanged (same signature, same fresh-DB behavior). Follow-up: mig 108_drop_legacy_trackers (DROP paliad.paliad_schema_migrations and public.paliad_schema_migrations) after one or two deploys of burn-in on this slice.
146 lines
5.0 KiB
Go
146 lines
5.0 KiB
Go
// Package db tests — migration dry-run gate.
|
|
//
|
|
// This is the test that catches mig-N crash-loops before they reach prod.
|
|
// The new runner tracks applied state as a set in paliad.applied_migrations
|
|
// (one row per migration; see migrate.go). A migration that compiles cleanly
|
|
// but fails on apply (typo, missing column, wrong CHECK shape) crashes the
|
|
// Dokploy container loop before paliad.de finishes binding :8080, and the
|
|
// only way to learn about it today is to watch the deploy log.
|
|
//
|
|
// TestMigrations_DryRun closes that gap: for every *.up.sql in this
|
|
// directory whose version is NOT present in paliad.applied_migrations on
|
|
// the scratch DB, it opens a transaction, runs the SQL, and ROLLBACKs.
|
|
// Any error fails the test with the file name + Postgres error. Always
|
|
// non-destructive — the ROLLBACK runs even on success, so the scratch DB
|
|
// stays at its starting set.
|
|
//
|
|
// "Pending" means: a version that's on disk but not in applied_migrations.
|
|
// In CI against a fresh scratch DB (where applied_migrations either
|
|
// doesn't exist or is empty), every migration is pending and gets
|
|
// verified. On a developer laptop whose scratch DB is already at HEAD,
|
|
// no migrations are pending and the test logs and passes — the protection
|
|
// only kicks in the moment a new *.up.sql lands in the tree before the
|
|
// developer runs `db.ApplyMigrations` against the same scratch DB.
|
|
//
|
|
// Requires TEST_DATABASE_URL (same pattern as the rest of the live-DB
|
|
// tests). Skipped without it.
|
|
//
|
|
// Design: docs/design-paliad-test-strategy-2026-05-19.md §5 Slice 1 and
|
|
// docs/design-migration-runner-applied-set-2026-05-20.md §6.
|
|
|
|
package db
|
|
|
|
import (
|
|
"database/sql"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
|
|
_ "github.com/lib/pq"
|
|
)
|
|
|
|
// TestMigrations_DryRun walks every pending *.up.sql in numeric order,
|
|
// applies each inside its own BEGIN/ROLLBACK against the scratch DB, and
|
|
// fails the test on the first SQL error. Reports per-file as a sub-test so
|
|
// `go test -v` shows which migration failed.
|
|
func TestMigrations_DryRun(t *testing.T) {
|
|
url := os.Getenv("TEST_DATABASE_URL")
|
|
if url == "" {
|
|
t.Skip("TEST_DATABASE_URL not set — skipping migration dry-run")
|
|
}
|
|
|
|
conn, err := sql.Open("postgres", url)
|
|
if err != nil {
|
|
t.Fatalf("open: %v", err)
|
|
}
|
|
defer conn.Close()
|
|
if err := conn.Ping(); err != nil {
|
|
t.Fatalf("ping: %v", err)
|
|
}
|
|
|
|
// The paliad schema must exist before migration 001 runs against it,
|
|
// mirroring the bootstrap step in ApplyMigrations. Without this, a
|
|
// fresh scratch DB would fail migration 001's CREATE TABLE paliad.*
|
|
// statements inside the BEGIN/ROLLBACK probe with "schema paliad does
|
|
// not exist" — a false negative that distracts from real errors.
|
|
if _, err := conn.Exec(`CREATE SCHEMA IF NOT EXISTS paliad`); err != nil {
|
|
t.Fatalf("ensure paliad schema: %v", err)
|
|
}
|
|
|
|
applied, err := readAppliedVersions(conn)
|
|
if err != nil {
|
|
t.Fatalf("read applied_migrations: %v", err)
|
|
}
|
|
|
|
onDisk, err := scanEmbeddedMigrations()
|
|
if err != nil {
|
|
t.Fatalf("scan embedded migrations: %v", err)
|
|
}
|
|
|
|
var pending []migration
|
|
for _, m := range onDisk {
|
|
if !applied[m.version] {
|
|
pending = append(pending, m)
|
|
}
|
|
}
|
|
|
|
if len(pending) == 0 {
|
|
t.Logf("no pending migrations — scratch DB applied set covers every on-disk version (%d total)",
|
|
len(onDisk))
|
|
return
|
|
}
|
|
t.Logf("scratch DB has %d/%d on-disk migrations applied; walking %d pending",
|
|
len(applied), len(onDisk), len(pending))
|
|
|
|
for _, m := range pending {
|
|
t.Run(fmt.Sprintf("%03d_%s", m.version, m.name), func(t *testing.T) {
|
|
body, err := migrationFS.ReadFile("migrations/" + m.filename)
|
|
if err != nil {
|
|
t.Fatalf("read %s: %v", m.filename, err)
|
|
}
|
|
tx, err := conn.Begin()
|
|
if err != nil {
|
|
t.Fatalf("begin: %v", err)
|
|
}
|
|
// Always rollback; the dry-run must not leave the scratch
|
|
// DB at a different applied set than where it started.
|
|
// Rollback is safe after a failed Exec — Postgres aborts
|
|
// the transaction internally on the first error.
|
|
defer func() { _ = tx.Rollback() }()
|
|
|
|
if _, err := tx.Exec(string(body)); err != nil {
|
|
t.Fatalf("migration %s failed dry-run: %v", m.filename, err)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// readAppliedVersions returns the set of versions present in
|
|
// paliad.applied_migrations on the scratch DB. Missing table → empty set
|
|
// (fresh-DB path; the table only exists after the runner has been called).
|
|
//
|
|
// We don't pre-create the table here because the dry-run is supposed to be
|
|
// a passive observer — it must not mutate the scratch DB outside of its
|
|
// own per-mig BEGIN/ROLLBACK probes. A "table doesn't exist" outcome is
|
|
// the right read against a virgin scratch DB.
|
|
func readAppliedVersions(conn *sql.DB) (map[int]bool, error) {
|
|
rows, err := conn.Query(`SELECT version FROM paliad.applied_migrations`)
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "does not exist") {
|
|
return map[int]bool{}, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
out := map[int]bool{}
|
|
for rows.Next() {
|
|
var v int
|
|
if err := rows.Scan(&v); err != nil {
|
|
return nil, err
|
|
}
|
|
out[v] = true
|
|
}
|
|
return out, rows.Err()
|
|
}
|