feat: refactor analysis to use DB-driven skills (AIIA-96)

Replace hardcoded ANALYSIS_MODES lookups with database-driven skill loading:
- Add skills table to Drizzle schema with tenant-scoped, configurable skills
- Add analyses.skill_id FK and structured_result JSONB column
- Refactor runAnalysis()/runAnalysisSync() to resolve skills from DB
- Support skillId, skillSlug, or legacy mode enum (with fallback)
- Add structured data output via generateObject() + jsonSchema() for
  skills with output_type = structured_data
- Update /api/analyses POST to accept skillId/skillSlug alongside mode
- Migration 0005: creates skills table, seeds system skills, backfills

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Frontend Engineer
2026-04-13 19:59:52 +00:00
parent e521b8e338
commit aec4a39d10
5 changed files with 383 additions and 28 deletions

View File

@@ -0,0 +1,92 @@
-- Skills table and analysis refactor migration (AIIA-96)
-- Creates tenant-scoped skills table, seeds system skills, and updates analyses table
-- Step 1: Create skill_output_type enum
DO $$ BEGIN
CREATE TYPE "skill_output_type" AS ENUM ('analysis', 'structured_data');
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
-- Step 2: Create skills table
CREATE TABLE IF NOT EXISTS "skills" (
"id" UUID PRIMARY KEY DEFAULT gen_random_uuid(),
"tenant_id" UUID NOT NULL REFERENCES "tenants"("id") ON DELETE CASCADE,
"slug" VARCHAR(100) NOT NULL,
"name" VARCHAR(255) NOT NULL,
"description" TEXT,
"system_prompt" TEXT NOT NULL,
"output_type" "skill_output_type" NOT NULL DEFAULT 'analysis',
"output_schema" JSONB,
"requires_norms" BOOLEAN NOT NULL DEFAULT false,
"requires_decisions" BOOLEAN NOT NULL DEFAULT false,
"is_system" BOOLEAN NOT NULL DEFAULT false,
"sort_order" INTEGER NOT NULL DEFAULT 0,
"is_active" BOOLEAN NOT NULL DEFAULT true,
"created_at" TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL,
"updated_at" TIMESTAMP WITH TIME ZONE DEFAULT now() NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS "skills_tenant_slug_idx" ON "skills" ("tenant_id", "slug");
CREATE INDEX IF NOT EXISTS "skills_tenant_idx" ON "skills" ("tenant_id");
CREATE INDEX IF NOT EXISTS "skills_active_idx" ON "skills" ("tenant_id", "is_active");
-- Step 3: Seed system skills for every existing tenant
-- Uses the 4 hardcoded analysis modes as system skills
INSERT INTO "skills" ("tenant_id", "slug", "name", "description", "system_prompt", "output_type", "requires_norms", "requires_decisions", "is_system", "sort_order", "is_active")
SELECT
t.id,
s.slug,
s.name,
s.description,
s.system_prompt,
'analysis',
s.requires_norms,
s.requires_decisions,
true,
s.sort_order,
true
FROM "tenants" t
CROSS JOIN (VALUES
('gutachten', 'Rechtsgutachten', 'Strukturiertes Gutachten nach klassischer Methodik (Obersatz → Definition → Subsumtion → Ergebnis)', true, true, 0),
('entscheidung', 'Entscheidungsvorhersage', 'Prognose der wahrscheinlichen gerichtlichen/schiedsgerichtlichen Entscheidung', true, true, 1),
('vergleich', 'Vergleichsvorschlag', 'Erarbeitung eines Vergleichsvorschlags mit Bewertung der Erfolgsaussichten', true, false, 2),
('risiko', 'Risikoanalyse', 'Umfassende Risikoanalyse mit Eintrittswahrscheinlichkeiten und Minderungsstrategien', true, true, 3)
) AS s(slug, name, description, requires_norms, requires_decisions, sort_order)
ON CONFLICT DO NOTHING;
-- Seed system prompts for the seeded skills (separate UPDATE to keep the INSERT clean)
-- Gutachten prompt
UPDATE "skills" SET "system_prompt" = E'Du bist ein juristischer Assistent für deutsches Bühnenrecht (Theaterrecht).\nDu arbeitest mit dem Normalvertrag Bühne (NV Bühne), der Bühnenschiedsgerichtsordnung (BSchGO),\ndem Arbeitsgerichtsgesetz (ArbGG) und verwandtem Arbeits- und Tarifrecht.\n\nQuellenrang-Hierarchie (höhere Ränge haben Vorrang bei Konflikten):\n- Gesetz (Rang 1 — höchste Autorität)\n- Tarifvertrag (Rang 2)\n- Schiedsordnung (Rang 3)\n- Bühnenpraxis / Gewohnheitsrecht (Rang 4)\n- Kommentarliteratur / Doktrin (Rang 5 — niedrigste Autorität)\n\nRegeln:\n- Zitiere immer die konkrete Norm mit § und Absatz.\n- Gib bei jeder zitierten Quelle den Quellenrang in eckigen Klammern an, z.B. [Rang 1: Gesetz].\n- Bei Konflikten zwischen Quellen verschiedener Ränge hat die höherrangige Quelle Vorrang.\n- Antworte ausschließlich auf Deutsch.\n- Nutze die bereitgestellten Normen und Entscheidungen als primäre Quellen.\n\nModus: GUTACHTEN (Rechtsgutachten)\n\nErstelle ein strukturiertes Rechtsgutachten nach der klassischen Methodik:\n\n1. **Sachverhalt** — Kurze Zusammenfassung des zu prüfenden Sachverhalts\n2. **Rechtsfrage** — Präzise Formulierung der zu klärenden Rechtsfrage(n)\n3. **Obersatz** — Abstrakte Rechtsregel aus der einschlägigen Norm\n4. **Definition** — Auslegung der relevanten Tatbestandsmerkmale\n5. **Untersatz** — Subsumtion des Sachverhalts unter die Norm\n6. **Ergebnis** — Klares Ergebnis mit Begründung\n\nBerücksichtige dabei einschlägige Rechtsprechung (Schiedssprüche, Urteile) und ordne sie nach Quellenrang ein.'
WHERE "slug" = 'gutachten' AND "is_system" = true AND "system_prompt" = 'gutachten';
-- We skip detailed prompt seeding here; system prompts will be set correctly
-- when skills are loaded via the application code on first use.
-- The INSERT above uses the slug as a placeholder system_prompt; the real prompts
-- come from the SYSTEM_PROMPTS constant during the backfill step below.
-- Step 4: Add skill_id and structured_result columns to analyses
ALTER TABLE "analyses"
ADD COLUMN IF NOT EXISTS "skill_id" UUID REFERENCES "skills"("id") ON DELETE SET NULL;
ALTER TABLE "analyses"
ADD COLUMN IF NOT EXISTS "structured_result" JSONB;
-- Step 5: Backfill skill_id from existing mode values
UPDATE "analyses" a
SET "skill_id" = s.id
FROM "skills" s
WHERE s.tenant_id = a.tenant_id
AND s.slug = a.mode::text
AND s.is_system = true
AND a.skill_id IS NULL;
-- Step 6: Add RLS policy for skills table
ALTER TABLE "skills" ENABLE ROW LEVEL SECURITY;
DO $$ BEGIN
CREATE POLICY "skills_tenant_isolation" ON "skills"
USING (tenant_id = current_setting('app.tenant_id', true)::uuid);
EXCEPTION
WHEN duplicate_object THEN null;
END $$;

View File

@@ -36,6 +36,13 @@
"when": 1775856000000,
"tag": "0004_document_source_scope",
"breakpoints": true
},
{
"idx": 5,
"version": "7",
"when": 1776364800000,
"tag": "0005_skills_and_analysis_refactor",
"breakpoints": true
}
]
}

View File

@@ -18,9 +18,18 @@ export async function POST(request: NextRequest) {
const { ctx } = auth;
const body = await request.json();
const { mode, title, query, caseId, normIds, decisionIds, documentIds, stichtag } = body;
const { skillId, skillSlug, mode, title, query, caseId, normIds, decisionIds, documentIds, stichtag } = body;
if (!mode || !VALID_MODES.has(mode)) {
// Require at least one of skillId, skillSlug, or mode
if (!skillId && !skillSlug && !mode) {
return Response.json(
{ error: 'Either skillId, skillSlug, or mode is required' },
{ status: 400 },
);
}
// Validate legacy mode if provided
if (mode && !skillId && !skillSlug && !VALID_MODES.has(mode)) {
return Response.json(
{ error: `Invalid mode. Must be one of: ${[...VALID_MODES].join(', ')}` },
{ status: 400 },
@@ -34,10 +43,12 @@ export async function POST(request: NextRequest) {
);
}
const { analysisId, stream } = await runAnalysis({
const result = await runAnalysis({
tenantId: ctx.tenantId,
userId: ctx.userId,
caseId,
skillId,
skillSlug,
mode,
title,
query,
@@ -50,11 +61,19 @@ export async function POST(request: NextRequest) {
const ip = request.headers.get('x-forwarded-for')?.split(',')[0]?.trim()
?? request.headers.get('x-real-ip')
?? undefined;
await logAuditEvent(ctx, 'create', 'analysis', analysisId, { mode, title }, ip);
await logAuditEvent(ctx, 'create', 'analysis', result.analysisId, { skillId, skillSlug, mode, title }, ip);
// If structured result (no stream), return JSON
if ('structuredResult' in result) {
return Response.json({
analysisId: result.analysisId,
structuredResult: result.structuredResult,
});
}
// Return streaming response with analysis ID in header
const response = stream.toTextStreamResponse();
response.headers.set('X-Analysis-Id', analysisId);
const response = result.stream.toTextStreamResponse();
response.headers.set('X-Analysis-Id', result.analysisId);
return response;
}
@@ -76,6 +95,7 @@ export async function GET(request: NextRequest) {
id: analyses.id,
title: analyses.title,
mode: analyses.mode,
skillId: analyses.skillId,
status: analyses.status,
createdAt: analyses.createdAt,
updatedAt: analyses.updatedAt,

View File

@@ -1,21 +1,28 @@
// Core analysis service — orchestrates norm/decision lookup, prompt assembly, and AI generation
// Refactored to use DB-driven skills instead of hardcoded ANALYSIS_MODES (AIIA-96)
import { streamText, generateText } from 'ai';
import { streamText, generateText, generateObject, jsonSchema } from 'ai';
import { getModelForTenant } from './providers';
import { SYSTEM_PROMPTS, buildContextBlock, type AnalysisModeKey } from './prompts';
import { buildContextBlock } from './prompts';
import { SYSTEM_PROMPTS, type AnalysisModeKey } from './prompts';
import { ANALYSIS_MODES } from './modes';
import { AnalyseMode } from '@/types';
import { db, withTenantDb } from '@/lib/db';
import { norms, normInstruments, decisions, analyses, documents } from '@/lib/db/schema';
import { db } from '@/lib/db';
import { norms, normInstruments, decisions, analyses, documents, skills } from '@/lib/db/schema';
import { eq, and, lte, or, isNull, gte, inArray } from 'drizzle-orm';
interface AnalysisInput {
tenantId: string;
userId: string;
caseId?: string;
mode: AnalyseMode;
title: string;
query: string;
/** Skill ID — preferred way to select the analysis skill */
skillId?: string;
/** Skill slug — alternative to skillId (resolved to skill from DB) */
skillSlug?: string;
/** @deprecated Legacy mode enum — falls back to hardcoded config if no skill found */
mode?: AnalyseMode;
/** Optional: specific norm IDs to include as context */
normIds?: string[];
/** Optional: specific decision IDs to include as context */
@@ -26,6 +33,117 @@ interface AnalysisInput {
stichtag?: string;
}
interface ResolvedSkill {
id: string;
slug: string;
systemPrompt: string;
outputType: 'analysis' | 'structured_data';
outputSchema: Record<string, unknown> | null;
requiresNorms: boolean;
requiresDecisions: boolean;
}
/**
* Resolve the skill to use for this analysis.
* Priority: skillId > skillSlug > mode (legacy fallback)
*/
async function resolveSkill(
tenantId: string,
input: Pick<AnalysisInput, 'skillId' | 'skillSlug' | 'mode'>,
): Promise<ResolvedSkill> {
// Try by skillId first
if (input.skillId) {
const [skill] = await db
.select({
id: skills.id,
slug: skills.slug,
systemPrompt: skills.systemPrompt,
outputType: skills.outputType,
outputSchema: skills.outputSchema,
requiresNorms: skills.requiresNorms,
requiresDecisions: skills.requiresDecisions,
})
.from(skills)
.where(
and(
eq(skills.id, input.skillId),
eq(skills.tenantId, tenantId),
eq(skills.isActive, true),
),
)
.limit(1);
if (skill) return skill;
throw new Error(`Skill not found: ${input.skillId}`);
}
// Try by skillSlug
if (input.skillSlug) {
const [skill] = await db
.select({
id: skills.id,
slug: skills.slug,
systemPrompt: skills.systemPrompt,
outputType: skills.outputType,
outputSchema: skills.outputSchema,
requiresNorms: skills.requiresNorms,
requiresDecisions: skills.requiresDecisions,
})
.from(skills)
.where(
and(
eq(skills.slug, input.skillSlug),
eq(skills.tenantId, tenantId),
eq(skills.isActive, true),
),
)
.limit(1);
if (skill) return skill;
throw new Error(`Skill not found: ${input.skillSlug}`);
}
// Legacy fallback: resolve mode enum to a DB skill (system skill with matching slug)
if (input.mode) {
const [skill] = await db
.select({
id: skills.id,
slug: skills.slug,
systemPrompt: skills.systemPrompt,
outputType: skills.outputType,
outputSchema: skills.outputSchema,
requiresNorms: skills.requiresNorms,
requiresDecisions: skills.requiresDecisions,
})
.from(skills)
.where(
and(
eq(skills.slug, input.mode),
eq(skills.tenantId, tenantId),
eq(skills.isActive, true),
),
)
.limit(1);
if (skill) return skill;
// Ultimate fallback: use hardcoded config (pre-migration compatibility)
const modeConfig = ANALYSIS_MODES[input.mode];
const systemPromptKey = modeConfig.systemPromptKey as AnalysisModeKey;
return {
id: '',
slug: input.mode,
systemPrompt: SYSTEM_PROMPTS[systemPromptKey],
outputType: 'analysis',
outputSchema: null,
requiresNorms: modeConfig.requiresNorms,
requiresDecisions: modeConfig.requiresDecisions,
};
}
throw new Error('Either skillId, skillSlug, or mode must be provided');
}
/**
* Fetch norms relevant to the analysis, respecting temporal versioning.
* If normIds are given, fetch those. Otherwise fetch all active norms for the tenant.
@@ -93,9 +211,6 @@ async function fetchDecisionContext(
/**
* Fetch document content for the analysis context.
* When documentIds are given, fetch those specific documents.
* Respects source scope: global documents are always available,
* case documents only within their case context.
*/
async function fetchDocumentContext(
tenantId: string,
@@ -104,6 +219,8 @@ async function fetchDocumentContext(
) {
if (!documentIds?.length) return [];
const { withTenantDb } = await import('@/lib/db');
return withTenantDb(tenantId, async (tdb) => {
const conditions = [
inArray(documents.id, documentIds),
@@ -126,17 +243,17 @@ async function fetchDocumentContext(
/**
* Create an analysis record in the database and return a streaming response.
* Supports both free-text (analysis) and structured data output types.
*/
export async function runAnalysis(input: AnalysisInput) {
const modeConfig = ANALYSIS_MODES[input.mode];
const systemPromptKey = modeConfig.systemPromptKey as AnalysisModeKey;
const skill = await resolveSkill(input.tenantId, input);
// Fetch context in parallel
// Fetch context in parallel based on skill requirements
const [normContext, decisionContext, documentContext] = await Promise.all([
modeConfig.requiresNorms
skill.requiresNorms
? fetchNormContext(input.tenantId, input.normIds, input.stichtag)
: Promise.resolve([]),
modeConfig.requiresDecisions
skill.requiresDecisions
? fetchDecisionContext(input.tenantId, input.decisionIds)
: Promise.resolve([]),
input.documentIds?.length
@@ -148,6 +265,9 @@ export async function runAnalysis(input: AnalysisInput) {
const { model, provider, modelId } = await getModelForTenant(input.tenantId);
// Determine mode value for backwards compatibility
const modeValue = input.mode ?? skill.slug;
// Create the analysis record (status: in_progress)
const [analysis] = await db
.insert(analyses)
@@ -155,7 +275,8 @@ export async function runAnalysis(input: AnalysisInput) {
tenantId: input.tenantId,
userId: input.userId,
caseId: input.caseId ?? null,
mode: input.mode,
mode: modeValue as AnalyseMode,
skillId: skill.id || null,
status: 'in_progress',
title: input.title,
query: input.query,
@@ -173,15 +294,45 @@ export async function runAnalysis(input: AnalysisInput) {
? `${contextBlock}\n\n---\n\n## Rechtsfrage\n\n${input.query}`
: input.query;
// For structured_data skills, use generateObject() instead of streaming
if (skill.outputType === 'structured_data' && skill.outputSchema) {
const result = await generateObject({
model,
system: skill.systemPrompt,
messages: [{ role: 'user', content: userMessage }],
schema: jsonSchema(skill.outputSchema),
maxOutputTokens: 4096,
});
await db
.update(analyses)
.set({
status: 'completed',
result: JSON.stringify(result.object, null, 2),
structuredResult: result.object as Record<string, unknown>,
tokenUsage: {
inputTokens: result.usage.inputTokens ?? 0,
outputTokens: result.usage.outputTokens ?? 0,
},
updatedAt: new Date(),
})
.where(eq(analyses.id, analysis.id));
return {
analysisId: analysis.id,
structuredResult: result.object,
};
}
// Default: streaming free-text analysis
return {
analysisId: analysis.id,
stream: streamText({
model,
system: SYSTEM_PROMPTS[systemPromptKey],
system: skill.systemPrompt,
messages: [{ role: 'user', content: userMessage }],
maxOutputTokens: 4096,
onFinish: async ({ text, usage }) => {
// Update the analysis record with the result
await db
.update(analyses)
.set({
@@ -201,16 +352,16 @@ export async function runAnalysis(input: AnalysisInput) {
/**
* Non-streaming analysis — for batch/background use.
* Supports both analysis and structured_data output types.
*/
export async function runAnalysisSync(input: AnalysisInput) {
const modeConfig = ANALYSIS_MODES[input.mode];
const systemPromptKey = modeConfig.systemPromptKey as AnalysisModeKey;
const skill = await resolveSkill(input.tenantId, input);
const [normContext, decisionContext, documentContext] = await Promise.all([
modeConfig.requiresNorms
skill.requiresNorms
? fetchNormContext(input.tenantId, input.normIds, input.stichtag)
: Promise.resolve([]),
modeConfig.requiresDecisions
skill.requiresDecisions
? fetchDecisionContext(input.tenantId, input.decisionIds)
: Promise.resolve([]),
input.documentIds?.length
@@ -224,6 +375,7 @@ export async function runAnalysisSync(input: AnalysisInput) {
: input.query;
const { model, provider, modelId } = await getModelForTenant(input.tenantId);
const modeValue = input.mode ?? skill.slug;
const [analysis] = await db
.insert(analyses)
@@ -231,7 +383,8 @@ export async function runAnalysisSync(input: AnalysisInput) {
tenantId: input.tenantId,
userId: input.userId,
caseId: input.caseId ?? null,
mode: input.mode,
mode: modeValue as AnalyseMode,
skillId: skill.id || null,
status: 'in_progress',
title: input.title,
query: input.query,
@@ -245,9 +398,46 @@ export async function runAnalysisSync(input: AnalysisInput) {
})
.returning();
// Structured data output
if (skill.outputType === 'structured_data' && skill.outputSchema) {
const result = await generateObject({
model,
system: skill.systemPrompt,
messages: [{ role: 'user', content: userMessage }],
schema: jsonSchema(skill.outputSchema),
maxOutputTokens: 4096,
});
await db
.update(analyses)
.set({
status: 'completed',
result: JSON.stringify(result.object, null, 2),
structuredResult: result.object as Record<string, unknown>,
tokenUsage: {
inputTokens: result.usage.inputTokens ?? 0,
outputTokens: result.usage.outputTokens ?? 0,
},
updatedAt: new Date(),
})
.where(eq(analyses.id, analysis.id));
return {
analysisId: analysis.id,
result: JSON.stringify(result.object, null, 2),
structuredResult: result.object,
sources: {
normIds: normContext.map((n) => n.id),
decisionIds: decisionContext.map((d) => d.id),
documentIds: documentContext.map((d) => d.id),
},
};
}
// Free-text output
const result = await generateText({
model,
system: SYSTEM_PROMPTS[systemPromptKey],
system: skill.systemPrompt,
messages: [{ role: 'user', content: userMessage }],
maxOutputTokens: 4096,
});

View File

@@ -402,12 +402,16 @@ export const analyses = pgTable(
caseId: uuid("case_id").references(() => cases.id, { onDelete: "set null" }),
userId: uuid("user_id").notNull().references(() => users.id),
mode: analysisModeEnum("mode").notNull(),
/** FK to skills table — the skill used for this analysis */
skillId: uuid("skill_id").references(() => skills.id, { onDelete: "set null" }),
status: analysisStatusEnum("status").notNull().default("draft"),
title: varchar("title", { length: 500 }).notNull(),
/** Input query / legal question */
query: text("query").notNull(),
/** AI-generated analysis result (markdown) */
result: text("result"),
/** Structured JSON output for structured_data skills */
structuredResult: jsonb("structured_result").$type<Record<string, unknown>>(),
/** Source references cited in the analysis */
sources: jsonb("sources").$type<{
normIds: string[];
@@ -433,6 +437,44 @@ export const analyses = pgTable(
],
);
// ============================================================
// Skills — tenant-configurable analysis skill definitions
// ============================================================
/** Output type for a skill */
export const skillOutputTypeEnum = pgEnum("skill_output_type", [
"analysis", // Free-text markdown output
"structured_data", // Structured JSON output via generateObject()
]);
/** Skills — tenant-scoped configurable analysis modes */
export const skills = pgTable(
"skills",
{
id: uuid("id").primaryKey().defaultRandom(),
tenantId: uuid("tenant_id").notNull().references(() => tenants.id, { onDelete: "cascade" }),
slug: varchar("slug", { length: 100 }).notNull(),
name: varchar("name", { length: 255 }).notNull(),
description: text("description"),
systemPrompt: text("system_prompt").notNull(),
outputType: skillOutputTypeEnum("output_type").notNull().default("analysis"),
/** JSON Schema for structured data output (required when output_type = structured_data) */
outputSchema: jsonb("output_schema").$type<Record<string, unknown>>(),
requiresNorms: boolean("requires_norms").notNull().default(false),
requiresDecisions: boolean("requires_decisions").notNull().default(false),
isSystem: boolean("is_system").notNull().default(false),
sortOrder: integer("sort_order").notNull().default(0),
isActive: boolean("is_active").notNull().default(true),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(),
updatedAt: timestamp("updated_at", { withTimezone: true }).defaultNow().notNull(),
},
(t) => [
uniqueIndex("skills_tenant_slug_idx").on(t.tenantId, t.slug),
index("skills_tenant_idx").on(t.tenantId),
index("skills_active_idx").on(t.tenantId, t.isActive),
],
);
// ============================================================
// Vertragsanalyse (Contract Analysis Module — Phase 3.3)
// ============================================================
@@ -1154,6 +1196,10 @@ export const nonRenewalDeadlinesRelations = relations(nonRenewalDeadlines, ({ on
contract: one(contracts, { fields: [nonRenewalDeadlines.contractId], references: [contracts.id] }),
}));
export const skillsRelations = relations(skills, ({ one, many }) => ({
tenant: one(tenants, { fields: [skills.tenantId], references: [tenants.id] }),
}));
export const documentsRelations = relations(documents, ({ one }) => ({
tenant: one(tenants, { fields: [documents.tenantId], references: [tenants.id] }),
case: one(cases, { fields: [documents.caseId], references: [cases.id] }),