diff --git a/app/stiftung/tasks.py b/app/stiftung/tasks.py index 4a9d2c6..fd0f9d6 100644 --- a/app/stiftung/tasks.py +++ b/app/stiftung/tasks.py @@ -28,6 +28,8 @@ import mimetypes from datetime import datetime, timezone as dt_timezone from email.header import decode_header, make_header +import time + import requests from celery import shared_task from django.conf import settings @@ -84,6 +86,56 @@ def _get_email_body(msg) -> str: return "\n".join(body_parts).strip() +def _poll_paperless_task(api_url: str, headers: dict, task_id: str, filename: str, max_wait: int = 120) -> int | None: + """ + Pollt den Paperless-ngx Task-Status bis das Dokument verarbeitet wurde. + Gibt die Dokument-ID zurück, oder None bei Fehler/Timeout. + """ + task_url = f"{api_url.rstrip('/')}/api/tasks/?task_id={task_id}" + waited = 0 + interval = 2 + while waited < max_wait: + try: + resp = requests.get(task_url, headers=headers, timeout=30) + resp.raise_for_status() + tasks = resp.json() + if tasks: + task = tasks[0] if isinstance(tasks, list) else tasks + status = task.get("status", "") + if status == "SUCCESS": + related = task.get("related_document") + if related: + # related_document can be a URL like "/api/documents/42/" + # or just an ID + if isinstance(related, str): + parts = related.rstrip("/").split("/") + try: + return int(parts[-1]) + except (ValueError, IndexError): + logger.warning("Konnte Dokument-ID nicht aus '%s' extrahieren.", related) + return None + return int(related) + # Some versions use result_id or document_id + for key in ("result_id", "document_id", "id"): + val = task.get(key) + if val is not None: + try: + return int(val) + except (ValueError, TypeError): + pass + logger.warning("Task %s erfolgreich aber keine Dokument-ID gefunden: %s", task_id, task) + return None + elif status == "FAILURE": + logger.error("Paperless-Task %s fehlgeschlagen für '%s': %s", task_id, filename, task.get("result")) + return None + except requests.RequestException as exc: + logger.warning("Fehler beim Abfragen von Paperless-Task %s: %s", task_id, exc) + time.sleep(interval) + waited += interval + logger.error("Timeout beim Warten auf Paperless-Task %s für '%s' (%ds).", task_id, filename, max_wait) + return None + + def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betreff: str = "") -> int | None: """ Lädt einen Anhang in Paperless-NGX hoch. @@ -140,9 +192,18 @@ def _upload_to_paperless(content: bytes, filename: str, destinataer=None, betref timeout=300, # 5 Minuten für große Anhänge ) response.raise_for_status() - # Paperless gibt die neue Dokument-ID zurück (als Integer oder UUID-String) result = response.json() - doc_id = result if isinstance(result, int) else result.get("id") + + # Paperless-ngx post_document returns a task UUID string. + # We need to poll the task status to get the actual document ID. + if isinstance(result, str): + task_id = result + doc_id = _poll_paperless_task(api_url, headers, task_id, safe_filename) + elif isinstance(result, int): + doc_id = result + else: + doc_id = result.get("id") + logger.info("Anhang '%s' erfolgreich in Paperless hochgeladen (ID: %s).", safe_filename, doc_id) return doc_id except requests.RequestException as exc: