Add German OCR support to Paperless-ngx
- Created custom Dockerfile extending official paperless-ngx image - Added tesseract-ocr-deu package for German language OCR - Set PAPERLESS_OCR_LANGUAGE=deu+eng environment variable - Updated CI/CD pipeline to build and push custom paperless image - Modified deployment script to pull paperless image from GHCR - Tested locally: German (deu) language pack now available alongside English
This commit is contained in:
19
.github/workflows/ci-cd.yml
vendored
19
.github/workflows/ci-cd.yml
vendored
@@ -165,7 +165,7 @@ jobs:
|
|||||||
type=ref,event=pr
|
type=ref,event=pr
|
||||||
type=sha,prefix={{branch}}-
|
type=sha,prefix={{branch}}-
|
||||||
|
|
||||||
- name: Build and push Docker image
|
- name: Build and push Docker images
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
context: ./app
|
context: ./app
|
||||||
@@ -173,6 +173,14 @@ jobs:
|
|||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
|
||||||
|
- name: Build and push Paperless image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: ./paperless
|
||||||
|
push: true
|
||||||
|
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-paperless:latest
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
needs: build
|
needs: build
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -221,8 +229,8 @@ jobs:
|
|||||||
echo "Attempting to pull images from GitHub Container Registry..."
|
echo "Attempting to pull images from GitHub Container Registry..."
|
||||||
if echo $DEPLOY_TOKEN | docker login ghcr.io -u remmerinio --password-stdin; then
|
if echo $DEPLOY_TOKEN | docker login ghcr.io -u remmerinio --password-stdin; then
|
||||||
echo "✅ Successfully logged into GHCR"
|
echo "✅ Successfully logged into GHCR"
|
||||||
if docker-compose -f compose.yml pull web worker beat; then
|
if docker-compose -f compose.yml pull web worker beat paperless; then
|
||||||
echo "✅ Successfully pulled web images from GHCR"
|
echo "✅ Successfully pulled web and paperless images from GHCR"
|
||||||
USE_REMOTE_IMAGES=true
|
USE_REMOTE_IMAGES=true
|
||||||
else
|
else
|
||||||
echo "⚠️ Failed to pull images from GHCR, will build locally"
|
echo "⚠️ Failed to pull images from GHCR, will build locally"
|
||||||
@@ -233,14 +241,15 @@ jobs:
|
|||||||
USE_REMOTE_IMAGES=false
|
USE_REMOTE_IMAGES=false
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Pull other standard images (paperless, redis, postgres, grampsweb)
|
# Pull other standard images (redis, postgres, grampsweb)
|
||||||
echo "Pulling standard Docker images..."
|
echo "Pulling standard Docker images..."
|
||||||
docker-compose -f compose.yml pull db redis paperless grampsweb || echo "Some standard images failed to pull, will use cached versions"
|
docker-compose -f compose.yml pull db redis grampsweb || echo "Some standard images failed to pull, will use cached versions"
|
||||||
|
|
||||||
# If we couldn't pull from GHCR, build locally
|
# If we couldn't pull from GHCR, build locally
|
||||||
if [ "$USE_REMOTE_IMAGES" = "false" ]; then
|
if [ "$USE_REMOTE_IMAGES" = "false" ]; then
|
||||||
echo "🔨 Building images locally from source code..."
|
echo "🔨 Building images locally from source code..."
|
||||||
docker build -t ghcr.io/remmerinio/stiftung-management-system:latest ./app
|
docker build -t ghcr.io/remmerinio/stiftung-management-system:latest ./app
|
||||||
|
docker build -t ghcr.io/remmerinio/stiftung-management-system-paperless:latest ./paperless
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Stop containers and clean up
|
# Stop containers and clean up
|
||||||
|
|||||||
@@ -54,7 +54,9 @@ services:
|
|||||||
command: ["python", "manage.py", "runserver", "0.0.0.0:8000"]
|
command: ["python", "manage.py", "runserver", "0.0.0.0:8000"]
|
||||||
|
|
||||||
paperless:
|
paperless:
|
||||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
build:
|
||||||
|
context: ./paperless
|
||||||
|
dockerfile: Dockerfile
|
||||||
ports:
|
ports:
|
||||||
- "8082:8000"
|
- "8082:8000"
|
||||||
environment:
|
environment:
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ services:
|
|||||||
- redis
|
- redis
|
||||||
|
|
||||||
paperless:
|
paperless:
|
||||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
image: ghcr.io/remmerinio/stiftung-management-system-paperless:latest
|
||||||
ports:
|
ports:
|
||||||
- "8080:8000"
|
- "8080:8000"
|
||||||
environment:
|
environment:
|
||||||
|
|||||||
22
paperless/Dockerfile
Normal file
22
paperless/Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# Custom Paperless-ngx with German OCR support
|
||||||
|
FROM ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||||
|
|
||||||
|
# Switch to root to install packages
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Update package list and install German OCR language data
|
||||||
|
# The correct package name is tesseract-ocr-deu (not tesseract-data-deu)
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
tesseract-ocr-deu \
|
||||||
|
&& apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Verify German language pack is installed
|
||||||
|
RUN tesseract --list-langs | grep deu || echo "German language pack not found"
|
||||||
|
|
||||||
|
# Switch back to paperless user
|
||||||
|
USER paperless
|
||||||
|
|
||||||
|
# Set the OCR language to include German and English
|
||||||
|
ENV PAPERLESS_OCR_LANGUAGE=deu+eng
|
||||||
Reference in New Issue
Block a user