Add German OCR support to Paperless-ngx
- Created custom Dockerfile extending official paperless-ngx image - Added tesseract-ocr-deu package for German language OCR - Set PAPERLESS_OCR_LANGUAGE=deu+eng environment variable - Updated CI/CD pipeline to build and push custom paperless image - Modified deployment script to pull paperless image from GHCR - Tested locally: German (deu) language pack now available alongside English
This commit is contained in:
19
.github/workflows/ci-cd.yml
vendored
19
.github/workflows/ci-cd.yml
vendored
@@ -165,7 +165,7 @@ jobs:
|
||||
type=ref,event=pr
|
||||
type=sha,prefix={{branch}}-
|
||||
|
||||
- name: Build and push Docker image
|
||||
- name: Build and push Docker images
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./app
|
||||
@@ -173,6 +173,14 @@ jobs:
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
- name: Build and push Paperless image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./paperless
|
||||
push: true
|
||||
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-paperless:latest
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
deploy:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
@@ -221,8 +229,8 @@ jobs:
|
||||
echo "Attempting to pull images from GitHub Container Registry..."
|
||||
if echo $DEPLOY_TOKEN | docker login ghcr.io -u remmerinio --password-stdin; then
|
||||
echo "✅ Successfully logged into GHCR"
|
||||
if docker-compose -f compose.yml pull web worker beat; then
|
||||
echo "✅ Successfully pulled web images from GHCR"
|
||||
if docker-compose -f compose.yml pull web worker beat paperless; then
|
||||
echo "✅ Successfully pulled web and paperless images from GHCR"
|
||||
USE_REMOTE_IMAGES=true
|
||||
else
|
||||
echo "⚠️ Failed to pull images from GHCR, will build locally"
|
||||
@@ -233,14 +241,15 @@ jobs:
|
||||
USE_REMOTE_IMAGES=false
|
||||
fi
|
||||
|
||||
# Pull other standard images (paperless, redis, postgres, grampsweb)
|
||||
# Pull other standard images (redis, postgres, grampsweb)
|
||||
echo "Pulling standard Docker images..."
|
||||
docker-compose -f compose.yml pull db redis paperless grampsweb || echo "Some standard images failed to pull, will use cached versions"
|
||||
docker-compose -f compose.yml pull db redis grampsweb || echo "Some standard images failed to pull, will use cached versions"
|
||||
|
||||
# If we couldn't pull from GHCR, build locally
|
||||
if [ "$USE_REMOTE_IMAGES" = "false" ]; then
|
||||
echo "🔨 Building images locally from source code..."
|
||||
docker build -t ghcr.io/remmerinio/stiftung-management-system:latest ./app
|
||||
docker build -t ghcr.io/remmerinio/stiftung-management-system-paperless:latest ./paperless
|
||||
fi
|
||||
|
||||
# Stop containers and clean up
|
||||
|
||||
@@ -54,7 +54,9 @@ services:
|
||||
command: ["python", "manage.py", "runserver", "0.0.0.0:8000"]
|
||||
|
||||
paperless:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
build:
|
||||
context: ./paperless
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "8082:8000"
|
||||
environment:
|
||||
|
||||
@@ -110,7 +110,7 @@ services:
|
||||
- redis
|
||||
|
||||
paperless:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
image: ghcr.io/remmerinio/stiftung-management-system-paperless:latest
|
||||
ports:
|
||||
- "8080:8000"
|
||||
environment:
|
||||
|
||||
22
paperless/Dockerfile
Normal file
22
paperless/Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
||||
# Custom Paperless-ngx with German OCR support
|
||||
FROM ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
|
||||
# Switch to root to install packages
|
||||
USER root
|
||||
|
||||
# Update package list and install German OCR language data
|
||||
# The correct package name is tesseract-ocr-deu (not tesseract-data-deu)
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
tesseract-ocr-deu \
|
||||
&& apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Verify German language pack is installed
|
||||
RUN tesseract --list-langs | grep deu || echo "German language pack not found"
|
||||
|
||||
# Switch back to paperless user
|
||||
USER paperless
|
||||
|
||||
# Set the OCR language to include German and English
|
||||
ENV PAPERLESS_OCR_LANGUAGE=deu+eng
|
||||
Reference in New Issue
Block a user