2024-06-01 12:38:10 +01:00
|
|
|
# Build the application
|
2025-04-29 11:39:38 +01:00
|
|
|
FROM gradle:8.14-jdk21 AS build
|
2025-03-10 20:17:45 +00:00
|
|
|
|
|
|
|
COPY build.gradle .
|
|
|
|
COPY settings.gradle .
|
|
|
|
COPY gradlew .
|
|
|
|
COPY gradle gradle/
|
2025-06-11 17:21:37 +01:00
|
|
|
COPY stirling-pdf/build.gradle stirling-pdf/.
|
2025-05-27 19:55:15 +01:00
|
|
|
COPY common/build.gradle common/.
|
|
|
|
COPY proprietary/build.gradle proprietary/.
|
2025-04-09 10:04:12 +00:00
|
|
|
RUN ./gradlew build -x spotlessApply -x spotlessCheck -x test -x sonarqube || return 0
|
2024-06-01 12:38:10 +01:00
|
|
|
|
|
|
|
# Set the working directory
|
|
|
|
WORKDIR /app
|
|
|
|
|
|
|
|
# Copy the entire project to the working directory
|
|
|
|
COPY . .
|
|
|
|
|
2025-06-03 17:15:50 +01:00
|
|
|
# Build the application with DISABLE_ADDITIONAL_FEATURES=false
|
|
|
|
RUN DISABLE_ADDITIONAL_FEATURES=false \
|
2025-02-22 23:55:32 +01:00
|
|
|
STIRLING_PDF_DESKTOP_UI=false \
|
2025-03-10 20:17:45 +00:00
|
|
|
./gradlew clean build -x spotlessApply -x spotlessCheck -x test -x sonarqube
|
2024-06-01 12:38:10 +01:00
|
|
|
|
|
|
|
# Main stage
|
2025-06-02 19:51:52 +01:00
|
|
|
FROM alpine:3.22.0@sha256:8a1f59ffb675680d47db6337b49d22281a139e9d709335b492be023728e11715
|
2024-06-01 12:38:10 +01:00
|
|
|
|
|
|
|
# Copy necessary files
|
|
|
|
COPY scripts /scripts
|
|
|
|
COPY pipeline /pipeline
|
2025-06-09 12:51:41 +01:00
|
|
|
COPY stirling-pdf/src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
|
2025-06-11 17:21:37 +01:00
|
|
|
COPY --from=build /app/stirling-pdf/build/libs/*.jar app.jar
|
2024-06-01 12:38:10 +01:00
|
|
|
|
|
|
|
ARG VERSION_TAG
|
|
|
|
|
|
|
|
# Set Environment Variables
|
2025-06-03 17:15:50 +01:00
|
|
|
ENV DISABLE_ADDITIONAL_FEATURES=true \
|
2024-06-01 12:38:10 +01:00
|
|
|
VERSION_TAG=$VERSION_TAG \
|
2025-03-25 17:57:17 +00:00
|
|
|
JAVA_BASE_OPTS="-XX:+UnlockExperimentalVMOptions -XX:MaxRAMPercentage=75 -XX:InitiatingHeapOccupancyPercent=20 -XX:+G1PeriodicGCInvokesConcurrent -XX:G1PeriodicGCInterval=10000 -XX:+UseStringDeduplication -XX:G1PeriodicGCSystemLoadThreshold=70" \
|
|
|
|
JAVA_CUSTOM_OPTS="" \
|
2024-06-01 12:38:10 +01:00
|
|
|
HOME=/home/stirlingpdfuser \
|
|
|
|
PUID=1000 \
|
|
|
|
PGID=1000 \
|
|
|
|
UMASK=022 \
|
|
|
|
FAT_DOCKER=true \
|
2025-02-18 11:57:56 +00:00
|
|
|
INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \
|
|
|
|
PYTHONPATH=/usr/lib/libreoffice/program:/opt/venv/lib/python3.12/site-packages \
|
|
|
|
UNO_PATH=/usr/lib/libreoffice/program \
|
2025-03-25 17:57:17 +00:00
|
|
|
URE_BOOTSTRAP=file:///usr/lib/libreoffice/program/fundamentalrc \
|
2025-06-25 18:32:28 +01:00
|
|
|
PATH=$PATH:/opt/venv/bin \
|
|
|
|
STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \
|
|
|
|
TMPDIR=/tmp/stirling-pdf \
|
|
|
|
TEMP=/tmp/stirling-pdf \
|
|
|
|
TMP=/tmp/stirling-pdf
|
2024-08-20 17:17:54 +02:00
|
|
|
|
2024-06-01 12:38:10 +01:00
|
|
|
|
|
|
|
# JDK for app
|
2025-02-18 11:57:56 +00:00
|
|
|
RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
|
|
|
echo "@community https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
2024-06-01 12:38:10 +01:00
|
|
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
|
|
|
apk upgrade --no-cache -a && \
|
|
|
|
apk add --no-cache \
|
2025-02-22 23:55:32 +01:00
|
|
|
ca-certificates \
|
|
|
|
tzdata \
|
|
|
|
tini \
|
|
|
|
bash \
|
|
|
|
curl \
|
|
|
|
shadow \
|
|
|
|
su-exec \
|
|
|
|
openssl \
|
|
|
|
openssl-dev \
|
|
|
|
openjdk21-jre \
|
|
|
|
# Doc conversion
|
|
|
|
gcompat \
|
|
|
|
libc6-compat \
|
|
|
|
libreoffice \
|
|
|
|
# pdftohtml
|
|
|
|
poppler-utils \
|
|
|
|
# OCR MY PDF (unpaper for descew and other advanced featues)
|
|
|
|
tesseract-ocr-data-eng \
|
2025-03-20 09:39:57 +00:00
|
|
|
tesseract-ocr-data-chi_sim \
|
|
|
|
tesseract-ocr-data-deu \
|
|
|
|
tesseract-ocr-data-fra \
|
|
|
|
tesseract-ocr-data-por \
|
restore OCRMyPDF and ghostscript compression (#3846)
# Description of Changes
This pull request introduces enhancements to tool dependencies, endpoint
configurations, and Dockerfile packages. The changes primarily focus on
adding support for new tools (Ghostscript and OCRmyPDF), improving
endpoint management, and refining dependency checks. Below is a summary
of the most important changes grouped by theme:
### Tool Integration and Dependency Management:
* Added support for Ghostscript and OCRmyPDF in the session limits and
timeout configurations (`ApplicationProperties.java`).
[[1]](diffhunk://#diff-0642cd5c54d57a80f1f9c26cb26677f58778dc889cb59ebfc07b7512e74a8886R548-R549)
[[2]](diffhunk://#diff-0642cd5c54d57a80f1f9c26cb26677f58778dc889cb59ebfc07b7512e74a8886R582-R589)
[[3]](diffhunk://#diff-0642cd5c54d57a80f1f9c26cb26677f58778dc889cb59ebfc07b7512e74a8886R602-R603)
[[4]](diffhunk://#diff-0642cd5c54d57a80f1f9c26cb26677f58778dc889cb59ebfc07b7512e74a8886R636-R643)
* Updated `Processes` enum and `ProcessExecutor` logic to include
Ghostscript and OCRmyPDF, enabling their session limits and timeout
handling.
[[1]](diffhunk://#diff-b0afb37bdac8b0f1a10aca87b0244b133ac9b05518088b6f4c6c2bf48f859fdaR87-R96)
[[2]](diffhunk://#diff-b0afb37bdac8b0f1a10aca87b0244b133ac9b05518088b6f4c6c2bf48f859fdaR141-R150)
[[3]](diffhunk://#diff-b0afb37bdac8b0f1a10aca87b0244b133ac9b05518088b6f4c6c2bf48f859fdaL281-R303)
* Integrated Ghostscript and OCRmyPDF into the external dependency check
mechanism (`ExternalAppDepConfig.java`).
[[1]](diffhunk://#diff-511713d04e0545670bfb44e70e84235288e91db0ef219e423628746f28e67cfcR37-R38)
[[2]](diffhunk://#diff-511713d04e0545670bfb44e70e84235288e91db0ef219e423628746f28e67cfcR114-R115)
### Endpoint Configuration Enhancements:
* Improved endpoint management by introducing functional group
overrides, tool group fallbacks, and alternative tool group handling
(`EndpointConfiguration.java`).
[[1]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dR24-R25)
[[2]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dR39-R178)
[[3]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dL264-R357)
[[4]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dR399-R437)
* Added new endpoint groups and alternatives for multi-tool endpoints
like "repair," "compress-pdf," and "ocr-pdf," supporting both
Ghostscript and OCRmyPDF.
[[1]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dL215-L216)
[[2]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dL264-R357)
### Dockerfile Updates:
* Removed `qpdf` and added `unpaper` and `ocrmypdf` to the package list
in `Dockerfile` and `Dockerfile.fat` to enable advanced OCR and PDF
manipulation features.
[[1]](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557L54)
[[2]](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557R71-R75)
[[3]](diffhunk://#diff-571631582b988e88c52c86960cc083b0b8fa63cf88f056f26e9e684195221c27L79-R89)
### Code Cleanup and Refactoring:
* Refactored `TempFileRegistry` to improve readability by simplifying
variable initialization.
[[1]](diffhunk://#diff-ed08845255ade38ea6e28a959ed967957611025fb7f8f5dcf4543b5f200a5fecL12)
[[2]](diffhunk://#diff-ed08845255ade38ea6e28a959ed967957611025fb7f8f5dcf4543b5f200a5fecL30-R29)
* Updated `CompressController` to dynamically check tool group
availability using the new endpoint configuration logic.
[[1]](diffhunk://#diff-fc8dc1845d34077a089d9265521ce8c8d6104a8b79137e1de5310e30ffb0348aR50)
[[2]](diffhunk://#diff-fc8dc1845d34077a089d9265521ce8c8d6104a8b79137e1de5310e30ffb0348aR65-R76)
These changes collectively enhance the application's capabilities for
PDF processing, improve dependency management, and streamline endpoint
handling.
---
## Checklist
### General
- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings
### Documentation
- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)
### UI Changes (if applicable)
- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)
### Testing (if applicable)
- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
2025-07-01 14:50:56 +01:00
|
|
|
unpaper \
|
2025-02-22 23:55:32 +01:00
|
|
|
font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine \
|
|
|
|
# CV
|
|
|
|
py3-opencv \
|
2025-02-18 11:57:56 +00:00
|
|
|
python3 \
|
restore OCRMyPDF and ghostscript compression (#3846)
# Description of Changes
This pull request introduces enhancements to tool dependencies, endpoint
configurations, and Dockerfile packages. The changes primarily focus on
adding support for new tools (Ghostscript and OCRmyPDF), improving
endpoint management, and refining dependency checks. Below is a summary
of the most important changes grouped by theme:
### Tool Integration and Dependency Management:
* Added support for Ghostscript and OCRmyPDF in the session limits and
timeout configurations (`ApplicationProperties.java`).
[[1]](diffhunk://#diff-0642cd5c54d57a80f1f9c26cb26677f58778dc889cb59ebfc07b7512e74a8886R548-R549)
[[2]](diffhunk://#diff-0642cd5c54d57a80f1f9c26cb26677f58778dc889cb59ebfc07b7512e74a8886R582-R589)
[[3]](diffhunk://#diff-0642cd5c54d57a80f1f9c26cb26677f58778dc889cb59ebfc07b7512e74a8886R602-R603)
[[4]](diffhunk://#diff-0642cd5c54d57a80f1f9c26cb26677f58778dc889cb59ebfc07b7512e74a8886R636-R643)
* Updated `Processes` enum and `ProcessExecutor` logic to include
Ghostscript and OCRmyPDF, enabling their session limits and timeout
handling.
[[1]](diffhunk://#diff-b0afb37bdac8b0f1a10aca87b0244b133ac9b05518088b6f4c6c2bf48f859fdaR87-R96)
[[2]](diffhunk://#diff-b0afb37bdac8b0f1a10aca87b0244b133ac9b05518088b6f4c6c2bf48f859fdaR141-R150)
[[3]](diffhunk://#diff-b0afb37bdac8b0f1a10aca87b0244b133ac9b05518088b6f4c6c2bf48f859fdaL281-R303)
* Integrated Ghostscript and OCRmyPDF into the external dependency check
mechanism (`ExternalAppDepConfig.java`).
[[1]](diffhunk://#diff-511713d04e0545670bfb44e70e84235288e91db0ef219e423628746f28e67cfcR37-R38)
[[2]](diffhunk://#diff-511713d04e0545670bfb44e70e84235288e91db0ef219e423628746f28e67cfcR114-R115)
### Endpoint Configuration Enhancements:
* Improved endpoint management by introducing functional group
overrides, tool group fallbacks, and alternative tool group handling
(`EndpointConfiguration.java`).
[[1]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dR24-R25)
[[2]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dR39-R178)
[[3]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dL264-R357)
[[4]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dR399-R437)
* Added new endpoint groups and alternatives for multi-tool endpoints
like "repair," "compress-pdf," and "ocr-pdf," supporting both
Ghostscript and OCRmyPDF.
[[1]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dL215-L216)
[[2]](diffhunk://#diff-d6f76bfe88b1a7f347c10cc6956f8e7874b66772c6ac0ac432bedd3c7e8c372dL264-R357)
### Dockerfile Updates:
* Removed `qpdf` and added `unpaper` and `ocrmypdf` to the package list
in `Dockerfile` and `Dockerfile.fat` to enable advanced OCR and PDF
manipulation features.
[[1]](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557L54)
[[2]](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557R71-R75)
[[3]](diffhunk://#diff-571631582b988e88c52c86960cc083b0b8fa63cf88f056f26e9e684195221c27L79-R89)
### Code Cleanup and Refactoring:
* Refactored `TempFileRegistry` to improve readability by simplifying
variable initialization.
[[1]](diffhunk://#diff-ed08845255ade38ea6e28a959ed967957611025fb7f8f5dcf4543b5f200a5fecL12)
[[2]](diffhunk://#diff-ed08845255ade38ea6e28a959ed967957611025fb7f8f5dcf4543b5f200a5fecL30-R29)
* Updated `CompressController` to dynamically check tool group
availability using the new endpoint configuration logic.
[[1]](diffhunk://#diff-fc8dc1845d34077a089d9265521ce8c8d6104a8b79137e1de5310e30ffb0348aR50)
[[2]](diffhunk://#diff-fc8dc1845d34077a089d9265521ce8c8d6104a8b79137e1de5310e30ffb0348aR65-R76)
These changes collectively enhance the application's capabilities for
PDF processing, improve dependency management, and streamline endpoint
handling.
---
## Checklist
### General
- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings
### Documentation
- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)
### UI Changes (if applicable)
- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)
### Testing (if applicable)
- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
2025-07-01 14:50:56 +01:00
|
|
|
ocrmypdf \
|
2025-02-18 11:57:56 +00:00
|
|
|
py3-pip \
|
|
|
|
py3-pillow@testing \
|
|
|
|
py3-pdf2image@testing && \
|
|
|
|
python3 -m venv /opt/venv && \
|
2025-05-27 17:25:41 +01:00
|
|
|
/opt/venv/bin/pip install --upgrade pip setuptools && \
|
2025-03-25 18:38:57 +00:00
|
|
|
/opt/venv/bin/pip install --no-cache-dir --upgrade unoserver weasyprint && \
|
2025-02-18 11:57:56 +00:00
|
|
|
ln -s /usr/lib/libreoffice/program/uno.py /opt/venv/lib/python3.12/site-packages/ && \
|
|
|
|
ln -s /usr/lib/libreoffice/program/unohelper.py /opt/venv/lib/python3.12/site-packages/ && \
|
|
|
|
ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \
|
2024-06-01 12:38:10 +01:00
|
|
|
mv /usr/share/tessdata /usr/share/tessdata-original && \
|
2025-06-25 18:32:28 +01:00
|
|
|
mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf && \
|
2024-06-01 12:38:10 +01:00
|
|
|
fc-cache -f -v && \
|
|
|
|
chmod +x /scripts/* && \
|
|
|
|
chmod +x /scripts/init.sh && \
|
2025-02-22 23:55:32 +01:00
|
|
|
# User permissions
|
2024-06-01 12:38:10 +01:00
|
|
|
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
|
2025-06-25 18:32:28 +01:00
|
|
|
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /tmp/stirling-pdf && \
|
2024-12-22 12:00:52 +00:00
|
|
|
chown stirlingpdfuser:stirlingpdfgroup /app.jar
|
2024-06-01 12:38:10 +01:00
|
|
|
|
|
|
|
EXPOSE 8080/tcp
|
|
|
|
# Set user and run command
|
|
|
|
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
|
2025-06-25 18:32:28 +01:00
|
|
|
CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"]
|