PDFA fixes (#2896)

# Description of Changes

Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
This commit is contained in:
Anthony Stirling 2025-02-07 13:06:19 +00:00 committed by GitHub
parent 26720c5018
commit bf65c456d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 11 additions and 10 deletions

View File

@ -56,13 +56,15 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
openssl-dev \
openjdk21-jre \
# Doc conversion
gcompat \
libc6-compat \
libreoffice \
# pdftohtml
poppler-utils \
# OCR MY PDF (unpaper for descew and other advanced features)
tesseract-ocr-data-eng \
# CV
py3-opencv \
py3-opencv \
# python3/pip
python3 \
py3-pip && \

View File

@ -57,6 +57,8 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
openssl-dev \
openjdk21-jre \
# Doc conversion
gcompat \
libc6-compat \
libreoffice \
# pdftohtml
poppler-utils \
@ -65,7 +67,7 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
tesseract-ocr-data-eng \
font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra \
# CV
py3-opencv \
py3-opencv \
# python3/pip
python3 \
py3-pip && \

View File

@ -265,9 +265,6 @@ public class EndpointConfiguration {
// Pdftohtml dependent endpoints
addEndpointToGroup("Pdftohtml", "pdf-to-html");
addEndpointToGroup("Pdftohtml", "pdf-to-markdown");
// disabled for now while we resolve issues
disableEndpoint("pdf-to-pdfa");
}
private void processEnvironmentConfigs() {

View File

@ -73,8 +73,8 @@ public class ConvertPDFToPDFA {
// Determine PDF/A filter based on requested format
String pdfFilter =
"pdfa".equals(outputFormat)
? "writer_pdf_Export:{'SelectPdfVersion':{'Value':'2'}}:writer_pdf_Export"
: "writer_pdf_Export:{'SelectPdfVersion':{'Value':'1'}}:writer_pdf_Export";
? "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"2\"}}"
: "pdf:writer_pdf_Export:{\"SelectPdfVersion\":{\"type\":\"long\",\"value\":\"1\"}}";
// Prepare LibreOffice command
List<String> command =
@ -84,7 +84,7 @@ public class ConvertPDFToPDFA {
"--headless",
"--nologo",
"--convert-to",
"pdf:" + pdfFilter,
pdfFilter,
"--outdir",
tempOutputDir.toString(),
tempInputFile.toString()));

View File

@ -1185,7 +1185,7 @@ changeMetadata.submit=Change
#pdfToPDFA
pdfToPDFA.title=PDF To PDF/A
pdfToPDFA.header=PDF To PDF/A
pdfToPDFA.credit=This service uses qpdf for PDF/A conversion
pdfToPDFA.credit=This service uses libreoffice for PDF/A conversion
pdfToPDFA.submit=Convert
pdfToPDFA.tip=Currently does not work for multiple inputs at once
pdfToPDFA.outputFormat=Output format

View File

@ -1185,7 +1185,7 @@ changeMetadata.submit=Change
#pdfToPDFA
pdfToPDFA.title=PDF To PDF/A
pdfToPDFA.header=PDF To PDF/A
pdfToPDFA.credit=This service uses qpdf for PDF/A conversion
pdfToPDFA.credit=This service uses libreoffice for PDF/A conversion
pdfToPDFA.submit=Convert
pdfToPDFA.tip=Currently does not work for multiple inputs at once
pdfToPDFA.outputFormat=Output format