Exception cleanup, resource change and OCR Defaults (#3876)

# Description of Changes Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: a <a>
2025-07-23 13:45:21 +00:00 · 2025-07-05 17:33:25 +01:00 · 2025-07-05 17:33:25 +01:00 · 14a4bdfb1b
commit 14a4bdfb1b
parent b4a7b5d520
16 changed files with 137 additions and 42 deletions
--- a/6
+++ b/6
@ -75,7 +75,9 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
    ocrmypdf \
    py3-pip \
    py3-pillow@testing \
-    py3-pdf2image@testing && \
+    py3-pdf2image@testing \
+    # URW Base 35 fonts for better PDF rendering
+    font-urw-base35 && \
    python3 -m venv /opt/venv && \
    /opt/venv/bin/pip install --upgrade pip setuptools && \
    /opt/venv/bin/pip install --no-cache-dir --upgrade unoserver weasyprint && \
@ -84,6 +86,8 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
    ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \
    mv /usr/share/tessdata /usr/share/tessdata-original && \
    mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf && \
+    # Configure URW Base 35 fonts
+    ln -s /usr/share/fontconfig/conf.avail/69-urw-*.conf /etc/fonts/conf.d/ && \
    fc-cache -f -v && \
    chmod +x /scripts/* && \
    chmod +x /scripts/init.sh && \
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@ -19,7 +19,7 @@ RUN apt-get update && apt-get install -y \
 # settings.yml | tessdataDir: /usr/share/tesseract-ocr/5/tessdata
  tesseract-ocr \
  tesseract-ocr-eng \
-  fonts-terminus fonts-dejavu fonts-font-awesome fonts-noto fonts-noto-core fonts-noto-cjk fonts-noto-extra fonts-liberation fonts-linuxlibertine \
+  fonts-terminus fonts-dejavu fonts-font-awesome fonts-noto fonts-noto-core fonts-noto-cjk fonts-noto-extra fonts-liberation fonts-linuxlibertine fonts-urw-base35 \
  python3-uno \
  python3-venv \
 # ss -tln
@ -45,6 +45,7 @@ ENV PATH="/opt/venv/bin:$PATH"
 COPY . /workspace

 RUN mkdir -p /tmp/stirling-pdf \
+  && fc-cache -f -v \
  && adduser --disabled-password --gecos '' devuser \
  && chown -R devuser:devuser /home/devuser /workspace /tmp/stirling-pdf
 RUN echo "devuser ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/devuser \
--- a/Dockerfile.fat
+++ b/Dockerfile.fat
@ -82,7 +82,7 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
 	tesseract-ocr-data-fra \
 	tesseract-ocr-data-por \
    unpaper \
-    font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine \
+    font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine font-urw-base35 \
    # CV
    py3-opencv \
    python3 \
@ -98,6 +98,8 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
    ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \
    mv /usr/share/tessdata /usr/share/tessdata-original && \
    mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf && \
+    # Configure URW Base 35 fonts
+    ln -s /usr/share/fontconfig/conf.avail/69-urw-*.conf /etc/fonts/conf.d/ && \
    fc-cache -f -v && \
    chmod +x /scripts/* && \
    chmod +x /scripts/init.sh && \
--- a/Dockerfile.ultra-lite
+++ b/Dockerfile.ultra-lite
@ -52,4 +52,4 @@ EXPOSE 8080/tcp

 # Run the application
 ENTRYPOINT ["tini", "--", "/scripts/init-without-ocr.sh"]
-CMD ["java", "-Dfile.encoding=UTF-8", "-Djava.io.tmpdir=/tmp/stirling-pdf", "-jar", "/app.jar"]
+CMD ["java", "-Dfile.encoding=UTF-8", "-Djava.io.tmpdir=/tmp/stirling-pdf", "-jar", "/app.jar"]
--- a/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java
+++ b/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java
@ -293,9 +293,32 @@ public class CustomPDFDocumentFactory {
        } else {
            throw new IllegalArgumentException("Unsupported source type: " + source.getClass());
        }
+
+        configureResourceCacheIfNeeded(document, contentSize);
+
        return document;
    }

+    /**
+     * Configure resource cache based on content size and memory constraints. Disables resource
+     * cache for large files or when memory is low to prevent OOM errors.
+     */
+    private void configureResourceCacheIfNeeded(PDDocument document, long contentSize) {
+        if (contentSize > LARGE_FILE_THRESHOLD) {
+            document.setResourceCache(null);
+        } else {
+            // Check current memory status for smaller files
+            long maxMemory = Runtime.getRuntime().maxMemory();
+            long usedMemory =
+                    Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
+            double freeMemoryPercent = (double) (maxMemory - usedMemory) / maxMemory * 100;
+
+            if (freeMemoryPercent < MIN_FREE_MEMORY_PERCENTAGE) {
+                document.setResourceCache(null);
+            }
+        }
+    }
+
    /** Load a PDF with password protection using adaptive loading strategies */
    private PDDocument loadAdaptivelyWithPassword(Object source, long contentSize, String password)
            throws IOException {
@ -314,6 +337,9 @@ public class CustomPDFDocumentFactory {
        } else {
            throw new IllegalArgumentException("Unsupported source type: " + source.getClass());
        }
+
+        configureResourceCacheIfNeeded(document, contentSize);
+
        return document;
    }

--- a/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
+++ b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
@ -153,13 +153,15 @@ public class TempFileCleanupService {
        // Clean up unregistered temp files based on our cleanup strategy
        boolean containerMode = isContainerMode();
        int unregisteredDeletedCount = cleanupUnregisteredFiles(containerMode, true, maxAgeMillis);
-        
-        if(registeredDeletedCount >0 || unregisteredDeletedCount >0 || directoriesDeletedCount >0) {
-        log.info(
-                "Scheduled cleanup complete. Deleted {} registered files, {} unregistered files, {} directories",
-                registeredDeletedCount,
-                unregisteredDeletedCount,
-                directoriesDeletedCount);
+
+        if (registeredDeletedCount > 0
+                || unregisteredDeletedCount > 0
+                || directoriesDeletedCount > 0) {
+            log.info(
+                    "Scheduled cleanup complete. Deleted {} registered files, {} unregistered files, {} directories",
+                    registeredDeletedCount,
+                    unregisteredDeletedCount,
+                    directoriesDeletedCount);
        }
    }

--- a/common/src/main/java/stirling/software/common/util/ExceptionUtils.java
+++ b/common/src/main/java/stirling/software/common/util/ExceptionUtils.java
@ -300,7 +300,8 @@ public class ExceptionUtils {
    public static void logException(String operation, Exception e) {
        if (PdfErrorUtils.isCorruptedPdfError(e)) {
            log.warn("PDF corruption detected during {}: {}", operation, e.getMessage());
-        } else if (e instanceof IOException && (isEncryptionError((IOException) e) || isPasswordError((IOException) e))) {
+        } else if (e instanceof IOException
+                && (isEncryptionError((IOException) e) || isPasswordError((IOException) e))) {
            log.info("PDF security issue during {}: {}", operation, e.getMessage());
        } else {
            log.error("Unexpected error during {}", operation, e);
--- a/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
+++ b/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
@ -49,6 +49,7 @@ public class PdfErrorUtils {
                || message.contains("Invalid dictionary, found:")
                || message.contains("AES initialization vector not fully read")
                || message.contains("BadPaddingException")
-                || message.contains("Given final block not properly padded");
+                || message.contains("Given final block not properly padded")
+                || message.contains("End-of-File, expected line");
    }
 }
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java
@ -72,7 +72,7 @@ public class SplitPDFController {
                pageNumbers.add(totalPages - 1);
            }

-            log.info(
+            log.debug(
                    "Splitting PDF into pages: {}",
                    pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(",")));

--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java
@ -70,17 +70,17 @@ public class ToSinglePageController {
        float yOffset = totalHeight;

        // For each page, copy its content to the new page at the correct offset
+        int pageIndex = 0;
        for (PDPage page : sourceDocument.getPages()) {
-            PDFormXObject form =
-                    layerUtility.importPageAsForm(
-                            sourceDocument, sourceDocument.getPages().indexOf(page));
+            PDFormXObject form = layerUtility.importPageAsForm(sourceDocument, pageIndex);
            AffineTransform af =
                    AffineTransform.getTranslateInstance(
                            0, yOffset - page.getMediaBox().getHeight());
            layerUtility.wrapInSaveRestore(newPage);
-            String defaultLayerName = "Layer" + sourceDocument.getPages().indexOf(page);
+            String defaultLayerName = "Layer" + pageIndex;
            layerUtility.appendFormAsLayer(newPage, form, af, defaultLayerName);
            yOffset -= page.getMediaBox().getHeight();
+            pageIndex++;
        }

        ByteArrayOutputStream baos = new ByteArrayOutputStream();
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
@ -95,9 +95,9 @@ public class ExtractImagesController {
            try {
                int pageCount = document.getPages().getCount();
                log.debug("Document reports {} pages", pageCount);
-                
+
                int consecutiveFailures = 0;
-                
+
                for (int pgNum = 0; pgNum < pageCount; pgNum++) {
                    try {
                        PDPage page = document.getPage(pgNum);
@ -118,7 +118,10 @@ public class ExtractImagesController {
                                                        allowDuplicates);
                                            } catch (Exception e) {
                                                // Log the error and continue processing other pages
-                                                ExceptionUtils.logException("image extraction from page " + currentPageNum, e);
+                                                ExceptionUtils.logException(
+                                                        "image extraction from page "
+                                                                + currentPageNum,
+                                                        e);
                                            }

                                            return null; // Callable requires a return type
@ -129,7 +132,7 @@ public class ExtractImagesController {
                    } catch (Exception e) {
                        consecutiveFailures++;
                        ExceptionUtils.logException("page access for page " + (pgNum + 1), e);
-                        
+
                        if (consecutiveFailures >= 3) {
                            log.warn("Stopping page iteration after 3 consecutive failures");
                            break;
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/FakeScanController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/FakeScanController.java
@ -47,6 +47,11 @@ public class FakeScanController {
    private final CustomPDFDocumentFactory pdfDocumentFactory;
    private static final Random RANDOM = new Random();

+    // Size limits to prevent OutOfMemoryError
+    private static final int MAX_IMAGE_WIDTH = 8192;
+    private static final int MAX_IMAGE_HEIGHT = 8192;
+    private static final long MAX_IMAGE_PIXELS = 16_777_216; // 4096x4096
+
    @PostMapping(value = "/fake-scan", consumes = "multipart/form-data")
    @Operation(
            summary = "Convert PDF to look like a scanned document",
@ -82,8 +87,46 @@ public class FakeScanController {
            PDFRenderer pdfRenderer = new PDFRenderer(document);

            for (int i = 0; i < document.getNumberOfPages(); i++) {
-                // Render page to image with specified resolution
-                BufferedImage image = pdfRenderer.renderImageWithDPI(i, resolution);
+                // Get page dimensions to calculate safe resolution
+                PDRectangle pageSize = document.getPage(i).getMediaBox();
+                float pageWidthPts = pageSize.getWidth();
+                float pageHeightPts = pageSize.getHeight();
+
+                // Calculate what the image dimensions would be at the requested resolution
+                int projectedWidth = (int) Math.ceil(pageWidthPts * resolution / 72.0);
+                int projectedHeight = (int) Math.ceil(pageHeightPts * resolution / 72.0);
+                long projectedPixels = (long) projectedWidth * projectedHeight;
+
+                // Calculate safe resolution that stays within limits
+                int safeResolution = resolution;
+                if (projectedWidth > MAX_IMAGE_WIDTH
+                        || projectedHeight > MAX_IMAGE_HEIGHT
+                        || projectedPixels > MAX_IMAGE_PIXELS) {
+                    double widthScale = (double) MAX_IMAGE_WIDTH / projectedWidth;
+                    double heightScale = (double) MAX_IMAGE_HEIGHT / projectedHeight;
+                    double pixelScale = Math.sqrt((double) MAX_IMAGE_PIXELS / projectedPixels);
+                    double minScale = Math.min(Math.min(widthScale, heightScale), pixelScale);
+                    safeResolution = (int) Math.max(72, resolution * minScale);
+
+                    log.warn(
+                            "Page {} would be too large at {}dpi ({}x{} pixels). Reducing to {}dpi",
+                            i + 1,
+                            resolution,
+                            projectedWidth,
+                            projectedHeight,
+                            safeResolution);
+                }
+
+                // Render page to image with safe resolution
+                BufferedImage image = pdfRenderer.renderImageWithDPI(i, safeResolution);
+
+                log.debug(
+                        "Processing page {} with dimensions {}x{} ({} pixels) at {}dpi",
+                        i + 1,
+                        image.getWidth(),
+                        image.getHeight(),
+                        (long) image.getWidth() * image.getHeight(),
+                        safeResolution);

                // 1. Convert to grayscale or keep color
                BufferedImage processed;
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java
@ -153,15 +153,19 @@ public class PipelineProcessor {
                        String filename = file.getFilename();
                        String providedExtension = "no extension";
                        if (filename != null && filename.contains(".")) {
-                            providedExtension = filename.substring(filename.lastIndexOf(".")).toLowerCase();
+                            providedExtension =
+                                    filename.substring(filename.lastIndexOf(".")).toLowerCase();
                        }
-                        
+
                        logPrintStream.println(
                                "No files with extension "
                                        + String.join(", ", inputFileTypes)
                                        + " found for operation "
                                        + operation
-                                        + ". Provided file '" + filename + "' has extension: " + providedExtension);
+                                        + ". Provided file '"
+                                        + filename
+                                        + "' has extension: "
+                                        + providedExtension);
                        hasErrors = true;
                    }
                }
@ -211,17 +215,21 @@ public class PipelineProcessor {
                    }
                } else {
                    // Get details about what files were actually provided
-                    List<String> providedExtensions = outputFiles.stream()
-                            .map(file -> {
-                                String filename = file.getFilename();
-                                if (filename != null && filename.contains(".")) {
-                                    return filename.substring(filename.lastIndexOf(".")).toLowerCase();
-                                }
-                                return "no extension";
-                            })
-                            .distinct()
-                            .toList();
-                    
+                    List<String> providedExtensions =
+                            outputFiles.stream()
+                                    .map(
+                                            file -> {
+                                                String filename = file.getFilename();
+                                                if (filename != null && filename.contains(".")) {
+                                                    return filename.substring(
+                                                                    filename.lastIndexOf("."))
+                                                            .toLowerCase();
+                                                }
+                                                return "no extension";
+                                            })
+                                    .distinct()
+                                    .toList();
+
                    logPrintStream.println(
                            "No files with extension "
                                    + String.join(", ", inputFileTypes)
@ -229,7 +237,9 @@ public class PipelineProcessor {
                                    + operation
                                    + ". Provided files have extensions: "
                                    + String.join(", ", providedExtensions)
-                                    + " (total files: " + outputFiles.size() + ")");
+                                    + " (total files: "
+                                    + outputFiles.size()
+                                    + ")");
                    hasErrors = true;
                }
            }
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java
@ -65,6 +65,7 @@ import org.springframework.web.bind.annotation.RestController;
 import org.springframework.web.multipart.MultipartFile;

 import io.github.pixee.security.Filenames;
+import io.micrometer.common.util.StringUtils;
 import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;

@ -166,7 +167,7 @@ public class CertSignController {
        Integer pageNumber = request.getPageNumber() != null ? (request.getPageNumber() - 1) : null;
        Boolean showLogo = request.getShowLogo();

-        if (certType == null) {
+        if (StringUtils.isBlank(certType)) {
            throw ExceptionUtils.createIllegalArgumentException(
                    "error.optionsNotSpecified",
                    "{0} options are not specified",
--- a/stirling-pdf/src/main/resources/static/js/fileInput.js
+++ b/stirling-pdf/src/main/resources/static/js/fileInput.js
@ -226,7 +226,8 @@ function setupFileInput(chooser) {

        try {
          const { isEncrypted, requiresPassword } = await decryptFile.checkFileEncrypted(file);
-          if (file.type === 'application/pdf' && isEncrypted) {
+          if (file.type === 'application/pdf' && isEncrypted && 
+              !window.location.pathname.includes('remove-password')) {
            decryptedFile = await decryptFile.decryptFile(file, requiresPassword);
            if (!decryptedFile) throw new Error('File decryption failed.');
          }
--- a/stirling-pdf/src/main/resources/templates/misc/ocr-pdf.html
+++ b/stirling-pdf/src/main/resources/templates/misc/ocr-pdf.html
@ -65,7 +65,7 @@
                  <label th:text="#{ocr.selectText.10}"></label>
                  <select class="form-control" name="ocrType">
                    <option value="skip-text" th:text="#{ocr.selectText.6}"></option>
-                    <option value="force-ocr" th:text="#{ocr.selectText.7}"></option>
+                    <option selected value="force-ocr" th:text="#{ocr.selectText.7}"></option>
                    <option value="Normal" th:text="#{ocr.selectText.8}"></option>
                  </select>
                </div>