From d79b6e29e0968906b894528cd946c13ccff17448 Mon Sep 17 00:00:00 2001
From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com.>
Date: Tue, 1 Jul 2025 21:17:45 +0100
Subject: [PATCH] error tuning

---
 .../service/CustomPDFDocumentFactory.java     | 20 ++++-
 .../software/common/util/PdfErrorUtils.java   | 59 +++++++++++++++
 .../software/common/util/PdfUtils.java        | 75 +++++++++++++++++--
 scripts/split_photos.py                       | 10 ++-
 .../SPDF/controller/api/MergeController.java  | 12 ++-
 .../api/RearrangePagesPDFController.java      |  2 +-
 .../api/misc/ExtractImagesController.java     | 10 ++-
 .../controller/api/security/GetInfoOnPDF.java | 62 ++++++++++-----
 .../api/security/PasswordController.java      | 28 +++++--
 9 files changed, 241 insertions(+), 37 deletions(-)
 create mode 100644 common/src/main/java/stirling/software/common/util/PdfErrorUtils.java

diff --git a/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java b/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java
index 51f52c34d..4da2412a8 100644
--- a/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java
+++ b/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java
@@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
 
 import stirling.software.common.model.api.PDFFile;
 import stirling.software.common.util.ApplicationContextProvider;
+import stirling.software.common.util.PdfErrorUtils;
 import stirling.software.common.util.TempFileManager;
 import stirling.software.common.util.TempFileRegistry;
 
@@ -354,7 +355,14 @@ public class CustomPDFDocumentFactory {
 
     private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
             throws IOException {
-        return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
+        try {
+            return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
+        } catch (IOException e) {
+            if (PdfErrorUtils.isCorruptedPdfError(e)) {
+                throw new IOException(PdfErrorUtils.getCorruptedPdfMessage(""), e);
+            }
+            throw e;
+        }
     }
 
     private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache)
@@ -366,7 +374,15 @@ public class CustomPDFDocumentFactory {
             Files.write(tempFile, bytes);
             return loadFromFile(tempFile.toFile(), size, cache);
         }
-        return Loader.loadPDF(bytes, "", null, null, cache);
+        
+        try {
+            return Loader.loadPDF(bytes, "", null, null, cache);
+        } catch (IOException e) {
+            if (PdfErrorUtils.isCorruptedPdfError(e)) {
+                throw new IOException(PdfErrorUtils.getCorruptedPdfMessage(""), e);
+            }
+            throw e;
+        }
     }
 
     public PDDocument createNewDocument(MemoryUsageSetting settings) throws IOException {
diff --git a/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java b/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
new file mode 100644
index 000000000..31cc2c00a
--- /dev/null
+++ b/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
@@ -0,0 +1,59 @@
+package stirling.software.common.util;
+
+import java.io.IOException;
+
+/**
+ * Utility class for detecting and handling PDF-related errors.
+ */
+public class PdfErrorUtils {
+    
+    /**
+     * Checks if an IOException indicates a corrupted PDF file.
+     * 
+     * @param e the IOException to check
+     * @return true if the error indicates PDF corruption, false otherwise
+     */
+    public static boolean isCorruptedPdfError(IOException e) {
+        String message = e.getMessage();
+        if (message == null) return false;
+        
+        // Check for common corruption indicators
+        return message.contains("Missing root object specification") ||
+               message.contains("Header doesn't contain versioninfo") ||
+               message.contains("Expected trailer") ||
+               message.contains("Invalid PDF") ||
+               message.contains("Corrupted") ||
+               message.contains("damaged") ||
+               message.contains("Unknown dir object") ||
+               message.contains("Can't dereference COSObject") ||
+               message.contains("AES initialization vector not fully read") ||
+               message.contains("BadPaddingException") ||
+               message.contains("Given final block not properly padded");
+    }
+    
+    /**
+     * Creates a user-friendly error message for corrupted PDF files.
+     * 
+     * @param context additional context about where the error occurred (e.g., "during merge", "during processing")
+     * @return a user-friendly error message
+     */
+    public static String getCorruptedPdfMessage(String context) {
+        String baseMessage = "PDF file appears to be corrupted or damaged. " +
+            "Please try using the 'Repair PDF' feature first to fix the file before proceeding with this operation.";
+            
+        if (context != null && !context.isEmpty()) {
+            return "Error " + context + ": " + baseMessage;
+        }
+        return baseMessage;
+    }
+    
+    /**
+     * Creates a user-friendly error message for multiple corrupted PDF files (e.g., during merge).
+     * 
+     * @return a user-friendly error message for multiple file operations
+     */
+    public static String getCorruptedPdfMessageForMultipleFiles() {
+        return "One or more PDF files appear to be corrupted or damaged. " +
+            "Please try using the 'Repair PDF' feature on each file first before attempting to merge them.";
+    }
+}
\ No newline at end of file
diff --git a/common/src/main/java/stirling/software/common/util/PdfUtils.java b/common/src/main/java/stirling/software/common/util/PdfUtils.java
index 3986110e5..9ea65388c 100644
--- a/common/src/main/java/stirling/software/common/util/PdfUtils.java
+++ b/common/src/main/java/stirling/software/common/util/PdfUtils.java
@@ -135,6 +135,16 @@ public class PdfUtils {
             int DPI,
             String filename)
             throws IOException, Exception {
+        
+        // Validate and limit DPI to prevent excessive memory usage
+        final int MAX_SAFE_DPI = 300; // Maximum safe DPI to prevent memory issues
+        if (DPI > MAX_SAFE_DPI) {
+            throw new IllegalArgumentException(String.format(
+                "DPI value %d exceeds maximum safe limit of %d. " +
+                "High DPI values can cause memory issues and crashes. " +
+                "Please use a lower DPI value.", DPI, MAX_SAFE_DPI));
+        }
+        
         try (PDDocument document = pdfDocumentFactory.load(inputStream)) {
             PDFRenderer pdfRenderer = new PDFRenderer(document);
             pdfRenderer.setSubsamplingAllowed(true);
@@ -158,7 +168,18 @@ public class PdfUtils {
                         writer.prepareWriteSequence(null);
 
                         for (int i = 0; i < pageCount; ++i) {
-                            BufferedImage image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            BufferedImage image;
+                            try {
+                                image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            } catch (IllegalArgumentException e) {
+                                if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                                    throw new IllegalArgumentException(String.format(
+                                        "PDF page %d is too large to render at %d DPI. " +
+                                        "Please try a lower DPI value (recommended: 150 or less).", 
+                                        i + 1, DPI), e);
+                                }
+                                throw e;
+                            }
                             writer.writeToSequence(new IIOImage(image, null, null), param);
                         }
 
@@ -190,7 +211,18 @@ public class PdfUtils {
                         PdfImageDimensionValue dimension = pageSizes.get(settings);
                         if (dimension == null) {
                             // Render the image to get the dimensions
-                            pdfSizeImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            try {
+                                pdfSizeImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            } catch (IllegalArgumentException e) {
+                                if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                                    throw new IllegalArgumentException(String.format(
+                                        "PDF page %d is too large to render at %d DPI. " +
+                                        "The resulting image would exceed Java's maximum array size. " +
+                                        "Please try a lower DPI value (recommended: 150 or less).", 
+                                        i + 1, DPI), e);
+                                }
+                                throw e;
+                            }
                             pdfSizeImageIndex = i;
                             dimension =
                                     new PdfImageDimensionValue(
@@ -218,7 +250,17 @@ public class PdfUtils {
                         if (firstImageAlreadyRendered && i == 0) {
                             pageImage = pdfSizeImage;
                         } else {
-                            pageImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            try {
+                                pageImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            } catch (IllegalArgumentException e) {
+                                if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                                    throw new IllegalArgumentException(String.format(
+                                        "PDF page %d is too large to render at %d DPI. " +
+                                        "Please try a lower DPI value (recommended: 150 or less).", 
+                                        i + 1, DPI), e);
+                                }
+                                throw e;
+                            }
                         }
 
                         // Calculate the x-coordinate to center the image
@@ -238,7 +280,18 @@ public class PdfUtils {
                 // Zip the images and return as byte array
                 try (ZipOutputStream zos = new ZipOutputStream(baos)) {
                     for (int i = 0; i < pageCount; ++i) {
-                        BufferedImage image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                        BufferedImage image;
+                        try {
+                            image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                        } catch (IllegalArgumentException e) {
+                            if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                                throw new IllegalArgumentException(String.format(
+                                    "PDF page %d is too large to render at %d DPI. " +
+                                    "Please try a lower DPI value (recommended: 150 or less).", 
+                                    i + 1, DPI), e);
+                            }
+                            throw e;
+                        }
                         try (ByteArrayOutputStream baosImage = new ByteArrayOutputStream()) {
                             ImageIO.write(image, imageType, baosImage);
 
@@ -276,7 +329,19 @@ public class PdfUtils {
         PDFRenderer pdfRenderer = new PDFRenderer(document);
         pdfRenderer.setSubsamplingAllowed(true);
         for (int page = 0; page < document.getNumberOfPages(); ++page) {
-            BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
+            BufferedImage bim;
+            try {
+                bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
+            } catch (IllegalArgumentException e) {
+                if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                    throw new IllegalArgumentException(String.format(
+                        "PDF page %d is too large to render at 300 DPI. " +
+                        "The resulting image would exceed Java's maximum array size. " +
+                        "Please use a lower DPI value for PDF-to-image conversion.", 
+                        page + 1), e);
+                }
+                throw e;
+            }
             PDPage originalPage = document.getPage(page);
 
             float width = originalPage.getMediaBox().getWidth();
diff --git a/scripts/split_photos.py b/scripts/split_photos.py
index a007fdeb4..510ca8409 100644
--- a/scripts/split_photos.py
+++ b/scripts/split_photos.py
@@ -94,8 +94,14 @@ def split_photos(input_file, output_directory, tolerance=30, min_area=10000, min
         cropped_image = image[y:y+h, x:x+w]
         cropped_image = auto_rotate(cropped_image, angle_threshold)
 
-        # Remove the added border
-        cropped_image = cropped_image[border_size:-border_size, border_size:-border_size]
+        # Remove the added border, but ensure we don't create an empty image
+        if border_size > 0 and cropped_image.shape[0] > 2 * border_size and cropped_image.shape[1] > 2 * border_size:
+            cropped_image = cropped_image[border_size:-border_size, border_size:-border_size]
+
+        # Check if the cropped image is valid before saving
+        if cropped_image.size == 0 or cropped_image.shape[0] == 0 or cropped_image.shape[1] == 0:
+            print(f"Warning: Skipping empty image for region {idx+1}")
+            continue
 
         output_path = os.path.join(output_directory, f"{input_file_basename}_{idx+1}.png")
         cv2.imwrite(output_path, cropped_image)
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java
index ddd988ef9..277828970 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java
@@ -36,6 +36,7 @@ import lombok.extern.slf4j.Slf4j;
 import stirling.software.SPDF.model.api.general.MergePdfsRequest;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.GeneralUtils;
+import stirling.software.common.util.PdfErrorUtils;
 import stirling.software.common.util.WebResponseUtils;
 
 @RestController
@@ -189,8 +190,15 @@ public class MergeController {
             mergedTempFile = Files.createTempFile("merged-", ".pdf").toFile();
             mergerUtility.setDestinationFileName(mergedTempFile.getAbsolutePath());
 
-            mergerUtility.mergeDocuments(
-                    pdfDocumentFactory.getStreamCacheFunction(totalSize)); // Merge the documents
+            try {
+                mergerUtility.mergeDocuments(
+                        pdfDocumentFactory.getStreamCacheFunction(totalSize)); // Merge the documents
+            } catch (IOException e) {
+                if (PdfErrorUtils.isCorruptedPdfError(e)) {
+                    throw new IOException(PdfErrorUtils.getCorruptedPdfMessageForMultipleFiles(), e);
+                }
+                throw e;
+            }
 
             // Load the merged PDF document
             mergedDocument = pdfDocumentFactory.load(mergedTempFile);
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java
index 3bf2ec802..1f2d3a7c3 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java
@@ -289,7 +289,7 @@ public class RearrangePagesPDFController {
                             + "_rearranged.pdf");
         } catch (IOException e) {
             log.error("Failed rearranging documents", e);
-            return null;
+            throw e;
         }
     }
 }
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
index cb06b9f4d..5b21ab1a7 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
@@ -42,6 +42,7 @@ import lombok.extern.slf4j.Slf4j;
 import stirling.software.SPDF.model.api.PDFExtractImagesRequest;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.ImageProcessingUtils;
+import stirling.software.common.util.PdfErrorUtils;
 import stirling.software.common.util.WebResponseUtils;
 
 @RestController
@@ -180,7 +181,8 @@ public class ExtractImagesController {
         }
         int count = 1;
         for (COSName name : page.getResources().getXObjectNames()) {
-            if (page.getResources().isImageXObject(name)) {
+            try {
+                if (page.getResources().isImageXObject(name)) {
                 PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
                 if (!allowDuplicates) {
                     byte[] data = ImageProcessingUtils.getImageData(image.getImage());
@@ -209,6 +211,12 @@ public class ExtractImagesController {
                     zos.closeEntry();
                 }
             }
+            } catch (IOException e) {
+                if (PdfErrorUtils.isCorruptedPdfError(e)) {
+                    throw new IOException(PdfErrorUtils.getCorruptedPdfMessage("during image extraction"), e);
+                }
+                throw e;
+            }
         }
     }
 
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java
index c630106e4..d4ffbab89 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java
@@ -150,21 +150,37 @@ public class GetInfoOnPDF {
             PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
             if (pdMetadata != null) {
                 COSInputStream metaStream = pdMetadata.createInputStream();
-                DomXmpParser domXmpParser = new DomXmpParser();
-                XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
-
-                ByteArrayOutputStream baos = new ByteArrayOutputStream();
-                new XmpSerializer().serialize(xmpMeta, baos, true);
-                String xmpString = new String(baos.toByteArray(), StandardCharsets.UTF_8);
-
-                if (xmpString.contains(standardKeyword)) {
+                
+                // First try to read raw metadata as string to check for standard keywords
+                byte[] metadataBytes = metaStream.readAllBytes();
+                String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
+                
+                if (rawMetadata.contains(standardKeyword)) {
                     return true;
                 }
+                
+                // If raw check doesn't find it, try parsing with XMP parser
+                // Reset stream for parsing
+                metaStream.close();
+                metaStream = pdMetadata.createInputStream();
+                
+                try {
+                    DomXmpParser domXmpParser = new DomXmpParser();
+                    XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
+
+                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                    new XmpSerializer().serialize(xmpMeta, baos, true);
+                    String xmpString = new String(baos.toByteArray(), StandardCharsets.UTF_8);
+
+                    if (xmpString.contains(standardKeyword)) {
+                        return true;
+                    }
+                } catch (XmpParsingException e) {
+                    // XMP parsing failed, but we already checked raw metadata above
+                    log.debug("XMP parsing failed for standard check, but raw metadata was already checked: {}", e.getMessage());
+                }
             }
-        } catch (
-                Exception
-                        e) { // Catching general exception for brevity, ideally you'd catch specific
-            // exceptions.
+        } catch (Exception e) {
             log.error("exception", e);
         }
 
@@ -392,13 +408,23 @@ public class GetInfoOnPDF {
             if (pdMetadata != null) {
                 try {
                     COSInputStream is = pdMetadata.createInputStream();
-                    DomXmpParser domXmpParser = new DomXmpParser();
-                    XMPMetadata xmpMeta = domXmpParser.parse(is);
+                    
+                    try {
+                        DomXmpParser domXmpParser = new DomXmpParser();
+                        XMPMetadata xmpMeta = domXmpParser.parse(is);
 
-                    ByteArrayOutputStream os = new ByteArrayOutputStream();
-                    new XmpSerializer().serialize(xmpMeta, os, true);
-                    xmpString = new String(os.toByteArray(), StandardCharsets.UTF_8);
-                } catch (XmpParsingException | IOException e) {
+                        ByteArrayOutputStream os = new ByteArrayOutputStream();
+                        new XmpSerializer().serialize(xmpMeta, os, true);
+                        xmpString = new String(os.toByteArray(), StandardCharsets.UTF_8);
+                    } catch (XmpParsingException e) {
+                        // XMP parsing failed, try to read raw metadata instead
+                        log.debug("XMP parsing failed, reading raw metadata: {}", e.getMessage());
+                        is.close();
+                        is = pdMetadata.createInputStream();
+                        byte[] metadataBytes = is.readAllBytes();
+                        xmpString = new String(metadataBytes, StandardCharsets.UTF_8);
+                    }
+                } catch (IOException e) {
                     log.error("exception", e);
                 }
             }
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java
index 4567fcb7e..61808b9d5 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java
@@ -42,12 +42,28 @@ public class PasswordController {
         MultipartFile fileInput = request.getFileInput();
         String password = request.getPassword();
         PDDocument document = pdfDocumentFactory.load(fileInput, password);
-        document.setAllSecurityToBeRemoved(true);
-        return WebResponseUtils.pdfDocToWebResponse(
-                document,
-                Filenames.toSimpleFileName(fileInput.getOriginalFilename())
-                                .replaceFirst("[.][^.]+$", "")
-                        + "_password_removed.pdf");
+        
+        try {
+            document.setAllSecurityToBeRemoved(true);
+            return WebResponseUtils.pdfDocToWebResponse(
+                    document,
+                    Filenames.toSimpleFileName(fileInput.getOriginalFilename())
+                                    .replaceFirst("[.][^.]+$", "")
+                            + "_password_removed.pdf");
+        } catch (IOException e) {
+            // Check if this is an encryption/decryption error
+            if (e.getMessage() != null && 
+                (e.getMessage().contains("BadPaddingException") ||
+                 e.getMessage().contains("Given final block not properly padded") ||
+                 e.getMessage().contains("Failed to decrypt"))) {
+                
+                document.close();
+                throw new IOException("The PDF appears to have corrupted encryption data. " +
+                    "This can happen when the PDF was created with incompatible encryption methods. " +
+                    "Please try using the 'Repair PDF' feature first, or contact the document creator for a new copy.", e);
+            }
+            throw e;
+        }
     }
 
     @PostMapping(consumes = "multipart/form-data", value = "/add-password")