error tuning

2025-08-26 22:29:24 +00:00 · 2025-07-01 21:17:45 +01:00 · 2025-07-01 21:17:45 +01:00 · d79b6e29e0
commit d79b6e29e0
parent b2a0868f5b
9 changed files with 241 additions and 37 deletions
--- a/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java
+++ b/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java
@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;

 import stirling.software.common.model.api.PDFFile;
 import stirling.software.common.util.ApplicationContextProvider;
+import stirling.software.common.util.PdfErrorUtils;
 import stirling.software.common.util.TempFileManager;
 import stirling.software.common.util.TempFileRegistry;

@ -354,7 +355,14 @@ public class CustomPDFDocumentFactory {

    private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
            throws IOException {
-        return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
+        try {
+            return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
+        } catch (IOException e) {
+            if (PdfErrorUtils.isCorruptedPdfError(e)) {
+                throw new IOException(PdfErrorUtils.getCorruptedPdfMessage(""), e);
+            }
+            throw e;
+        }
    }

    private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache)
@ -366,7 +374,15 @@ public class CustomPDFDocumentFactory {
            Files.write(tempFile, bytes);
            return loadFromFile(tempFile.toFile(), size, cache);
        }
-        return Loader.loadPDF(bytes, "", null, null, cache);
+        
+        try {
+            return Loader.loadPDF(bytes, "", null, null, cache);
+        } catch (IOException e) {
+            if (PdfErrorUtils.isCorruptedPdfError(e)) {
+                throw new IOException(PdfErrorUtils.getCorruptedPdfMessage(""), e);
+            }
+            throw e;
+        }
    }

    public PDDocument createNewDocument(MemoryUsageSetting settings) throws IOException {
--- a/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
+++ b/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
@ -0,0 +1,59 @@
+package stirling.software.common.util;
+
+import java.io.IOException;
+
+/**
+ * Utility class for detecting and handling PDF-related errors.
+ */
+public class PdfErrorUtils {
+    
+    /**
+     * Checks if an IOException indicates a corrupted PDF file.
+     * 
+     * @param e the IOException to check
+     * @return true if the error indicates PDF corruption, false otherwise
+     */
+    public static boolean isCorruptedPdfError(IOException e) {
+        String message = e.getMessage();
+        if (message == null) return false;
+        
+        // Check for common corruption indicators
+        return message.contains("Missing root object specification") ||
+               message.contains("Header doesn't contain versioninfo") ||
+               message.contains("Expected trailer") ||
+               message.contains("Invalid PDF") ||
+               message.contains("Corrupted") ||
+               message.contains("damaged") ||
+               message.contains("Unknown dir object") ||
+               message.contains("Can't dereference COSObject") ||
+               message.contains("AES initialization vector not fully read") ||
+               message.contains("BadPaddingException") ||
+               message.contains("Given final block not properly padded");
+    }
+    
+    /**
+     * Creates a user-friendly error message for corrupted PDF files.
+     * 
+     * @param context additional context about where the error occurred (e.g., "during merge", "during processing")
+     * @return a user-friendly error message
+     */
+    public static String getCorruptedPdfMessage(String context) {
+        String baseMessage = "PDF file appears to be corrupted or damaged. " +
+            "Please try using the 'Repair PDF' feature first to fix the file before proceeding with this operation.";
+            
+        if (context != null && !context.isEmpty()) {
+            return "Error " + context + ": " + baseMessage;
+        }
+        return baseMessage;
+    }
+    
+    /**
+     * Creates a user-friendly error message for multiple corrupted PDF files (e.g., during merge).
+     * 
+     * @return a user-friendly error message for multiple file operations
+     */
+    public static String getCorruptedPdfMessageForMultipleFiles() {
+        return "One or more PDF files appear to be corrupted or damaged. " +
+            "Please try using the 'Repair PDF' feature on each file first before attempting to merge them.";
+    }
+}
--- a/common/src/main/java/stirling/software/common/util/PdfUtils.java
+++ b/common/src/main/java/stirling/software/common/util/PdfUtils.java
@ -135,6 +135,16 @@ public class PdfUtils {
            int DPI,
            String filename)
            throws IOException, Exception {
+        
+        // Validate and limit DPI to prevent excessive memory usage
+        final int MAX_SAFE_DPI = 300; // Maximum safe DPI to prevent memory issues
+        if (DPI > MAX_SAFE_DPI) {
+            throw new IllegalArgumentException(String.format(
+                "DPI value %d exceeds maximum safe limit of %d. " +
+                "High DPI values can cause memory issues and crashes. " +
+                "Please use a lower DPI value.", DPI, MAX_SAFE_DPI));
+        }
+        
        try (PDDocument document = pdfDocumentFactory.load(inputStream)) {
            PDFRenderer pdfRenderer = new PDFRenderer(document);
            pdfRenderer.setSubsamplingAllowed(true);
@ -158,7 +168,18 @@ public class PdfUtils {
                        writer.prepareWriteSequence(null);

                        for (int i = 0; i < pageCount; ++i) {
-                            BufferedImage image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            BufferedImage image;
+                            try {
+                                image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            } catch (IllegalArgumentException e) {
+                                if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                                    throw new IllegalArgumentException(String.format(
+                                        "PDF page %d is too large to render at %d DPI. " +
+                                        "Please try a lower DPI value (recommended: 150 or less).", 
+                                        i + 1, DPI), e);
+                                }
+                                throw e;
+                            }
                            writer.writeToSequence(new IIOImage(image, null, null), param);
                        }

@ -190,7 +211,18 @@ public class PdfUtils {
                        PdfImageDimensionValue dimension = pageSizes.get(settings);
                        if (dimension == null) {
                            // Render the image to get the dimensions
-                            pdfSizeImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            try {
+                                pdfSizeImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            } catch (IllegalArgumentException e) {
+                                if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                                    throw new IllegalArgumentException(String.format(
+                                        "PDF page %d is too large to render at %d DPI. " +
+                                        "The resulting image would exceed Java's maximum array size. " +
+                                        "Please try a lower DPI value (recommended: 150 or less).", 
+                                        i + 1, DPI), e);
+                                }
+                                throw e;
+                            }
                            pdfSizeImageIndex = i;
                            dimension =
                                    new PdfImageDimensionValue(
@ -218,7 +250,17 @@ public class PdfUtils {
                        if (firstImageAlreadyRendered && i == 0) {
                            pageImage = pdfSizeImage;
                        } else {
-                            pageImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            try {
+                                pageImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                            } catch (IllegalArgumentException e) {
+                                if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                                    throw new IllegalArgumentException(String.format(
+                                        "PDF page %d is too large to render at %d DPI. " +
+                                        "Please try a lower DPI value (recommended: 150 or less).", 
+                                        i + 1, DPI), e);
+                                }
+                                throw e;
+                            }
                        }

                        // Calculate the x-coordinate to center the image
@ -238,7 +280,18 @@ public class PdfUtils {
                // Zip the images and return as byte array
                try (ZipOutputStream zos = new ZipOutputStream(baos)) {
                    for (int i = 0; i < pageCount; ++i) {
-                        BufferedImage image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                        BufferedImage image;
+                        try {
+                            image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
+                        } catch (IllegalArgumentException e) {
+                            if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                                throw new IllegalArgumentException(String.format(
+                                    "PDF page %d is too large to render at %d DPI. " +
+                                    "Please try a lower DPI value (recommended: 150 or less).", 
+                                    i + 1, DPI), e);
+                            }
+                            throw e;
+                        }
                        try (ByteArrayOutputStream baosImage = new ByteArrayOutputStream()) {
                            ImageIO.write(image, imageType, baosImage);

@ -276,7 +329,19 @@ public class PdfUtils {
        PDFRenderer pdfRenderer = new PDFRenderer(document);
        pdfRenderer.setSubsamplingAllowed(true);
        for (int page = 0; page < document.getNumberOfPages(); ++page) {
-            BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
+            BufferedImage bim;
+            try {
+                bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
+            } catch (IllegalArgumentException e) {
+                if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
+                    throw new IllegalArgumentException(String.format(
+                        "PDF page %d is too large to render at 300 DPI. " +
+                        "The resulting image would exceed Java's maximum array size. " +
+                        "Please use a lower DPI value for PDF-to-image conversion.", 
+                        page + 1), e);
+                }
+                throw e;
+            }
            PDPage originalPage = document.getPage(page);

            float width = originalPage.getMediaBox().getWidth();
--- a/scripts/split_photos.py
+++ b/scripts/split_photos.py
@ -94,8 +94,14 @@ def split_photos(input_file, output_directory, tolerance=30, min_area=10000, min
        cropped_image = image[y:y+h, x:x+w]
        cropped_image = auto_rotate(cropped_image, angle_threshold)

-        # Remove the added border
-        cropped_image = cropped_image[border_size:-border_size, border_size:-border_size]
+        # Remove the added border, but ensure we don't create an empty image
+        if border_size > 0 and cropped_image.shape[0] > 2 * border_size and cropped_image.shape[1] > 2 * border_size:
+            cropped_image = cropped_image[border_size:-border_size, border_size:-border_size]
+
+        # Check if the cropped image is valid before saving
+        if cropped_image.size == 0 or cropped_image.shape[0] == 0 or cropped_image.shape[1] == 0:
+            print(f"Warning: Skipping empty image for region {idx+1}")
+            continue

        output_path = os.path.join(output_directory, f"{input_file_basename}_{idx+1}.png")
        cv2.imwrite(output_path, cropped_image)
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java
@ -36,6 +36,7 @@ import lombok.extern.slf4j.Slf4j;
 import stirling.software.SPDF.model.api.general.MergePdfsRequest;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.GeneralUtils;
+import stirling.software.common.util.PdfErrorUtils;
 import stirling.software.common.util.WebResponseUtils;

@RestController
@ -189,8 +190,15 @@ public class MergeController {
            mergedTempFile = Files.createTempFile("merged-", ".pdf").toFile();
            mergerUtility.setDestinationFileName(mergedTempFile.getAbsolutePath());

-            mergerUtility.mergeDocuments(
-                    pdfDocumentFactory.getStreamCacheFunction(totalSize)); // Merge the documents
+            try {
+                mergerUtility.mergeDocuments(
+                        pdfDocumentFactory.getStreamCacheFunction(totalSize)); // Merge the documents
+            } catch (IOException e) {
+                if (PdfErrorUtils.isCorruptedPdfError(e)) {
+                    throw new IOException(PdfErrorUtils.getCorruptedPdfMessageForMultipleFiles(), e);
+                }
+                throw e;
+            }

            // Load the merged PDF document
            mergedDocument = pdfDocumentFactory.load(mergedTempFile);
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java
@ -289,7 +289,7 @@ public class RearrangePagesPDFController {
                            + "_rearranged.pdf");
        } catch (IOException e) {
            log.error("Failed rearranging documents", e);
-            return null;
+            throw e;
        }
    }
 }
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
@ -42,6 +42,7 @@ import lombok.extern.slf4j.Slf4j;
 import stirling.software.SPDF.model.api.PDFExtractImagesRequest;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.ImageProcessingUtils;
+import stirling.software.common.util.PdfErrorUtils;
 import stirling.software.common.util.WebResponseUtils;

@RestController
@ -180,7 +181,8 @@ public class ExtractImagesController {
        }
        int count = 1;
        for (COSName name : page.getResources().getXObjectNames()) {
-            if (page.getResources().isImageXObject(name)) {
+            try {
+                if (page.getResources().isImageXObject(name)) {
                PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
                if (!allowDuplicates) {
                    byte[] data = ImageProcessingUtils.getImageData(image.getImage());
@ -209,6 +211,12 @@ public class ExtractImagesController {
                    zos.closeEntry();
                }
            }
+            } catch (IOException e) {
+                if (PdfErrorUtils.isCorruptedPdfError(e)) {
+                    throw new IOException(PdfErrorUtils.getCorruptedPdfMessage("during image extraction"), e);
+                }
+                throw e;
+            }
        }
    }

--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java
@ -150,21 +150,37 @@ public class GetInfoOnPDF {
            PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
            if (pdMetadata != null) {
                COSInputStream metaStream = pdMetadata.createInputStream();
-                DomXmpParser domXmpParser = new DomXmpParser();
-                XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
-
-                ByteArrayOutputStream baos = new ByteArrayOutputStream();
-                new XmpSerializer().serialize(xmpMeta, baos, true);
-                String xmpString = new String(baos.toByteArray(), StandardCharsets.UTF_8);
-
-                if (xmpString.contains(standardKeyword)) {
+                
+                // First try to read raw metadata as string to check for standard keywords
+                byte[] metadataBytes = metaStream.readAllBytes();
+                String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
+                
+                if (rawMetadata.contains(standardKeyword)) {
                    return true;
                }
+                
+                // If raw check doesn't find it, try parsing with XMP parser
+                // Reset stream for parsing
+                metaStream.close();
+                metaStream = pdMetadata.createInputStream();
+                
+                try {
+                    DomXmpParser domXmpParser = new DomXmpParser();
+                    XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
+
+                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                    new XmpSerializer().serialize(xmpMeta, baos, true);
+                    String xmpString = new String(baos.toByteArray(), StandardCharsets.UTF_8);
+
+                    if (xmpString.contains(standardKeyword)) {
+                        return true;
+                    }
+                } catch (XmpParsingException e) {
+                    // XMP parsing failed, but we already checked raw metadata above
+                    log.debug("XMP parsing failed for standard check, but raw metadata was already checked: {}", e.getMessage());
+                }
            }
-        } catch (
-                Exception
-                        e) { // Catching general exception for brevity, ideally you'd catch specific
-            // exceptions.
+        } catch (Exception e) {
            log.error("exception", e);
        }

@ -392,13 +408,23 @@ public class GetInfoOnPDF {
            if (pdMetadata != null) {
                try {
                    COSInputStream is = pdMetadata.createInputStream();
-                    DomXmpParser domXmpParser = new DomXmpParser();
-                    XMPMetadata xmpMeta = domXmpParser.parse(is);
+                    
+                    try {
+                        DomXmpParser domXmpParser = new DomXmpParser();
+                        XMPMetadata xmpMeta = domXmpParser.parse(is);

-                    ByteArrayOutputStream os = new ByteArrayOutputStream();
-                    new XmpSerializer().serialize(xmpMeta, os, true);
-                    xmpString = new String(os.toByteArray(), StandardCharsets.UTF_8);
-                } catch (XmpParsingException | IOException e) {
+                        ByteArrayOutputStream os = new ByteArrayOutputStream();
+                        new XmpSerializer().serialize(xmpMeta, os, true);
+                        xmpString = new String(os.toByteArray(), StandardCharsets.UTF_8);
+                    } catch (XmpParsingException e) {
+                        // XMP parsing failed, try to read raw metadata instead
+                        log.debug("XMP parsing failed, reading raw metadata: {}", e.getMessage());
+                        is.close();
+                        is = pdMetadata.createInputStream();
+                        byte[] metadataBytes = is.readAllBytes();
+                        xmpString = new String(metadataBytes, StandardCharsets.UTF_8);
+                    }
+                } catch (IOException e) {
                    log.error("exception", e);
                }
            }
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java
@ -42,12 +42,28 @@ public class PasswordController {
        MultipartFile fileInput = request.getFileInput();
        String password = request.getPassword();
        PDDocument document = pdfDocumentFactory.load(fileInput, password);
-        document.setAllSecurityToBeRemoved(true);
-        return WebResponseUtils.pdfDocToWebResponse(
-                document,
-                Filenames.toSimpleFileName(fileInput.getOriginalFilename())
-                                .replaceFirst("[.][^.]+$", "")
-                        + "_password_removed.pdf");
+        
+        try {
+            document.setAllSecurityToBeRemoved(true);
+            return WebResponseUtils.pdfDocToWebResponse(
+                    document,
+                    Filenames.toSimpleFileName(fileInput.getOriginalFilename())
+                                    .replaceFirst("[.][^.]+$", "")
+                            + "_password_removed.pdf");
+        } catch (IOException e) {
+            // Check if this is an encryption/decryption error
+            if (e.getMessage() != null && 
+                (e.getMessage().contains("BadPaddingException") ||
+                 e.getMessage().contains("Given final block not properly padded") ||
+                 e.getMessage().contains("Failed to decrypt"))) {
+                
+                document.close();
+                throw new IOException("The PDF appears to have corrupted encryption data. " +
+                    "This can happen when the PDF was created with incompatible encryption methods. " +
+                    "Please try using the 'Repair PDF' feature first, or contact the document creator for a new copy.", e);
+            }
+            throw e;
+        }
    }

    @PostMapping(consumes = "multipart/form-data", value = "/add-password")