exceptions

2025-08-27 06:39:24 +00:00 · 2025-07-02 23:51:03 +01:00 · 2025-07-02 23:51:03 +01:00 · c7b8b6d1e1
commit c7b8b6d1e1
parent a6e70b3df1
4 changed files with 76 additions and 35 deletions
--- a/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
+++ b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
@ -154,11 +154,13 @@ public class TempFileCleanupService {
        boolean containerMode = isContainerMode();
        int unregisteredDeletedCount = cleanupUnregisteredFiles(containerMode, true, maxAgeMillis);
        if(registeredDeletedCount >0 || unregisteredDeletedCount >0 || directoriesDeletedCount >0) {
        log.info(
                "Scheduled cleanup complete. Deleted {} registered files, {} unregistered files, {} directories",
                registeredDeletedCount,
                unregisteredDeletedCount,
                directoriesDeletedCount);
        }
    }
    /**
@ -166,7 +168,6 @@ public class TempFileCleanupService {
     * important in Docker environments where temp files persist between container restarts.
     */
    private void runStartupCleanup() {
        log.info("Running startup temporary file cleanup");
        boolean containerMode = isContainerMode();
        log.info(
@ -178,7 +179,6 @@ public class TempFileCleanupService {
        long maxAgeMillis = containerMode ? 0 : 24 * 60 * 60 * 1000; // 0 or 24 hours
        int totalDeletedCount = cleanupUnregisteredFiles(containerMode, false, maxAgeMillis);
        log.info(
                "Startup cleanup complete. Deleted {} temporary files/directories",
                totalDeletedCount);
@ -225,7 +225,7 @@ public class TempFileCleanupService {
                            tempDir -> {
                                try {
                                    String phase = isScheduled ? "scheduled" : "startup";
-                                    log.info(
+                                    log.debug(
                                            "Scanning directory for {} cleanup: {}",
                                            phase,
                                            tempDir);
--- a/common/src/main/java/stirling/software/common/util/ExceptionUtils.java
+++ b/common/src/main/java/stirling/software/common/util/ExceptionUtils.java
@ -298,9 +298,9 @@ public class ExceptionUtils {
     * @param e the exception that occurred
     */
    public static void logException(String operation, Exception e) {
-        if (e instanceof IOException && PdfErrorUtils.isCorruptedPdfError((IOException) e)) {
+        if (PdfErrorUtils.isCorruptedPdfError(e)) {
            log.warn("PDF corruption detected during {}: {}", operation, e.getMessage());
-        } else if (isEncryptionError((IOException) e) || isPasswordError((IOException) e)) {
+        } else if (e instanceof IOException && (isEncryptionError((IOException) e) || isPasswordError((IOException) e))) {
            log.info("PDF security issue during {}: {}", operation, e.getMessage());
        } else {
            log.error("Unexpected error during {}", operation, e);
--- a/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
+++ b/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
@ -12,7 +12,26 @@ public class PdfErrorUtils {
     * @return true if the error indicates PDF corruption, false otherwise
     */
    public static boolean isCorruptedPdfError(IOException e) {
-        String message = e.getMessage();
+        return isCorruptedPdfError(e.getMessage());
    }
    /**
     * Checks if any Exception indicates a corrupted PDF file.
     *
     * @param e the Exception to check
     * @return true if the error indicates PDF corruption, false otherwise
     */
    public static boolean isCorruptedPdfError(Exception e) {
        return isCorruptedPdfError(e.getMessage());
    }
    /**
     * Checks if an error message indicates a corrupted PDF file.
     *
     * @param message the error message to check
     * @return true if the message indicates PDF corruption, false otherwise
     */
    private static boolean isCorruptedPdfError(String message) {
        if (message == null) return false;
        // Check for common corruption indicators
@ -24,6 +43,10 @@ public class PdfErrorUtils {
                || message.contains("damaged")
                || message.contains("Unknown dir object")
                || message.contains("Can't dereference COSObject")
                || message.contains("parseCOSString string should start with")
                || message.contains("ICCBased colorspace array must have a stream")
                || message.contains("1-based index not found")
                || message.contains("Invalid dictionary, found:")
                || message.contains("AES initialization vector not fully read")
                || message.contains("BadPaddingException")
                || message.contains("Given final block not properly padded");
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
@ -91,36 +91,54 @@ public class ExtractImagesController {
                    Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
            Set<Future<Void>> futures = new HashSet<>();
-            // Iterate over each page
+            // Safely iterate over each page, handling corrupt PDFs where page count might be wrong
-            for (int pgNum = 0; pgNum < document.getPages().getCount(); pgNum++) {
+            try {
-                PDPage page = document.getPage(pgNum);
+                int pageCount = document.getPages().getCount();
-                Future<Void> future =
+                log.debug("Document reports {} pages", pageCount);
                        executor.submit(
                                () -> {
                                    // Use the page number directly from the iterator, so no need to
                                    // calculate manually
                                    int pageNum = document.getPages().indexOf(page) + 1;
-                                    try {
+                int consecutiveFailures = 0;
                                        // Call the image extraction method for each page
                                        extractImagesFromPage(
                                                page,
                                                format,
                                                filename,
                                                pageNum,
                                                processedImages,
                                                zos,
                                                allowDuplicates);
                                    } catch (IOException e) {
                                        // Log the error and continue processing other pages
                                        ExceptionUtils.logException("image extraction from page " + pageNum, e);
                                    }
-                                    return null; // Callable requires a return type
+                for (int pgNum = 0; pgNum < pageCount; pgNum++) {
-                                });
+                    try {
                        PDPage page = document.getPage(pgNum);
                        consecutiveFailures = 0; // Reset on success
                        final int currentPageNum = pgNum + 1; // Convert to 1-based page numbering
                        Future<Void> future =
                                executor.submit(
                                        () -> {
                                            try {
                                                // Call the image extraction method for each page
                                                extractImagesFromPage(
                                                        page,
                                                        format,
                                                        filename,
                                                        currentPageNum,
                                                        processedImages,
                                                        zos,
                                                        allowDuplicates);
                                            } catch (Exception e) {
                                                // Log the error and continue processing other pages
                                                ExceptionUtils.logException("image extraction from page " + currentPageNum, e);
                                            }
-                // Add the Future object to the list to track completion
+                                            return null; // Callable requires a return type
-                futures.add(future);
+                                        });
                        // Add the Future object to the list to track completion
                        futures.add(future);
                    } catch (Exception e) {
                        consecutiveFailures++;
                        ExceptionUtils.logException("page access for page " + (pgNum + 1), e);
                        if (consecutiveFailures >= 3) {
                            log.warn("Stopping page iteration after 3 consecutive failures");
                            break;
                        }
                    }
                }
            } catch (Exception e) {
                ExceptionUtils.logException("page count determination", e);
                throw e;
            }
            // Wait for all tasks to complete