exceptions

2025-08-26 22:29:24 +00:00 · 2025-07-02 23:51:03 +01:00 · 2025-07-02 23:51:03 +01:00 · c7b8b6d1e1
commit c7b8b6d1e1
parent a6e70b3df1
4 changed files with 76 additions and 35 deletions
--- a/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
+++ b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
@ -154,11 +154,13 @@ public class TempFileCleanupService {
        boolean containerMode = isContainerMode();
        int unregisteredDeletedCount = cleanupUnregisteredFiles(containerMode, true, maxAgeMillis);
        
+        if(registeredDeletedCount >0 || unregisteredDeletedCount >0 || directoriesDeletedCount >0) {
        log.info(
                "Scheduled cleanup complete. Deleted {} registered files, {} unregistered files, {} directories",
                registeredDeletedCount,
                unregisteredDeletedCount,
                directoriesDeletedCount);
+        }
    }

    /**
@ -166,7 +168,6 @@ public class TempFileCleanupService {
     * important in Docker environments where temp files persist between container restarts.
     */
    private void runStartupCleanup() {
-        log.info("Running startup temporary file cleanup");
        boolean containerMode = isContainerMode();

        log.info(
@ -178,7 +179,6 @@ public class TempFileCleanupService {
        long maxAgeMillis = containerMode ? 0 : 24 * 60 * 60 * 1000; // 0 or 24 hours

        int totalDeletedCount = cleanupUnregisteredFiles(containerMode, false, maxAgeMillis);
-
        log.info(
                "Startup cleanup complete. Deleted {} temporary files/directories",
                totalDeletedCount);
@ -225,7 +225,7 @@ public class TempFileCleanupService {
                            tempDir -> {
                                try {
                                    String phase = isScheduled ? "scheduled" : "startup";
-                                    log.info(
+                                    log.debug(
                                            "Scanning directory for {} cleanup: {}",
                                            phase,
                                            tempDir);
--- a/common/src/main/java/stirling/software/common/util/ExceptionUtils.java
+++ b/common/src/main/java/stirling/software/common/util/ExceptionUtils.java
@ -298,9 +298,9 @@ public class ExceptionUtils {
     * @param e the exception that occurred
     */
    public static void logException(String operation, Exception e) {
-        if (e instanceof IOException && PdfErrorUtils.isCorruptedPdfError((IOException) e)) {
+        if (PdfErrorUtils.isCorruptedPdfError(e)) {
            log.warn("PDF corruption detected during {}: {}", operation, e.getMessage());
-        } else if (isEncryptionError((IOException) e) || isPasswordError((IOException) e)) {
+        } else if (e instanceof IOException && (isEncryptionError((IOException) e) || isPasswordError((IOException) e))) {
            log.info("PDF security issue during {}: {}", operation, e.getMessage());
        } else {
            log.error("Unexpected error during {}", operation, e);
--- a/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
+++ b/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java
@ -12,7 +12,26 @@ public class PdfErrorUtils {
     * @return true if the error indicates PDF corruption, false otherwise
     */
    public static boolean isCorruptedPdfError(IOException e) {
-        String message = e.getMessage();
+        return isCorruptedPdfError(e.getMessage());
+    }
+
+    /**
+     * Checks if any Exception indicates a corrupted PDF file.
+     *
+     * @param e the Exception to check
+     * @return true if the error indicates PDF corruption, false otherwise
+     */
+    public static boolean isCorruptedPdfError(Exception e) {
+        return isCorruptedPdfError(e.getMessage());
+    }
+
+    /**
+     * Checks if an error message indicates a corrupted PDF file.
+     *
+     * @param message the error message to check
+     * @return true if the message indicates PDF corruption, false otherwise
+     */
+    private static boolean isCorruptedPdfError(String message) {
        if (message == null) return false;

        // Check for common corruption indicators
@ -24,6 +43,10 @@ public class PdfErrorUtils {
                || message.contains("damaged")
                || message.contains("Unknown dir object")
                || message.contains("Can't dereference COSObject")
+                || message.contains("parseCOSString string should start with")
+                || message.contains("ICCBased colorspace array must have a stream")
+                || message.contains("1-based index not found")
+                || message.contains("Invalid dictionary, found:")
                || message.contains("AES initialization vector not fully read")
                || message.contains("BadPaddingException")
                || message.contains("Given final block not properly padded");
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
@ -91,36 +91,54 @@ public class ExtractImagesController {
                    Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
            Set<Future<Void>> futures = new HashSet<>();

-            // Iterate over each page
-            for (int pgNum = 0; pgNum < document.getPages().getCount(); pgNum++) {
-                PDPage page = document.getPage(pgNum);
-                Future<Void> future =
-                        executor.submit(
-                                () -> {
-                                    // Use the page number directly from the iterator, so no need to
-                                    // calculate manually
-                                    int pageNum = document.getPages().indexOf(page) + 1;
+            // Safely iterate over each page, handling corrupt PDFs where page count might be wrong
+            try {
+                int pageCount = document.getPages().getCount();
+                log.debug("Document reports {} pages", pageCount);
                
-                                    try {
-                                        // Call the image extraction method for each page
-                                        extractImagesFromPage(
-                                                page,
-                                                format,
-                                                filename,
-                                                pageNum,
-                                                processedImages,
-                                                zos,
-                                                allowDuplicates);
-                                    } catch (IOException e) {
-                                        // Log the error and continue processing other pages
-                                        ExceptionUtils.logException("image extraction from page " + pageNum, e);
-                                    }
+                int consecutiveFailures = 0;
                
-                                    return null; // Callable requires a return type
-                                });
+                for (int pgNum = 0; pgNum < pageCount; pgNum++) {
+                    try {
+                        PDPage page = document.getPage(pgNum);
+                        consecutiveFailures = 0; // Reset on success
+                        final int currentPageNum = pgNum + 1; // Convert to 1-based page numbering
+                        Future<Void> future =
+                                executor.submit(
+                                        () -> {
+                                            try {
+                                                // Call the image extraction method for each page
+                                                extractImagesFromPage(
+                                                        page,
+                                                        format,
+                                                        filename,
+                                                        currentPageNum,
+                                                        processedImages,
+                                                        zos,
+                                                        allowDuplicates);
+                                            } catch (Exception e) {
+                                                // Log the error and continue processing other pages
+                                                ExceptionUtils.logException("image extraction from page " + currentPageNum, e);
+                                            }

-                // Add the Future object to the list to track completion
-                futures.add(future);
+                                            return null; // Callable requires a return type
+                                        });
+
+                        // Add the Future object to the list to track completion
+                        futures.add(future);
+                    } catch (Exception e) {
+                        consecutiveFailures++;
+                        ExceptionUtils.logException("page access for page " + (pgNum + 1), e);
+                        
+                        if (consecutiveFailures >= 3) {
+                            log.warn("Stopping page iteration after 3 consecutive failures");
+                            break;
+                        }
+                    }
+                }
+            } catch (Exception e) {
+                ExceptionUtils.logException("page count determination", e);
+                throw e;
            }

            // Wait for all tasks to complete