diff --git a/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java b/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java index 25b3d9501..d5993346e 100644 --- a/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java +++ b/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java @@ -411,7 +411,7 @@ public class CustomPDFDocumentFactory { try { document.setAllSecurityToBeRemoved(true); } catch (Exception e) { - log.error("Decryption failed", e); + ExceptionUtils.logException("PDF decryption", e); throw new IOException("PDF decryption failed", e); } } diff --git a/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java index 895aa70de..53d7920b8 100644 --- a/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java +++ b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java @@ -72,12 +72,12 @@ public class TempFileCleanupService { fileName -> fileName.contains("jetty") || fileName.startsWith("jetty-") - || fileName.equals("proc") - || fileName.equals("sys") - || fileName.equals("dev") - || fileName.equals("hsperfdata_stirlingpdfuser") + || "proc".equals(fileName) + || "sys".equals(fileName) + || "dev".equals(fileName) + || "hsperfdata_stirlingpdfuser".equals(fileName) || fileName.startsWith("hsperfdata_") - || fileName.equals(".pdfbox.cache"); + || ".pdfbox.cache".equals(fileName); @PostConstruct public void init() { @@ -153,12 +153,14 @@ public class TempFileCleanupService { // Clean up unregistered temp files based on our cleanup strategy boolean containerMode = isContainerMode(); int unregisteredDeletedCount = cleanupUnregisteredFiles(containerMode, true, maxAgeMillis); - + + if(registeredDeletedCount >0 || unregisteredDeletedCount >0 || directoriesDeletedCount >0) { log.info( "Scheduled cleanup complete. Deleted {} registered files, {} unregistered files, {} directories", registeredDeletedCount, unregisteredDeletedCount, directoriesDeletedCount); + } } /** @@ -166,7 +168,6 @@ public class TempFileCleanupService { * important in Docker environments where temp files persist between container restarts. */ private void runStartupCleanup() { - log.info("Running startup temporary file cleanup"); boolean containerMode = isContainerMode(); log.info( @@ -178,7 +179,6 @@ public class TempFileCleanupService { long maxAgeMillis = containerMode ? 0 : 24 * 60 * 60 * 1000; // 0 or 24 hours int totalDeletedCount = cleanupUnregisteredFiles(containerMode, false, maxAgeMillis); - log.info( "Startup cleanup complete. Deleted {} temporary files/directories", totalDeletedCount); @@ -225,7 +225,7 @@ public class TempFileCleanupService { tempDir -> { try { String phase = isScheduled ? "scheduled" : "startup"; - log.info( + log.debug( "Scanning directory for {} cleanup: {}", phase, tempDir); diff --git a/common/src/main/java/stirling/software/common/util/ExceptionUtils.java b/common/src/main/java/stirling/software/common/util/ExceptionUtils.java index fb6d501d9..061cf1450 100644 --- a/common/src/main/java/stirling/software/common/util/ExceptionUtils.java +++ b/common/src/main/java/stirling/software/common/util/ExceptionUtils.java @@ -298,9 +298,9 @@ public class ExceptionUtils { * @param e the exception that occurred */ public static void logException(String operation, Exception e) { - if (e instanceof IOException && PdfErrorUtils.isCorruptedPdfError((IOException) e)) { + if (PdfErrorUtils.isCorruptedPdfError(e)) { log.warn("PDF corruption detected during {}: {}", operation, e.getMessage()); - } else if (isEncryptionError((IOException) e) || isPasswordError((IOException) e)) { + } else if (e instanceof IOException && (isEncryptionError((IOException) e) || isPasswordError((IOException) e))) { log.info("PDF security issue during {}: {}", operation, e.getMessage()); } else { log.error("Unexpected error during {}", operation, e); diff --git a/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java b/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java index e18922c10..c67e2a4ec 100644 --- a/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java +++ b/common/src/main/java/stirling/software/common/util/PdfErrorUtils.java @@ -12,7 +12,26 @@ public class PdfErrorUtils { * @return true if the error indicates PDF corruption, false otherwise */ public static boolean isCorruptedPdfError(IOException e) { - String message = e.getMessage(); + return isCorruptedPdfError(e.getMessage()); + } + + /** + * Checks if any Exception indicates a corrupted PDF file. + * + * @param e the Exception to check + * @return true if the error indicates PDF corruption, false otherwise + */ + public static boolean isCorruptedPdfError(Exception e) { + return isCorruptedPdfError(e.getMessage()); + } + + /** + * Checks if an error message indicates a corrupted PDF file. + * + * @param message the error message to check + * @return true if the message indicates PDF corruption, false otherwise + */ + private static boolean isCorruptedPdfError(String message) { if (message == null) return false; // Check for common corruption indicators @@ -24,6 +43,10 @@ public class PdfErrorUtils { || message.contains("damaged") || message.contains("Unknown dir object") || message.contains("Can't dereference COSObject") + || message.contains("parseCOSString string should start with") + || message.contains("ICCBased colorspace array must have a stream") + || message.contains("1-based index not found") + || message.contains("Invalid dictionary, found:") || message.contains("AES initialization vector not fully read") || message.contains("BadPaddingException") || message.contains("Given final block not properly padded"); diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java index 585ce79bd..4e05392c8 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/MergeController.java @@ -148,7 +148,7 @@ public class MergeController { try (PDDocument doc = pdfDocumentFactory.load(file)) { pageIndex += doc.getNumberOfPages(); } catch (IOException e) { - log.error("Error loading document for TOC generation", e); + ExceptionUtils.logException("document loading for TOC generation", e); pageIndex++; // Increment by at least one if we can't determine page count } } @@ -240,7 +240,11 @@ public class MergeController { baos, mergedFileName); // Return the modified PDF } catch (Exception ex) { - log.error("Error in merge pdf process", ex); + if (ex instanceof IOException && PdfErrorUtils.isCorruptedPdfError((IOException) ex)) { + log.warn("Corrupted PDF detected in merge pdf process: {}", ex.getMessage()); + } else { + log.error("Error in merge pdf process", ex); + } throw ex; } finally { if (mergedDocument != null) { diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java index 1f2d3a7c3..717c85016 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java @@ -25,6 +25,7 @@ import stirling.software.SPDF.model.SortTypes; import stirling.software.SPDF.model.api.PDFWithPageNums; import stirling.software.SPDF.model.api.general.RearrangePagesRequest; import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.util.ExceptionUtils; import stirling.software.common.util.GeneralUtils; import stirling.software.common.util.WebResponseUtils; @@ -288,7 +289,7 @@ public class RearrangePagesPDFController { .replaceFirst("[.][^.]+$", "") + "_rearranged.pdf"); } catch (IOException e) { - log.error("Failed rearranging documents", e); + ExceptionUtils.logException("document rearrangement", e); throw e; } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java index fba99cae3..3438a8789 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java @@ -29,6 +29,7 @@ import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.model.api.PDFWithPageNums; import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.util.ExceptionUtils; import stirling.software.common.util.WebResponseUtils; @RestController @@ -96,7 +97,7 @@ public class SplitPDFController { splitDocumentsBoas.add(baos); } catch (Exception e) { - log.error("Failed splitting documents and saving them", e); + ExceptionUtils.logException("document splitting and saving", e); throw e; } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java index c90af2c67..f0f9fb012 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPdfByChaptersController.java @@ -159,7 +159,7 @@ public class SplitPdfByChaptersController { Bookmark lastBookmark = bookmarks.get(bookmarks.size() - 1); } catch (Exception e) { - log.error("Unable to extract outline items", e); + ExceptionUtils.logException("outline extraction", e); return ResponseEntity.internalServerError() .body("Unable to extract outline items".getBytes()); } @@ -294,7 +294,7 @@ public class SplitPdfByChaptersController { splitDocumentsBoas.add(baos); } catch (Exception e) { - log.error("Failed splitting documents and saving them", e); + ExceptionUtils.logException("document splitting and saving", e); throw e; } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySizeController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySizeController.java index e152a5354..0dbbd933c 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySizeController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySizeController.java @@ -105,7 +105,7 @@ public class SplitPdfBySizeController { log.debug("PDF splitting completed successfully"); } catch (Exception e) { - log.error("Error loading or processing PDF document", e); + ExceptionUtils.logException("PDF document loading or processing", e); throw e; } } catch (IOException e) { @@ -114,7 +114,7 @@ public class SplitPdfBySizeController { } } catch (Exception e) { - log.error("Exception during PDF splitting process", e); + ExceptionUtils.logException("PDF splitting process", e); throw e; // Re-throw to ensure proper error response } finally { try { @@ -278,7 +278,7 @@ public class SplitPdfBySizeController { currentDoc = pdfDocumentFactory.createNewDocumentBasedOnOldDocument(sourceDocument); log.debug("Successfully created initial output document"); } catch (Exception e) { - log.error("Error creating initial output document", e); + ExceptionUtils.logException("initial output document creation", e); throw ExceptionUtils.createFileProcessingException("split", e); } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java index ac6eed57a..ab8e5b3f8 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java @@ -606,7 +606,7 @@ public class CompressController { return "empty-stream"; } } catch (Exception e) { - log.error("Error generating image hash", e); + ExceptionUtils.logException("image hash generation", e); return "fallback-" + System.identityHashCode(image); } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/DecompressPdfController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/DecompressPdfController.java index cfbc88e8b..5c432ce57 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/DecompressPdfController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/DecompressPdfController.java @@ -26,6 +26,7 @@ import lombok.extern.slf4j.Slf4j; import stirling.software.common.model.api.PDFFile; import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.util.ExceptionUtils; import stirling.software.common.util.WebResponseUtils; @RestController @@ -134,7 +135,7 @@ public class DecompressPdfController { stream.setInt(COSName.LENGTH, decompressedBytes.length); } } catch (IOException e) { - log.error("Error decompressing stream", e); + ExceptionUtils.logException("stream decompression", e); // Continue processing other streams even if this one fails } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java index 940978bbb..249e9263c 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java @@ -91,39 +91,54 @@ public class ExtractImagesController { Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); Set> futures = new HashSet<>(); - // Iterate over each page - for (int pgNum = 0; pgNum < document.getPages().getCount(); pgNum++) { - PDPage page = document.getPage(pgNum); - Future future = - executor.submit( - () -> { - // Use the page number directly from the iterator, so no need to - // calculate manually - int pageNum = document.getPages().indexOf(page) + 1; + // Safely iterate over each page, handling corrupt PDFs where page count might be wrong + try { + int pageCount = document.getPages().getCount(); + log.debug("Document reports {} pages", pageCount); + + int consecutiveFailures = 0; + + for (int pgNum = 0; pgNum < pageCount; pgNum++) { + try { + PDPage page = document.getPage(pgNum); + consecutiveFailures = 0; // Reset on success + final int currentPageNum = pgNum + 1; // Convert to 1-based page numbering + Future future = + executor.submit( + () -> { + try { + // Call the image extraction method for each page + extractImagesFromPage( + page, + format, + filename, + currentPageNum, + processedImages, + zos, + allowDuplicates); + } catch (Exception e) { + // Log the error and continue processing other pages + ExceptionUtils.logException("image extraction from page " + currentPageNum, e); + } - try { - // Call the image extraction method for each page - extractImagesFromPage( - page, - format, - filename, - pageNum, - processedImages, - zos, - allowDuplicates); - } catch (IOException e) { - // Log the error and continue processing other pages - log.error( - "Error extracting images from page {}: {}", - pageNum, - e.getMessage()); - } + return null; // Callable requires a return type + }); - return null; // Callable requires a return type - }); - - // Add the Future object to the list to track completion - futures.add(future); + // Add the Future object to the list to track completion + futures.add(future); + } catch (Exception e) { + consecutiveFailures++; + ExceptionUtils.logException("page access for page " + (pgNum + 1), e); + + if (consecutiveFailures >= 3) { + log.warn("Stopping page iteration after 3 consecutive failures"); + break; + } + } + } + } catch (Exception e) { + ExceptionUtils.logException("page count determination", e); + throw e; } // Wait for all tasks to complete diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java index 222ecd1c6..74177c23c 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java @@ -150,11 +150,18 @@ public class PipelineProcessor { } } if (!hasInputFileType) { + String filename = file.getFilename(); + String providedExtension = "no extension"; + if (filename != null && filename.contains(".")) { + providedExtension = filename.substring(filename.lastIndexOf(".")).toLowerCase(); + } + logPrintStream.println( "No files with extension " + String.join(", ", inputFileTypes) + " found for operation " - + operation); + + operation + + ". Provided file '" + filename + "' has extension: " + providedExtension); hasErrors = true; } } @@ -203,11 +210,26 @@ public class PipelineProcessor { hasErrors = true; } } else { + // Get details about what files were actually provided + List providedExtensions = outputFiles.stream() + .map(file -> { + String filename = file.getFilename(); + if (filename != null && filename.contains(".")) { + return filename.substring(filename.lastIndexOf(".")).toLowerCase(); + } + return "no extension"; + }) + .distinct() + .toList(); + logPrintStream.println( "No files with extension " + String.join(", ", inputFileTypes) + " found for multi-input operation " - + operation); + + operation + + ". Provided files have extensions: " + + String.join(", ", providedExtensions) + + " (total files: " + outputFiles.size() + ")"); hasErrors = true; } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java index 2d7358681..9a621390b 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/CertSignController.java @@ -133,7 +133,7 @@ public class CertSignController { } doc.saveIncremental(output); } catch (Exception e) { - log.error("exception", e); + ExceptionUtils.logException("PDF signing", e); } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java index f0acd7e03..f3c0a5e29 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java @@ -63,6 +63,7 @@ import lombok.extern.slf4j.Slf4j; import stirling.software.common.model.api.PDFFile; import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.util.ExceptionUtils; import stirling.software.common.util.WebResponseUtils; @RestController @@ -181,7 +182,7 @@ public class GetInfoOnPDF { } } } catch (Exception e) { - log.error("exception", e); + ExceptionUtils.logException("PDF standard checking", e); } return false; diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java b/stirling-pdf/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java index acd0669c0..181757a04 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/service/MetricsAggregatorService.java @@ -77,7 +77,7 @@ public class MetricsAggregatorService { double lastCount = lastSentMetrics.getOrDefault(key, 0.0); double difference = currentCount - lastCount; if (difference > 0) { - logger.info("{}, {}", key, difference); + logger.debug("{}, {}", key, difference); metrics.put(key, difference); lastSentMetrics.put(key, currentCount); }