exceptions

This commit is contained in:
Anthony Stirling 2025-07-02 23:51:03 +01:00
parent a6e70b3df1
commit c7b8b6d1e1
4 changed files with 76 additions and 35 deletions

View File

@ -154,19 +154,20 @@ public class TempFileCleanupService {
boolean containerMode = isContainerMode(); boolean containerMode = isContainerMode();
int unregisteredDeletedCount = cleanupUnregisteredFiles(containerMode, true, maxAgeMillis); int unregisteredDeletedCount = cleanupUnregisteredFiles(containerMode, true, maxAgeMillis);
if(registeredDeletedCount >0 || unregisteredDeletedCount >0 || directoriesDeletedCount >0) {
log.info( log.info(
"Scheduled cleanup complete. Deleted {} registered files, {} unregistered files, {} directories", "Scheduled cleanup complete. Deleted {} registered files, {} unregistered files, {} directories",
registeredDeletedCount, registeredDeletedCount,
unregisteredDeletedCount, unregisteredDeletedCount,
directoriesDeletedCount); directoriesDeletedCount);
} }
}
/** /**
* Perform startup cleanup of stale temporary files from previous runs. This is especially * Perform startup cleanup of stale temporary files from previous runs. This is especially
* important in Docker environments where temp files persist between container restarts. * important in Docker environments where temp files persist between container restarts.
*/ */
private void runStartupCleanup() { private void runStartupCleanup() {
log.info("Running startup temporary file cleanup");
boolean containerMode = isContainerMode(); boolean containerMode = isContainerMode();
log.info( log.info(
@ -178,7 +179,6 @@ public class TempFileCleanupService {
long maxAgeMillis = containerMode ? 0 : 24 * 60 * 60 * 1000; // 0 or 24 hours long maxAgeMillis = containerMode ? 0 : 24 * 60 * 60 * 1000; // 0 or 24 hours
int totalDeletedCount = cleanupUnregisteredFiles(containerMode, false, maxAgeMillis); int totalDeletedCount = cleanupUnregisteredFiles(containerMode, false, maxAgeMillis);
log.info( log.info(
"Startup cleanup complete. Deleted {} temporary files/directories", "Startup cleanup complete. Deleted {} temporary files/directories",
totalDeletedCount); totalDeletedCount);
@ -225,7 +225,7 @@ public class TempFileCleanupService {
tempDir -> { tempDir -> {
try { try {
String phase = isScheduled ? "scheduled" : "startup"; String phase = isScheduled ? "scheduled" : "startup";
log.info( log.debug(
"Scanning directory for {} cleanup: {}", "Scanning directory for {} cleanup: {}",
phase, phase,
tempDir); tempDir);

View File

@ -298,9 +298,9 @@ public class ExceptionUtils {
* @param e the exception that occurred * @param e the exception that occurred
*/ */
public static void logException(String operation, Exception e) { public static void logException(String operation, Exception e) {
if (e instanceof IOException && PdfErrorUtils.isCorruptedPdfError((IOException) e)) { if (PdfErrorUtils.isCorruptedPdfError(e)) {
log.warn("PDF corruption detected during {}: {}", operation, e.getMessage()); log.warn("PDF corruption detected during {}: {}", operation, e.getMessage());
} else if (isEncryptionError((IOException) e) || isPasswordError((IOException) e)) { } else if (e instanceof IOException && (isEncryptionError((IOException) e) || isPasswordError((IOException) e))) {
log.info("PDF security issue during {}: {}", operation, e.getMessage()); log.info("PDF security issue during {}: {}", operation, e.getMessage());
} else { } else {
log.error("Unexpected error during {}", operation, e); log.error("Unexpected error during {}", operation, e);

View File

@ -12,7 +12,26 @@ public class PdfErrorUtils {
* @return true if the error indicates PDF corruption, false otherwise * @return true if the error indicates PDF corruption, false otherwise
*/ */
public static boolean isCorruptedPdfError(IOException e) { public static boolean isCorruptedPdfError(IOException e) {
String message = e.getMessage(); return isCorruptedPdfError(e.getMessage());
}
/**
* Checks if any Exception indicates a corrupted PDF file.
*
* @param e the Exception to check
* @return true if the error indicates PDF corruption, false otherwise
*/
public static boolean isCorruptedPdfError(Exception e) {
return isCorruptedPdfError(e.getMessage());
}
/**
* Checks if an error message indicates a corrupted PDF file.
*
* @param message the error message to check
* @return true if the message indicates PDF corruption, false otherwise
*/
private static boolean isCorruptedPdfError(String message) {
if (message == null) return false; if (message == null) return false;
// Check for common corruption indicators // Check for common corruption indicators
@ -24,6 +43,10 @@ public class PdfErrorUtils {
|| message.contains("damaged") || message.contains("damaged")
|| message.contains("Unknown dir object") || message.contains("Unknown dir object")
|| message.contains("Can't dereference COSObject") || message.contains("Can't dereference COSObject")
|| message.contains("parseCOSString string should start with")
|| message.contains("ICCBased colorspace array must have a stream")
|| message.contains("1-based index not found")
|| message.contains("Invalid dictionary, found:")
|| message.contains("AES initialization vector not fully read") || message.contains("AES initialization vector not fully read")
|| message.contains("BadPaddingException") || message.contains("BadPaddingException")
|| message.contains("Given final block not properly padded"); || message.contains("Given final block not properly padded");

View File

@ -91,29 +91,34 @@ public class ExtractImagesController {
Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
Set<Future<Void>> futures = new HashSet<>(); Set<Future<Void>> futures = new HashSet<>();
// Iterate over each page // Safely iterate over each page, handling corrupt PDFs where page count might be wrong
for (int pgNum = 0; pgNum < document.getPages().getCount(); pgNum++) { try {
int pageCount = document.getPages().getCount();
log.debug("Document reports {} pages", pageCount);
int consecutiveFailures = 0;
for (int pgNum = 0; pgNum < pageCount; pgNum++) {
try {
PDPage page = document.getPage(pgNum); PDPage page = document.getPage(pgNum);
consecutiveFailures = 0; // Reset on success
final int currentPageNum = pgNum + 1; // Convert to 1-based page numbering
Future<Void> future = Future<Void> future =
executor.submit( executor.submit(
() -> { () -> {
// Use the page number directly from the iterator, so no need to
// calculate manually
int pageNum = document.getPages().indexOf(page) + 1;
try { try {
// Call the image extraction method for each page // Call the image extraction method for each page
extractImagesFromPage( extractImagesFromPage(
page, page,
format, format,
filename, filename,
pageNum, currentPageNum,
processedImages, processedImages,
zos, zos,
allowDuplicates); allowDuplicates);
} catch (IOException e) { } catch (Exception e) {
// Log the error and continue processing other pages // Log the error and continue processing other pages
ExceptionUtils.logException("image extraction from page " + pageNum, e); ExceptionUtils.logException("image extraction from page " + currentPageNum, e);
} }
return null; // Callable requires a return type return null; // Callable requires a return type
@ -121,6 +126,19 @@ public class ExtractImagesController {
// Add the Future object to the list to track completion // Add the Future object to the list to track completion
futures.add(future); futures.add(future);
} catch (Exception e) {
consecutiveFailures++;
ExceptionUtils.logException("page access for page " + (pgNum + 1), e);
if (consecutiveFailures >= 3) {
log.warn("Stopping page iteration after 3 consecutive failures");
break;
}
}
}
} catch (Exception e) {
ExceptionUtils.logException("page count determination", e);
throw e;
} }
// Wait for all tasks to complete // Wait for all tasks to complete