error tuning

This commit is contained in:
Anthony Stirling 2025-07-01 21:17:45 +01:00
parent b2a0868f5b
commit d79b6e29e0
9 changed files with 241 additions and 37 deletions

View File

@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.common.model.api.PDFFile;
import stirling.software.common.util.ApplicationContextProvider;
import stirling.software.common.util.PdfErrorUtils;
import stirling.software.common.util.TempFileManager;
import stirling.software.common.util.TempFileRegistry;
@ -354,7 +355,14 @@ public class CustomPDFDocumentFactory {
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
throws IOException {
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
try {
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
} catch (IOException e) {
if (PdfErrorUtils.isCorruptedPdfError(e)) {
throw new IOException(PdfErrorUtils.getCorruptedPdfMessage(""), e);
}
throw e;
}
}
private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache)
@ -366,7 +374,15 @@ public class CustomPDFDocumentFactory {
Files.write(tempFile, bytes);
return loadFromFile(tempFile.toFile(), size, cache);
}
return Loader.loadPDF(bytes, "", null, null, cache);
try {
return Loader.loadPDF(bytes, "", null, null, cache);
} catch (IOException e) {
if (PdfErrorUtils.isCorruptedPdfError(e)) {
throw new IOException(PdfErrorUtils.getCorruptedPdfMessage(""), e);
}
throw e;
}
}
public PDDocument createNewDocument(MemoryUsageSetting settings) throws IOException {

View File

@ -0,0 +1,59 @@
package stirling.software.common.util;
import java.io.IOException;
/**
* Utility class for detecting and handling PDF-related errors.
*/
public class PdfErrorUtils {
/**
* Checks if an IOException indicates a corrupted PDF file.
*
* @param e the IOException to check
* @return true if the error indicates PDF corruption, false otherwise
*/
public static boolean isCorruptedPdfError(IOException e) {
String message = e.getMessage();
if (message == null) return false;
// Check for common corruption indicators
return message.contains("Missing root object specification") ||
message.contains("Header doesn't contain versioninfo") ||
message.contains("Expected trailer") ||
message.contains("Invalid PDF") ||
message.contains("Corrupted") ||
message.contains("damaged") ||
message.contains("Unknown dir object") ||
message.contains("Can't dereference COSObject") ||
message.contains("AES initialization vector not fully read") ||
message.contains("BadPaddingException") ||
message.contains("Given final block not properly padded");
}
/**
* Creates a user-friendly error message for corrupted PDF files.
*
* @param context additional context about where the error occurred (e.g., "during merge", "during processing")
* @return a user-friendly error message
*/
public static String getCorruptedPdfMessage(String context) {
String baseMessage = "PDF file appears to be corrupted or damaged. " +
"Please try using the 'Repair PDF' feature first to fix the file before proceeding with this operation.";
if (context != null && !context.isEmpty()) {
return "Error " + context + ": " + baseMessage;
}
return baseMessage;
}
/**
* Creates a user-friendly error message for multiple corrupted PDF files (e.g., during merge).
*
* @return a user-friendly error message for multiple file operations
*/
public static String getCorruptedPdfMessageForMultipleFiles() {
return "One or more PDF files appear to be corrupted or damaged. " +
"Please try using the 'Repair PDF' feature on each file first before attempting to merge them.";
}
}

View File

@ -135,6 +135,16 @@ public class PdfUtils {
int DPI,
String filename)
throws IOException, Exception {
// Validate and limit DPI to prevent excessive memory usage
final int MAX_SAFE_DPI = 300; // Maximum safe DPI to prevent memory issues
if (DPI > MAX_SAFE_DPI) {
throw new IllegalArgumentException(String.format(
"DPI value %d exceeds maximum safe limit of %d. " +
"High DPI values can cause memory issues and crashes. " +
"Please use a lower DPI value.", DPI, MAX_SAFE_DPI));
}
try (PDDocument document = pdfDocumentFactory.load(inputStream)) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
@ -158,7 +168,18 @@ public class PdfUtils {
writer.prepareWriteSequence(null);
for (int i = 0; i < pageCount; ++i) {
BufferedImage image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
BufferedImage image;
try {
image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
throw new IllegalArgumentException(String.format(
"PDF page %d is too large to render at %d DPI. " +
"Please try a lower DPI value (recommended: 150 or less).",
i + 1, DPI), e);
}
throw e;
}
writer.writeToSequence(new IIOImage(image, null, null), param);
}
@ -190,7 +211,18 @@ public class PdfUtils {
PdfImageDimensionValue dimension = pageSizes.get(settings);
if (dimension == null) {
// Render the image to get the dimensions
pdfSizeImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
try {
pdfSizeImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
throw new IllegalArgumentException(String.format(
"PDF page %d is too large to render at %d DPI. " +
"The resulting image would exceed Java's maximum array size. " +
"Please try a lower DPI value (recommended: 150 or less).",
i + 1, DPI), e);
}
throw e;
}
pdfSizeImageIndex = i;
dimension =
new PdfImageDimensionValue(
@ -218,7 +250,17 @@ public class PdfUtils {
if (firstImageAlreadyRendered && i == 0) {
pageImage = pdfSizeImage;
} else {
pageImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
try {
pageImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
throw new IllegalArgumentException(String.format(
"PDF page %d is too large to render at %d DPI. " +
"Please try a lower DPI value (recommended: 150 or less).",
i + 1, DPI), e);
}
throw e;
}
}
// Calculate the x-coordinate to center the image
@ -238,7 +280,18 @@ public class PdfUtils {
// Zip the images and return as byte array
try (ZipOutputStream zos = new ZipOutputStream(baos)) {
for (int i = 0; i < pageCount; ++i) {
BufferedImage image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
BufferedImage image;
try {
image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
throw new IllegalArgumentException(String.format(
"PDF page %d is too large to render at %d DPI. " +
"Please try a lower DPI value (recommended: 150 or less).",
i + 1, DPI), e);
}
throw e;
}
try (ByteArrayOutputStream baosImage = new ByteArrayOutputStream()) {
ImageIO.write(image, imageType, baosImage);
@ -276,7 +329,19 @@ public class PdfUtils {
PDFRenderer pdfRenderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true);
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
BufferedImage bim;
try {
bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
} catch (IllegalArgumentException e) {
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
throw new IllegalArgumentException(String.format(
"PDF page %d is too large to render at 300 DPI. " +
"The resulting image would exceed Java's maximum array size. " +
"Please use a lower DPI value for PDF-to-image conversion.",
page + 1), e);
}
throw e;
}
PDPage originalPage = document.getPage(page);
float width = originalPage.getMediaBox().getWidth();

View File

@ -94,8 +94,14 @@ def split_photos(input_file, output_directory, tolerance=30, min_area=10000, min
cropped_image = image[y:y+h, x:x+w]
cropped_image = auto_rotate(cropped_image, angle_threshold)
# Remove the added border
cropped_image = cropped_image[border_size:-border_size, border_size:-border_size]
# Remove the added border, but ensure we don't create an empty image
if border_size > 0 and cropped_image.shape[0] > 2 * border_size and cropped_image.shape[1] > 2 * border_size:
cropped_image = cropped_image[border_size:-border_size, border_size:-border_size]
# Check if the cropped image is valid before saving
if cropped_image.size == 0 or cropped_image.shape[0] == 0 or cropped_image.shape[1] == 0:
print(f"Warning: Skipping empty image for region {idx+1}")
continue
output_path = os.path.join(output_directory, f"{input_file_basename}_{idx+1}.png")
cv2.imwrite(output_path, cropped_image)

View File

@ -36,6 +36,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.general.MergePdfsRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.GeneralUtils;
import stirling.software.common.util.PdfErrorUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -189,8 +190,15 @@ public class MergeController {
mergedTempFile = Files.createTempFile("merged-", ".pdf").toFile();
mergerUtility.setDestinationFileName(mergedTempFile.getAbsolutePath());
mergerUtility.mergeDocuments(
pdfDocumentFactory.getStreamCacheFunction(totalSize)); // Merge the documents
try {
mergerUtility.mergeDocuments(
pdfDocumentFactory.getStreamCacheFunction(totalSize)); // Merge the documents
} catch (IOException e) {
if (PdfErrorUtils.isCorruptedPdfError(e)) {
throw new IOException(PdfErrorUtils.getCorruptedPdfMessageForMultipleFiles(), e);
}
throw e;
}
// Load the merged PDF document
mergedDocument = pdfDocumentFactory.load(mergedTempFile);

View File

@ -289,7 +289,7 @@ public class RearrangePagesPDFController {
+ "_rearranged.pdf");
} catch (IOException e) {
log.error("Failed rearranging documents", e);
return null;
throw e;
}
}
}

View File

@ -42,6 +42,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.PDFExtractImagesRequest;
import stirling.software.common.service.CustomPDFDocumentFactory;
import stirling.software.common.util.ImageProcessingUtils;
import stirling.software.common.util.PdfErrorUtils;
import stirling.software.common.util.WebResponseUtils;
@RestController
@ -180,7 +181,8 @@ public class ExtractImagesController {
}
int count = 1;
for (COSName name : page.getResources().getXObjectNames()) {
if (page.getResources().isImageXObject(name)) {
try {
if (page.getResources().isImageXObject(name)) {
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
if (!allowDuplicates) {
byte[] data = ImageProcessingUtils.getImageData(image.getImage());
@ -209,6 +211,12 @@ public class ExtractImagesController {
zos.closeEntry();
}
}
} catch (IOException e) {
if (PdfErrorUtils.isCorruptedPdfError(e)) {
throw new IOException(PdfErrorUtils.getCorruptedPdfMessage("during image extraction"), e);
}
throw e;
}
}
}

View File

@ -150,21 +150,37 @@ public class GetInfoOnPDF {
PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
if (pdMetadata != null) {
COSInputStream metaStream = pdMetadata.createInputStream();
DomXmpParser domXmpParser = new DomXmpParser();
XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
new XmpSerializer().serialize(xmpMeta, baos, true);
String xmpString = new String(baos.toByteArray(), StandardCharsets.UTF_8);
if (xmpString.contains(standardKeyword)) {
// First try to read raw metadata as string to check for standard keywords
byte[] metadataBytes = metaStream.readAllBytes();
String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
if (rawMetadata.contains(standardKeyword)) {
return true;
}
// If raw check doesn't find it, try parsing with XMP parser
// Reset stream for parsing
metaStream.close();
metaStream = pdMetadata.createInputStream();
try {
DomXmpParser domXmpParser = new DomXmpParser();
XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
new XmpSerializer().serialize(xmpMeta, baos, true);
String xmpString = new String(baos.toByteArray(), StandardCharsets.UTF_8);
if (xmpString.contains(standardKeyword)) {
return true;
}
} catch (XmpParsingException e) {
// XMP parsing failed, but we already checked raw metadata above
log.debug("XMP parsing failed for standard check, but raw metadata was already checked: {}", e.getMessage());
}
}
} catch (
Exception
e) { // Catching general exception for brevity, ideally you'd catch specific
// exceptions.
} catch (Exception e) {
log.error("exception", e);
}
@ -392,13 +408,23 @@ public class GetInfoOnPDF {
if (pdMetadata != null) {
try {
COSInputStream is = pdMetadata.createInputStream();
DomXmpParser domXmpParser = new DomXmpParser();
XMPMetadata xmpMeta = domXmpParser.parse(is);
try {
DomXmpParser domXmpParser = new DomXmpParser();
XMPMetadata xmpMeta = domXmpParser.parse(is);
ByteArrayOutputStream os = new ByteArrayOutputStream();
new XmpSerializer().serialize(xmpMeta, os, true);
xmpString = new String(os.toByteArray(), StandardCharsets.UTF_8);
} catch (XmpParsingException | IOException e) {
ByteArrayOutputStream os = new ByteArrayOutputStream();
new XmpSerializer().serialize(xmpMeta, os, true);
xmpString = new String(os.toByteArray(), StandardCharsets.UTF_8);
} catch (XmpParsingException e) {
// XMP parsing failed, try to read raw metadata instead
log.debug("XMP parsing failed, reading raw metadata: {}", e.getMessage());
is.close();
is = pdMetadata.createInputStream();
byte[] metadataBytes = is.readAllBytes();
xmpString = new String(metadataBytes, StandardCharsets.UTF_8);
}
} catch (IOException e) {
log.error("exception", e);
}
}

View File

@ -42,12 +42,28 @@ public class PasswordController {
MultipartFile fileInput = request.getFileInput();
String password = request.getPassword();
PDDocument document = pdfDocumentFactory.load(fileInput, password);
document.setAllSecurityToBeRemoved(true);
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(fileInput.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_password_removed.pdf");
try {
document.setAllSecurityToBeRemoved(true);
return WebResponseUtils.pdfDocToWebResponse(
document,
Filenames.toSimpleFileName(fileInput.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_password_removed.pdf");
} catch (IOException e) {
// Check if this is an encryption/decryption error
if (e.getMessage() != null &&
(e.getMessage().contains("BadPaddingException") ||
e.getMessage().contains("Given final block not properly padded") ||
e.getMessage().contains("Failed to decrypt"))) {
document.close();
throw new IOException("The PDF appears to have corrupted encryption data. " +
"This can happen when the PDF was created with incompatible encryption methods. " +
"Please try using the 'Repair PDF' feature first, or contact the document creator for a new copy.", e);
}
throw e;
}
}
@PostMapping(consumes = "multipart/form-data", value = "/add-password")