mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-08-26 22:29:24 +00:00
error tuning
This commit is contained in:
parent
b2a0868f5b
commit
d79b6e29e0
@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.common.model.api.PDFFile;
|
||||
import stirling.software.common.util.ApplicationContextProvider;
|
||||
import stirling.software.common.util.PdfErrorUtils;
|
||||
import stirling.software.common.util.TempFileManager;
|
||||
import stirling.software.common.util.TempFileRegistry;
|
||||
|
||||
@ -354,7 +355,14 @@ public class CustomPDFDocumentFactory {
|
||||
|
||||
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
|
||||
throws IOException {
|
||||
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
|
||||
try {
|
||||
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
|
||||
} catch (IOException e) {
|
||||
if (PdfErrorUtils.isCorruptedPdfError(e)) {
|
||||
throw new IOException(PdfErrorUtils.getCorruptedPdfMessage(""), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache)
|
||||
@ -366,7 +374,15 @@ public class CustomPDFDocumentFactory {
|
||||
Files.write(tempFile, bytes);
|
||||
return loadFromFile(tempFile.toFile(), size, cache);
|
||||
}
|
||||
return Loader.loadPDF(bytes, "", null, null, cache);
|
||||
|
||||
try {
|
||||
return Loader.loadPDF(bytes, "", null, null, cache);
|
||||
} catch (IOException e) {
|
||||
if (PdfErrorUtils.isCorruptedPdfError(e)) {
|
||||
throw new IOException(PdfErrorUtils.getCorruptedPdfMessage(""), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public PDDocument createNewDocument(MemoryUsageSetting settings) throws IOException {
|
||||
|
@ -0,0 +1,59 @@
|
||||
package stirling.software.common.util;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Utility class for detecting and handling PDF-related errors.
|
||||
*/
|
||||
public class PdfErrorUtils {
|
||||
|
||||
/**
|
||||
* Checks if an IOException indicates a corrupted PDF file.
|
||||
*
|
||||
* @param e the IOException to check
|
||||
* @return true if the error indicates PDF corruption, false otherwise
|
||||
*/
|
||||
public static boolean isCorruptedPdfError(IOException e) {
|
||||
String message = e.getMessage();
|
||||
if (message == null) return false;
|
||||
|
||||
// Check for common corruption indicators
|
||||
return message.contains("Missing root object specification") ||
|
||||
message.contains("Header doesn't contain versioninfo") ||
|
||||
message.contains("Expected trailer") ||
|
||||
message.contains("Invalid PDF") ||
|
||||
message.contains("Corrupted") ||
|
||||
message.contains("damaged") ||
|
||||
message.contains("Unknown dir object") ||
|
||||
message.contains("Can't dereference COSObject") ||
|
||||
message.contains("AES initialization vector not fully read") ||
|
||||
message.contains("BadPaddingException") ||
|
||||
message.contains("Given final block not properly padded");
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a user-friendly error message for corrupted PDF files.
|
||||
*
|
||||
* @param context additional context about where the error occurred (e.g., "during merge", "during processing")
|
||||
* @return a user-friendly error message
|
||||
*/
|
||||
public static String getCorruptedPdfMessage(String context) {
|
||||
String baseMessage = "PDF file appears to be corrupted or damaged. " +
|
||||
"Please try using the 'Repair PDF' feature first to fix the file before proceeding with this operation.";
|
||||
|
||||
if (context != null && !context.isEmpty()) {
|
||||
return "Error " + context + ": " + baseMessage;
|
||||
}
|
||||
return baseMessage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a user-friendly error message for multiple corrupted PDF files (e.g., during merge).
|
||||
*
|
||||
* @return a user-friendly error message for multiple file operations
|
||||
*/
|
||||
public static String getCorruptedPdfMessageForMultipleFiles() {
|
||||
return "One or more PDF files appear to be corrupted or damaged. " +
|
||||
"Please try using the 'Repair PDF' feature on each file first before attempting to merge them.";
|
||||
}
|
||||
}
|
@ -135,6 +135,16 @@ public class PdfUtils {
|
||||
int DPI,
|
||||
String filename)
|
||||
throws IOException, Exception {
|
||||
|
||||
// Validate and limit DPI to prevent excessive memory usage
|
||||
final int MAX_SAFE_DPI = 300; // Maximum safe DPI to prevent memory issues
|
||||
if (DPI > MAX_SAFE_DPI) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"DPI value %d exceeds maximum safe limit of %d. " +
|
||||
"High DPI values can cause memory issues and crashes. " +
|
||||
"Please use a lower DPI value.", DPI, MAX_SAFE_DPI));
|
||||
}
|
||||
|
||||
try (PDDocument document = pdfDocumentFactory.load(inputStream)) {
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
@ -158,7 +168,18 @@ public class PdfUtils {
|
||||
writer.prepareWriteSequence(null);
|
||||
|
||||
for (int i = 0; i < pageCount; ++i) {
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
|
||||
BufferedImage image;
|
||||
try {
|
||||
image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"PDF page %d is too large to render at %d DPI. " +
|
||||
"Please try a lower DPI value (recommended: 150 or less).",
|
||||
i + 1, DPI), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
writer.writeToSequence(new IIOImage(image, null, null), param);
|
||||
}
|
||||
|
||||
@ -190,7 +211,18 @@ public class PdfUtils {
|
||||
PdfImageDimensionValue dimension = pageSizes.get(settings);
|
||||
if (dimension == null) {
|
||||
// Render the image to get the dimensions
|
||||
pdfSizeImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
|
||||
try {
|
||||
pdfSizeImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"PDF page %d is too large to render at %d DPI. " +
|
||||
"The resulting image would exceed Java's maximum array size. " +
|
||||
"Please try a lower DPI value (recommended: 150 or less).",
|
||||
i + 1, DPI), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
pdfSizeImageIndex = i;
|
||||
dimension =
|
||||
new PdfImageDimensionValue(
|
||||
@ -218,7 +250,17 @@ public class PdfUtils {
|
||||
if (firstImageAlreadyRendered && i == 0) {
|
||||
pageImage = pdfSizeImage;
|
||||
} else {
|
||||
pageImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
|
||||
try {
|
||||
pageImage = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"PDF page %d is too large to render at %d DPI. " +
|
||||
"Please try a lower DPI value (recommended: 150 or less).",
|
||||
i + 1, DPI), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the x-coordinate to center the image
|
||||
@ -238,7 +280,18 @@ public class PdfUtils {
|
||||
// Zip the images and return as byte array
|
||||
try (ZipOutputStream zos = new ZipOutputStream(baos)) {
|
||||
for (int i = 0; i < pageCount; ++i) {
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
|
||||
BufferedImage image;
|
||||
try {
|
||||
image = pdfRenderer.renderImageWithDPI(i, DPI, colorType);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"PDF page %d is too large to render at %d DPI. " +
|
||||
"Please try a lower DPI value (recommended: 150 or less).",
|
||||
i + 1, DPI), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
try (ByteArrayOutputStream baosImage = new ByteArrayOutputStream()) {
|
||||
ImageIO.write(image, imageType, baosImage);
|
||||
|
||||
@ -276,7 +329,19 @@ public class PdfUtils {
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page) {
|
||||
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
BufferedImage bim;
|
||||
try {
|
||||
bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (e.getMessage() != null && e.getMessage().contains("Maximum size of image exceeded")) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"PDF page %d is too large to render at 300 DPI. " +
|
||||
"The resulting image would exceed Java's maximum array size. " +
|
||||
"Please use a lower DPI value for PDF-to-image conversion.",
|
||||
page + 1), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
PDPage originalPage = document.getPage(page);
|
||||
|
||||
float width = originalPage.getMediaBox().getWidth();
|
||||
|
@ -94,8 +94,14 @@ def split_photos(input_file, output_directory, tolerance=30, min_area=10000, min
|
||||
cropped_image = image[y:y+h, x:x+w]
|
||||
cropped_image = auto_rotate(cropped_image, angle_threshold)
|
||||
|
||||
# Remove the added border
|
||||
cropped_image = cropped_image[border_size:-border_size, border_size:-border_size]
|
||||
# Remove the added border, but ensure we don't create an empty image
|
||||
if border_size > 0 and cropped_image.shape[0] > 2 * border_size and cropped_image.shape[1] > 2 * border_size:
|
||||
cropped_image = cropped_image[border_size:-border_size, border_size:-border_size]
|
||||
|
||||
# Check if the cropped image is valid before saving
|
||||
if cropped_image.size == 0 or cropped_image.shape[0] == 0 or cropped_image.shape[1] == 0:
|
||||
print(f"Warning: Skipping empty image for region {idx+1}")
|
||||
continue
|
||||
|
||||
output_path = os.path.join(output_directory, f"{input_file_basename}_{idx+1}.png")
|
||||
cv2.imwrite(output_path, cropped_image)
|
||||
|
@ -36,6 +36,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import stirling.software.SPDF.model.api.general.MergePdfsRequest;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.GeneralUtils;
|
||||
import stirling.software.common.util.PdfErrorUtils;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@ -189,8 +190,15 @@ public class MergeController {
|
||||
mergedTempFile = Files.createTempFile("merged-", ".pdf").toFile();
|
||||
mergerUtility.setDestinationFileName(mergedTempFile.getAbsolutePath());
|
||||
|
||||
mergerUtility.mergeDocuments(
|
||||
pdfDocumentFactory.getStreamCacheFunction(totalSize)); // Merge the documents
|
||||
try {
|
||||
mergerUtility.mergeDocuments(
|
||||
pdfDocumentFactory.getStreamCacheFunction(totalSize)); // Merge the documents
|
||||
} catch (IOException e) {
|
||||
if (PdfErrorUtils.isCorruptedPdfError(e)) {
|
||||
throw new IOException(PdfErrorUtils.getCorruptedPdfMessageForMultipleFiles(), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Load the merged PDF document
|
||||
mergedDocument = pdfDocumentFactory.load(mergedTempFile);
|
||||
|
@ -289,7 +289,7 @@ public class RearrangePagesPDFController {
|
||||
+ "_rearranged.pdf");
|
||||
} catch (IOException e) {
|
||||
log.error("Failed rearranging documents", e);
|
||||
return null;
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -42,6 +42,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import stirling.software.SPDF.model.api.PDFExtractImagesRequest;
|
||||
import stirling.software.common.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.common.util.ImageProcessingUtils;
|
||||
import stirling.software.common.util.PdfErrorUtils;
|
||||
import stirling.software.common.util.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@ -180,7 +181,8 @@ public class ExtractImagesController {
|
||||
}
|
||||
int count = 1;
|
||||
for (COSName name : page.getResources().getXObjectNames()) {
|
||||
if (page.getResources().isImageXObject(name)) {
|
||||
try {
|
||||
if (page.getResources().isImageXObject(name)) {
|
||||
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
|
||||
if (!allowDuplicates) {
|
||||
byte[] data = ImageProcessingUtils.getImageData(image.getImage());
|
||||
@ -209,6 +211,12 @@ public class ExtractImagesController {
|
||||
zos.closeEntry();
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
if (PdfErrorUtils.isCorruptedPdfError(e)) {
|
||||
throw new IOException(PdfErrorUtils.getCorruptedPdfMessage("during image extraction"), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -150,21 +150,37 @@ public class GetInfoOnPDF {
|
||||
PDMetadata pdMetadata = document.getDocumentCatalog().getMetadata();
|
||||
if (pdMetadata != null) {
|
||||
COSInputStream metaStream = pdMetadata.createInputStream();
|
||||
DomXmpParser domXmpParser = new DomXmpParser();
|
||||
XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
|
||||
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
new XmpSerializer().serialize(xmpMeta, baos, true);
|
||||
String xmpString = new String(baos.toByteArray(), StandardCharsets.UTF_8);
|
||||
|
||||
if (xmpString.contains(standardKeyword)) {
|
||||
|
||||
// First try to read raw metadata as string to check for standard keywords
|
||||
byte[] metadataBytes = metaStream.readAllBytes();
|
||||
String rawMetadata = new String(metadataBytes, StandardCharsets.UTF_8);
|
||||
|
||||
if (rawMetadata.contains(standardKeyword)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If raw check doesn't find it, try parsing with XMP parser
|
||||
// Reset stream for parsing
|
||||
metaStream.close();
|
||||
metaStream = pdMetadata.createInputStream();
|
||||
|
||||
try {
|
||||
DomXmpParser domXmpParser = new DomXmpParser();
|
||||
XMPMetadata xmpMeta = domXmpParser.parse(metaStream);
|
||||
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
new XmpSerializer().serialize(xmpMeta, baos, true);
|
||||
String xmpString = new String(baos.toByteArray(), StandardCharsets.UTF_8);
|
||||
|
||||
if (xmpString.contains(standardKeyword)) {
|
||||
return true;
|
||||
}
|
||||
} catch (XmpParsingException e) {
|
||||
// XMP parsing failed, but we already checked raw metadata above
|
||||
log.debug("XMP parsing failed for standard check, but raw metadata was already checked: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
} catch (
|
||||
Exception
|
||||
e) { // Catching general exception for brevity, ideally you'd catch specific
|
||||
// exceptions.
|
||||
} catch (Exception e) {
|
||||
log.error("exception", e);
|
||||
}
|
||||
|
||||
@ -392,13 +408,23 @@ public class GetInfoOnPDF {
|
||||
if (pdMetadata != null) {
|
||||
try {
|
||||
COSInputStream is = pdMetadata.createInputStream();
|
||||
DomXmpParser domXmpParser = new DomXmpParser();
|
||||
XMPMetadata xmpMeta = domXmpParser.parse(is);
|
||||
|
||||
try {
|
||||
DomXmpParser domXmpParser = new DomXmpParser();
|
||||
XMPMetadata xmpMeta = domXmpParser.parse(is);
|
||||
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
new XmpSerializer().serialize(xmpMeta, os, true);
|
||||
xmpString = new String(os.toByteArray(), StandardCharsets.UTF_8);
|
||||
} catch (XmpParsingException | IOException e) {
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
new XmpSerializer().serialize(xmpMeta, os, true);
|
||||
xmpString = new String(os.toByteArray(), StandardCharsets.UTF_8);
|
||||
} catch (XmpParsingException e) {
|
||||
// XMP parsing failed, try to read raw metadata instead
|
||||
log.debug("XMP parsing failed, reading raw metadata: {}", e.getMessage());
|
||||
is.close();
|
||||
is = pdMetadata.createInputStream();
|
||||
byte[] metadataBytes = is.readAllBytes();
|
||||
xmpString = new String(metadataBytes, StandardCharsets.UTF_8);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error("exception", e);
|
||||
}
|
||||
}
|
||||
|
@ -42,12 +42,28 @@ public class PasswordController {
|
||||
MultipartFile fileInput = request.getFileInput();
|
||||
String password = request.getPassword();
|
||||
PDDocument document = pdfDocumentFactory.load(fileInput, password);
|
||||
document.setAllSecurityToBeRemoved(true);
|
||||
return WebResponseUtils.pdfDocToWebResponse(
|
||||
document,
|
||||
Filenames.toSimpleFileName(fileInput.getOriginalFilename())
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ "_password_removed.pdf");
|
||||
|
||||
try {
|
||||
document.setAllSecurityToBeRemoved(true);
|
||||
return WebResponseUtils.pdfDocToWebResponse(
|
||||
document,
|
||||
Filenames.toSimpleFileName(fileInput.getOriginalFilename())
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ "_password_removed.pdf");
|
||||
} catch (IOException e) {
|
||||
// Check if this is an encryption/decryption error
|
||||
if (e.getMessage() != null &&
|
||||
(e.getMessage().contains("BadPaddingException") ||
|
||||
e.getMessage().contains("Given final block not properly padded") ||
|
||||
e.getMessage().contains("Failed to decrypt"))) {
|
||||
|
||||
document.close();
|
||||
throw new IOException("The PDF appears to have corrupted encryption data. " +
|
||||
"This can happen when the PDF was created with incompatible encryption methods. " +
|
||||
"Please try using the 'Repair PDF' feature first, or contact the document creator for a new copy.", e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/add-password")
|
||||
|
Loading…
x
Reference in New Issue
Block a user