mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-06-05 00:32:02 +00:00
Further compression fixes (#3177)
# Description of Changes Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: a <a>
This commit is contained in:
parent
2848ccd12e
commit
c7a8b9f011
@ -25,7 +25,7 @@ ext {
|
||||
}
|
||||
|
||||
group = "stirling.software"
|
||||
version = "0.44.1"
|
||||
version = "0.44.2"
|
||||
|
||||
java {
|
||||
// 17 is lowest but we support and recommend 21
|
||||
|
@ -3,13 +3,19 @@ package stirling.software.SPDF.controller.api.misc;
|
||||
import java.awt.*;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import javax.imageio.IIOImage;
|
||||
import javax.imageio.ImageIO;
|
||||
@ -36,11 +42,15 @@ import io.github.pixee.security.Filenames;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
|
||||
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.ImageProcessingUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
@ -58,303 +68,366 @@ public class CompressController {
|
||||
this.pdfDocumentFactory = pdfDocumentFactory;
|
||||
}
|
||||
|
||||
private void compressImagesInPDF(Path pdfFile, double scaleFactor, float jpegQuality)
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
private static class ImageReference {
|
||||
int pageNum; // Page number where the image appears
|
||||
COSName name; // The name used to reference this image
|
||||
}
|
||||
|
||||
public Path compressImagesInPDF(
|
||||
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
||||
throws Exception {
|
||||
byte[] fileBytes = Files.readAllBytes(pdfFile);
|
||||
long originalFileSize = fileBytes.length;
|
||||
Path newCompressedPDF = Files.createTempFile("compressedPDF", ".pdf");
|
||||
long originalFileSize = Files.size(pdfFile);
|
||||
log.info(
|
||||
"Starting image compression with scale factor: {} and JPEG quality: {} on file"
|
||||
+ " size: {}",
|
||||
"Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {} on file size: {}",
|
||||
scaleFactor,
|
||||
jpegQuality,
|
||||
convertToGrayscale,
|
||||
GeneralUtils.formatBytes(originalFileSize));
|
||||
|
||||
// Track processed images to avoid recompression
|
||||
Set<String> processedImages = new HashSet<>();
|
||||
try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
|
||||
|
||||
// Collect all unique images by content hash
|
||||
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
|
||||
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
|
||||
|
||||
try (PDDocument doc = pdfDocumentFactory.load(fileBytes)) {
|
||||
int totalImages = 0;
|
||||
|
||||
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
|
||||
PDPage page = doc.getPage(pageNum);
|
||||
PDResources res = page.getResources();
|
||||
if (res == null || res.getXObjectNames() == null) continue;
|
||||
|
||||
for (COSName name : res.getXObjectNames()) {
|
||||
PDXObject xobj = res.getXObject(name);
|
||||
if (!(xobj instanceof PDImageXObject)) continue;
|
||||
|
||||
totalImages++;
|
||||
PDImageXObject image = (PDImageXObject) xobj;
|
||||
String imageHash = generateImageHash(image);
|
||||
|
||||
// Store only page number and name reference
|
||||
ImageReference ref = new ImageReference();
|
||||
ref.pageNum = pageNum;
|
||||
ref.name = name;
|
||||
|
||||
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
|
||||
}
|
||||
}
|
||||
|
||||
int uniqueImagesCount = uniqueImages.size();
|
||||
int duplicatedImages = totalImages - uniqueImagesCount;
|
||||
log.info(
|
||||
"Found {} unique images and {} duplicated instances across {} pages",
|
||||
uniqueImagesCount,
|
||||
duplicatedImages,
|
||||
doc.getNumberOfPages());
|
||||
|
||||
// SECOND PASS: Process each unique image exactly once
|
||||
int compressedImages = 0;
|
||||
int skippedImages = 0;
|
||||
long totalOriginalBytes = 0;
|
||||
long totalCompressedBytes = 0;
|
||||
|
||||
// Minimum dimensions to preserve reasonable quality
|
||||
int MIN_WIDTH = 400; // Higher minimum
|
||||
int MIN_HEIGHT = 400; // Higher minimum
|
||||
for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
|
||||
String imageHash = entry.getKey();
|
||||
List<ImageReference> references = entry.getValue();
|
||||
|
||||
log.info("PDF has {} pages", doc.getNumberOfPages());
|
||||
if (references.isEmpty()) continue;
|
||||
|
||||
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
|
||||
PDPage page = doc.getPage(pageNum);
|
||||
PDResources res = page.getResources();
|
||||
// Get the first instance of this image
|
||||
ImageReference firstRef = references.get(0);
|
||||
PDPage firstPage = doc.getPage(firstRef.pageNum);
|
||||
PDResources firstPageResources = firstPage.getResources();
|
||||
PDImageXObject originalImage =
|
||||
(PDImageXObject) firstPageResources.getXObject(firstRef.name);
|
||||
|
||||
if (res == null || res.getXObjectNames() == null) {
|
||||
continue;
|
||||
}
|
||||
// Track original size
|
||||
int originalSize = (int) originalImage.getCOSObject().getLength();
|
||||
totalOriginalBytes += originalSize;
|
||||
|
||||
int pageImages = 0;
|
||||
// Process this unique image once
|
||||
BufferedImage processedImage =
|
||||
processAndCompressImage(
|
||||
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
|
||||
|
||||
for (COSName name : res.getXObjectNames()) {
|
||||
String imageName = name.getName();
|
||||
if (processedImage != null) {
|
||||
// Convert to bytes for storage
|
||||
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
|
||||
|
||||
// Skip already processed images
|
||||
if (processedImages.contains(imageName)) {
|
||||
skippedImages++;
|
||||
continue;
|
||||
}
|
||||
// Check if compression is beneficial
|
||||
if (compressedData.length < originalSize || convertToGrayscale) {
|
||||
// Create a single compressed version
|
||||
PDImageXObject compressedImage =
|
||||
PDImageXObject.createFromByteArray(
|
||||
doc,
|
||||
compressedData,
|
||||
originalImage.getCOSObject().toString());
|
||||
|
||||
PDXObject xobj = res.getXObject(name);
|
||||
if (!(xobj instanceof PDImageXObject)) {
|
||||
continue;
|
||||
}
|
||||
// Store the compressed version only once in our map
|
||||
compressedVersions.put(imageHash, compressedImage);
|
||||
|
||||
totalImages++;
|
||||
pageImages++;
|
||||
PDImageXObject image = (PDImageXObject) xobj;
|
||||
BufferedImage bufferedImage = image.getImage();
|
||||
|
||||
int originalWidth = bufferedImage.getWidth();
|
||||
int originalHeight = bufferedImage.getHeight();
|
||||
|
||||
log.info(
|
||||
"Page {}, Image {}: Original dimensions: {}x{}",
|
||||
pageNum + 1,
|
||||
imageName,
|
||||
originalWidth,
|
||||
originalHeight);
|
||||
|
||||
// Skip if already small enough
|
||||
if (originalWidth <= MIN_WIDTH || originalHeight <= MIN_HEIGHT) {
|
||||
// Report compression stats
|
||||
double reductionPercentage =
|
||||
100.0 - ((compressedData.length * 100.0) / originalSize);
|
||||
log.info(
|
||||
"Page {}, Image {}: Skipping - below minimum dimensions threshold",
|
||||
pageNum + 1,
|
||||
imageName);
|
||||
skippedImages++;
|
||||
processedImages.add(imageName);
|
||||
continue;
|
||||
}
|
||||
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
|
||||
imageHash,
|
||||
GeneralUtils.formatBytes(originalSize),
|
||||
GeneralUtils.formatBytes(compressedData.length),
|
||||
String.format("%.1f", reductionPercentage));
|
||||
|
||||
// Adjust scale factor for very large or very small images
|
||||
double adjustedScaleFactor = scaleFactor;
|
||||
if (originalWidth > 3000 || originalHeight > 3000) {
|
||||
// More aggressive for very large images
|
||||
adjustedScaleFactor = Math.min(scaleFactor, 0.75);
|
||||
log.info(
|
||||
"Page {}, Image {}: Very large image, using more aggressive scale:"
|
||||
+ " {}",
|
||||
pageNum + 1,
|
||||
imageName,
|
||||
adjustedScaleFactor);
|
||||
} else if (originalWidth < 1000 || originalHeight < 1000) {
|
||||
// More conservative for smaller images
|
||||
adjustedScaleFactor = Math.max(scaleFactor, 0.9);
|
||||
log.info(
|
||||
"Page {}, Image {}: Smaller image, using conservative scale: {}",
|
||||
pageNum + 1,
|
||||
imageName,
|
||||
adjustedScaleFactor);
|
||||
}
|
||||
// Replace ALL instances with the compressed version
|
||||
for (ImageReference ref : references) {
|
||||
// Get the page and resources when needed
|
||||
PDPage page = doc.getPage(ref.pageNum);
|
||||
PDResources resources = page.getResources();
|
||||
resources.put(ref.name, compressedImage);
|
||||
|
||||
int newWidth = (int) (originalWidth * adjustedScaleFactor);
|
||||
int newHeight = (int) (originalHeight * adjustedScaleFactor);
|
||||
|
||||
// Ensure minimum dimensions
|
||||
newWidth = Math.max(newWidth, MIN_WIDTH);
|
||||
newHeight = Math.max(newHeight, MIN_HEIGHT);
|
||||
|
||||
// Skip if change is negligible
|
||||
if ((double) newWidth / originalWidth > 0.95
|
||||
&& (double) newHeight / originalHeight > 0.95) {
|
||||
log.info(
|
||||
"Page {}, Image {}: Change too small, skipping compression",
|
||||
pageNum + 1,
|
||||
imageName);
|
||||
skippedImages++;
|
||||
processedImages.add(imageName);
|
||||
continue;
|
||||
}
|
||||
|
||||
log.info(
|
||||
"Page {}, Image {}: Resizing to {}x{} ({}% of original)",
|
||||
pageNum + 1,
|
||||
imageName,
|
||||
newWidth,
|
||||
newHeight,
|
||||
Math.round((newWidth * 100.0) / originalWidth));
|
||||
|
||||
// Use high quality scaling
|
||||
BufferedImage scaledImage =
|
||||
new BufferedImage(
|
||||
newWidth,
|
||||
newHeight,
|
||||
bufferedImage.getColorModel().hasAlpha()
|
||||
? BufferedImage.TYPE_INT_ARGB
|
||||
: BufferedImage.TYPE_INT_RGB);
|
||||
|
||||
Graphics2D g2d = scaledImage.createGraphics();
|
||||
g2d.setRenderingHint(
|
||||
RenderingHints.KEY_INTERPOLATION,
|
||||
RenderingHints.VALUE_INTERPOLATION_BICUBIC);
|
||||
g2d.setRenderingHint(
|
||||
RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
|
||||
g2d.setRenderingHint(
|
||||
RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
|
||||
g2d.drawImage(bufferedImage, 0, 0, newWidth, newHeight, null);
|
||||
g2d.dispose();
|
||||
|
||||
// Choose appropriate format and compression
|
||||
String format = bufferedImage.getColorModel().hasAlpha() ? "png" : "jpeg";
|
||||
|
||||
// First get the actual size of the original image by encoding it to the chosen
|
||||
// format
|
||||
ByteArrayOutputStream originalImageStream = new ByteArrayOutputStream();
|
||||
if ("jpeg".equals(format)) {
|
||||
// Get the best available JPEG writer (prioritizes TwelveMonkeys if
|
||||
// available)
|
||||
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("jpeg");
|
||||
ImageWriter writer = null;
|
||||
|
||||
// Prefer TwelveMonkeys writer if available
|
||||
while (writers.hasNext()) {
|
||||
ImageWriter candidate = writers.next();
|
||||
if (candidate.getClass().getName().contains("twelvemonkeys")) {
|
||||
writer = candidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (writer == null) {
|
||||
writer = ImageIO.getImageWritersByFormatName("jpeg").next();
|
||||
log.info(
|
||||
"Replaced image on page {} with compressed version",
|
||||
ref.pageNum + 1);
|
||||
}
|
||||
|
||||
JPEGImageWriteParam param =
|
||||
(JPEGImageWriteParam) writer.getDefaultWriteParam();
|
||||
|
||||
// Set advanced compression parameters
|
||||
param.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
|
||||
param.setCompressionQuality(jpegQuality);
|
||||
param.setOptimizeHuffmanTables(true); // Better compression
|
||||
param.setProgressiveMode(
|
||||
ImageWriteParam.MODE_DEFAULT); // Progressive scanning
|
||||
|
||||
// Write compressed image
|
||||
try (ImageOutputStream ios =
|
||||
ImageIO.createImageOutputStream(originalImageStream)) {
|
||||
writer.setOutput(ios);
|
||||
writer.write(null, new IIOImage(scaledImage, null, null), param);
|
||||
}
|
||||
writer.dispose();
|
||||
totalCompressedBytes += compressedData.length * references.size();
|
||||
compressedImages++;
|
||||
} else {
|
||||
ImageIO.write(bufferedImage, format, originalImageStream);
|
||||
}
|
||||
int originalEncodedSize = (int) image.getCOSObject().getLength();
|
||||
originalImageStream.close();
|
||||
|
||||
// Now compress the scaled image
|
||||
ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream();
|
||||
if ("jpeg".equals(format)) {
|
||||
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(format);
|
||||
if (writers.hasNext()) {
|
||||
ImageWriter writer = writers.next();
|
||||
ImageWriteParam param = writer.getDefaultWriteParam();
|
||||
|
||||
if (param.canWriteCompressed()) {
|
||||
param.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
|
||||
param.setCompressionQuality(jpegQuality);
|
||||
|
||||
ImageOutputStream imageOut =
|
||||
ImageIO.createImageOutputStream(compressedImageStream);
|
||||
writer.setOutput(imageOut);
|
||||
writer.write(null, new IIOImage(scaledImage, null, null), param);
|
||||
writer.dispose();
|
||||
imageOut.close();
|
||||
} else {
|
||||
ImageIO.write(scaledImage, format, compressedImageStream);
|
||||
}
|
||||
} else {
|
||||
ImageIO.write(scaledImage, format, compressedImageStream);
|
||||
}
|
||||
} else {
|
||||
ImageIO.write(scaledImage, format, compressedImageStream);
|
||||
}
|
||||
byte[] imageBytes = compressedImageStream.toByteArray();
|
||||
compressedImageStream.close();
|
||||
|
||||
// Format sizes using our utility method
|
||||
String originalSizeStr = GeneralUtils.formatBytes(originalEncodedSize);
|
||||
String compressedSizeStr = GeneralUtils.formatBytes(imageBytes.length);
|
||||
|
||||
// Calculate reduction percentage (how much smaller the new file is)
|
||||
double reductionPercentage =
|
||||
100.0 - ((imageBytes.length * 100.0) / originalEncodedSize);
|
||||
|
||||
if (imageBytes.length >= originalEncodedSize) {
|
||||
log.info(
|
||||
"Page {}, Image {}: Compressed size {} not smaller than original"
|
||||
+ " {}, skipping replacement",
|
||||
pageNum + 1,
|
||||
imageName,
|
||||
GeneralUtils.formatBytes(imageBytes.length),
|
||||
GeneralUtils.formatBytes(originalEncodedSize));
|
||||
|
||||
// Accumulate original size for both counters (no change)
|
||||
totalOriginalBytes += originalEncodedSize;
|
||||
totalCompressedBytes += originalEncodedSize;
|
||||
log.info("Image hash {}: Compression not beneficial, skipping", imageHash);
|
||||
totalCompressedBytes += originalSize * references.size();
|
||||
skippedImages++;
|
||||
processedImages.add(imageName);
|
||||
continue;
|
||||
}
|
||||
log.info(
|
||||
"Page {}, Image {}: Compressed from {} to {} (reduced by {}%)",
|
||||
pageNum + 1,
|
||||
imageName,
|
||||
originalSizeStr,
|
||||
compressedSizeStr,
|
||||
String.format("%.1f", reductionPercentage));
|
||||
|
||||
// Only replace if compressed size is smaller
|
||||
PDImageXObject compressedImage =
|
||||
PDImageXObject.createFromByteArray(
|
||||
doc, imageBytes, image.getCOSObject().toString());
|
||||
res.put(name, compressedImage);
|
||||
|
||||
// Update counters with compressed size
|
||||
totalOriginalBytes += originalEncodedSize;
|
||||
totalCompressedBytes += imageBytes.length;
|
||||
compressedImages++;
|
||||
processedImages.add(imageName);
|
||||
} else {
|
||||
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
|
||||
totalCompressedBytes += originalSize * references.size();
|
||||
skippedImages++;
|
||||
}
|
||||
}
|
||||
|
||||
// Log overall image compression statistics
|
||||
// Log compression statistics
|
||||
double overallImageReduction =
|
||||
totalOriginalBytes > 0
|
||||
? 100.0 - ((totalCompressedBytes * 100.0) / totalOriginalBytes)
|
||||
: 0;
|
||||
|
||||
log.info(
|
||||
"Image compression summary - Total: {}, Compressed: {}, Skipped: {}",
|
||||
totalImages,
|
||||
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}",
|
||||
uniqueImagesCount,
|
||||
compressedImages,
|
||||
skippedImages);
|
||||
skippedImages,
|
||||
duplicatedImages);
|
||||
log.info(
|
||||
"Total original image size: {}, compressed: {} (reduced by {}%)",
|
||||
GeneralUtils.formatBytes(totalOriginalBytes),
|
||||
GeneralUtils.formatBytes(totalCompressedBytes),
|
||||
String.format("%.1f", overallImageReduction));
|
||||
|
||||
// Free memory before saving
|
||||
compressedVersions.clear();
|
||||
uniqueImages.clear();
|
||||
|
||||
// Save the document
|
||||
log.info("Saving compressed PDF to {}", pdfFile.toString());
|
||||
doc.save(pdfFile.toString());
|
||||
log.info("Saving compressed PDF to {}", newCompressedPDF.toString());
|
||||
doc.save(newCompressedPDF.toString());
|
||||
|
||||
// Log overall file size reduction
|
||||
long compressedFileSize = Files.size(pdfFile);
|
||||
long compressedFileSize = Files.size(newCompressedPDF);
|
||||
double overallReduction = 100.0 - ((compressedFileSize * 100.0) / originalFileSize);
|
||||
log.info(
|
||||
"Overall PDF compression: {} → {} (reduced by {}%)",
|
||||
GeneralUtils.formatBytes(originalFileSize),
|
||||
GeneralUtils.formatBytes(compressedFileSize),
|
||||
String.format("%.1f", overallReduction));
|
||||
return newCompressedPDF;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private BufferedImage convertToGrayscale(BufferedImage image) {
|
||||
BufferedImage grayImage =
|
||||
new BufferedImage(
|
||||
image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
|
||||
|
||||
Graphics2D g = grayImage.createGraphics();
|
||||
g.drawImage(image, 0, 0, null);
|
||||
g.dispose();
|
||||
|
||||
return grayImage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes and compresses an image if beneficial. Returns the processed image if compression
|
||||
* is worthwhile, null otherwise.
|
||||
*/
|
||||
private BufferedImage processAndCompressImage(
|
||||
PDImageXObject image, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
|
||||
throws IOException {
|
||||
BufferedImage bufferedImage = image.getImage();
|
||||
int originalWidth = bufferedImage.getWidth();
|
||||
int originalHeight = bufferedImage.getHeight();
|
||||
|
||||
// Minimum dimensions to preserve reasonable quality
|
||||
int MIN_WIDTH = 400;
|
||||
int MIN_HEIGHT = 400;
|
||||
|
||||
log.info("Original dimensions: {}x{}", originalWidth, originalHeight);
|
||||
|
||||
// Skip if already small enough
|
||||
if ((originalWidth <= MIN_WIDTH || originalHeight <= MIN_HEIGHT) && !convertToGrayscale) {
|
||||
log.info("Skipping - below minimum dimensions threshold");
|
||||
return null;
|
||||
}
|
||||
|
||||
// Convert to grayscale first if requested (before resizing for better quality)
|
||||
if (convertToGrayscale) {
|
||||
bufferedImage = convertToGrayscale(bufferedImage);
|
||||
log.info("Converted image to grayscale");
|
||||
}
|
||||
|
||||
// Adjust scale factor for very large or very small images
|
||||
double adjustedScaleFactor = scaleFactor;
|
||||
if (originalWidth > 3000 || originalHeight > 3000) {
|
||||
// More aggressive for very large images
|
||||
adjustedScaleFactor = Math.min(scaleFactor, 0.75);
|
||||
log.info("Very large image, using more aggressive scale: {}", adjustedScaleFactor);
|
||||
} else if (originalWidth < 1000 || originalHeight < 1000) {
|
||||
// More conservative for smaller images
|
||||
adjustedScaleFactor = Math.max(scaleFactor, 0.9);
|
||||
log.info("Smaller image, using conservative scale: {}", adjustedScaleFactor);
|
||||
}
|
||||
|
||||
int newWidth = (int) (originalWidth * adjustedScaleFactor);
|
||||
int newHeight = (int) (originalHeight * adjustedScaleFactor);
|
||||
|
||||
// Ensure minimum dimensions
|
||||
newWidth = Math.max(newWidth, MIN_WIDTH);
|
||||
newHeight = Math.max(newHeight, MIN_HEIGHT);
|
||||
|
||||
// Skip if change is negligible
|
||||
if ((double) newWidth / originalWidth > 0.95
|
||||
&& (double) newHeight / originalHeight > 0.95
|
||||
&& !convertToGrayscale) {
|
||||
log.info("Change too small, skipping compression");
|
||||
return null;
|
||||
}
|
||||
|
||||
log.info(
|
||||
"Resizing to {}x{} ({}% of original)",
|
||||
newWidth, newHeight, Math.round((newWidth * 100.0) / originalWidth));
|
||||
|
||||
BufferedImage scaledImage;
|
||||
if (convertToGrayscale) {
|
||||
// If already grayscale, maintain the grayscale format
|
||||
scaledImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_BYTE_GRAY);
|
||||
} else {
|
||||
// Otherwise use original color model
|
||||
scaledImage =
|
||||
new BufferedImage(
|
||||
newWidth,
|
||||
newHeight,
|
||||
bufferedImage.getColorModel().hasAlpha()
|
||||
? BufferedImage.TYPE_INT_ARGB
|
||||
: BufferedImage.TYPE_INT_RGB);
|
||||
}
|
||||
Graphics2D g2d = scaledImage.createGraphics();
|
||||
g2d.setRenderingHint(
|
||||
RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
|
||||
g2d.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
|
||||
g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
|
||||
g2d.drawImage(bufferedImage, 0, 0, newWidth, newHeight, null);
|
||||
g2d.dispose();
|
||||
|
||||
return scaledImage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a BufferedImage to a byte array with specified JPEG quality. Checks if compression
|
||||
* is beneficial compared to original.
|
||||
*/
|
||||
private byte[] convertToBytes(BufferedImage scaledImage, float jpegQuality) throws IOException {
|
||||
String format = scaledImage.getColorModel().hasAlpha() ? "png" : "jpeg";
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
|
||||
if ("jpeg".equals(format)) {
|
||||
// Get the best available JPEG writer
|
||||
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("jpeg");
|
||||
ImageWriter writer = writers.next();
|
||||
|
||||
JPEGImageWriteParam param = (JPEGImageWriteParam) writer.getDefaultWriteParam();
|
||||
|
||||
// Set compression parameters
|
||||
param.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
|
||||
param.setCompressionQuality(jpegQuality);
|
||||
param.setOptimizeHuffmanTables(true); // Better compression
|
||||
param.setProgressiveMode(ImageWriteParam.MODE_DEFAULT); // Progressive scanning
|
||||
|
||||
// Write compressed image
|
||||
try (ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream)) {
|
||||
writer.setOutput(ios);
|
||||
writer.write(null, new IIOImage(scaledImage, null, null), param);
|
||||
}
|
||||
writer.dispose();
|
||||
} else {
|
||||
ImageIO.write(scaledImage, format, outputStream);
|
||||
}
|
||||
|
||||
return outputStream.toByteArray();
|
||||
}
|
||||
|
||||
/** Modified hash function to consistently identify identical image content */
|
||||
private String generateImageHash(PDImageXObject image) {
|
||||
try {
|
||||
// Create a stream for the raw stream data
|
||||
try (InputStream stream = image.getCOSObject().createRawInputStream()) {
|
||||
// Read up to first 8KB of data for the hash
|
||||
byte[] buffer = new byte[8192];
|
||||
int bytesRead = stream.read(buffer);
|
||||
if (bytesRead > 0) {
|
||||
byte[] dataToHash =
|
||||
bytesRead == buffer.length ? buffer : Arrays.copyOf(buffer, bytesRead);
|
||||
return bytesToHexString(generatMD5(dataToHash));
|
||||
}
|
||||
return "empty-stream";
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Error generating image hash", e);
|
||||
return "fallback-" + System.identityHashCode(image);
|
||||
}
|
||||
}
|
||||
|
||||
private String bytesToHexString(byte[] bytes) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (byte b : bytes) {
|
||||
sb.append(String.format("%02x", b));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private byte[] generatMD5(byte[] data) throws IOException {
|
||||
try {
|
||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
||||
return md.digest(data); // Get the MD5 hash of the image bytes
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException("MD5 algorithm not available", e);
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] generateImageMD5(PDImageXObject image) throws IOException {
|
||||
return generatMD5(ImageProcessingUtils.getImageData(image.getImage()));
|
||||
}
|
||||
|
||||
/** Generates a hash string from a byte array */
|
||||
private String generateHashFromBytes(byte[] data) {
|
||||
try {
|
||||
// Use the existing method to generate MD5 hash
|
||||
byte[] hash = generatMD5(data);
|
||||
return bytesToHexString(hash);
|
||||
} catch (Exception e) {
|
||||
log.error("Error generating hash from bytes", e);
|
||||
// Return a unique string as fallback
|
||||
return "fallback-" + System.identityHashCode(data);
|
||||
}
|
||||
}
|
||||
|
||||
@ -392,7 +465,7 @@ public class CompressController {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
Integer optimizeLevel = request.getOptimizeLevel();
|
||||
String expectedOutputSizeString = request.getExpectedOutputSize();
|
||||
|
||||
Boolean convertToGrayscale = request.getGrayscale();
|
||||
if (expectedOutputSizeString == null && optimizeLevel == null) {
|
||||
throw new Exception("Both expected output size and optimize level are not specified");
|
||||
}
|
||||
@ -404,48 +477,61 @@ public class CompressController {
|
||||
autoMode = true;
|
||||
}
|
||||
|
||||
Path tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(tempInputFile.toFile());
|
||||
|
||||
long inputFileSize = Files.size(tempInputFile);
|
||||
|
||||
Path tempOutputFile = null;
|
||||
byte[] pdfBytes;
|
||||
// Create initial input file
|
||||
Path originalFile = Files.createTempFile("input_", ".pdf");
|
||||
inputFile.transferTo(originalFile.toFile());
|
||||
long inputFileSize = Files.size(originalFile);
|
||||
|
||||
// Start with original as current working file
|
||||
Path currentFile = originalFile;
|
||||
|
||||
// Keep track of all temporary files for cleanup
|
||||
List<Path> tempFiles = new ArrayList<>();
|
||||
tempFiles.add(originalFile);
|
||||
|
||||
try {
|
||||
tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||
|
||||
if (autoMode) {
|
||||
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
|
||||
optimizeLevel = determineOptimizeLevel(sizeReductionRatio);
|
||||
}
|
||||
|
||||
boolean sizeMet = false;
|
||||
boolean imageCompressionApplied = false; // Track if we've already compressed images
|
||||
boolean imageCompressionApplied = false;
|
||||
boolean qpdfCompressionApplied = false;
|
||||
|
||||
while (!sizeMet && optimizeLevel <= 9) {
|
||||
// Apply appropriate compression based on level
|
||||
|
||||
// Levels 4-9: Apply image compression
|
||||
if (optimizeLevel >= 4 && !imageCompressionApplied) {
|
||||
// Apply image compression for levels 4-9
|
||||
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
|
||||
&& !imageCompressionApplied) {
|
||||
double scaleFactor = getScaleFactorForLevel(optimizeLevel);
|
||||
float jpegQuality = getJpegQualityForLevel(optimizeLevel);
|
||||
compressImagesInPDF(tempInputFile, scaleFactor, jpegQuality);
|
||||
imageCompressionApplied = true; // Mark that we've compressed images
|
||||
|
||||
// Use the returned path from compressImagesInPDF
|
||||
Path compressedImageFile = compressImagesInPDF(
|
||||
currentFile,
|
||||
scaleFactor,
|
||||
jpegQuality,
|
||||
Boolean.TRUE.equals(convertToGrayscale));
|
||||
|
||||
// Add to temp files list and update current file
|
||||
tempFiles.add(compressedImageFile);
|
||||
currentFile = compressedImageFile;
|
||||
imageCompressionApplied = true;
|
||||
}
|
||||
|
||||
// All levels (1-9): Apply QPDF compression
|
||||
// Apply QPDF compression for all levels
|
||||
if (!qpdfCompressionApplied) {
|
||||
long preQpdfSize = Files.size(tempInputFile);
|
||||
long preQpdfSize = Files.size(currentFile);
|
||||
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
|
||||
|
||||
// For levels 1-3, map to qpdf compression levels 1-9
|
||||
int qpdfCompressionLevel = optimizeLevel;
|
||||
if (optimizeLevel <= 3) {
|
||||
qpdfCompressionLevel = optimizeLevel * 3; // Level 1->3, 2->6, 3->9
|
||||
} else {
|
||||
qpdfCompressionLevel = 9; // Max QPDF compression for levels 4-9
|
||||
}
|
||||
// Map optimization levels to QPDF compression levels
|
||||
int qpdfCompressionLevel = optimizeLevel <= 3
|
||||
? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
|
||||
: 9; // Max compression for levels 4-9
|
||||
|
||||
// Create output file for QPDF
|
||||
Path qpdfOutputFile = Files.createTempFile("qpdf_output_", ".pdf");
|
||||
tempFiles.add(qpdfOutputFile);
|
||||
|
||||
// Run QPDF optimization
|
||||
List<String> command = new ArrayList<>();
|
||||
@ -460,49 +546,50 @@ public class CompressController {
|
||||
command.add("--compression-level=" + qpdfCompressionLevel);
|
||||
command.add("--compress-streams=y");
|
||||
command.add("--object-streams=generate");
|
||||
command.add(tempInputFile.toString());
|
||||
command.add(tempOutputFile.toString());
|
||||
command.add(currentFile.toString());
|
||||
command.add(qpdfOutputFile.toString());
|
||||
|
||||
ProcessExecutorResult returnCode = null;
|
||||
try {
|
||||
returnCode =
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
|
||||
.runCommandWithOutputHandling(command);
|
||||
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
|
||||
.runCommandWithOutputHandling(command);
|
||||
qpdfCompressionApplied = true;
|
||||
|
||||
// Update current file to the QPDF output
|
||||
currentFile = qpdfOutputFile;
|
||||
|
||||
long postQpdfSize = Files.size(currentFile);
|
||||
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
|
||||
log.info(
|
||||
"Post-QPDF file size: {} (reduced by {}%)",
|
||||
GeneralUtils.formatBytes(postQpdfSize),
|
||||
String.format("%.1f", qpdfReduction));
|
||||
|
||||
} catch (Exception e) {
|
||||
if (returnCode != null && returnCode.getRc() != 3) {
|
||||
throw e;
|
||||
}
|
||||
// If QPDF fails, keep using the current file
|
||||
log.warn("QPDF compression failed, continuing with current file");
|
||||
}
|
||||
long postQpdfSize = Files.size(tempOutputFile);
|
||||
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
|
||||
log.info(
|
||||
"Post-QPDF file size: {} (reduced by {}%)",
|
||||
GeneralUtils.formatBytes(postQpdfSize), String.format("%.1f", qpdfReduction));
|
||||
|
||||
} else {
|
||||
tempOutputFile = tempInputFile;
|
||||
}
|
||||
|
||||
// Check if file size is within expected size or not auto mode
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
long outputFileSize = Files.size(currentFile);
|
||||
if (outputFileSize <= expectedOutputSize || !autoMode) {
|
||||
sizeMet = true;
|
||||
} else {
|
||||
int newOptimizeLevel =
|
||||
incrementOptimizeLevel(
|
||||
optimizeLevel, outputFileSize, expectedOutputSize);
|
||||
int newOptimizeLevel = incrementOptimizeLevel(
|
||||
optimizeLevel, outputFileSize, expectedOutputSize);
|
||||
|
||||
// Check if we can't increase the level further
|
||||
if (newOptimizeLevel == optimizeLevel) {
|
||||
if (autoMode) {
|
||||
log.info(
|
||||
"Maximum optimization level reached without meeting target"
|
||||
+ " size.");
|
||||
log.info("Maximum optimization level reached without meeting target size.");
|
||||
sizeMet = true;
|
||||
}
|
||||
} else {
|
||||
// Reset image compression if moving to a new level
|
||||
// Reset flags for next iteration with higher optimization level
|
||||
imageCompressionApplied = false;
|
||||
qpdfCompressionApplied = false;
|
||||
optimizeLevel = newOptimizeLevel;
|
||||
@ -510,27 +597,30 @@ public class CompressController {
|
||||
}
|
||||
}
|
||||
|
||||
// Read the optimized PDF file
|
||||
pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
Path finalFile = tempOutputFile;
|
||||
|
||||
// Check if optimized file is larger than the original
|
||||
if (pdfBytes.length > inputFileSize) {
|
||||
log.warn(
|
||||
"Optimized file is larger than the original. Returning the original file"
|
||||
+ " instead.");
|
||||
finalFile = tempInputFile;
|
||||
long finalFileSize = Files.size(currentFile);
|
||||
if (finalFileSize > inputFileSize) {
|
||||
log.warn("Optimized file is larger than the original. Using the original file instead.");
|
||||
// Use the stored reference to the original file
|
||||
currentFile = originalFile;
|
||||
}
|
||||
|
||||
String outputFilename =
|
||||
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||
String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ "_Optimized.pdf";
|
||||
|
||||
return WebResponseUtils.pdfDocToWebResponse(
|
||||
pdfDocumentFactory.load(finalFile.toFile()), outputFilename);
|
||||
pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
|
||||
|
||||
} finally {
|
||||
Files.deleteIfExists(tempOutputFile);
|
||||
// Clean up all temporary files
|
||||
for (Path tempFile : tempFiles) {
|
||||
try {
|
||||
Files.deleteIfExists(tempFile);
|
||||
} catch (IOException e) {
|
||||
log.warn("Failed to delete temporary file: " + tempFile, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -82,6 +82,21 @@ public class CustomPDFDocumentFactory {
|
||||
return loadAdaptively(file, fileSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point for loading a PDF document from a Path. Automatically selects the most
|
||||
* appropriate loading strategy.
|
||||
*/
|
||||
public PDDocument load(Path path) throws IOException {
|
||||
if (path == null) {
|
||||
throw new IllegalArgumentException("File cannot be null");
|
||||
}
|
||||
|
||||
long fileSize = Files.size(path);
|
||||
log.info("Loading PDF from file, size: {}MB", fileSize / (1024 * 1024));
|
||||
|
||||
return loadAdaptively(path.toFile(), fileSize);
|
||||
}
|
||||
|
||||
/** Load a PDF from byte array with automatic optimization. */
|
||||
public PDDocument load(byte[] input) throws IOException {
|
||||
if (input == null) {
|
||||
@ -246,6 +261,7 @@ public class CustomPDFDocumentFactory {
|
||||
removePassword(doc);
|
||||
}
|
||||
|
||||
|
||||
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
|
||||
throws IOException {
|
||||
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
|
||||
|
@ -34,11 +34,15 @@
|
||||
<!-- Bootstrap -->
|
||||
<script th:src="@{'/js/thirdParty/popper.min.js'}"></script>
|
||||
<script th:src="@{'/js/thirdParty/bootstrap.min.js'}"></script>
|
||||
|
||||
<link rel="stylesheet" th:href="@{'/css/bootstrap.min.css'}">
|
||||
|
||||
<!-- Bootstrap Icons -->
|
||||
<link rel="stylesheet" th:href="@{'/css/bootstrap-icons.min.css'}">
|
||||
|
||||
<!-- Pixel, doesn't collect any PII-->
|
||||
<img referrerpolicy="no-referrer-when-downgrade" src="https://pixel.stirlingpdf.com/a.png?x-pxid=4f5fa02f-a065-4efb-bb2c-24509a4b6b92" style="position: absolute; visibility: hidden;"/>
|
||||
|
||||
<!-- Custom -->
|
||||
<link rel="stylesheet" th:href="@{'/css/general.css'}">
|
||||
<link rel="stylesheet" th:href="@{'/css/theme/theme.css'}">
|
||||
|
@ -1,24 +1,27 @@
|
||||
package stirling.software.SPDF.controller.api;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import java.io.IOException;
|
||||
import org.apache.pdfbox.pdmodel.PDPageTree;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageTree;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
||||
|
||||
import stirling.software.SPDF.model.api.general.RotatePDFRequest;
|
||||
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class RotationControllerTest {
|
||||
|
Loading…
x
Reference in New Issue
Block a user