Further compression fixes (#3177)

# Description of Changes

Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)

---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: a <a>
This commit is contained in:
Anthony Stirling 2025-03-14 21:00:06 +00:00 committed by GitHub
parent 2848ccd12e
commit c7a8b9f011
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 427 additions and 314 deletions

View File

@ -25,7 +25,7 @@ ext {
} }
group = "stirling.software" group = "stirling.software"
version = "0.44.1" version = "0.44.2"
java { java {
// 17 is lowest but we support and recommend 21 // 17 is lowest but we support and recommend 21

View File

@ -3,13 +3,19 @@ package stirling.software.SPDF.controller.api.misc;
import java.awt.*; import java.awt.*;
import java.awt.image.BufferedImage; import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet; import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Map;
import java.util.Map.Entry;
import javax.imageio.IIOImage; import javax.imageio.IIOImage;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
@ -36,11 +42,15 @@ import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag; import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.misc.OptimizePdfRequest; import stirling.software.SPDF.model.api.misc.OptimizePdfRequest;
import stirling.software.SPDF.service.CustomPDFDocumentFactory; import stirling.software.SPDF.service.CustomPDFDocumentFactory;
import stirling.software.SPDF.utils.GeneralUtils; import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ImageProcessingUtils;
import stirling.software.SPDF.utils.ProcessExecutor; import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils; import stirling.software.SPDF.utils.WebResponseUtils;
@ -58,81 +68,222 @@ public class CompressController {
this.pdfDocumentFactory = pdfDocumentFactory; this.pdfDocumentFactory = pdfDocumentFactory;
} }
private void compressImagesInPDF(Path pdfFile, double scaleFactor, float jpegQuality) @Data
@AllArgsConstructor
@NoArgsConstructor
private static class ImageReference {
int pageNum; // Page number where the image appears
COSName name; // The name used to reference this image
}
public Path compressImagesInPDF(
Path pdfFile, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
throws Exception { throws Exception {
byte[] fileBytes = Files.readAllBytes(pdfFile); Path newCompressedPDF = Files.createTempFile("compressedPDF", ".pdf");
long originalFileSize = fileBytes.length; long originalFileSize = Files.size(pdfFile);
log.info( log.info(
"Starting image compression with scale factor: {} and JPEG quality: {} on file" "Starting image compression with scale factor: {}, JPEG quality: {}, grayscale: {} on file size: {}",
+ " size: {}",
scaleFactor, scaleFactor,
jpegQuality, jpegQuality,
convertToGrayscale,
GeneralUtils.formatBytes(originalFileSize)); GeneralUtils.formatBytes(originalFileSize));
// Track processed images to avoid recompression try (PDDocument doc = pdfDocumentFactory.load(pdfFile)) {
Set<String> processedImages = new HashSet<>();
// Collect all unique images by content hash
Map<String, List<ImageReference>> uniqueImages = new HashMap<>();
Map<String, PDImageXObject> compressedVersions = new HashMap<>();
try (PDDocument doc = pdfDocumentFactory.load(fileBytes)) {
int totalImages = 0; int totalImages = 0;
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) {
PDPage page = doc.getPage(pageNum);
PDResources res = page.getResources();
if (res == null || res.getXObjectNames() == null) continue;
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
if (!(xobj instanceof PDImageXObject)) continue;
totalImages++;
PDImageXObject image = (PDImageXObject) xobj;
String imageHash = generateImageHash(image);
// Store only page number and name reference
ImageReference ref = new ImageReference();
ref.pageNum = pageNum;
ref.name = name;
uniqueImages.computeIfAbsent(imageHash, k -> new ArrayList<>()).add(ref);
}
}
int uniqueImagesCount = uniqueImages.size();
int duplicatedImages = totalImages - uniqueImagesCount;
log.info(
"Found {} unique images and {} duplicated instances across {} pages",
uniqueImagesCount,
duplicatedImages,
doc.getNumberOfPages());
// SECOND PASS: Process each unique image exactly once
int compressedImages = 0; int compressedImages = 0;
int skippedImages = 0; int skippedImages = 0;
long totalOriginalBytes = 0; long totalOriginalBytes = 0;
long totalCompressedBytes = 0; long totalCompressedBytes = 0;
// Minimum dimensions to preserve reasonable quality for (Entry<String, List<ImageReference>> entry : uniqueImages.entrySet()) {
int MIN_WIDTH = 400; // Higher minimum String imageHash = entry.getKey();
int MIN_HEIGHT = 400; // Higher minimum List<ImageReference> references = entry.getValue();
log.info("PDF has {} pages", doc.getNumberOfPages()); if (references.isEmpty()) continue;
for (int pageNum = 0; pageNum < doc.getNumberOfPages(); pageNum++) { // Get the first instance of this image
PDPage page = doc.getPage(pageNum); ImageReference firstRef = references.get(0);
PDResources res = page.getResources(); PDPage firstPage = doc.getPage(firstRef.pageNum);
PDResources firstPageResources = firstPage.getResources();
PDImageXObject originalImage =
(PDImageXObject) firstPageResources.getXObject(firstRef.name);
if (res == null || res.getXObjectNames() == null) { // Track original size
continue; int originalSize = (int) originalImage.getCOSObject().getLength();
totalOriginalBytes += originalSize;
// Process this unique image once
BufferedImage processedImage =
processAndCompressImage(
originalImage, scaleFactor, jpegQuality, convertToGrayscale);
if (processedImage != null) {
// Convert to bytes for storage
byte[] compressedData = convertToBytes(processedImage, jpegQuality);
// Check if compression is beneficial
if (compressedData.length < originalSize || convertToGrayscale) {
// Create a single compressed version
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc,
compressedData,
originalImage.getCOSObject().toString());
// Store the compressed version only once in our map
compressedVersions.put(imageHash, compressedImage);
// Report compression stats
double reductionPercentage =
100.0 - ((compressedData.length * 100.0) / originalSize);
log.info(
"Image hash {}: Compressed from {} to {} (reduced by {}%)",
imageHash,
GeneralUtils.formatBytes(originalSize),
GeneralUtils.formatBytes(compressedData.length),
String.format("%.1f", reductionPercentage));
// Replace ALL instances with the compressed version
for (ImageReference ref : references) {
// Get the page and resources when needed
PDPage page = doc.getPage(ref.pageNum);
PDResources resources = page.getResources();
resources.put(ref.name, compressedImage);
log.info(
"Replaced image on page {} with compressed version",
ref.pageNum + 1);
} }
int pageImages = 0; totalCompressedBytes += compressedData.length * references.size();
compressedImages++;
for (COSName name : res.getXObjectNames()) { } else {
String imageName = name.getName(); log.info("Image hash {}: Compression not beneficial, skipping", imageHash);
totalCompressedBytes += originalSize * references.size();
// Skip already processed images
if (processedImages.contains(imageName)) {
skippedImages++; skippedImages++;
continue; }
} else {
log.info("Image hash {}: Not suitable for compression, skipping", imageHash);
totalCompressedBytes += originalSize * references.size();
skippedImages++;
}
} }
PDXObject xobj = res.getXObject(name); // Log compression statistics
if (!(xobj instanceof PDImageXObject)) { double overallImageReduction =
continue; totalOriginalBytes > 0
? 100.0 - ((totalCompressedBytes * 100.0) / totalOriginalBytes)
: 0;
log.info(
"Image compression summary - Total unique: {}, Compressed: {}, Skipped: {}, Duplicates: {}",
uniqueImagesCount,
compressedImages,
skippedImages,
duplicatedImages);
log.info(
"Total original image size: {}, compressed: {} (reduced by {}%)",
GeneralUtils.formatBytes(totalOriginalBytes),
GeneralUtils.formatBytes(totalCompressedBytes),
String.format("%.1f", overallImageReduction));
// Free memory before saving
compressedVersions.clear();
uniqueImages.clear();
// Save the document
log.info("Saving compressed PDF to {}", newCompressedPDF.toString());
doc.save(newCompressedPDF.toString());
// Log overall file size reduction
long compressedFileSize = Files.size(newCompressedPDF);
double overallReduction = 100.0 - ((compressedFileSize * 100.0) / originalFileSize);
log.info(
"Overall PDF compression: {} → {} (reduced by {}%)",
GeneralUtils.formatBytes(originalFileSize),
GeneralUtils.formatBytes(compressedFileSize),
String.format("%.1f", overallReduction));
return newCompressedPDF;
} }
totalImages++; }
pageImages++;
PDImageXObject image = (PDImageXObject) xobj; private BufferedImage convertToGrayscale(BufferedImage image) {
BufferedImage grayImage =
new BufferedImage(
image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
Graphics2D g = grayImage.createGraphics();
g.drawImage(image, 0, 0, null);
g.dispose();
return grayImage;
}
/**
* Processes and compresses an image if beneficial. Returns the processed image if compression
* is worthwhile, null otherwise.
*/
private BufferedImage processAndCompressImage(
PDImageXObject image, double scaleFactor, float jpegQuality, boolean convertToGrayscale)
throws IOException {
BufferedImage bufferedImage = image.getImage(); BufferedImage bufferedImage = image.getImage();
int originalWidth = bufferedImage.getWidth(); int originalWidth = bufferedImage.getWidth();
int originalHeight = bufferedImage.getHeight(); int originalHeight = bufferedImage.getHeight();
log.info( // Minimum dimensions to preserve reasonable quality
"Page {}, Image {}: Original dimensions: {}x{}", int MIN_WIDTH = 400;
pageNum + 1, int MIN_HEIGHT = 400;
imageName,
originalWidth, log.info("Original dimensions: {}x{}", originalWidth, originalHeight);
originalHeight);
// Skip if already small enough // Skip if already small enough
if (originalWidth <= MIN_WIDTH || originalHeight <= MIN_HEIGHT) { if ((originalWidth <= MIN_WIDTH || originalHeight <= MIN_HEIGHT) && !convertToGrayscale) {
log.info( log.info("Skipping - below minimum dimensions threshold");
"Page {}, Image {}: Skipping - below minimum dimensions threshold", return null;
pageNum + 1, }
imageName);
skippedImages++; // Convert to grayscale first if requested (before resizing for better quality)
processedImages.add(imageName); if (convertToGrayscale) {
continue; bufferedImage = convertToGrayscale(bufferedImage);
log.info("Converted image to grayscale");
} }
// Adjust scale factor for very large or very small images // Adjust scale factor for very large or very small images
@ -140,20 +291,11 @@ public class CompressController {
if (originalWidth > 3000 || originalHeight > 3000) { if (originalWidth > 3000 || originalHeight > 3000) {
// More aggressive for very large images // More aggressive for very large images
adjustedScaleFactor = Math.min(scaleFactor, 0.75); adjustedScaleFactor = Math.min(scaleFactor, 0.75);
log.info( log.info("Very large image, using more aggressive scale: {}", adjustedScaleFactor);
"Page {}, Image {}: Very large image, using more aggressive scale:"
+ " {}",
pageNum + 1,
imageName,
adjustedScaleFactor);
} else if (originalWidth < 1000 || originalHeight < 1000) { } else if (originalWidth < 1000 || originalHeight < 1000) {
// More conservative for smaller images // More conservative for smaller images
adjustedScaleFactor = Math.max(scaleFactor, 0.9); adjustedScaleFactor = Math.max(scaleFactor, 0.9);
log.info( log.info("Smaller image, using conservative scale: {}", adjustedScaleFactor);
"Page {}, Image {}: Smaller image, using conservative scale: {}",
pageNum + 1,
imageName,
adjustedScaleFactor);
} }
int newWidth = (int) (originalWidth * adjustedScaleFactor); int newWidth = (int) (originalWidth * adjustedScaleFactor);
@ -165,196 +307,127 @@ public class CompressController {
// Skip if change is negligible // Skip if change is negligible
if ((double) newWidth / originalWidth > 0.95 if ((double) newWidth / originalWidth > 0.95
&& (double) newHeight / originalHeight > 0.95) { && (double) newHeight / originalHeight > 0.95
log.info( && !convertToGrayscale) {
"Page {}, Image {}: Change too small, skipping compression", log.info("Change too small, skipping compression");
pageNum + 1, return null;
imageName);
skippedImages++;
processedImages.add(imageName);
continue;
} }
log.info( log.info(
"Page {}, Image {}: Resizing to {}x{} ({}% of original)", "Resizing to {}x{} ({}% of original)",
pageNum + 1, newWidth, newHeight, Math.round((newWidth * 100.0) / originalWidth));
imageName,
newWidth,
newHeight,
Math.round((newWidth * 100.0) / originalWidth));
// Use high quality scaling BufferedImage scaledImage;
BufferedImage scaledImage = if (convertToGrayscale) {
// If already grayscale, maintain the grayscale format
scaledImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_BYTE_GRAY);
} else {
// Otherwise use original color model
scaledImage =
new BufferedImage( new BufferedImage(
newWidth, newWidth,
newHeight, newHeight,
bufferedImage.getColorModel().hasAlpha() bufferedImage.getColorModel().hasAlpha()
? BufferedImage.TYPE_INT_ARGB ? BufferedImage.TYPE_INT_ARGB
: BufferedImage.TYPE_INT_RGB); : BufferedImage.TYPE_INT_RGB);
}
Graphics2D g2d = scaledImage.createGraphics(); Graphics2D g2d = scaledImage.createGraphics();
g2d.setRenderingHint( g2d.setRenderingHint(
RenderingHints.KEY_INTERPOLATION, RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
RenderingHints.VALUE_INTERPOLATION_BICUBIC); g2d.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
g2d.setRenderingHint( g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
g2d.setRenderingHint(
RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
g2d.drawImage(bufferedImage, 0, 0, newWidth, newHeight, null); g2d.drawImage(bufferedImage, 0, 0, newWidth, newHeight, null);
g2d.dispose(); g2d.dispose();
// Choose appropriate format and compression return scaledImage;
String format = bufferedImage.getColorModel().hasAlpha() ? "png" : "jpeg"; }
/**
* Converts a BufferedImage to a byte array with specified JPEG quality. Checks if compression
* is beneficial compared to original.
*/
private byte[] convertToBytes(BufferedImage scaledImage, float jpegQuality) throws IOException {
String format = scaledImage.getColorModel().hasAlpha() ? "png" : "jpeg";
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
// First get the actual size of the original image by encoding it to the chosen
// format
ByteArrayOutputStream originalImageStream = new ByteArrayOutputStream();
if ("jpeg".equals(format)) { if ("jpeg".equals(format)) {
// Get the best available JPEG writer (prioritizes TwelveMonkeys if // Get the best available JPEG writer
// available)
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("jpeg"); Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName("jpeg");
ImageWriter writer = null; ImageWriter writer = writers.next();
// Prefer TwelveMonkeys writer if available JPEGImageWriteParam param = (JPEGImageWriteParam) writer.getDefaultWriteParam();
while (writers.hasNext()) {
ImageWriter candidate = writers.next();
if (candidate.getClass().getName().contains("twelvemonkeys")) {
writer = candidate;
break;
}
}
if (writer == null) {
writer = ImageIO.getImageWritersByFormatName("jpeg").next();
}
JPEGImageWriteParam param = // Set compression parameters
(JPEGImageWriteParam) writer.getDefaultWriteParam();
// Set advanced compression parameters
param.setCompressionMode(ImageWriteParam.MODE_EXPLICIT); param.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
param.setCompressionQuality(jpegQuality); param.setCompressionQuality(jpegQuality);
param.setOptimizeHuffmanTables(true); // Better compression param.setOptimizeHuffmanTables(true); // Better compression
param.setProgressiveMode( param.setProgressiveMode(ImageWriteParam.MODE_DEFAULT); // Progressive scanning
ImageWriteParam.MODE_DEFAULT); // Progressive scanning
// Write compressed image // Write compressed image
try (ImageOutputStream ios = try (ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream)) {
ImageIO.createImageOutputStream(originalImageStream)) {
writer.setOutput(ios); writer.setOutput(ios);
writer.write(null, new IIOImage(scaledImage, null, null), param); writer.write(null, new IIOImage(scaledImage, null, null), param);
} }
writer.dispose(); writer.dispose();
} else { } else {
ImageIO.write(bufferedImage, format, originalImageStream); ImageIO.write(scaledImage, format, outputStream);
} }
int originalEncodedSize = (int) image.getCOSObject().getLength();
originalImageStream.close();
// Now compress the scaled image return outputStream.toByteArray();
ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream();
if ("jpeg".equals(format)) {
Iterator<ImageWriter> writers = ImageIO.getImageWritersByFormatName(format);
if (writers.hasNext()) {
ImageWriter writer = writers.next();
ImageWriteParam param = writer.getDefaultWriteParam();
if (param.canWriteCompressed()) {
param.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
param.setCompressionQuality(jpegQuality);
ImageOutputStream imageOut =
ImageIO.createImageOutputStream(compressedImageStream);
writer.setOutput(imageOut);
writer.write(null, new IIOImage(scaledImage, null, null), param);
writer.dispose();
imageOut.close();
} else {
ImageIO.write(scaledImage, format, compressedImageStream);
} }
} else {
ImageIO.write(scaledImage, format, compressedImageStream); /** Modified hash function to consistently identify identical image content */
private String generateImageHash(PDImageXObject image) {
try {
// Create a stream for the raw stream data
try (InputStream stream = image.getCOSObject().createRawInputStream()) {
// Read up to first 8KB of data for the hash
byte[] buffer = new byte[8192];
int bytesRead = stream.read(buffer);
if (bytesRead > 0) {
byte[] dataToHash =
bytesRead == buffer.length ? buffer : Arrays.copyOf(buffer, bytesRead);
return bytesToHexString(generatMD5(dataToHash));
} }
} else { return "empty-stream";
ImageIO.write(scaledImage, format, compressedImageStream);
} }
byte[] imageBytes = compressedImageStream.toByteArray(); } catch (Exception e) {
compressedImageStream.close(); log.error("Error generating image hash", e);
return "fallback-" + System.identityHashCode(image);
// Format sizes using our utility method
String originalSizeStr = GeneralUtils.formatBytes(originalEncodedSize);
String compressedSizeStr = GeneralUtils.formatBytes(imageBytes.length);
// Calculate reduction percentage (how much smaller the new file is)
double reductionPercentage =
100.0 - ((imageBytes.length * 100.0) / originalEncodedSize);
if (imageBytes.length >= originalEncodedSize) {
log.info(
"Page {}, Image {}: Compressed size {} not smaller than original"
+ " {}, skipping replacement",
pageNum + 1,
imageName,
GeneralUtils.formatBytes(imageBytes.length),
GeneralUtils.formatBytes(originalEncodedSize));
// Accumulate original size for both counters (no change)
totalOriginalBytes += originalEncodedSize;
totalCompressedBytes += originalEncodedSize;
skippedImages++;
processedImages.add(imageName);
continue;
}
log.info(
"Page {}, Image {}: Compressed from {} to {} (reduced by {}%)",
pageNum + 1,
imageName,
originalSizeStr,
compressedSizeStr,
String.format("%.1f", reductionPercentage));
// Only replace if compressed size is smaller
PDImageXObject compressedImage =
PDImageXObject.createFromByteArray(
doc, imageBytes, image.getCOSObject().toString());
res.put(name, compressedImage);
// Update counters with compressed size
totalOriginalBytes += originalEncodedSize;
totalCompressedBytes += imageBytes.length;
compressedImages++;
processedImages.add(imageName);
} }
} }
// Log overall image compression statistics private String bytesToHexString(byte[] bytes) {
double overallImageReduction = StringBuilder sb = new StringBuilder();
totalOriginalBytes > 0 for (byte b : bytes) {
? 100.0 - ((totalCompressedBytes * 100.0) / totalOriginalBytes) sb.append(String.format("%02x", b));
: 0; }
return sb.toString();
}
log.info( private byte[] generatMD5(byte[] data) throws IOException {
"Image compression summary - Total: {}, Compressed: {}, Skipped: {}", try {
totalImages, MessageDigest md = MessageDigest.getInstance("MD5");
compressedImages, return md.digest(data); // Get the MD5 hash of the image bytes
skippedImages); } catch (NoSuchAlgorithmException e) {
log.info( throw new RuntimeException("MD5 algorithm not available", e);
"Total original image size: {}, compressed: {} (reduced by {}%)", }
GeneralUtils.formatBytes(totalOriginalBytes), }
GeneralUtils.formatBytes(totalCompressedBytes),
String.format("%.1f", overallImageReduction));
// Save the document private byte[] generateImageMD5(PDImageXObject image) throws IOException {
log.info("Saving compressed PDF to {}", pdfFile.toString()); return generatMD5(ImageProcessingUtils.getImageData(image.getImage()));
doc.save(pdfFile.toString()); }
// Log overall file size reduction /** Generates a hash string from a byte array */
long compressedFileSize = Files.size(pdfFile); private String generateHashFromBytes(byte[] data) {
double overallReduction = 100.0 - ((compressedFileSize * 100.0) / originalFileSize); try {
log.info( // Use the existing method to generate MD5 hash
"Overall PDF compression: {} → {} (reduced by {}%)", byte[] hash = generatMD5(data);
GeneralUtils.formatBytes(originalFileSize), return bytesToHexString(hash);
GeneralUtils.formatBytes(compressedFileSize), } catch (Exception e) {
String.format("%.1f", overallReduction)); log.error("Error generating hash from bytes", e);
// Return a unique string as fallback
return "fallback-" + System.identityHashCode(data);
} }
} }
@ -392,7 +465,7 @@ public class CompressController {
MultipartFile inputFile = request.getFileInput(); MultipartFile inputFile = request.getFileInput();
Integer optimizeLevel = request.getOptimizeLevel(); Integer optimizeLevel = request.getOptimizeLevel();
String expectedOutputSizeString = request.getExpectedOutputSize(); String expectedOutputSizeString = request.getExpectedOutputSize();
Boolean convertToGrayscale = request.getGrayscale();
if (expectedOutputSizeString == null && optimizeLevel == null) { if (expectedOutputSizeString == null && optimizeLevel == null) {
throw new Exception("Both expected output size and optimize level are not specified"); throw new Exception("Both expected output size and optimize level are not specified");
} }
@ -404,48 +477,61 @@ public class CompressController {
autoMode = true; autoMode = true;
} }
Path tempInputFile = Files.createTempFile("input_", ".pdf"); // Create initial input file
inputFile.transferTo(tempInputFile.toFile()); Path originalFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(originalFile.toFile());
long inputFileSize = Files.size(originalFile);
long inputFileSize = Files.size(tempInputFile); // Start with original as current working file
Path currentFile = originalFile;
// Keep track of all temporary files for cleanup
List<Path> tempFiles = new ArrayList<>();
tempFiles.add(originalFile);
Path tempOutputFile = null;
byte[] pdfBytes;
try { try {
tempOutputFile = Files.createTempFile("output_", ".pdf");
if (autoMode) { if (autoMode) {
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize; double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
optimizeLevel = determineOptimizeLevel(sizeReductionRatio); optimizeLevel = determineOptimizeLevel(sizeReductionRatio);
} }
boolean sizeMet = false; boolean sizeMet = false;
boolean imageCompressionApplied = false; // Track if we've already compressed images boolean imageCompressionApplied = false;
boolean qpdfCompressionApplied = false; boolean qpdfCompressionApplied = false;
while (!sizeMet && optimizeLevel <= 9) { while (!sizeMet && optimizeLevel <= 9) {
// Apply appropriate compression based on level // Apply image compression for levels 4-9
if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
// Levels 4-9: Apply image compression && !imageCompressionApplied) {
if (optimizeLevel >= 4 && !imageCompressionApplied) {
double scaleFactor = getScaleFactorForLevel(optimizeLevel); double scaleFactor = getScaleFactorForLevel(optimizeLevel);
float jpegQuality = getJpegQualityForLevel(optimizeLevel); float jpegQuality = getJpegQualityForLevel(optimizeLevel);
compressImagesInPDF(tempInputFile, scaleFactor, jpegQuality);
imageCompressionApplied = true; // Mark that we've compressed images // Use the returned path from compressImagesInPDF
Path compressedImageFile = compressImagesInPDF(
currentFile,
scaleFactor,
jpegQuality,
Boolean.TRUE.equals(convertToGrayscale));
// Add to temp files list and update current file
tempFiles.add(compressedImageFile);
currentFile = compressedImageFile;
imageCompressionApplied = true;
} }
// All levels (1-9): Apply QPDF compression // Apply QPDF compression for all levels
if (!qpdfCompressionApplied) { if (!qpdfCompressionApplied) {
long preQpdfSize = Files.size(tempInputFile); long preQpdfSize = Files.size(currentFile);
log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize)); log.info("Pre-QPDF file size: {}", GeneralUtils.formatBytes(preQpdfSize));
// For levels 1-3, map to qpdf compression levels 1-9 // Map optimization levels to QPDF compression levels
int qpdfCompressionLevel = optimizeLevel; int qpdfCompressionLevel = optimizeLevel <= 3
if (optimizeLevel <= 3) { ? optimizeLevel * 3 // Level 1->3, 2->6, 3->9
qpdfCompressionLevel = optimizeLevel * 3; // Level 1->3, 2->6, 3->9 : 9; // Max compression for levels 4-9
} else {
qpdfCompressionLevel = 9; // Max QPDF compression for levels 4-9 // Create output file for QPDF
} Path qpdfOutputFile = Files.createTempFile("qpdf_output_", ".pdf");
tempFiles.add(qpdfOutputFile);
// Run QPDF optimization // Run QPDF optimization
List<String> command = new ArrayList<>(); List<String> command = new ArrayList<>();
@ -460,49 +546,50 @@ public class CompressController {
command.add("--compression-level=" + qpdfCompressionLevel); command.add("--compression-level=" + qpdfCompressionLevel);
command.add("--compress-streams=y"); command.add("--compress-streams=y");
command.add("--object-streams=generate"); command.add("--object-streams=generate");
command.add(tempInputFile.toString()); command.add(currentFile.toString());
command.add(tempOutputFile.toString()); command.add(qpdfOutputFile.toString());
ProcessExecutorResult returnCode = null; ProcessExecutorResult returnCode = null;
try { try {
returnCode = returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
.runCommandWithOutputHandling(command); .runCommandWithOutputHandling(command);
qpdfCompressionApplied = true; qpdfCompressionApplied = true;
// Update current file to the QPDF output
currentFile = qpdfOutputFile;
long postQpdfSize = Files.size(currentFile);
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
log.info(
"Post-QPDF file size: {} (reduced by {}%)",
GeneralUtils.formatBytes(postQpdfSize),
String.format("%.1f", qpdfReduction));
} catch (Exception e) { } catch (Exception e) {
if (returnCode != null && returnCode.getRc() != 3) { if (returnCode != null && returnCode.getRc() != 3) {
throw e; throw e;
} }
// If QPDF fails, keep using the current file
log.warn("QPDF compression failed, continuing with current file");
} }
long postQpdfSize = Files.size(tempOutputFile);
double qpdfReduction = 100.0 - ((postQpdfSize * 100.0) / preQpdfSize);
log.info(
"Post-QPDF file size: {} (reduced by {}%)",
GeneralUtils.formatBytes(postQpdfSize), String.format("%.1f", qpdfReduction));
} else {
tempOutputFile = tempInputFile;
} }
// Check if file size is within expected size or not auto mode // Check if file size is within expected size or not auto mode
long outputFileSize = Files.size(tempOutputFile); long outputFileSize = Files.size(currentFile);
if (outputFileSize <= expectedOutputSize || !autoMode) { if (outputFileSize <= expectedOutputSize || !autoMode) {
sizeMet = true; sizeMet = true;
} else { } else {
int newOptimizeLevel = int newOptimizeLevel = incrementOptimizeLevel(
incrementOptimizeLevel(
optimizeLevel, outputFileSize, expectedOutputSize); optimizeLevel, outputFileSize, expectedOutputSize);
// Check if we can't increase the level further // Check if we can't increase the level further
if (newOptimizeLevel == optimizeLevel) { if (newOptimizeLevel == optimizeLevel) {
if (autoMode) { if (autoMode) {
log.info( log.info("Maximum optimization level reached without meeting target size.");
"Maximum optimization level reached without meeting target"
+ " size.");
sizeMet = true; sizeMet = true;
} }
} else { } else {
// Reset image compression if moving to a new level // Reset flags for next iteration with higher optimization level
imageCompressionApplied = false; imageCompressionApplied = false;
qpdfCompressionApplied = false; qpdfCompressionApplied = false;
optimizeLevel = newOptimizeLevel; optimizeLevel = newOptimizeLevel;
@ -510,27 +597,30 @@ public class CompressController {
} }
} }
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
Path finalFile = tempOutputFile;
// Check if optimized file is larger than the original // Check if optimized file is larger than the original
if (pdfBytes.length > inputFileSize) { long finalFileSize = Files.size(currentFile);
log.warn( if (finalFileSize > inputFileSize) {
"Optimized file is larger than the original. Returning the original file" log.warn("Optimized file is larger than the original. Using the original file instead.");
+ " instead."); // Use the stored reference to the original file
finalFile = tempInputFile; currentFile = originalFile;
} }
String outputFilename = String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename())
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "") .replaceFirst("[.][^.]+$", "")
+ "_Optimized.pdf"; + "_Optimized.pdf";
return WebResponseUtils.pdfDocToWebResponse( return WebResponseUtils.pdfDocToWebResponse(
pdfDocumentFactory.load(finalFile.toFile()), outputFilename); pdfDocumentFactory.load(currentFile.toFile()), outputFilename);
} finally { } finally {
Files.deleteIfExists(tempOutputFile); // Clean up all temporary files
for (Path tempFile : tempFiles) {
try {
Files.deleteIfExists(tempFile);
} catch (IOException e) {
log.warn("Failed to delete temporary file: " + tempFile, e);
}
}
} }
} }

View File

@ -82,6 +82,21 @@ public class CustomPDFDocumentFactory {
return loadAdaptively(file, fileSize); return loadAdaptively(file, fileSize);
} }
/**
* Main entry point for loading a PDF document from a Path. Automatically selects the most
* appropriate loading strategy.
*/
public PDDocument load(Path path) throws IOException {
if (path == null) {
throw new IllegalArgumentException("File cannot be null");
}
long fileSize = Files.size(path);
log.info("Loading PDF from file, size: {}MB", fileSize / (1024 * 1024));
return loadAdaptively(path.toFile(), fileSize);
}
/** Load a PDF from byte array with automatic optimization. */ /** Load a PDF from byte array with automatic optimization. */
public PDDocument load(byte[] input) throws IOException { public PDDocument load(byte[] input) throws IOException {
if (input == null) { if (input == null) {
@ -246,6 +261,7 @@ public class CustomPDFDocumentFactory {
removePassword(doc); removePassword(doc);
} }
private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache) private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
throws IOException { throws IOException {
return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache); return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);

View File

@ -34,11 +34,15 @@
<!-- Bootstrap --> <!-- Bootstrap -->
<script th:src="@{'/js/thirdParty/popper.min.js'}"></script> <script th:src="@{'/js/thirdParty/popper.min.js'}"></script>
<script th:src="@{'/js/thirdParty/bootstrap.min.js'}"></script> <script th:src="@{'/js/thirdParty/bootstrap.min.js'}"></script>
<link rel="stylesheet" th:href="@{'/css/bootstrap.min.css'}"> <link rel="stylesheet" th:href="@{'/css/bootstrap.min.css'}">
<!-- Bootstrap Icons --> <!-- Bootstrap Icons -->
<link rel="stylesheet" th:href="@{'/css/bootstrap-icons.min.css'}"> <link rel="stylesheet" th:href="@{'/css/bootstrap-icons.min.css'}">
<!-- Pixel, doesn't collect any PII-->
<img referrerpolicy="no-referrer-when-downgrade" src="https://pixel.stirlingpdf.com/a.png?x-pxid=4f5fa02f-a065-4efb-bb2c-24509a4b6b92" style="position: absolute; visibility: hidden;"/>
<!-- Custom --> <!-- Custom -->
<link rel="stylesheet" th:href="@{'/css/general.css'}"> <link rel="stylesheet" th:href="@{'/css/general.css'}">
<link rel="stylesheet" th:href="@{'/css/theme/theme.css'}"> <link rel="stylesheet" th:href="@{'/css/theme/theme.css'}">

View File

@ -1,24 +1,27 @@
package stirling.software.SPDF.controller.api; package stirling.software.SPDF.controller.api;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDPageTree;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks; import org.mockito.InjectMocks;
import org.mockito.Mock; import org.mockito.Mock;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.mock.web.MockMultipartFile; import org.springframework.mock.web.MockMultipartFile;
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
import stirling.software.SPDF.model.api.general.RotatePDFRequest; import stirling.software.SPDF.model.api.general.RotatePDFRequest;
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class RotationControllerTest { public class RotationControllerTest {