Stirling-PDF/src/main/java/stirling/software/SPDF/controller/api/SplitPdfBySectionsController.java

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

165 lines
6.9 KiB
Java
Raw Normal View History

2023-12-16 19:29:43 +00:00
package stirling.software.SPDF.controller.api;
2023-12-30 19:11:27 +00:00
2023-12-16 19:29:43 +00:00
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
2023-12-16 19:30:47 +00:00
2024-01-12 23:15:27 +00:00
import org.apache.pdfbox.Loader;
2023-12-16 19:29:43 +00:00
import org.apache.pdfbox.multipdf.LayerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
2023-12-16 19:30:47 +00:00
import org.apache.pdfbox.pdmodel.PDPageContentStream;
2024-01-12 23:15:27 +00:00
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
2023-12-16 19:29:43 +00:00
import org.apache.pdfbox.pdmodel.common.PDRectangle;
2023-12-16 19:30:47 +00:00
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
2023-12-16 19:29:43 +00:00
import org.apache.pdfbox.util.Matrix;
import org.springframework.beans.factory.annotation.Autowired;
2023-12-16 19:29:43 +00:00
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames;
2023-12-16 19:29:43 +00:00
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
2023-12-30 19:11:27 +00:00
2023-12-16 19:29:43 +00:00
import stirling.software.SPDF.model.api.SplitPdfBySectionsRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
2023-12-16 19:29:43 +00:00
import stirling.software.SPDF.utils.WebResponseUtils;
2023-12-30 19:11:27 +00:00
2023-12-16 19:29:43 +00:00
@RestController
@RequestMapping("/api/v1/general")
2023-12-20 19:29:13 +00:00
@Tag(name = "General", description = "General APIs")
2023-12-16 19:29:43 +00:00
public class SplitPdfBySectionsController {
private final CustomPDDocumentFactory pdfDocumentFactory;
@Autowired
public SplitPdfBySectionsController(CustomPDDocumentFactory pdfDocumentFactory) {
this.pdfDocumentFactory = pdfDocumentFactory;
}
2023-12-16 19:29:43 +00:00
@PostMapping(value = "/split-pdf-by-sections", consumes = "multipart/form-data")
2023-12-20 19:29:13 +00:00
@Operation(
summary = "Split PDF pages into smaller sections",
description =
"Split each page of a PDF into smaller sections based on the user's choice (halves, thirds, quarters, etc.), both vertically and horizontally. Input:PDF Output:ZIP-PDF Type:SISO")
2023-12-16 19:29:43 +00:00
public ResponseEntity<byte[]> splitPdf(@ModelAttribute SplitPdfBySectionsRequest request)
throws Exception {
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
MultipartFile file = request.getFileInput();
2024-01-12 23:15:27 +00:00
PDDocument sourceDocument = Loader.loadPDF(file.getBytes());
2023-12-16 19:29:43 +00:00
// Process the PDF based on split parameters
2023-12-17 12:23:11 +00:00
int horiz = request.getHorizontalDivisions() + 1;
int verti = request.getVerticalDivisions() + 1;
boolean merge = request.isMerge();
2023-12-17 12:23:11 +00:00
List<PDDocument> splitDocuments = splitPdfPages(sourceDocument, verti, horiz);
String filename =
Filenames.toSimpleFileName(file.getOriginalFilename())
.replaceFirst("[.][^.]+$", "");
if (merge) {
MergeController mergeController = new MergeController(pdfDocumentFactory);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
mergeController.mergeDocuments(splitDocuments).save(baos);
2024-05-27 16:31:00 +01:00
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), filename + "_split.pdf");
}
2023-12-16 19:29:43 +00:00
for (PDDocument doc : splitDocuments) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
doc.save(baos);
doc.close();
splitDocumentsBoas.add(baos);
}
sourceDocument.close();
Path zipFile = Files.createTempFile("split_documents", ".zip");
byte[] data;
try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {
int pageNum = 1;
for (int i = 0; i < splitDocumentsBoas.size(); i++) {
ByteArrayOutputStream baos = splitDocumentsBoas.get(i);
int sectionNum = (i % (horiz * verti)) + 1;
2024-01-01 14:46:19 +00:00
String fileName = filename + "_" + pageNum + "_" + sectionNum + ".pdf";
2023-12-16 19:29:43 +00:00
byte[] pdf = baos.toByteArray();
ZipEntry pdfEntry = new ZipEntry(fileName);
zipOut.putNextEntry(pdfEntry);
zipOut.write(pdf);
zipOut.closeEntry();
if (sectionNum == horiz * verti) pageNum++;
}
Fix issue #2511: Fix broken ZIP issue by adding zipOut.finish() (#2890) --- # Description of Changes ### What was changed - Added `zipOut.finish()` to ensure the ZIP file is properly finalized after writing all entries. - This ensures the central directory metadata is written, fixing the issue where the ZIP file was incomplete or broken. ### Why the change was made - The issue (#2511) reported that splitting a PDF resulted in a broken ZIP file. The root cause was the missing central directory due to improper stream finalization. - Adding `zipOut.finish()` explicitly ensures the ZIP file is correctly structured and can be extracted without errors. ### Challenges encountered Closes #2511 --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details. - Tested with various PDFs to ensure the ZIP file is created correctly. - Verified ZIP integrity using `unzip -t` and manual extraction. ---
2025-02-07 19:06:33 +05:30
zipOut.finish();
2023-12-16 19:29:43 +00:00
data = Files.readAllBytes(zipFile);
2024-12-09 20:40:59 +00:00
return WebResponseUtils.bytesToWebResponse(
data, filename + "_split.zip", MediaType.APPLICATION_OCTET_STREAM);
} finally {
2024-05-27 16:31:00 +01:00
Files.deleteIfExists(zipFile);
2023-12-16 19:29:43 +00:00
}
}
2023-12-30 19:11:27 +00:00
2023-12-16 19:29:43 +00:00
public List<PDDocument> splitPdfPages(
PDDocument document, int horizontalDivisions, int verticalDivisions)
throws IOException {
List<PDDocument> splitDocuments = new ArrayList<>();
for (PDPage originalPage : document.getPages()) {
PDRectangle originalMediaBox = originalPage.getMediaBox();
float width = originalMediaBox.getWidth();
float height = originalMediaBox.getHeight();
float subPageWidth = width / horizontalDivisions;
float subPageHeight = height / verticalDivisions;
LayerUtility layerUtility = new LayerUtility(document);
for (int i = 0; i < horizontalDivisions; i++) {
for (int j = 0; j < verticalDivisions; j++) {
PDDocument subDoc = new PDDocument();
PDPage subPage = new PDPage(new PDRectangle(subPageWidth, subPageHeight));
subDoc.addPage(subPage);
PDFormXObject form =
layerUtility.importPageAsForm(
document, document.getPages().indexOf(originalPage));
try (PDPageContentStream contentStream =
2024-01-12 23:15:27 +00:00
new PDPageContentStream(
2024-01-13 01:05:43 +00:00
subDoc, subPage, AppendMode.APPEND, true, true)) {
2023-12-16 19:29:43 +00:00
// Set clipping area and position
float translateX = -subPageWidth * i;
// float translateY = height - subPageHeight * (verticalDivisions - j);
2024-01-13 01:05:43 +00:00
float translateY = -subPageHeight * (verticalDivisions - 1 - j);
2024-01-03 17:59:04 +00:00
2023-12-16 19:29:43 +00:00
contentStream.saveGraphicsState();
contentStream.addRect(0, 0, subPageWidth, subPageHeight);
contentStream.clip();
contentStream.transform(new Matrix(1, 0, 0, 1, translateX, translateY));
// Draw the form
contentStream.drawForm(form);
contentStream.restoreGraphicsState();
}
splitDocuments.add(subDoc);
}
}
}
return splitDocuments;
}
}