mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-05-24 02:42:02 +00:00

# Description This pull request includes several changes aimed at improving the code structure and removing redundant code. The most significant changes involve reordering methods, removing unnecessary annotations, and refactoring constructors to use dependency injection. Autowired now comes via constructor (which also doesn't need autowired annotation as its done by default for configuration) ## Checklist - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have performed a self-review of my own code - [ ] I have attached images of the change if it is UI based - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] If my code has heavily changed functionality I have updated relevant docs on [Stirling-PDFs doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) - [ ] My changes generate no new warnings - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only)
312 lines
12 KiB
Java
312 lines
12 KiB
Java
package stirling.software.SPDF.controller.api;
|
|
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.zip.ZipEntry;
|
|
import java.util.zip.ZipOutputStream;
|
|
|
|
import org.apache.pdfbox.Loader;
|
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
import org.apache.pdfbox.pdmodel.PDPage;
|
|
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
|
|
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.http.MediaType;
|
|
import org.springframework.http.ResponseEntity;
|
|
import org.springframework.web.bind.annotation.ModelAttribute;
|
|
import org.springframework.web.bind.annotation.PostMapping;
|
|
import org.springframework.web.bind.annotation.RequestMapping;
|
|
import org.springframework.web.bind.annotation.RestController;
|
|
import org.springframework.web.multipart.MultipartFile;
|
|
|
|
import io.github.pixee.security.Filenames;
|
|
import io.swagger.v3.oas.annotations.Operation;
|
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
|
|
|
import lombok.AllArgsConstructor;
|
|
import lombok.Data;
|
|
import lombok.EqualsAndHashCode;
|
|
import lombok.NoArgsConstructor;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import stirling.software.SPDF.model.PdfMetadata;
|
|
import stirling.software.SPDF.model.api.SplitPdfByChaptersRequest;
|
|
import stirling.software.SPDF.service.PdfMetadataService;
|
|
import stirling.software.SPDF.utils.WebResponseUtils;
|
|
|
|
@RestController
|
|
@RequestMapping("/api/v1/general")
|
|
@Slf4j
|
|
@Tag(name = "General", description = "General APIs")
|
|
public class SplitPdfByChaptersController {
|
|
|
|
private final PdfMetadataService pdfMetadataService;
|
|
|
|
@Autowired
|
|
public SplitPdfByChaptersController(PdfMetadataService pdfMetadataService) {
|
|
this.pdfMetadataService = pdfMetadataService;
|
|
}
|
|
|
|
private static List<Bookmark> extractOutlineItems(
|
|
PDDocument sourceDocument,
|
|
PDOutlineItem current,
|
|
List<Bookmark> bookmarks,
|
|
PDOutlineItem nextParent,
|
|
int level,
|
|
int maxLevel)
|
|
throws Exception {
|
|
|
|
while (current != null) {
|
|
|
|
String currentTitle = current.getTitle().replace("/", "");
|
|
int firstPage =
|
|
sourceDocument.getPages().indexOf(current.findDestinationPage(sourceDocument));
|
|
PDOutlineItem child = current.getFirstChild();
|
|
PDOutlineItem nextSibling = current.getNextSibling();
|
|
int endPage;
|
|
if (child != null && level < maxLevel) {
|
|
endPage =
|
|
sourceDocument
|
|
.getPages()
|
|
.indexOf(child.findDestinationPage(sourceDocument));
|
|
} else if (nextSibling != null) {
|
|
endPage =
|
|
sourceDocument
|
|
.getPages()
|
|
.indexOf(nextSibling.findDestinationPage(sourceDocument));
|
|
} else if (nextParent != null) {
|
|
|
|
endPage =
|
|
sourceDocument
|
|
.getPages()
|
|
.indexOf(nextParent.findDestinationPage(sourceDocument));
|
|
} else {
|
|
endPage = -2;
|
|
/*
|
|
happens when we have something like this:
|
|
Outline Item 2
|
|
Outline Item 2.1
|
|
Outline Item 2.1.1
|
|
Outline Item 2.2
|
|
Outline 2.2.1
|
|
Outline 2.2.2 <--- this item neither has an immediate next parent nor an immediate next sibling
|
|
Outline Item 3
|
|
*/
|
|
}
|
|
if (!bookmarks.isEmpty()
|
|
&& bookmarks.get(bookmarks.size() - 1).getEndPage() == -2
|
|
&& firstPage
|
|
>= bookmarks
|
|
.get(bookmarks.size() - 1)
|
|
.getStartPage()) { // for handling the above-mentioned case
|
|
Bookmark previousBookmark = bookmarks.get(bookmarks.size() - 1);
|
|
previousBookmark.setEndPage(firstPage);
|
|
}
|
|
bookmarks.add(new Bookmark(currentTitle, firstPage, endPage));
|
|
|
|
// Recursively process children
|
|
if (child != null && level < maxLevel) {
|
|
extractOutlineItems(
|
|
sourceDocument, child, bookmarks, nextSibling, level + 1, maxLevel);
|
|
}
|
|
|
|
current = nextSibling;
|
|
}
|
|
return bookmarks;
|
|
}
|
|
|
|
@PostMapping(value = "/split-pdf-by-chapters", consumes = "multipart/form-data")
|
|
@Operation(
|
|
summary = "Split PDFs by Chapters",
|
|
description = "Splits a PDF into chapters and returns a ZIP file.")
|
|
public ResponseEntity<byte[]> splitPdf(@ModelAttribute SplitPdfByChaptersRequest request)
|
|
throws Exception {
|
|
MultipartFile file = request.getFileInput();
|
|
PDDocument sourceDocument = null;
|
|
Path zipFile = null;
|
|
|
|
try {
|
|
boolean includeMetadata = request.getIncludeMetadata();
|
|
Integer bookmarkLevel =
|
|
request.getBookmarkLevel(); // levels start from 0 (top most bookmarks)
|
|
if (bookmarkLevel < 0) {
|
|
return ResponseEntity.badRequest().body("Invalid bookmark level".getBytes());
|
|
}
|
|
sourceDocument = Loader.loadPDF(file.getBytes());
|
|
|
|
PDDocumentOutline outline = sourceDocument.getDocumentCatalog().getDocumentOutline();
|
|
|
|
if (outline == null) {
|
|
log.warn("No outline found for {}", file.getOriginalFilename());
|
|
return ResponseEntity.badRequest().body("No outline found".getBytes());
|
|
}
|
|
List<Bookmark> bookmarks = new ArrayList<>();
|
|
try {
|
|
bookmarks =
|
|
extractOutlineItems(
|
|
sourceDocument,
|
|
outline.getFirstChild(),
|
|
bookmarks,
|
|
outline.getFirstChild().getNextSibling(),
|
|
0,
|
|
bookmarkLevel);
|
|
// to handle last page edge case
|
|
bookmarks.get(bookmarks.size() - 1).setEndPage(sourceDocument.getNumberOfPages());
|
|
Bookmark lastBookmark = bookmarks.get(bookmarks.size() - 1);
|
|
|
|
} catch (Exception e) {
|
|
log.error("Unable to extract outline items", e);
|
|
return ResponseEntity.internalServerError()
|
|
.body("Unable to extract outline items".getBytes());
|
|
}
|
|
|
|
boolean allowDuplicates = request.getAllowDuplicates();
|
|
if (!allowDuplicates) {
|
|
/*
|
|
duplicates are generated when multiple bookmarks correspond to the same page,
|
|
if the user doesn't want duplicates mergeBookmarksThatCorrespondToSamePage() method will merge the titles of all
|
|
the bookmarks that correspond to the same page, and treat them as a single bookmark
|
|
*/
|
|
bookmarks = mergeBookmarksThatCorrespondToSamePage(bookmarks);
|
|
}
|
|
for (Bookmark bookmark : bookmarks) {
|
|
log.info(
|
|
"{}::::{} to {}",
|
|
bookmark.getTitle(),
|
|
bookmark.getStartPage(),
|
|
bookmark.getEndPage());
|
|
}
|
|
List<ByteArrayOutputStream> splitDocumentsBoas =
|
|
getSplitDocumentsBoas(sourceDocument, bookmarks, includeMetadata);
|
|
|
|
zipFile = createZipFile(bookmarks, splitDocumentsBoas);
|
|
|
|
byte[] data = Files.readAllBytes(zipFile);
|
|
Files.deleteIfExists(zipFile);
|
|
|
|
String filename =
|
|
Filenames.toSimpleFileName(file.getOriginalFilename())
|
|
.replaceFirst("[.][^.]+$", "");
|
|
sourceDocument.close();
|
|
return WebResponseUtils.bytesToWebResponse(
|
|
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
|
|
} finally {
|
|
try {
|
|
if (sourceDocument != null) {
|
|
sourceDocument.close();
|
|
}
|
|
if (zipFile != null) {
|
|
Files.deleteIfExists(zipFile);
|
|
}
|
|
} catch (Exception e) {
|
|
log.error("Error while cleaning up resources", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
private List<Bookmark> mergeBookmarksThatCorrespondToSamePage(List<Bookmark> bookmarks) {
|
|
String mergedTitle = "";
|
|
List<Bookmark> chaptersToBeRemoved = new ArrayList<>();
|
|
for (Bookmark bookmark : bookmarks) {
|
|
if (bookmark.getStartPage() == bookmark.getEndPage()) {
|
|
mergedTitle = mergedTitle.concat(bookmark.getTitle().concat(" "));
|
|
chaptersToBeRemoved.add(bookmark);
|
|
} else {
|
|
if (!mergedTitle.isEmpty()) {
|
|
if (mergedTitle.length() > 255) {
|
|
mergedTitle = mergedTitle.substring(0, 253) + "...";
|
|
}
|
|
|
|
bookmarks.set(
|
|
bookmarks.indexOf(bookmark),
|
|
new Bookmark(
|
|
mergedTitle, bookmark.getStartPage(), bookmark.getEndPage()));
|
|
}
|
|
mergedTitle = "";
|
|
}
|
|
}
|
|
bookmarks.removeAll(chaptersToBeRemoved);
|
|
return bookmarks;
|
|
}
|
|
|
|
private Path createZipFile(
|
|
List<Bookmark> bookmarks, List<ByteArrayOutputStream> splitDocumentsBoas)
|
|
throws Exception {
|
|
Path zipFile = Files.createTempFile("split_documents", ".zip");
|
|
String fileNumberFormatter = "%0" + (Integer.toString(bookmarks.size()).length()) + "d ";
|
|
try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {
|
|
for (int i = 0; i < splitDocumentsBoas.size(); i++) {
|
|
|
|
// split files will be named as "[FILE_NUMBER] [BOOKMARK_TITLE].pdf"
|
|
|
|
String fileName =
|
|
String.format(fileNumberFormatter, i)
|
|
+ bookmarks.get(i).getTitle()
|
|
+ ".pdf";
|
|
ByteArrayOutputStream baos = splitDocumentsBoas.get(i);
|
|
byte[] pdf = baos.toByteArray();
|
|
|
|
ZipEntry pdfEntry = new ZipEntry(fileName);
|
|
zipOut.putNextEntry(pdfEntry);
|
|
zipOut.write(pdf);
|
|
zipOut.closeEntry();
|
|
|
|
log.info("Wrote split document {} to zip file", fileName);
|
|
}
|
|
} catch (Exception e) {
|
|
log.error("Failed writing to zip", e);
|
|
throw e;
|
|
}
|
|
|
|
log.info("Successfully created zip file with split documents: {}", zipFile);
|
|
return zipFile;
|
|
}
|
|
|
|
public List<ByteArrayOutputStream> getSplitDocumentsBoas(
|
|
PDDocument sourceDocument, List<Bookmark> bookmarks, boolean includeMetadata)
|
|
throws Exception {
|
|
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
|
|
PdfMetadata metadata = null;
|
|
if (includeMetadata) {
|
|
metadata = pdfMetadataService.extractMetadataFromPdf(sourceDocument);
|
|
}
|
|
for (Bookmark bookmark : bookmarks) {
|
|
try (PDDocument splitDocument = new PDDocument()) {
|
|
boolean isSinglePage = (bookmark.getStartPage() == bookmark.getEndPage());
|
|
|
|
for (int i = bookmark.getStartPage();
|
|
i < bookmark.getEndPage() + (isSinglePage ? 1 : 0);
|
|
i++) {
|
|
PDPage page = sourceDocument.getPage(i);
|
|
splitDocument.addPage(page);
|
|
log.info("Adding page {} to split document", i);
|
|
}
|
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
if (includeMetadata) {
|
|
pdfMetadataService.setMetadataToPdf(splitDocument, metadata);
|
|
}
|
|
|
|
splitDocument.save(baos);
|
|
|
|
splitDocumentsBoas.add(baos);
|
|
} catch (Exception e) {
|
|
log.error("Failed splitting documents and saving them", e);
|
|
throw e;
|
|
}
|
|
}
|
|
return splitDocumentsBoas;
|
|
}
|
|
}
|
|
|
|
@Data
|
|
@EqualsAndHashCode
|
|
@NoArgsConstructor
|
|
@AllArgsConstructor
|
|
class Bookmark {
|
|
private String title;
|
|
private int startPage;
|
|
private int endPage;
|
|
}
|