2024-09-05 00:51:35 +05:30
|
|
|
package stirling.software.SPDF.controller.api;
|
|
|
|
|
|
|
|
import java.io.ByteArrayOutputStream;
|
|
|
|
import java.nio.file.Files;
|
|
|
|
import java.nio.file.Path;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.zip.ZipEntry;
|
|
|
|
import java.util.zip.ZipOutputStream;
|
|
|
|
|
|
|
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
|
|
import org.apache.pdfbox.pdmodel.PDPage;
|
|
|
|
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
|
|
|
|
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
|
|
|
|
import org.springframework.http.MediaType;
|
|
|
|
import org.springframework.http.ResponseEntity;
|
|
|
|
import org.springframework.web.bind.annotation.ModelAttribute;
|
|
|
|
import org.springframework.web.bind.annotation.PostMapping;
|
|
|
|
import org.springframework.web.bind.annotation.RequestMapping;
|
|
|
|
import org.springframework.web.bind.annotation.RestController;
|
|
|
|
import org.springframework.web.multipart.MultipartFile;
|
|
|
|
|
|
|
|
import io.github.pixee.security.Filenames;
|
|
|
|
import io.swagger.v3.oas.annotations.Operation;
|
|
|
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
|
|
|
|
|
|
|
import lombok.AllArgsConstructor;
|
|
|
|
import lombok.Data;
|
|
|
|
import lombok.EqualsAndHashCode;
|
|
|
|
import lombok.NoArgsConstructor;
|
2025-04-25 15:35:12 +02:00
|
|
|
import lombok.RequiredArgsConstructor;
|
2024-12-17 10:26:18 +01:00
|
|
|
import lombok.extern.slf4j.Slf4j;
|
2025-02-23 13:36:21 +00:00
|
|
|
|
2024-09-05 00:51:35 +05:30
|
|
|
import stirling.software.SPDF.model.api.SplitPdfByChaptersRequest;
|
2025-05-27 13:01:52 +01:00
|
|
|
import stirling.software.common.model.PdfMetadata;
|
|
|
|
import stirling.software.common.service.CustomPDFDocumentFactory;
|
|
|
|
import stirling.software.common.service.PdfMetadataService;
|
|
|
|
import stirling.software.common.util.WebResponseUtils;
|
2024-09-05 00:51:35 +05:30
|
|
|
|
|
|
|
@RestController
|
|
|
|
@RequestMapping("/api/v1/general")
|
2024-12-17 10:26:18 +01:00
|
|
|
@Slf4j
|
2024-09-05 00:51:35 +05:30
|
|
|
@Tag(name = "General", description = "General APIs")
|
2025-04-25 15:35:12 +02:00
|
|
|
@RequiredArgsConstructor
|
2024-09-05 00:51:35 +05:30
|
|
|
public class SplitPdfByChaptersController {
|
|
|
|
|
2024-09-14 16:29:39 +01:00
|
|
|
private final PdfMetadataService pdfMetadataService;
|
|
|
|
|
2025-03-12 13:13:44 +01:00
|
|
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
2025-03-08 00:03:27 +00:00
|
|
|
|
2024-12-24 09:52:53 +00:00
|
|
|
private static List<Bookmark> extractOutlineItems(
|
|
|
|
PDDocument sourceDocument,
|
|
|
|
PDOutlineItem current,
|
|
|
|
List<Bookmark> bookmarks,
|
|
|
|
PDOutlineItem nextParent,
|
|
|
|
int level,
|
|
|
|
int maxLevel)
|
|
|
|
throws Exception {
|
|
|
|
|
|
|
|
while (current != null) {
|
|
|
|
|
|
|
|
String currentTitle = current.getTitle().replace("/", "");
|
|
|
|
int firstPage =
|
|
|
|
sourceDocument.getPages().indexOf(current.findDestinationPage(sourceDocument));
|
|
|
|
PDOutlineItem child = current.getFirstChild();
|
|
|
|
PDOutlineItem nextSibling = current.getNextSibling();
|
|
|
|
int endPage;
|
|
|
|
if (child != null && level < maxLevel) {
|
|
|
|
endPage =
|
|
|
|
sourceDocument
|
|
|
|
.getPages()
|
|
|
|
.indexOf(child.findDestinationPage(sourceDocument));
|
|
|
|
} else if (nextSibling != null) {
|
|
|
|
endPage =
|
|
|
|
sourceDocument
|
|
|
|
.getPages()
|
|
|
|
.indexOf(nextSibling.findDestinationPage(sourceDocument));
|
|
|
|
} else if (nextParent != null) {
|
|
|
|
|
|
|
|
endPage =
|
|
|
|
sourceDocument
|
|
|
|
.getPages()
|
|
|
|
.indexOf(nextParent.findDestinationPage(sourceDocument));
|
|
|
|
} else {
|
|
|
|
endPage = -2;
|
|
|
|
/*
|
|
|
|
happens when we have something like this:
|
|
|
|
Outline Item 2
|
|
|
|
Outline Item 2.1
|
|
|
|
Outline Item 2.1.1
|
|
|
|
Outline Item 2.2
|
|
|
|
Outline 2.2.1
|
|
|
|
Outline 2.2.2 <--- this item neither has an immediate next parent nor an immediate next sibling
|
|
|
|
Outline Item 3
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
if (!bookmarks.isEmpty()
|
|
|
|
&& bookmarks.get(bookmarks.size() - 1).getEndPage() == -2
|
|
|
|
&& firstPage
|
|
|
|
>= bookmarks
|
|
|
|
.get(bookmarks.size() - 1)
|
|
|
|
.getStartPage()) { // for handling the above-mentioned case
|
|
|
|
Bookmark previousBookmark = bookmarks.get(bookmarks.size() - 1);
|
|
|
|
previousBookmark.setEndPage(firstPage);
|
|
|
|
}
|
|
|
|
bookmarks.add(new Bookmark(currentTitle, firstPage, endPage));
|
|
|
|
|
|
|
|
// Recursively process children
|
|
|
|
if (child != null && level < maxLevel) {
|
|
|
|
extractOutlineItems(
|
|
|
|
sourceDocument, child, bookmarks, nextSibling, level + 1, maxLevel);
|
|
|
|
}
|
|
|
|
|
|
|
|
current = nextSibling;
|
|
|
|
}
|
|
|
|
return bookmarks;
|
|
|
|
}
|
|
|
|
|
2024-09-05 00:51:35 +05:30
|
|
|
@PostMapping(value = "/split-pdf-by-chapters", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Split PDFs by Chapters",
|
|
|
|
description = "Splits a PDF into chapters and returns a ZIP file.")
|
|
|
|
public ResponseEntity<byte[]> splitPdf(@ModelAttribute SplitPdfByChaptersRequest request)
|
|
|
|
throws Exception {
|
|
|
|
MultipartFile file = request.getFileInput();
|
2024-12-09 20:40:59 +00:00
|
|
|
PDDocument sourceDocument = null;
|
|
|
|
Path zipFile = null;
|
2024-09-05 00:51:35 +05:30
|
|
|
|
|
|
|
try {
|
Improve Type Safety and OpenAPI Schema for PDF API Controllers and Models (#3470)
# Description of Changes
- **What was changed**
- Updated controller methods to use strongly‐typed primitives (`int`,
`long`, `boolean`) instead of `String` for numeric and boolean
parameters, eliminating calls to `Integer.parseInt`/`Long.parseLong` and
improving null‐safety (`Boolean.TRUE.equals(...)`).
- Enhanced all API request model classes with richer Swagger/OpenAPI
annotations: added `requiredMode`, `defaultValue`, `allowableValues`,
`format`, `pattern`, and tightened schema descriptions for all fields.
- Refactored HTML form templates for “Remove Blank Pages” to include
`min`, `max`, and `step` attributes on numeric inputs, matching the
updated validation rules.
- **Why the change was made**
- **Type safety & robustness**: Shifting from `String` to native types
prevents runtime parsing errors, simplifies controller logic, and makes
default values explicit.
- **Better API documentation & validation**: Enriching the Swagger
annotations ensures generated docs accurately reflect required fields,
default values, and permitted ranges, which improves client code
generation and developer experience.
- **Consistency across codebase**: Aligning all request models and
controllers enforces a uniform coding style and reduces bugs.
#3406
---
## Checklist
### General
- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings
### Documentation
- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)
### UI Changes (if applicable)
- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)
### Testing (if applicable)
- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
2025-05-16 13:23:01 +02:00
|
|
|
boolean includeMetadata = Boolean.TRUE.equals(request.getIncludeMetadata());
|
2024-12-09 20:40:59 +00:00
|
|
|
Integer bookmarkLevel =
|
|
|
|
request.getBookmarkLevel(); // levels start from 0 (top most bookmarks)
|
|
|
|
if (bookmarkLevel < 0) {
|
2025-06-03 17:44:35 +01:00
|
|
|
throw new IllegalArgumentException("Invalid bookmark level");
|
2024-12-09 20:40:59 +00:00
|
|
|
}
|
2025-03-10 20:17:45 +00:00
|
|
|
sourceDocument = pdfDocumentFactory.load(file);
|
2024-12-09 20:40:59 +00:00
|
|
|
|
|
|
|
PDDocumentOutline outline = sourceDocument.getDocumentCatalog().getDocumentOutline();
|
|
|
|
|
|
|
|
if (outline == null) {
|
2024-12-17 10:26:18 +01:00
|
|
|
log.warn("No outline found for {}", file.getOriginalFilename());
|
2025-06-03 17:44:35 +01:00
|
|
|
throw new IllegalArgumentException("No outline found");
|
2024-12-09 20:40:59 +00:00
|
|
|
}
|
|
|
|
List<Bookmark> bookmarks = new ArrayList<>();
|
|
|
|
try {
|
|
|
|
bookmarks =
|
|
|
|
extractOutlineItems(
|
|
|
|
sourceDocument,
|
|
|
|
outline.getFirstChild(),
|
|
|
|
bookmarks,
|
|
|
|
outline.getFirstChild().getNextSibling(),
|
|
|
|
0,
|
|
|
|
bookmarkLevel);
|
|
|
|
// to handle last page edge case
|
|
|
|
bookmarks.get(bookmarks.size() - 1).setEndPage(sourceDocument.getNumberOfPages());
|
|
|
|
Bookmark lastBookmark = bookmarks.get(bookmarks.size() - 1);
|
|
|
|
|
|
|
|
} catch (Exception e) {
|
2024-12-17 10:26:18 +01:00
|
|
|
log.error("Unable to extract outline items", e);
|
2024-12-09 20:40:59 +00:00
|
|
|
return ResponseEntity.internalServerError()
|
|
|
|
.body("Unable to extract outline items".getBytes());
|
|
|
|
}
|
|
|
|
|
Improve Type Safety and OpenAPI Schema for PDF API Controllers and Models (#3470)
# Description of Changes
- **What was changed**
- Updated controller methods to use strongly‐typed primitives (`int`,
`long`, `boolean`) instead of `String` for numeric and boolean
parameters, eliminating calls to `Integer.parseInt`/`Long.parseLong` and
improving null‐safety (`Boolean.TRUE.equals(...)`).
- Enhanced all API request model classes with richer Swagger/OpenAPI
annotations: added `requiredMode`, `defaultValue`, `allowableValues`,
`format`, `pattern`, and tightened schema descriptions for all fields.
- Refactored HTML form templates for “Remove Blank Pages” to include
`min`, `max`, and `step` attributes on numeric inputs, matching the
updated validation rules.
- **Why the change was made**
- **Type safety & robustness**: Shifting from `String` to native types
prevents runtime parsing errors, simplifies controller logic, and makes
default values explicit.
- **Better API documentation & validation**: Enriching the Swagger
annotations ensures generated docs accurately reflect required fields,
default values, and permitted ranges, which improves client code
generation and developer experience.
- **Consistency across codebase**: Aligning all request models and
controllers enforces a uniform coding style and reduces bugs.
#3406
---
## Checklist
### General
- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings
### Documentation
- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)
### UI Changes (if applicable)
- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)
### Testing (if applicable)
- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
2025-05-16 13:23:01 +02:00
|
|
|
boolean allowDuplicates = Boolean.TRUE.equals(request.getAllowDuplicates());
|
2024-12-09 20:40:59 +00:00
|
|
|
if (!allowDuplicates) {
|
|
|
|
/*
|
|
|
|
duplicates are generated when multiple bookmarks correspond to the same page,
|
|
|
|
if the user doesn't want duplicates mergeBookmarksThatCorrespondToSamePage() method will merge the titles of all
|
|
|
|
the bookmarks that correspond to the same page, and treat them as a single bookmark
|
|
|
|
*/
|
|
|
|
bookmarks = mergeBookmarksThatCorrespondToSamePage(bookmarks);
|
|
|
|
}
|
|
|
|
for (Bookmark bookmark : bookmarks) {
|
2024-12-17 10:26:18 +01:00
|
|
|
log.info(
|
2024-12-09 20:40:59 +00:00
|
|
|
"{}::::{} to {}",
|
|
|
|
bookmark.getTitle(),
|
|
|
|
bookmark.getStartPage(),
|
|
|
|
bookmark.getEndPage());
|
|
|
|
}
|
|
|
|
List<ByteArrayOutputStream> splitDocumentsBoas =
|
|
|
|
getSplitDocumentsBoas(sourceDocument, bookmarks, includeMetadata);
|
|
|
|
|
|
|
|
zipFile = createZipFile(bookmarks, splitDocumentsBoas);
|
|
|
|
|
|
|
|
byte[] data = Files.readAllBytes(zipFile);
|
|
|
|
Files.deleteIfExists(zipFile);
|
|
|
|
|
|
|
|
String filename =
|
|
|
|
Filenames.toSimpleFileName(file.getOriginalFilename())
|
|
|
|
.replaceFirst("[.][^.]+$", "");
|
|
|
|
sourceDocument.close();
|
|
|
|
return WebResponseUtils.bytesToWebResponse(
|
|
|
|
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
|
|
|
|
} finally {
|
|
|
|
try {
|
|
|
|
if (sourceDocument != null) {
|
|
|
|
sourceDocument.close();
|
|
|
|
}
|
|
|
|
if (zipFile != null) {
|
|
|
|
Files.deleteIfExists(zipFile);
|
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
2024-12-17 10:26:18 +01:00
|
|
|
log.error("Error while cleaning up resources", e);
|
2024-12-09 20:40:59 +00:00
|
|
|
}
|
2024-09-05 00:51:35 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private List<Bookmark> mergeBookmarksThatCorrespondToSamePage(List<Bookmark> bookmarks) {
|
|
|
|
String mergedTitle = "";
|
|
|
|
List<Bookmark> chaptersToBeRemoved = new ArrayList<>();
|
|
|
|
for (Bookmark bookmark : bookmarks) {
|
|
|
|
if (bookmark.getStartPage() == bookmark.getEndPage()) {
|
|
|
|
mergedTitle = mergedTitle.concat(bookmark.getTitle().concat(" "));
|
|
|
|
chaptersToBeRemoved.add(bookmark);
|
|
|
|
} else {
|
|
|
|
if (!mergedTitle.isEmpty()) {
|
|
|
|
if (mergedTitle.length() > 255) {
|
|
|
|
mergedTitle = mergedTitle.substring(0, 253) + "...";
|
|
|
|
}
|
|
|
|
|
|
|
|
bookmarks.set(
|
|
|
|
bookmarks.indexOf(bookmark),
|
|
|
|
new Bookmark(
|
|
|
|
mergedTitle, bookmark.getStartPage(), bookmark.getEndPage()));
|
|
|
|
}
|
|
|
|
mergedTitle = "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bookmarks.removeAll(chaptersToBeRemoved);
|
|
|
|
return bookmarks;
|
|
|
|
}
|
|
|
|
|
|
|
|
private Path createZipFile(
|
|
|
|
List<Bookmark> bookmarks, List<ByteArrayOutputStream> splitDocumentsBoas)
|
|
|
|
throws Exception {
|
|
|
|
Path zipFile = Files.createTempFile("split_documents", ".zip");
|
|
|
|
String fileNumberFormatter = "%0" + (Integer.toString(bookmarks.size()).length()) + "d ";
|
|
|
|
try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {
|
|
|
|
for (int i = 0; i < splitDocumentsBoas.size(); i++) {
|
|
|
|
|
|
|
|
// split files will be named as "[FILE_NUMBER] [BOOKMARK_TITLE].pdf"
|
|
|
|
|
|
|
|
String fileName =
|
|
|
|
String.format(fileNumberFormatter, i)
|
|
|
|
+ bookmarks.get(i).getTitle()
|
|
|
|
+ ".pdf";
|
|
|
|
ByteArrayOutputStream baos = splitDocumentsBoas.get(i);
|
|
|
|
byte[] pdf = baos.toByteArray();
|
|
|
|
|
|
|
|
ZipEntry pdfEntry = new ZipEntry(fileName);
|
|
|
|
zipOut.putNextEntry(pdfEntry);
|
|
|
|
zipOut.write(pdf);
|
|
|
|
zipOut.closeEntry();
|
|
|
|
|
2024-12-17 10:26:18 +01:00
|
|
|
log.info("Wrote split document {} to zip file", fileName);
|
2024-09-05 00:51:35 +05:30
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
2024-12-17 10:26:18 +01:00
|
|
|
log.error("Failed writing to zip", e);
|
2024-09-05 00:51:35 +05:30
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
|
2024-12-17 10:26:18 +01:00
|
|
|
log.info("Successfully created zip file with split documents: {}", zipFile);
|
2024-09-05 00:51:35 +05:30
|
|
|
return zipFile;
|
|
|
|
}
|
|
|
|
|
|
|
|
public List<ByteArrayOutputStream> getSplitDocumentsBoas(
|
|
|
|
PDDocument sourceDocument, List<Bookmark> bookmarks, boolean includeMetadata)
|
|
|
|
throws Exception {
|
|
|
|
List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
|
|
|
|
PdfMetadata metadata = null;
|
|
|
|
if (includeMetadata) {
|
2024-09-14 16:29:39 +01:00
|
|
|
metadata = pdfMetadataService.extractMetadataFromPdf(sourceDocument);
|
2024-09-05 00:51:35 +05:30
|
|
|
}
|
|
|
|
for (Bookmark bookmark : bookmarks) {
|
|
|
|
try (PDDocument splitDocument = new PDDocument()) {
|
|
|
|
boolean isSinglePage = (bookmark.getStartPage() == bookmark.getEndPage());
|
|
|
|
|
|
|
|
for (int i = bookmark.getStartPage();
|
|
|
|
i < bookmark.getEndPage() + (isSinglePage ? 1 : 0);
|
|
|
|
i++) {
|
|
|
|
PDPage page = sourceDocument.getPage(i);
|
|
|
|
splitDocument.addPage(page);
|
2024-12-17 10:26:18 +01:00
|
|
|
log.info("Adding page {} to split document", i);
|
2024-09-05 00:51:35 +05:30
|
|
|
}
|
|
|
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
|
|
if (includeMetadata) {
|
2024-09-14 16:29:39 +01:00
|
|
|
pdfMetadataService.setMetadataToPdf(splitDocument, metadata);
|
2024-09-05 00:51:35 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
splitDocument.save(baos);
|
|
|
|
|
|
|
|
splitDocumentsBoas.add(baos);
|
|
|
|
} catch (Exception e) {
|
2024-12-17 10:26:18 +01:00
|
|
|
log.error("Failed splitting documents and saving them", e);
|
2024-09-05 00:51:35 +05:30
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return splitDocumentsBoas;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Data
|
|
|
|
@EqualsAndHashCode
|
|
|
|
@NoArgsConstructor
|
|
|
|
@AllArgsConstructor
|
|
|
|
class Bookmark {
|
|
|
|
private String title;
|
|
|
|
private int startPage;
|
|
|
|
private int endPage;
|
|
|
|
}
|