Stirling-PDF/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java

package stirling.software.SPDF.controller.api;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;

import stirling.software.SPDF.model.api.PDFWithPageNums;
import stirling.software.SPDF.utils.WebResponseUtils;

@RestController
@RequestMapping("/api/v1/general")
@Tag(name = "General", description = "General APIs")
public class SplitPDFController {

    private static final Logger logger = LoggerFactory.getLogger(SplitPDFController.class);

    @PostMapping(consumes = "multipart/form-data", value = "/split-pages")
    @Operation(
            summary = "Split a PDF file into separate documents",
            description =
                    "This endpoint splits a given PDF file into separate documents based on the specified page numbers or ranges. Users can specify pages using individual numbers, ranges, or 'all' for every page. Input:PDF Output:PDF Type:SIMO")
    public ResponseEntity<byte[]> splitPdf(@ModelAttribute PDFWithPageNums request)
            throws IOException {
        MultipartFile file = request.getFileInput();
        String pages = request.getPageNumbers();
        // open the pdf document

        PDDocument document = Loader.loadPDF(file.getBytes());

        List<Integer> pageNumbers = request.getPageNumbersList(document);
        if (!pageNumbers.contains(document.getNumberOfPages() - 1))
            pageNumbers.add(document.getNumberOfPages() - 1);
        logger.info(
                "Splitting PDF into pages: {}",
                pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(",")));

        // split the document
        List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();
        int previousPageNumber = 0;
        for (int splitPoint : pageNumbers) {
            try (PDDocument splitDocument = new PDDocument()) {
                for (int i = previousPageNumber; i <= splitPoint; i++) {
                    PDPage page = document.getPage(i);
                    splitDocument.addPage(page);
                    logger.debug("Adding page {} to split document", i);
                }
                previousPageNumber = splitPoint + 1;

                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                splitDocument.save(baos);

                splitDocumentsBoas.add(baos);
            } catch (Exception e) {
                logger.error("Failed splitting documents and saving them", e);
                throw e;
            }
        }

        // closing the original document
        document.close();

        Path zipFile = Files.createTempFile("split_documents", ".zip");

        String filename = file.getOriginalFilename().replaceFirst("[.][^.]+$", "");
        try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {
            // loop through the split documents and write them to the zip file
            for (int i = 0; i < splitDocumentsBoas.size(); i++) {
                String fileName = filename + "_" + (i + 1) + ".pdf";
                ByteArrayOutputStream baos = splitDocumentsBoas.get(i);
                byte[] pdf = baos.toByteArray();

                // Add PDF file to the zip
                ZipEntry pdfEntry = new ZipEntry(fileName);
                zipOut.putNextEntry(pdfEntry);
                zipOut.write(pdf);
                zipOut.closeEntry();

                logger.info("Wrote split document {} to zip file", fileName);
            }
        } catch (Exception e) {
            logger.error("Failed writing to zip", e);
            throw e;
        }

        logger.info("Successfully created zip file with split documents: {}", zipFile.toString());
        byte[] data = Files.readAllBytes(zipFile);
        Files.delete(zipFile);

        // return the Resource in the response
        return WebResponseUtils.bytesToWebResponse(
                data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
    }
}
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`package stirling.software.SPDF.controller.api;`

			`import java.io.ByteArrayOutputStream;`
			`import java.io.IOException;`
			`import java.nio.file.Files;`
			`import java.nio.file.Path;`
			`import java.util.ArrayList;`
			`import java.util.List;`
			`import java.util.stream.Collectors;`
			`import java.util.zip.ZipEntry;`
			`import java.util.zip.ZipOutputStream;`

pdfbox3 upgrade and fix 2024-01-12 23:15:27 +00:00			`import org.apache.pdfbox.Loader;`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`import org.apache.pdfbox.pdmodel.PDDocument;`
			`import org.apache.pdfbox.pdmodel.PDPage;`
			`import org.slf4j.Logger;`
			`import org.slf4j.LoggerFactory;`
			`import org.springframework.http.MediaType;`
			`import org.springframework.http.ResponseEntity;`
init 2023-09-09 00:25:27 +01:00			`import org.springframework.web.bind.annotation.ModelAttribute;`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`import org.springframework.web.bind.annotation.PostMapping;`
api /api/v1/ 2023-09-11 23:19:50 +01:00			`import org.springframework.web.bind.annotation.RequestMapping;`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`import org.springframework.web.bind.annotation.RestController;`
			`import org.springframework.web.multipart.MultipartFile;`

more docs 2023-05-08 15:20:04 +01:00			`import io.swagger.v3.oas.annotations.Operation;`
api tag 2023-06-25 09:16:32 +01:00			`import io.swagger.v3.oas.annotations.tags.Tag;`
formatting 2023-12-30 19:11:27 +00:00
init 2023-09-09 00:25:27 +01:00			`import stirling.software.SPDF.model.api.PDFWithPageNums;`
fix naming issues in split and made it allign with others 2023-06-07 14:01:37 +01:00			`import stirling.software.SPDF.utils.WebResponseUtils;`
fixes for #438 and #423 2023-10-28 10:40:26 +01:00
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`@RestController`
api /api/v1/ 2023-09-11 23:19:50 +01:00			`@RequestMapping("/api/v1/general")`
api tag 2023-06-25 09:16:32 +01:00			`@Tag(name = "General", description = "General APIs")`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`public class SplitPDFController {`

			`private static final Logger logger = LoggerFactory.getLogger(SplitPDFController.class);`

			`@PostMapping(consumes = "multipart/form-data", value = "/split-pages")`
more docs 2023-05-08 15:20:04 +01:00			`@Operation(`
			`summary = "Split a PDF file into separate documents",`
init many new shit 2023-06-23 23:29:53 +01:00			`description =`
			`"This endpoint splits a given PDF file into separate documents based on the specified page numbers or ranges. Users can specify pages using individual numbers, ranges, or 'all' for every page. Input:PDF Output:PDF Type:SIMO")`
init 2023-09-09 00:25:27 +01:00			`public ResponseEntity<byte[]> splitPdf(@ModelAttribute PDFWithPageNums request)`
			`throws IOException {`
			`MultipartFile file = request.getFileInput();`
			`String pages = request.getPageNumbers();`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`// open the pdf document`
pdfbox3 upgrade and fix 2024-01-12 23:15:27 +00:00
			`PDDocument document = Loader.loadPDF(file.getBytes());`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00
init 2023-09-09 00:25:27 +01:00			`List<Integer> pageNumbers = request.getPageNumbersList(document);`
slow rework testing, still on multipage 2023-09-12 00:42:22 +01:00			`if (!pageNumbers.contains(document.getNumberOfPages() - 1))`
			`pageNumbers.add(document.getNumberOfPages() - 1);`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`logger.info(`
			`"Splitting PDF into pages: {}",`
			`pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(",")));`

fixes for #438 and #423 2023-10-28 10:40:26 +01:00			`// split the document`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`List<ByteArrayOutputStream> splitDocumentsBoas = new ArrayList<>();`
fixes for #438 and #423 2023-10-28 10:40:26 +01:00			`int previousPageNumber = 0;`
Split pages support n function and other stuff 2023-06-03 22:56:15 +01:00			`for (int splitPoint : pageNumbers) {`
fixes for #438 and #423 2023-10-28 10:40:26 +01:00			`try (PDDocument splitDocument = new PDDocument()) {`
			`for (int i = previousPageNumber; i <= splitPoint; i++) {`
			`PDPage page = document.getPage(i);`
			`splitDocument.addPage(page);`
			`logger.debug("Adding page {} to split document", i);`
			`}`
			`previousPageNumber = splitPoint + 1;`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00
			`ByteArrayOutputStream baos = new ByteArrayOutputStream();`
fixes for #438 and #423 2023-10-28 10:40:26 +01:00			`splitDocument.save(baos);`

rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`splitDocumentsBoas.add(baos);`
fixes for #438 and #423 2023-10-28 10:40:26 +01:00			`} catch (Exception e) {`
			`logger.error("Failed splitting documents and saving them", e);`
			`throw e;`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`}`
			`}`

			`// closing the original document`
			`document.close();`

			`Path zipFile = Files.createTempFile("split_documents", ".zip");`

fix naming issues in split and made it allign with others 2023-06-07 14:01:37 +01:00			`String filename = file.getOriginalFilename().replaceFirst("[.][^.]+$", "");`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) {`
			`// loop through the split documents and write them to the zip file`
			`for (int i = 0; i < splitDocumentsBoas.size(); i++) {`
fix naming issues in split and made it allign with others 2023-06-07 14:01:37 +01:00			`String fileName = filename + "_" + (i + 1) + ".pdf";`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`ByteArrayOutputStream baos = splitDocumentsBoas.get(i);`
			`byte[] pdf = baos.toByteArray();`

			`// Add PDF file to the zip`
			`ZipEntry pdfEntry = new ZipEntry(fileName);`
			`zipOut.putNextEntry(pdfEntry);`
			`zipOut.write(pdf);`
			`zipOut.closeEntry();`

			`logger.info("Wrote split document {} to zip file", fileName);`
			`}`
			`} catch (Exception e) {`
			`logger.error("Failed writing to zip", e);`
			`throw e;`
			`}`

			`logger.info("Successfully created zip file with split documents: {}", zipFile.toString());`
			`byte[] data = Files.readAllBytes(zipFile);`
			`Files.delete(zipFile);`

			`// return the Resource in the response`
fix naming issues in split and made it allign with others 2023-06-07 14:01:37 +01:00			`return WebResponseUtils.bytesToWebResponse(`
			`data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);`
rework for API, folder changes, easter eggs and fun 2023-04-28 23:18:10 +01:00			`}`
fixes for #438 and #423 2023-10-28 10:40:26 +01:00			`}`