2023-04-28 23:18:10 +01:00
package stirling.software.SPDF.controller.api ;
2024-02-01 23:48:27 +00:00
import io.github.pixee.security.Filenames ;
2023-04-28 23:18:10 +01:00
import java.io.ByteArrayOutputStream ;
import java.io.IOException ;
import java.nio.file.Files ;
import java.nio.file.Path ;
import java.util.ArrayList ;
import java.util.List ;
import java.util.stream.Collectors ;
import java.util.zip.ZipEntry ;
import java.util.zip.ZipOutputStream ;
2024-01-12 23:15:27 +00:00
import org.apache.pdfbox.Loader ;
2023-04-28 23:18:10 +01:00
import org.apache.pdfbox.pdmodel.PDDocument ;
import org.apache.pdfbox.pdmodel.PDPage ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import org.springframework.http.MediaType ;
import org.springframework.http.ResponseEntity ;
2023-09-09 00:25:27 +01:00
import org.springframework.web.bind.annotation.ModelAttribute ;
2023-04-28 23:18:10 +01:00
import org.springframework.web.bind.annotation.PostMapping ;
2023-09-11 23:19:50 +01:00
import org.springframework.web.bind.annotation.RequestMapping ;
2023-04-28 23:18:10 +01:00
import org.springframework.web.bind.annotation.RestController ;
import org.springframework.web.multipart.MultipartFile ;
2023-05-08 15:20:04 +01:00
import io.swagger.v3.oas.annotations.Operation ;
2023-06-25 09:16:32 +01:00
import io.swagger.v3.oas.annotations.tags.Tag ;
2023-12-30 19:11:27 +00:00
2023-09-09 00:25:27 +01:00
import stirling.software.SPDF.model.api.PDFWithPageNums ;
2023-06-07 14:01:37 +01:00
import stirling.software.SPDF.utils.WebResponseUtils ;
2023-10-28 10:40:26 +01:00
2023-04-28 23:18:10 +01:00
@RestController
2023-09-11 23:19:50 +01:00
@RequestMapping ( " /api/v1/general " )
2023-06-25 09:16:32 +01:00
@Tag ( name = " General " , description = " General APIs " )
2023-04-28 23:18:10 +01:00
public class SplitPDFController {
private static final Logger logger = LoggerFactory . getLogger ( SplitPDFController . class ) ;
@PostMapping ( consumes = " multipart/form-data " , value = " /split-pages " )
2023-05-08 15:20:04 +01:00
@Operation (
summary = " Split a PDF file into separate documents " ,
2023-06-23 23:29:53 +01:00
description =
" This endpoint splits a given PDF file into separate documents based on the specified page numbers or ranges. Users can specify pages using individual numbers, ranges, or 'all' for every page. Input:PDF Output:PDF Type:SIMO " )
2023-09-09 00:25:27 +01:00
public ResponseEntity < byte [ ] > splitPdf ( @ModelAttribute PDFWithPageNums request )
throws IOException {
MultipartFile file = request . getFileInput ( ) ;
String pages = request . getPageNumbers ( ) ;
2023-04-28 23:18:10 +01:00
// open the pdf document
2024-01-12 23:15:27 +00:00
PDDocument document = Loader . loadPDF ( file . getBytes ( ) ) ;
2023-04-28 23:18:10 +01:00
2023-09-09 00:25:27 +01:00
List < Integer > pageNumbers = request . getPageNumbersList ( document ) ;
2023-09-12 00:42:22 +01:00
if ( ! pageNumbers . contains ( document . getNumberOfPages ( ) - 1 ) )
pageNumbers . add ( document . getNumberOfPages ( ) - 1 ) ;
2023-04-28 23:18:10 +01:00
logger . info (
" Splitting PDF into pages: {} " ,
pageNumbers . stream ( ) . map ( String : : valueOf ) . collect ( Collectors . joining ( " , " ) ) ) ;
2023-10-28 10:40:26 +01:00
// split the document
2023-04-28 23:18:10 +01:00
List < ByteArrayOutputStream > splitDocumentsBoas = new ArrayList < > ( ) ;
2023-10-28 10:40:26 +01:00
int previousPageNumber = 0 ;
2023-06-03 22:56:15 +01:00
for ( int splitPoint : pageNumbers ) {
2023-10-28 10:40:26 +01:00
try ( PDDocument splitDocument = new PDDocument ( ) ) {
for ( int i = previousPageNumber ; i < = splitPoint ; i + + ) {
PDPage page = document . getPage ( i ) ;
splitDocument . addPage ( page ) ;
logger . debug ( " Adding page {} to split document " , i ) ;
}
previousPageNumber = splitPoint + 1 ;
2023-04-28 23:18:10 +01:00
ByteArrayOutputStream baos = new ByteArrayOutputStream ( ) ;
2023-10-28 10:40:26 +01:00
splitDocument . save ( baos ) ;
2023-04-28 23:18:10 +01:00
splitDocumentsBoas . add ( baos ) ;
2023-10-28 10:40:26 +01:00
} catch ( Exception e ) {
logger . error ( " Failed splitting documents and saving them " , e ) ;
throw e ;
2023-04-28 23:18:10 +01:00
}
}
// closing the original document
document . close ( ) ;
Path zipFile = Files . createTempFile ( " split_documents " , " .zip " ) ;
2024-02-01 23:48:27 +00:00
String filename = Filenames . toSimpleFileName ( file . getOriginalFilename ( ) ) . replaceFirst ( " [.][^.]+$ " , " " ) ;
2023-04-28 23:18:10 +01:00
try ( ZipOutputStream zipOut = new ZipOutputStream ( Files . newOutputStream ( zipFile ) ) ) {
// loop through the split documents and write them to the zip file
for ( int i = 0 ; i < splitDocumentsBoas . size ( ) ; i + + ) {
2023-06-07 14:01:37 +01:00
String fileName = filename + " _ " + ( i + 1 ) + " .pdf " ;
2023-04-28 23:18:10 +01:00
ByteArrayOutputStream baos = splitDocumentsBoas . get ( i ) ;
byte [ ] pdf = baos . toByteArray ( ) ;
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry ( fileName ) ;
zipOut . putNextEntry ( pdfEntry ) ;
zipOut . write ( pdf ) ;
zipOut . closeEntry ( ) ;
logger . info ( " Wrote split document {} to zip file " , fileName ) ;
}
} catch ( Exception e ) {
logger . error ( " Failed writing to zip " , e ) ;
throw e ;
}
logger . info ( " Successfully created zip file with split documents: {} " , zipFile . toString ( ) ) ;
byte [ ] data = Files . readAllBytes ( zipFile ) ;
Files . delete ( zipFile ) ;
// return the Resource in the response
2023-06-07 14:01:37 +01:00
return WebResponseUtils . bytesToWebResponse (
data , filename + " .zip " , MediaType . APPLICATION_OCTET_STREAM ) ;
2023-04-28 23:18:10 +01:00
}
2023-10-28 10:40:26 +01:00
}