diff --git a/build.gradle b/build.gradle index 2ae9ba33..4f0d2d57 100644 --- a/build.gradle +++ b/build.gradle @@ -26,7 +26,7 @@ ext { } group = "stirling.software" -version = "0.40.2" +version = "0.41.0" java { // 17 is lowest but we support and recommend 21 diff --git a/docs/stirling-pdf.png b/docs/stirling-pdf.png deleted file mode 100644 index 9df29860..00000000 Binary files a/docs/stirling-pdf.png and /dev/null differ diff --git a/docs/stirling-transparent.svg b/docs/stirling-transparent.svg deleted file mode 100644 index a8511519..00000000 --- a/docs/stirling-transparent.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java index 9c6cbf9b..d2f850ce 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java @@ -8,7 +8,9 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; @@ -41,8 +43,12 @@ import stirling.software.SPDF.utils.WebResponseUtils; @Tag(name = "Misc", description = "Miscellaneous APIs") public class AutoSplitPdfController { - private static final String QR_CONTENT = "https://github.com/Stirling-Tools/Stirling-PDF"; - private static final String QR_CONTENT_OLD = "https://github.com/Frooodle/Stirling-PDF"; + private static final Set VALID_QR_CONTENTS = + new HashSet<>( + Set.of( + "https://github.com/Stirling-Tools/Stirling-PDF", + "https://github.com/Frooodle/Stirling-PDF", + "https://stirlingpdf.com")); private final CustomPDDocumentFactory pdfDocumentFactory; @@ -120,13 +126,14 @@ public class AutoSplitPdfController { for (int page = 0; page < document.getNumberOfPages(); ++page) { BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 150); String result = decodeQRCode(bim); - if ((QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result)) && page != 0) { + + boolean isValidQrCode = VALID_QR_CONTENTS.contains(result); + log.debug("detected qr code {}, code is vale={}", result, isValidQrCode); + if (isValidQrCode && page != 0) { splitDocuments.add(new PDDocument()); } - if (!splitDocuments.isEmpty() - && !QR_CONTENT.equals(result) - && !QR_CONTENT_OLD.equals(result)) { + if (!splitDocuments.isEmpty() && !isValidQrCode) { splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page)); } else if (page == 0) { PDDocument firstDocument = new PDDocument(); @@ -135,7 +142,7 @@ public class AutoSplitPdfController { } // If duplexMode is true and current page is a divider, then skip next page - if (duplexMode && (QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result))) { + if (duplexMode && isValidQrCode) { page++; } } @@ -168,6 +175,9 @@ public class AutoSplitPdfController { return WebResponseUtils.bytesToWebResponse( data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM); + } catch (Exception e) { + log.error("Error in auto split", e); + throw e; } finally { // Clean up resources if (document != null) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java index 5dfeedb7..1036cda6 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java @@ -52,7 +52,7 @@ public class ExtractImagesController { @Operation( summary = "Extract images from a PDF file", description = - "This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input: PDF Output: IMAGE/ZIP Type: SIMO") + "This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO") public ResponseEntity extractImages(@ModelAttribute PDFExtractImagesRequest request) throws IOException, InterruptedException, ExecutionException { MultipartFile file = request.getFileInput(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java index 10acbeea..c1e205ea 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java @@ -46,7 +46,7 @@ public class FlattenController { @Operation( summary = "Flatten PDF form fields or full page", description = - "Flattening just PDF form fields or converting each page to images to make text unselectable. Input: PDF, Output: PDF. Type: SISO") + "Flattening just PDF form fields or converting each page to images to make text unselectable. Input:PDF, Output:PDF. Type:SISO") public ResponseEntity flatten(@ModelAttribute FlattenRequest request) throws Exception { MultipartFile file = request.getFileInput(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java index 8dda1fc4..c8ffe9de 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java @@ -8,7 +8,7 @@ import java.util.*; import java.util.stream.Collectors; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; -import io.swagger.v3.oas.annotations.Operation; + import javax.imageio.ImageIO; import org.apache.pdfbox.multipdf.PDFMergerUtility; @@ -26,6 +26,7 @@ import org.springframework.web.multipart.MultipartFile; import io.github.pixee.security.BoundedLineReader; import io.github.pixee.security.Filenames; +import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import lombok.extern.slf4j.Slf4j; @@ -65,9 +66,10 @@ public class OCRController { } @PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf") - @Operation( - summary = "Process PDF files with OCR using Tesseract", - description = "Takes a PDF file as input, performs OCR using specified languages and OCR type (skip-text/force-ocr), and returns the processed PDF. Input:PDF Output:PDF Type:SISO") + @Operation( + summary = "Process PDF files with OCR using Tesseract", + description = + "Takes a PDF file as input, performs OCR using specified languages and OCR type (skip-text/force-ocr), and returns the processed PDF. Input:PDF Output:PDF Type:SISO") public ResponseEntity processPdfWithOCR( @ModelAttribute ProcessPdfWithOcrRequest request) throws IOException, InterruptedException { diff --git a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineController.java b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineController.java index dcef0376..2d6dd7b3 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineController.java @@ -25,6 +25,7 @@ import io.swagger.v3.oas.annotations.tags.Tag; import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.model.PipelineConfig; +import stirling.software.SPDF.model.PipelineResult; import stirling.software.SPDF.model.api.HandleDataRequest; import stirling.software.SPDF.utils.WebResponseUtils; @@ -58,7 +59,8 @@ public class PipelineController { if (inputFiles == null || inputFiles.size() == 0) { return null; } - List outputFiles = processor.runPipelineAgainstFiles(inputFiles, config); + PipelineResult result = processor.runPipelineAgainstFiles(inputFiles, config); + List outputFiles = result.getOutputFiles(); if (outputFiles != null && outputFiles.size() == 1) { // If there is only one file, return it directly Resource singleFile = outputFiles.get(0); diff --git a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineDirectoryProcessor.java b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineDirectoryProcessor.java index 20330204..38550c5c 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineDirectoryProcessor.java +++ b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineDirectoryProcessor.java @@ -27,6 +27,7 @@ import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.config.InstallationPathConfig; import stirling.software.SPDF.model.PipelineConfig; import stirling.software.SPDF.model.PipelineOperation; +import stirling.software.SPDF.model.PipelineResult; import stirling.software.SPDF.utils.FileMonitor; @Service @@ -143,19 +144,64 @@ public class PipelineDirectoryProcessor { private File[] collectFilesForProcessing(Path dir, Path jsonFile, PipelineOperation operation) throws IOException { + + List inputExtensions = + apiDocService.getExtensionTypes(false, operation.getOperation()); + log.info( + "Allowed extensions for operation {}: {}", + operation.getOperation(), + inputExtensions); + + boolean allowAllFiles = inputExtensions.contains("ALL"); + try (Stream paths = Files.list(dir)) { - if ("automated".equals(operation.getParameters().get("fileInput"))) { - return paths.filter( - path -> - !Files.isDirectory(path) - && !path.equals(jsonFile) - && fileMonitor.isFileReadyForProcessing(path)) - .map(Path::toFile) - .toArray(File[]::new); - } else { - String fileInput = (String) operation.getParameters().get("fileInput"); - return new File[] {new File(fileInput)}; - } + File[] files = + paths.filter( + path -> { + if (Files.isDirectory(path)) { + return false; + } + if (path.equals(jsonFile)) { + return false; + } + + // Get file extension + String filename = path.getFileName().toString(); + String extension = + filename.contains(".") + ? filename.substring( + filename.lastIndexOf(".") + + 1) + .toLowerCase() + : ""; + + // Check against allowed extensions + boolean isAllowed = + allowAllFiles + || inputExtensions.contains(extension); + if (!isAllowed) { + log.info( + "Skipping file with unsupported extension: {} ({})", + filename, + extension); + } + return isAllowed; + }) + .filter( + path -> { + boolean isReady = + fileMonitor.isFileReadyForProcessing(path); + if (!isReady) { + log.info( + "File not ready for processing (locked/created last 5s): {}", + path); + } + return isReady; + }) + .map(Path::toFile) + .toArray(File[]::new); + log.info("Collected {} files for processing", files.length); + return files; } } @@ -198,19 +244,37 @@ public class PipelineDirectoryProcessor { try { List inputFiles = processor.generateInputFiles(filesToProcess.toArray(new File[0])); - if (inputFiles == null || inputFiles.size() == 0) { + if (inputFiles == null || inputFiles.isEmpty()) { return; } - List outputFiles = processor.runPipelineAgainstFiles(inputFiles, config); - if (outputFiles == null) return; - moveAndRenameFiles(outputFiles, config, dir); - deleteOriginalFiles(filesToProcess, processingDir); + PipelineResult result = processor.runPipelineAgainstFiles(inputFiles, config); + + if (result.isHasErrors()) { + log.error("Errors occurred during processing, retaining original files"); + moveToErrorDirectory(filesToProcess, dir); + } else { + moveAndRenameFiles(result.getOutputFiles(), config, dir); + deleteOriginalFiles(filesToProcess, processingDir); + } } catch (Exception e) { - log.error("error during processing", e); + log.error("Error during processing", e); moveFilesBack(filesToProcess, processingDir); } } + private void moveToErrorDirectory(List files, Path originalDir) throws IOException { + Path errorDir = originalDir.resolve("error"); + if (!Files.exists(errorDir)) { + Files.createDirectories(errorDir); + } + + for (File file : files) { + Path target = errorDir.resolve(file.getName()); + Files.move(file.toPath(), target); + log.info("Moved failed file to error directory for investigation: {}", target); + } + } + private void moveAndRenameFiles(List resources, PipelineConfig config, Path dir) throws IOException { for (Resource resource : resources) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java index 58ffe43b..80171f20 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java +++ b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java @@ -33,6 +33,7 @@ import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.SPDFApplication; import stirling.software.SPDF.model.PipelineConfig; import stirling.software.SPDF.model.PipelineOperation; +import stirling.software.SPDF.model.PipelineResult; import stirling.software.SPDF.model.Role; @Service @@ -84,8 +85,10 @@ public class PipelineProcessor { return "http://localhost:" + port + contextPath + "/"; } - List runPipelineAgainstFiles(List outputFiles, PipelineConfig config) + PipelineResult runPipelineAgainstFiles(List outputFiles, PipelineConfig config) throws Exception { + PipelineResult result = new PipelineResult(); + ByteArrayOutputStream logStream = new ByteArrayOutputStream(); PrintStream logPrintStream = new PrintStream(logStream); boolean hasErrors = false; @@ -130,7 +133,8 @@ public class PipelineProcessor { if (operation.startsWith("filter-") && (response.getBody() == null || response.getBody().length == 0)) { - log.info("Skipping file due to failing {}", operation); + result.setFiltersApplied(true); + log.info("Skipping file due to filtering {}", operation); continue; } if (!response.getStatusCode().equals(HttpStatus.OK)) { @@ -208,7 +212,10 @@ public class PipelineProcessor { if (hasErrors) { log.error("Errors occurred during processing. Log: {}", logStream.toString()); } - return outputFiles; + result.setHasErrors(hasErrors); + result.setFiltersApplied(hasErrors); + result.setOutputFiles(outputFiles); + return result; } private ResponseEntity sendWebRequest(String url, MultiValueMap body) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/security/RemoveCertSignController.java b/src/main/java/stirling/software/SPDF/controller/api/security/RemoveCertSignController.java index 9d1c78e9..88ed9b13 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/security/RemoveCertSignController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/security/RemoveCertSignController.java @@ -40,8 +40,7 @@ public class RemoveCertSignController { @Operation( summary = "Remove digital signature from PDF", description = - "This endpoint accepts a PDF file and returns the PDF file without the digital signature." - + " Input: PDF, Output: PDF") + "This endpoint accepts a PDF file and returns the PDF file without the digital signature. Input:PDF, Output:PDF Type:SISO") public ResponseEntity removeCertSignPDF(@ModelAttribute PDFFile request) throws Exception { MultipartFile pdf = request.getFileInput(); diff --git a/src/main/java/stirling/software/SPDF/model/PipelineResult.java b/src/main/java/stirling/software/SPDF/model/PipelineResult.java new file mode 100644 index 00000000..8ecf0d97 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/model/PipelineResult.java @@ -0,0 +1,14 @@ +package stirling.software.SPDF.model; + +import java.util.List; + +import org.springframework.core.io.Resource; + +import lombok.Data; + +@Data +public class PipelineResult { + private List outputFiles; + private boolean hasErrors; + private boolean filtersApplied; +} diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 9be009db..c0855bbb 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -768,7 +768,6 @@ autoSplitPDF.selectText.3=Upload the single large scanned PDF file and let Stirl autoSplitPDF.selectText.4=Divider pages are automatically detected and removed, guaranteeing a neat final document. autoSplitPDF.formPrompt=Submit PDF containing Stirling-PDF Page dividers: autoSplitPDF.duplexMode=Duplex Mode (Front and back scanning) -autoSplitPDF.dividerDownload1=Download 'Auto Splitter Divider (minimal).pdf' autoSplitPDF.dividerDownload2=Download 'Auto Splitter Divider (with instructions).pdf' autoSplitPDF.submit=Submit diff --git a/src/main/resources/static/files/Auto Splitter Divider (minimal).pdf b/src/main/resources/static/files/Auto Splitter Divider (minimal).pdf deleted file mode 100644 index 0859af0f..00000000 Binary files a/src/main/resources/static/files/Auto Splitter Divider (minimal).pdf and /dev/null differ diff --git a/src/main/resources/static/files/Auto Splitter Divider (with instructions).pdf b/src/main/resources/static/files/Auto Splitter Divider (with instructions).pdf index 6aed127e..370e5f51 100644 Binary files a/src/main/resources/static/files/Auto Splitter Divider (with instructions).pdf and b/src/main/resources/static/files/Auto Splitter Divider (with instructions).pdf differ diff --git a/src/main/resources/templates/auto-split-pdf.html b/src/main/resources/templates/auto-split-pdf.html index c9cd2b28..25584918 100644 --- a/src/main/resources/templates/auto-split-pdf.html +++ b/src/main/resources/templates/auto-split-pdf.html @@ -45,8 +45,6 @@
  • -