diff --git a/build.gradle b/build.gradle
index 2ae9ba33..4f0d2d57 100644
--- a/build.gradle
+++ b/build.gradle
@@ -26,7 +26,7 @@ ext {
}
group = "stirling.software"
-version = "0.40.2"
+version = "0.41.0"
java {
// 17 is lowest but we support and recommend 21
diff --git a/docs/stirling-pdf.png b/docs/stirling-pdf.png
deleted file mode 100644
index 9df29860..00000000
Binary files a/docs/stirling-pdf.png and /dev/null differ
diff --git a/docs/stirling-transparent.svg b/docs/stirling-transparent.svg
deleted file mode 100644
index a8511519..00000000
--- a/docs/stirling-transparent.svg
+++ /dev/null
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java
index 9c6cbf9b..d2f850ce 100644
--- a/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java
@@ -8,7 +8,9 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@@ -41,8 +43,12 @@ import stirling.software.SPDF.utils.WebResponseUtils;
@Tag(name = "Misc", description = "Miscellaneous APIs")
public class AutoSplitPdfController {
- private static final String QR_CONTENT = "https://github.com/Stirling-Tools/Stirling-PDF";
- private static final String QR_CONTENT_OLD = "https://github.com/Frooodle/Stirling-PDF";
+ private static final Set VALID_QR_CONTENTS =
+ new HashSet<>(
+ Set.of(
+ "https://github.com/Stirling-Tools/Stirling-PDF",
+ "https://github.com/Frooodle/Stirling-PDF",
+ "https://stirlingpdf.com"));
private final CustomPDDocumentFactory pdfDocumentFactory;
@@ -120,13 +126,14 @@ public class AutoSplitPdfController {
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 150);
String result = decodeQRCode(bim);
- if ((QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result)) && page != 0) {
+
+ boolean isValidQrCode = VALID_QR_CONTENTS.contains(result);
+ log.debug("detected qr code {}, code is vale={}", result, isValidQrCode);
+ if (isValidQrCode && page != 0) {
splitDocuments.add(new PDDocument());
}
- if (!splitDocuments.isEmpty()
- && !QR_CONTENT.equals(result)
- && !QR_CONTENT_OLD.equals(result)) {
+ if (!splitDocuments.isEmpty() && !isValidQrCode) {
splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page));
} else if (page == 0) {
PDDocument firstDocument = new PDDocument();
@@ -135,7 +142,7 @@ public class AutoSplitPdfController {
}
// If duplexMode is true and current page is a divider, then skip next page
- if (duplexMode && (QR_CONTENT.equals(result) || QR_CONTENT_OLD.equals(result))) {
+ if (duplexMode && isValidQrCode) {
page++;
}
}
@@ -168,6 +175,9 @@ public class AutoSplitPdfController {
return WebResponseUtils.bytesToWebResponse(
data, filename + ".zip", MediaType.APPLICATION_OCTET_STREAM);
+ } catch (Exception e) {
+ log.error("Error in auto split", e);
+ throw e;
} finally {
// Clean up resources
if (document != null) {
diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
index 5dfeedb7..1036cda6 100644
--- a/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java
@@ -52,7 +52,7 @@ public class ExtractImagesController {
@Operation(
summary = "Extract images from a PDF file",
description =
- "This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input: PDF Output: IMAGE/ZIP Type: SIMO")
+ "This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO")
public ResponseEntity extractImages(@ModelAttribute PDFExtractImagesRequest request)
throws IOException, InterruptedException, ExecutionException {
MultipartFile file = request.getFileInput();
diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java
index 10acbeea..c1e205ea 100644
--- a/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/FlattenController.java
@@ -46,7 +46,7 @@ public class FlattenController {
@Operation(
summary = "Flatten PDF form fields or full page",
description =
- "Flattening just PDF form fields or converting each page to images to make text unselectable. Input: PDF, Output: PDF. Type: SISO")
+ "Flattening just PDF form fields or converting each page to images to make text unselectable. Input:PDF, Output:PDF. Type:SISO")
public ResponseEntity flatten(@ModelAttribute FlattenRequest request) throws Exception {
MultipartFile file = request.getFileInput();
diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
index 8dda1fc4..c8ffe9de 100644
--- a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
@@ -8,7 +8,7 @@ import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
-import io.swagger.v3.oas.annotations.Operation;
+
import javax.imageio.ImageIO;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
@@ -26,6 +26,7 @@ import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.BoundedLineReader;
import io.github.pixee.security.Filenames;
+import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.extern.slf4j.Slf4j;
@@ -65,9 +66,10 @@ public class OCRController {
}
@PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
- @Operation(
- summary = "Process PDF files with OCR using Tesseract",
- description = "Takes a PDF file as input, performs OCR using specified languages and OCR type (skip-text/force-ocr), and returns the processed PDF. Input:PDF Output:PDF Type:SISO")
+ @Operation(
+ summary = "Process PDF files with OCR using Tesseract",
+ description =
+ "Takes a PDF file as input, performs OCR using specified languages and OCR type (skip-text/force-ocr), and returns the processed PDF. Input:PDF Output:PDF Type:SISO")
public ResponseEntity processPdfWithOCR(
@ModelAttribute ProcessPdfWithOcrRequest request)
throws IOException, InterruptedException {
diff --git a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineController.java b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineController.java
index dcef0376..2d6dd7b3 100644
--- a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineController.java
@@ -25,6 +25,7 @@ import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.PipelineConfig;
+import stirling.software.SPDF.model.PipelineResult;
import stirling.software.SPDF.model.api.HandleDataRequest;
import stirling.software.SPDF.utils.WebResponseUtils;
@@ -58,7 +59,8 @@ public class PipelineController {
if (inputFiles == null || inputFiles.size() == 0) {
return null;
}
- List outputFiles = processor.runPipelineAgainstFiles(inputFiles, config);
+ PipelineResult result = processor.runPipelineAgainstFiles(inputFiles, config);
+ List outputFiles = result.getOutputFiles();
if (outputFiles != null && outputFiles.size() == 1) {
// If there is only one file, return it directly
Resource singleFile = outputFiles.get(0);
diff --git a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineDirectoryProcessor.java b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineDirectoryProcessor.java
index 20330204..38550c5c 100644
--- a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineDirectoryProcessor.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineDirectoryProcessor.java
@@ -27,6 +27,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.config.InstallationPathConfig;
import stirling.software.SPDF.model.PipelineConfig;
import stirling.software.SPDF.model.PipelineOperation;
+import stirling.software.SPDF.model.PipelineResult;
import stirling.software.SPDF.utils.FileMonitor;
@Service
@@ -143,19 +144,64 @@ public class PipelineDirectoryProcessor {
private File[] collectFilesForProcessing(Path dir, Path jsonFile, PipelineOperation operation)
throws IOException {
+
+ List inputExtensions =
+ apiDocService.getExtensionTypes(false, operation.getOperation());
+ log.info(
+ "Allowed extensions for operation {}: {}",
+ operation.getOperation(),
+ inputExtensions);
+
+ boolean allowAllFiles = inputExtensions.contains("ALL");
+
try (Stream paths = Files.list(dir)) {
- if ("automated".equals(operation.getParameters().get("fileInput"))) {
- return paths.filter(
- path ->
- !Files.isDirectory(path)
- && !path.equals(jsonFile)
- && fileMonitor.isFileReadyForProcessing(path))
- .map(Path::toFile)
- .toArray(File[]::new);
- } else {
- String fileInput = (String) operation.getParameters().get("fileInput");
- return new File[] {new File(fileInput)};
- }
+ File[] files =
+ paths.filter(
+ path -> {
+ if (Files.isDirectory(path)) {
+ return false;
+ }
+ if (path.equals(jsonFile)) {
+ return false;
+ }
+
+ // Get file extension
+ String filename = path.getFileName().toString();
+ String extension =
+ filename.contains(".")
+ ? filename.substring(
+ filename.lastIndexOf(".")
+ + 1)
+ .toLowerCase()
+ : "";
+
+ // Check against allowed extensions
+ boolean isAllowed =
+ allowAllFiles
+ || inputExtensions.contains(extension);
+ if (!isAllowed) {
+ log.info(
+ "Skipping file with unsupported extension: {} ({})",
+ filename,
+ extension);
+ }
+ return isAllowed;
+ })
+ .filter(
+ path -> {
+ boolean isReady =
+ fileMonitor.isFileReadyForProcessing(path);
+ if (!isReady) {
+ log.info(
+ "File not ready for processing (locked/created last 5s): {}",
+ path);
+ }
+ return isReady;
+ })
+ .map(Path::toFile)
+ .toArray(File[]::new);
+ log.info("Collected {} files for processing", files.length);
+ return files;
}
}
@@ -198,19 +244,37 @@ public class PipelineDirectoryProcessor {
try {
List inputFiles =
processor.generateInputFiles(filesToProcess.toArray(new File[0]));
- if (inputFiles == null || inputFiles.size() == 0) {
+ if (inputFiles == null || inputFiles.isEmpty()) {
return;
}
- List outputFiles = processor.runPipelineAgainstFiles(inputFiles, config);
- if (outputFiles == null) return;
- moveAndRenameFiles(outputFiles, config, dir);
- deleteOriginalFiles(filesToProcess, processingDir);
+ PipelineResult result = processor.runPipelineAgainstFiles(inputFiles, config);
+
+ if (result.isHasErrors()) {
+ log.error("Errors occurred during processing, retaining original files");
+ moveToErrorDirectory(filesToProcess, dir);
+ } else {
+ moveAndRenameFiles(result.getOutputFiles(), config, dir);
+ deleteOriginalFiles(filesToProcess, processingDir);
+ }
} catch (Exception e) {
- log.error("error during processing", e);
+ log.error("Error during processing", e);
moveFilesBack(filesToProcess, processingDir);
}
}
+ private void moveToErrorDirectory(List files, Path originalDir) throws IOException {
+ Path errorDir = originalDir.resolve("error");
+ if (!Files.exists(errorDir)) {
+ Files.createDirectories(errorDir);
+ }
+
+ for (File file : files) {
+ Path target = errorDir.resolve(file.getName());
+ Files.move(file.toPath(), target);
+ log.info("Moved failed file to error directory for investigation: {}", target);
+ }
+ }
+
private void moveAndRenameFiles(List resources, PipelineConfig config, Path dir)
throws IOException {
for (Resource resource : resources) {
diff --git a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java
index 58ffe43b..80171f20 100644
--- a/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/pipeline/PipelineProcessor.java
@@ -33,6 +33,7 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.SPDFApplication;
import stirling.software.SPDF.model.PipelineConfig;
import stirling.software.SPDF.model.PipelineOperation;
+import stirling.software.SPDF.model.PipelineResult;
import stirling.software.SPDF.model.Role;
@Service
@@ -84,8 +85,10 @@ public class PipelineProcessor {
return "http://localhost:" + port + contextPath + "/";
}
- List runPipelineAgainstFiles(List outputFiles, PipelineConfig config)
+ PipelineResult runPipelineAgainstFiles(List outputFiles, PipelineConfig config)
throws Exception {
+ PipelineResult result = new PipelineResult();
+
ByteArrayOutputStream logStream = new ByteArrayOutputStream();
PrintStream logPrintStream = new PrintStream(logStream);
boolean hasErrors = false;
@@ -130,7 +133,8 @@ public class PipelineProcessor {
if (operation.startsWith("filter-")
&& (response.getBody() == null
|| response.getBody().length == 0)) {
- log.info("Skipping file due to failing {}", operation);
+ result.setFiltersApplied(true);
+ log.info("Skipping file due to filtering {}", operation);
continue;
}
if (!response.getStatusCode().equals(HttpStatus.OK)) {
@@ -208,7 +212,10 @@ public class PipelineProcessor {
if (hasErrors) {
log.error("Errors occurred during processing. Log: {}", logStream.toString());
}
- return outputFiles;
+ result.setHasErrors(hasErrors);
+ result.setFiltersApplied(hasErrors);
+ result.setOutputFiles(outputFiles);
+ return result;
}
private ResponseEntity sendWebRequest(String url, MultiValueMap body) {
diff --git a/src/main/java/stirling/software/SPDF/controller/api/security/RemoveCertSignController.java b/src/main/java/stirling/software/SPDF/controller/api/security/RemoveCertSignController.java
index 9d1c78e9..88ed9b13 100644
--- a/src/main/java/stirling/software/SPDF/controller/api/security/RemoveCertSignController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/security/RemoveCertSignController.java
@@ -40,8 +40,7 @@ public class RemoveCertSignController {
@Operation(
summary = "Remove digital signature from PDF",
description =
- "This endpoint accepts a PDF file and returns the PDF file without the digital signature."
- + " Input: PDF, Output: PDF")
+ "This endpoint accepts a PDF file and returns the PDF file without the digital signature. Input:PDF, Output:PDF Type:SISO")
public ResponseEntity removeCertSignPDF(@ModelAttribute PDFFile request)
throws Exception {
MultipartFile pdf = request.getFileInput();
diff --git a/src/main/java/stirling/software/SPDF/model/PipelineResult.java b/src/main/java/stirling/software/SPDF/model/PipelineResult.java
new file mode 100644
index 00000000..8ecf0d97
--- /dev/null
+++ b/src/main/java/stirling/software/SPDF/model/PipelineResult.java
@@ -0,0 +1,14 @@
+package stirling.software.SPDF.model;
+
+import java.util.List;
+
+import org.springframework.core.io.Resource;
+
+import lombok.Data;
+
+@Data
+public class PipelineResult {
+ private List outputFiles;
+ private boolean hasErrors;
+ private boolean filtersApplied;
+}
diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties
index 9be009db..c0855bbb 100644
--- a/src/main/resources/messages_en_GB.properties
+++ b/src/main/resources/messages_en_GB.properties
@@ -768,7 +768,6 @@ autoSplitPDF.selectText.3=Upload the single large scanned PDF file and let Stirl
autoSplitPDF.selectText.4=Divider pages are automatically detected and removed, guaranteeing a neat final document.
autoSplitPDF.formPrompt=Submit PDF containing Stirling-PDF Page dividers:
autoSplitPDF.duplexMode=Duplex Mode (Front and back scanning)
-autoSplitPDF.dividerDownload1=Download 'Auto Splitter Divider (minimal).pdf'
autoSplitPDF.dividerDownload2=Download 'Auto Splitter Divider (with instructions).pdf'
autoSplitPDF.submit=Submit
diff --git a/src/main/resources/static/files/Auto Splitter Divider (minimal).pdf b/src/main/resources/static/files/Auto Splitter Divider (minimal).pdf
deleted file mode 100644
index 0859af0f..00000000
Binary files a/src/main/resources/static/files/Auto Splitter Divider (minimal).pdf and /dev/null differ
diff --git a/src/main/resources/static/files/Auto Splitter Divider (with instructions).pdf b/src/main/resources/static/files/Auto Splitter Divider (with instructions).pdf
index 6aed127e..370e5f51 100644
Binary files a/src/main/resources/static/files/Auto Splitter Divider (with instructions).pdf and b/src/main/resources/static/files/Auto Splitter Divider (with instructions).pdf differ
diff --git a/src/main/resources/templates/auto-split-pdf.html b/src/main/resources/templates/auto-split-pdf.html
index c9cd2b28..25584918 100644
--- a/src/main/resources/templates/auto-split-pdf.html
+++ b/src/main/resources/templates/auto-split-pdf.html
@@ -45,8 +45,6 @@
-