From 782c30f934d06128aecb0530e2278324173237dd Mon Sep 17 00:00:00 2001
From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com.>
Date: Mon, 30 Jun 2025 22:27:45 +0100
Subject: [PATCH] restore OCRMyPDF and ghostscript compression

---
 Dockerfile                                    |   3 +-
 Dockerfile.fat                                |   3 +-
 .../common/model/ApplicationProperties.java   |  20 +
 .../service/TempFileCleanupService.java       |   4 +-
 .../software/common/util/ProcessExecutor.java |  24 +-
 .../common/util/TempFileRegistry.java         |  10 +-
 .../SPDF/config/EndpointConfiguration.java    |  62 ++-
 .../SPDF/config/ExternalAppDepConfig.java     |   4 +
 .../api/misc/CompressController.java          | 174 +++++++--
 .../controller/api/misc/OCRController.java    | 353 ++++++++++++++----
 .../controller/api/misc/RepairController.java |  93 ++++-
 .../api/misc/ProcessPdfWithOcrRequest.java    |  15 +
 .../resources/templates/misc/ocr-pdf.html     |  24 ++
 13 files changed, 646 insertions(+), 143 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index fd02b29f7..1edf05841 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -51,7 +51,6 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
     tini \
     bash \
     curl \
-    qpdf \
     shadow \
     su-exec \
     openssl \
@@ -69,9 +68,11 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
 	tesseract-ocr-data-deu \
 	tesseract-ocr-data-fra \
 	tesseract-ocr-data-por \
+    unpaper \
     # CV
     py3-opencv \
     python3 \
+    ocrmypdf \
     py3-pip \
     py3-pillow@testing \
     py3-pdf2image@testing && \
diff --git a/Dockerfile.fat b/Dockerfile.fat
index 666ba98be..976c1ee17 100644
--- a/Dockerfile.fat
+++ b/Dockerfile.fat
@@ -76,16 +76,17 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a
     # pdftohtml
     poppler-utils \
     # OCR MY PDF (unpaper for descew and other advanced featues)
-    qpdf \
     tesseract-ocr-data-eng \
     tesseract-ocr-data-chi_sim \
 	tesseract-ocr-data-deu \
 	tesseract-ocr-data-fra \
 	tesseract-ocr-data-por \
+    unpaper \
     font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra font-liberation font-linux-libertine \
     # CV
     py3-opencv \
     python3 \
+    ocrmypdf \
     py3-pip \
     py3-pillow@testing \
     py3-pdf2image@testing && \
diff --git a/common/src/main/java/stirling/software/common/model/ApplicationProperties.java b/common/src/main/java/stirling/software/common/model/ApplicationProperties.java
index 0017fa34a..e4edf2baa 100644
--- a/common/src/main/java/stirling/software/common/model/ApplicationProperties.java
+++ b/common/src/main/java/stirling/software/common/model/ApplicationProperties.java
@@ -545,6 +545,8 @@ public class ApplicationProperties {
             private int calibreSessionLimit;
             private int qpdfSessionLimit;
             private int tesseractSessionLimit;
+            private int ghostscriptSessionLimit;
+            private int ocrMyPdfSessionLimit;
 
             public int getQpdfSessionLimit() {
                 return qpdfSessionLimit > 0 ? qpdfSessionLimit : 2;
@@ -577,6 +579,14 @@ public class ApplicationProperties {
             public int getCalibreSessionLimit() {
                 return calibreSessionLimit > 0 ? calibreSessionLimit : 1;
             }
+
+            public int getGhostscriptSessionLimit() {
+                return ghostscriptSessionLimit > 0 ? ghostscriptSessionLimit : 8;
+            }
+
+            public int getOcrMyPdfSessionLimit() {
+                return ocrMyPdfSessionLimit > 0 ? ocrMyPdfSessionLimit : 2;
+            }
         }
 
         @Data
@@ -589,6 +599,8 @@ public class ApplicationProperties {
             private long calibreTimeoutMinutes;
             private long tesseractTimeoutMinutes;
             private long qpdfTimeoutMinutes;
+            private long ghostscriptTimeoutMinutes;
+            private long ocrMyPdfTimeoutMinutes;
 
             public long getTesseractTimeoutMinutes() {
                 return tesseractTimeoutMinutes > 0 ? tesseractTimeoutMinutes : 30;
@@ -621,6 +633,14 @@ public class ApplicationProperties {
             public long getCalibreTimeoutMinutes() {
                 return calibreTimeoutMinutes > 0 ? calibreTimeoutMinutes : 30;
             }
+
+            public long getGhostscriptTimeoutMinutes() {
+                return ghostscriptTimeoutMinutes > 0 ? ghostscriptTimeoutMinutes : 30;
+            }
+
+            public long getOcrMyPdfTimeoutMinutes() {
+                return ocrMyPdfTimeoutMinutes > 0 ? ocrMyPdfTimeoutMinutes : 30;
+            }
         }
     }
 }
diff --git a/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
index d53c4ea84..895aa70de 100644
--- a/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
+++ b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java
@@ -308,7 +308,7 @@ public class TempFileCleanupService {
         }
 
         java.util.List<Path> subdirectories = new java.util.ArrayList<>();
-        
+
         try (Stream<Path> pathStream = Files.list(directory)) {
             pathStream.forEach(
                     path -> {
@@ -347,7 +347,7 @@ public class TempFileCleanupService {
                         }
                     });
         }
-        
+
         for (Path subdirectory : subdirectories) {
             try {
                 cleanupDirectoryStreaming(
diff --git a/common/src/main/java/stirling/software/common/util/ProcessExecutor.java b/common/src/main/java/stirling/software/common/util/ProcessExecutor.java
index 09c5ff675..ee7297153 100644
--- a/common/src/main/java/stirling/software/common/util/ProcessExecutor.java
+++ b/common/src/main/java/stirling/software/common/util/ProcessExecutor.java
@@ -84,6 +84,16 @@ public class ProcessExecutor {
                                                 .getProcessExecutor()
                                                 .getSessionLimit()
                                                 .getCalibreSessionLimit();
+                                case GHOSTSCRIPT ->
+                                        applicationProperties
+                                                .getProcessExecutor()
+                                                .getSessionLimit()
+                                                .getGhostscriptSessionLimit();
+                                case OCR_MY_PDF ->
+                                        applicationProperties
+                                                .getProcessExecutor()
+                                                .getSessionLimit()
+                                                .getOcrMyPdfSessionLimit();
                             };
 
                     long timeoutMinutes =
@@ -128,6 +138,16 @@ public class ProcessExecutor {
                                                 .getProcessExecutor()
                                                 .getTimeoutMinutes()
                                                 .getCalibreTimeoutMinutes();
+                                case GHOSTSCRIPT ->
+                                        applicationProperties
+                                                .getProcessExecutor()
+                                                .getTimeoutMinutes()
+                                                .getGhostscriptTimeoutMinutes();
+                                case OCR_MY_PDF ->
+                                        applicationProperties
+                                                .getProcessExecutor()
+                                                .getTimeoutMinutes()
+                                                .getOcrMyPdfTimeoutMinutes();
                             };
                     return new ProcessExecutor(semaphoreLimit, liveUpdates, timeoutMinutes);
                 });
@@ -278,7 +298,9 @@ public class ProcessExecutor {
         INSTALL_APP,
         CALIBRE,
         TESSERACT,
-        QPDF
+        QPDF,
+        GHOSTSCRIPT,
+        OCR_MY_PDF
     }
 
     public class ProcessExecutorResult {
diff --git a/common/src/main/java/stirling/software/common/util/TempFileRegistry.java b/common/src/main/java/stirling/software/common/util/TempFileRegistry.java
index 1e55c6b15..323b3bff3 100644
--- a/common/src/main/java/stirling/software/common/util/TempFileRegistry.java
+++ b/common/src/main/java/stirling/software/common/util/TempFileRegistry.java
@@ -9,7 +9,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ConcurrentSkipListSet;
 import java.util.stream.Collectors;
 
 import org.springframework.stereotype.Component;
@@ -24,11 +23,10 @@ import lombok.extern.slf4j.Slf4j;
 @Component
 public class TempFileRegistry {
 
-	 private final ConcurrentMap<Path, Instant> registeredFiles = new ConcurrentHashMap<>();
-	     private final Set<Path> thirdPartyTempFiles =
-	             Collections.newSetFromMap(new ConcurrentHashMap<>());
-	     private final Set<Path> tempDirectories =
-	            Collections.newSetFromMap(new ConcurrentHashMap<>());
+    private final ConcurrentMap<Path, Instant> registeredFiles = new ConcurrentHashMap<>();
+    private final Set<Path> thirdPartyTempFiles =
+            Collections.newSetFromMap(new ConcurrentHashMap<>());
+    private final Set<Path> tempDirectories = Collections.newSetFromMap(new ConcurrentHashMap<>());
 
     /**
      * Register a temporary file with the registry.
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java b/stirling-pdf/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java
index 361eeace3..25115acb1 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java
@@ -21,6 +21,8 @@ public class EndpointConfiguration {
     private final ApplicationProperties applicationProperties;
     private Map<String, Boolean> endpointStatuses = new ConcurrentHashMap<>();
     private Map<String, Set<String>> endpointGroups = new ConcurrentHashMap<>();
+    private Set<String> disabledGroups = new HashSet<>();
+    private Map<String, Set<String>> endpointAlternatives = new ConcurrentHashMap<>();
     private final boolean runningProOrHigher;
 
     public EndpointConfiguration(
@@ -51,16 +53,36 @@ public class EndpointConfiguration {
         if (endpoint.startsWith("/")) {
             endpoint = endpoint.substring(1);
         }
+        
+        // Check if endpoint has alternatives (multiple tools can handle it)
+        Set<String> alternatives = endpointAlternatives.get(endpoint);
+        if (alternatives != null && !alternatives.isEmpty()) {
+            // Endpoint is enabled if ANY of its alternative tools are enabled
+            for (String toolGroup : alternatives) {
+                if (isGroupEnabled(toolGroup)) {
+                    return true;
+                }
+            }
+            return false; // All alternative tools are disabled
+        }
+        
+        // Fallback to standard endpoint status check
         return endpointStatuses.getOrDefault(endpoint, true);
     }
 
     public boolean isGroupEnabled(String group) {
+        // Check if group is explicitly disabled first
+        if (disabledGroups.contains(group)) {
+            return false;
+        }
+
         Set<String> endpoints = endpointGroups.get(group);
         if (endpoints == null || endpoints.isEmpty()) {
             log.debug("Group '{}' does not exist or has no endpoints", group);
             return false;
         }
 
+        // Additional check: if all endpoints in group are disabled, consider group disabled
         for (String endpoint : endpoints) {
             if (!isEndpointEnabled(endpoint)) {
                 return false;
@@ -73,8 +95,23 @@ public class EndpointConfiguration {
     public void addEndpointToGroup(String group, String endpoint) {
         endpointGroups.computeIfAbsent(group, k -> new HashSet<>()).add(endpoint);
     }
+    
+    public void addEndpointAlternative(String endpoint, String toolGroup) {
+        endpointAlternatives.computeIfAbsent(endpoint, k -> new HashSet<>()).add(toolGroup);
+    }
+
+    public void disableGroup(String group) {
+        disabledGroups.add(group);
+        Set<String> endpoints = endpointGroups.get(group);
+        if (endpoints != null) {
+            for (String endpoint : endpoints) {
+                disableEndpoint(endpoint);
+            }
+        }
+    }
 
     public void enableGroup(String group) {
+        disabledGroups.remove(group);
         Set<String> endpoints = endpointGroups.get(group);
         if (endpoints != null) {
             for (String endpoint : endpoints) {
@@ -83,13 +120,8 @@ public class EndpointConfiguration {
         }
     }
 
-    public void disableGroup(String group) {
-        Set<String> endpoints = endpointGroups.get(group);
-        if (endpoints != null) {
-            for (String endpoint : endpoints) {
-                disableEndpoint(endpoint);
-            }
-        }
+    public Set<String> getDisabledGroups() {
+        return new HashSet<>(disabledGroups);
     }
 
     public void logDisabledEndpointsSummary() {
@@ -101,6 +133,12 @@ public class EndpointConfiguration {
                         .sorted()
                         .toList();
 
+        if (!disabledGroups.isEmpty()) {
+            log.info(
+                    "Disabled groups: {}",
+                    String.join(", ", disabledGroups.stream().sorted().toList()));
+        }
+
         if (!disabledList.isEmpty()) {
             log.info(
                     "Total disabled endpoints: {}. Disabled endpoints: {}",
@@ -212,7 +250,6 @@ public class EndpointConfiguration {
         // Unoconvert
         addEndpointToGroup("Unoconvert", "file-to-pdf");
 
-        addEndpointToGroup("tesseract", "ocr-pdf");
 
         // Java
         addEndpointToGroup("Java", "merge-pdfs");
@@ -261,8 +298,13 @@ public class EndpointConfiguration {
         addEndpointToGroup("Javascript", "compare");
         addEndpointToGroup("Javascript", "adjust-contrast");
 
-        // qpdf dependent endpoints
-        addEndpointToGroup("qpdf", "repair");
+        // Multi-tool endpoints - endpoints that can be handled by multiple tools
+        addEndpointAlternative("repair", "qpdf");
+        addEndpointAlternative("repair", "Ghostscript");
+        addEndpointAlternative("compress-pdf", "qpdf");
+        addEndpointAlternative("compress-pdf", "Ghostscript");
+        addEndpointAlternative("ocr-pdf", "tesseract");
+        addEndpointAlternative("ocr-pdf", "OCRmyPDF");
 
         // Weasyprint dependent endpoints
         addEndpointToGroup("Weasyprint", "html-to-pdf");
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java b/stirling-pdf/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java
index 7dd6d2b3b..6d857c679 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java
@@ -34,6 +34,8 @@ public class ExternalAppDepConfig {
                 new HashMap<>() {
 
                     {
+                        put("gs", List.of("Ghostscript"));
+                        put("ocrmypdf", List.of("OCRmyPDF"));
                         put("soffice", List.of("LibreOffice"));
                         put(weasyprintPath, List.of("Weasyprint"));
                         put("pdftohtml", List.of("Pdftohtml"));
@@ -109,6 +111,8 @@ public class ExternalAppDepConfig {
     @PostConstruct
     public void checkDependencies() {
         // Check core dependencies
+        checkDependencyAndDisableGroup("gs");
+        checkDependencyAndDisableGroup("ocrmypdf");
         checkDependencyAndDisableGroup("tesseract");
         checkDependencyAndDisableGroup("soffice");
         checkDependencyAndDisableGroup("qpdf");
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java
index 8509f5056..13828d88f 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java
@@ -65,12 +65,14 @@ public class CompressController {
 
     private final CustomPDFDocumentFactory pdfDocumentFactory;
     private final boolean qpdfEnabled;
+    private final boolean ghostscriptEnabled;
 
     public CompressController(
             CustomPDFDocumentFactory pdfDocumentFactory,
             EndpointConfiguration endpointConfiguration) {
         this.pdfDocumentFactory = pdfDocumentFactory;
         this.qpdfEnabled = endpointConfiguration.isGroupEnabled("qpdf");
+        this.ghostscriptEnabled = endpointConfiguration.isGroupEnabled("Ghostscript");
     }
 
     @Data
@@ -697,25 +699,69 @@ public class CompressController {
 
             boolean sizeMet = false;
             boolean imageCompressionApplied = false;
-            boolean qpdfCompressionApplied = false;
-
-            if (qpdfEnabled && optimizeLevel <= 3) {
-                optimizeLevel = 4;
-            }
+            boolean externalCompressionApplied = false;
 
             while (!sizeMet && optimizeLevel <= 9) {
-                // Apply image compression for levels 4-9
-                if ((optimizeLevel >= 3 || Boolean.TRUE.equals(convertToGrayscale))
-                        && !imageCompressionApplied) {
-                    double scaleFactor = getScaleFactorForLevel(optimizeLevel);
-                    float jpegQuality = getJpegQualityForLevel(optimizeLevel);
+                // Apply external compression first
+                if (!externalCompressionApplied) {
+                    boolean ghostscriptSuccess = false;
 
-                    // Compress images
+                    // Try Ghostscript first if available - for ANY compression level
+                    if (ghostscriptEnabled) {
+                        try {
+                            applyGhostscriptCompression(
+                                    request, optimizeLevel, currentFile, tempFiles);
+                            log.info("Ghostscript compression applied successfully");
+                            ghostscriptSuccess = true;
+                        } catch (IOException e) {
+                            log.warn("Ghostscript compression failed, trying fallback methods");
+                        }
+                    }
+
+                    // Fallback to QPDF if Ghostscript failed or not available (levels 1-3 only)
+                    if (!ghostscriptSuccess && qpdfEnabled && optimizeLevel <= 3) {
+                        try {
+                            applyQpdfCompression(request, optimizeLevel, currentFile, tempFiles);
+                            log.info("QPDF compression applied successfully");
+                        } catch (IOException e) {
+                            log.warn("QPDF compression also failed");
+                        }
+                    }
+
+                    if (!ghostscriptSuccess && !qpdfEnabled) {
+                        log.info(
+                                "No external compression tools available, using image compression only");
+                    }
+
+                    externalCompressionApplied = true;
+
+                    // Skip image compression if Ghostscript succeeded
+                    if (ghostscriptSuccess) {
+                        imageCompressionApplied = true;
+                    }
+                }
+
+                // Apply image compression for levels 4+ only if Ghostscript didn't run
+                if ((optimizeLevel >= 4 || Boolean.TRUE.equals(convertToGrayscale))
+                        && !imageCompressionApplied) {
+                    // Use different scale factors based on level
+                    double scaleFactor =
+                            switch (optimizeLevel) {
+                                case 4 -> 0.95; // 95% of original size
+                                case 5 -> 0.9; // 90% of original size
+                                case 6 -> 0.8; // 80% of original size
+                                case 7 -> 0.7; // 70% of original size
+                                case 8 -> 0.65; // 65% of original size
+                                case 9 -> 0.5; // 50% of original size
+                                default -> 1.0;
+                            };
+
+                    log.info("Applying image compression with scale factor: {}", scaleFactor);
                     Path compressedImageFile =
                             compressImagesInPDF(
                                     currentFile,
                                     scaleFactor,
-                                    jpegQuality,
+                                    0.7f, // Default JPEG quality
                                     Boolean.TRUE.equals(convertToGrayscale));
 
                     tempFiles.add(compressedImageFile);
@@ -723,18 +769,6 @@ public class CompressController {
                     imageCompressionApplied = true;
                 }
 
-                // Apply QPDF compression for all levels
-                if (!qpdfCompressionApplied && qpdfEnabled) {
-                    applyQpdfCompression(request, optimizeLevel, currentFile, tempFiles);
-                    qpdfCompressionApplied = true;
-                } else if (!qpdfCompressionApplied) {
-                    // If QPDF is disabled, mark as applied and log
-                    if (!qpdfEnabled) {
-                        log.info("Skipping QPDF compression as QPDF group is disabled");
-                    }
-                    qpdfCompressionApplied = true;
-                }
-
                 // Check if target size reached or not in auto mode
                 long outputFileSize = Files.size(currentFile);
                 if (outputFileSize <= expectedOutputSize || !autoMode) {
@@ -754,7 +788,7 @@ public class CompressController {
                     } else {
                         // Reset flags for next iteration with higher optimization level
                         imageCompressionApplied = false;
-                        qpdfCompressionApplied = false;
+                        externalCompressionApplied = false;
                         optimizeLevel = newOptimizeLevel;
                     }
                 }
@@ -788,6 +822,96 @@ public class CompressController {
         }
     }
 
+    // Run Ghostscript compression
+    private void applyGhostscriptCompression(
+            OptimizePdfRequest request, int optimizeLevel, Path currentFile, List<Path> tempFiles)
+            throws IOException {
+
+        long preGsSize = Files.size(currentFile);
+        log.info("Pre-Ghostscript file size: {}", GeneralUtils.formatBytes(preGsSize));
+
+        // Create output file for Ghostscript
+        Path gsOutputFile = Files.createTempFile("gs_output_", ".pdf");
+        tempFiles.add(gsOutputFile);
+
+        // Build Ghostscript command based on optimization level
+        List<String> command = new ArrayList<>();
+        command.add("gs");
+        command.add("-sDEVICE=pdfwrite");
+        command.add("-dCompatibilityLevel=1.5");
+        command.add("-dNOPAUSE");
+        command.add("-dQUIET");
+        command.add("-dBATCH");
+
+        // Map optimization levels to Ghostscript settings
+        switch (optimizeLevel) {
+            case 1:
+                command.add("-dPDFSETTINGS=/prepress");
+                break;
+            case 2:
+                command.add("-dPDFSETTINGS=/printer");
+                break;
+            case 3:
+                command.add("-dPDFSETTINGS=/ebook");
+                break;
+            case 4:
+            case 5:
+                command.add("-dPDFSETTINGS=/screen");
+                break;
+            case 6:
+            case 7:
+                command.add("-dPDFSETTINGS=/screen");
+                command.add("-dColorImageResolution=150");
+                command.add("-dGrayImageResolution=150");
+                command.add("-dMonoImageResolution=300");
+                break;
+            case 8:
+            case 9:
+                command.add("-dPDFSETTINGS=/screen");
+                command.add("-dColorImageResolution=100");
+                command.add("-dGrayImageResolution=100");
+                command.add("-dMonoImageResolution=200");
+                break;
+            case 10:
+                command.add("-dPDFSETTINGS=/screen");
+                command.add("-dColorImageResolution=72");
+                command.add("-dGrayImageResolution=72");
+                command.add("-dMonoImageResolution=150");
+                break;
+            default:
+                command.add("-dPDFSETTINGS=/screen");
+                break;
+        }
+
+        command.add("-sOutputFile=" + gsOutputFile.toString());
+        command.add(currentFile.toString());
+
+        ProcessExecutorResult returnCode = null;
+        try {
+            returnCode =
+                    ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
+                            .runCommandWithOutputHandling(command);
+
+            if (returnCode.getRc() == 0) {
+                // Update current file to the Ghostscript output
+                Files.copy(gsOutputFile, currentFile, StandardCopyOption.REPLACE_EXISTING);
+
+                long postGsSize = Files.size(currentFile);
+                double gsReduction = 100.0 - ((postGsSize * 100.0) / preGsSize);
+                log.info(
+                        "Post-Ghostscript file size: {} (reduced by {}%)",
+                        GeneralUtils.formatBytes(postGsSize), String.format("%.1f", gsReduction));
+            } else {
+                log.warn("Ghostscript compression failed with return code: {}", returnCode.getRc());
+                throw new IOException("Ghostscript compression failed");
+            }
+
+        } catch (Exception e) {
+            log.warn("Ghostscript compression failed, will fallback to other methods", e);
+            throw new IOException("Ghostscript compression failed", e);
+        }
+    }
+
     // Run QPDF compression
     private void applyQpdfCompression(
             OptimizePdfRequest request, int optimizeLevel, Path currentFile, List<Path> tempFiles)
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
index 93061b570..4721d86b9 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
@@ -2,6 +2,7 @@ package stirling.software.SPDF.controller.api.misc;
 
 import java.awt.image.BufferedImage;
 import java.io.*;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.*;
 import java.util.zip.ZipEntry;
@@ -26,26 +27,42 @@ import io.github.pixee.security.Filenames;
 import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;
 
-import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
+import stirling.software.SPDF.config.EndpointConfiguration;
 import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest;
 import stirling.software.common.model.ApplicationProperties;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.ProcessExecutor;
 import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult;
+import stirling.software.common.util.TempDirectory;
+import stirling.software.common.util.TempFile;
 import stirling.software.common.util.TempFileManager;
+import stirling.software.common.util.WebResponseUtils;
 
 @RestController
 @RequestMapping("/api/v1/misc")
 @Tag(name = "Misc", description = "Miscellaneous APIs")
 @Slf4j
-@RequiredArgsConstructor
 public class OCRController {
 
     private final ApplicationProperties applicationProperties;
     private final CustomPDFDocumentFactory pdfDocumentFactory;
     private final TempFileManager tempFileManager;
+    private final boolean ocrMyPdfEnabled;
+    private final boolean tesseractEnabled;
+
+    public OCRController(
+            ApplicationProperties applicationProperties,
+            CustomPDFDocumentFactory pdfDocumentFactory,
+            TempFileManager tempFileManager,
+            EndpointConfiguration endpointConfiguration) {
+        this.applicationProperties = applicationProperties;
+        this.pdfDocumentFactory = pdfDocumentFactory;
+        this.tempFileManager = tempFileManager;
+        this.ocrMyPdfEnabled = endpointConfiguration.isGroupEnabled("OCRmyPDF");
+        this.tesseractEnabled = endpointConfiguration.isGroupEnabled("tesseract");
+    }
 
     /** Gets the list of available Tesseract languages from the tessdata directory */
     public List<String> getAvailableTesseractLanguages() {
@@ -63,39 +80,261 @@ public class OCRController {
 
     @PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf")
     @Operation(
-            summary = "Process PDF files with OCR using Tesseract",
+            summary = "Process a PDF file with OCR",
             description =
-                    "Takes a PDF file as input, performs OCR using specified languages and OCR type"
-                            + " (skip-text/force-ocr), and returns the processed PDF. Input:PDF"
-                            + " Output:PDF Type:SISO")
+                    "This endpoint processes a PDF file using OCR (Optical Character Recognition). "
+                            + "Users can specify languages, sidecar, deskew, clean, cleanFinal, ocrType, ocrRenderType, and removeImagesAfter options. "
+                            + "Uses OCRmyPDF if available, falls back to Tesseract. Input:PDF Output:PDF Type:SI-Conditional")
     public ResponseEntity<byte[]> processPdfWithOCR(
             @ModelAttribute ProcessPdfWithOcrRequest request)
             throws IOException, InterruptedException {
         MultipartFile inputFile = request.getFileInput();
-        List<String> languages = request.getLanguages();
+        List<String> selectedLanguages = request.getLanguages();
+        Boolean sidecar = request.isSidecar();
+        Boolean deskew = request.isDeskew();
+        Boolean clean = request.isClean();
+        Boolean cleanFinal = request.isCleanFinal();
         String ocrType = request.getOcrType();
+        String ocrRenderType = request.getOcrRenderType();
+        Boolean removeImagesAfter = request.isRemoveImagesAfter();
 
-        // Create a temp directory using TempFileManager directly
-        Path tempDirPath = tempFileManager.createTempDirectory();
-        File tempDir = tempDirPath.toFile();
+        if (selectedLanguages == null || selectedLanguages.isEmpty()) {
+            throw new IOException("Please select at least one language.");
+        }
 
-        try {
-            File tempInputFile = new File(tempDir, "input.pdf");
-            File tempOutputDir = new File(tempDir, "output");
-            File tempImagesDir = new File(tempDir, "images");
-            File finalOutputFile = new File(tempDir, "final_output.pdf");
+        if (!"hocr".equals(ocrRenderType) && !"sandwich".equals(ocrRenderType)) {
+            throw new IOException("ocrRenderType wrong");
+        }
+
+        // Get available Tesseract languages
+        List<String> availableLanguages = getAvailableTesseractLanguages();
+
+        // Validate selected languages
+        selectedLanguages =
+                selectedLanguages.stream().filter(availableLanguages::contains).toList();
+
+        if (selectedLanguages.isEmpty()) {
+            throw new IOException("None of the selected languages are valid.");
+        }
+
+        // Use try-with-resources for proper temp file management
+        try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
+                TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf")) {
+
+            inputFile.transferTo(tempInputFile.getFile());
+
+            TempFile sidecarTextFile = null;
+
+            try {
+                // Use OCRmyPDF if available (no fallback - error if it fails)
+                if (ocrMyPdfEnabled) {
+                    if (sidecar != null && sidecar) {
+                        sidecarTextFile = new TempFile(tempFileManager, ".txt");
+                    }
+
+                    processWithOcrMyPdf(
+                            selectedLanguages,
+                            sidecar,
+                            deskew,
+                            clean,
+                            cleanFinal,
+                            ocrType,
+                            ocrRenderType,
+                            removeImagesAfter,
+                            tempInputFile.getPath(),
+                            tempOutputFile.getPath(),
+                            sidecarTextFile != null ? sidecarTextFile.getPath() : null);
+                    log.info("OCRmyPDF processing completed successfully");
+                }
+                // Use Tesseract only if OCRmyPDF is not available
+                else if (tesseractEnabled) {
+                    processWithTesseract(
+                            selectedLanguages,
+                            ocrType,
+                            tempInputFile.getPath(),
+                            tempOutputFile.getPath());
+                    log.info("Tesseract processing completed successfully");
+                } else {
+                    throw new IOException("No OCR tools are available");
+                }
+
+                // Read the processed PDF file
+                byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath());
+
+                // Return the OCR processed PDF as a response
+                String outputFilename =
+                        Filenames.toSimpleFileName(inputFile.getOriginalFilename())
+                                        .replaceFirst("[.][^.]+$", "")
+                                + "_OCR.pdf";
+
+                if (sidecar != null && sidecar && sidecarTextFile != null) {
+                    // Create a zip file containing both the PDF and the text file
+                    String outputZipFilename =
+                            Filenames.toSimpleFileName(inputFile.getOriginalFilename())
+                                            .replaceFirst("[.][^.]+$", "")
+                                    + "_OCR.zip";
+
+                    try (TempFile tempZipFile = new TempFile(tempFileManager, ".zip");
+                            ZipOutputStream zipOut =
+                                    new ZipOutputStream(
+                                            Files.newOutputStream(tempZipFile.getPath()))) {
+
+                        // Add PDF file to the zip
+                        ZipEntry pdfEntry = new ZipEntry(outputFilename);
+                        zipOut.putNextEntry(pdfEntry);
+                        zipOut.write(pdfBytes);
+                        zipOut.closeEntry();
+
+                        // Add text file to the zip
+                        ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
+                        zipOut.putNextEntry(txtEntry);
+                        Files.copy(sidecarTextFile.getPath(), zipOut);
+                        zipOut.closeEntry();
+
+                        zipOut.finish();
+
+                        byte[] zipBytes = Files.readAllBytes(tempZipFile.getPath());
+
+                        // Return the zip file containing both the PDF and the text file
+                        return WebResponseUtils.bytesToWebResponse(
+                                zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
+                    }
+                } else {
+                    // Return the OCR processed PDF as a response
+                    return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
+                }
+
+            } finally {
+                // Clean up sidecar temp file if created
+                if (sidecarTextFile != null) {
+                    try {
+                        sidecarTextFile.close();
+                    } catch (Exception e) {
+                        log.warn("Failed to close sidecar temp file", e);
+                    }
+                }
+            }
+        }
+    }
+
+    private void processWithOcrMyPdf(
+            List<String> selectedLanguages,
+            Boolean sidecar,
+            Boolean deskew,
+            Boolean clean,
+            Boolean cleanFinal,
+            String ocrType,
+            String ocrRenderType,
+            Boolean removeImagesAfter,
+            Path tempInputFile,
+            Path tempOutputFile,
+            Path sidecarTextPath)
+            throws IOException, InterruptedException {
+
+        // Build OCRmyPDF command
+        String languageOption = String.join("+", selectedLanguages);
+
+        List<String> command =
+                new ArrayList<>(
+                        Arrays.asList(
+                                "ocrmypdf",
+                                "--verbose",
+                                "2",
+                                "--output-type",
+                                "pdf",
+                                "--pdf-renderer",
+                                ocrRenderType));
+
+        if (sidecar != null && sidecar && sidecarTextPath != null) {
+            command.add("--sidecar");
+            command.add(sidecarTextPath.toString());
+        }
+
+        if (deskew != null && deskew) {
+            command.add("--deskew");
+        }
+        if (clean != null && clean) {
+            command.add("--clean");
+        }
+        if (cleanFinal != null && cleanFinal) {
+            command.add("--clean-final");
+        }
+        if (ocrType != null && !"".equals(ocrType)) {
+            if ("skip-text".equals(ocrType)) {
+                command.add("--skip-text");
+            } else if ("force-ocr".equals(ocrType)) {
+                command.add("--force-ocr");
+            }
+        }
+
+        command.addAll(
+                Arrays.asList(
+                        "--language",
+                        languageOption,
+                        tempInputFile.toString(),
+                        tempOutputFile.toString()));
+
+        // Run CLI command
+        ProcessExecutorResult result =
+                ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
+                        .runCommandWithOutputHandling(command);
+
+        if (result.getRc() != 0
+                && result.getMessages().contains("multiprocessing/synchronize.py")
+                && result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
+            command.add("--jobs");
+            command.add("1");
+            result =
+                    ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
+                            .runCommandWithOutputHandling(command);
+        }
+
+        if (result.getRc() != 0) {
+            throw new IOException("OCRmyPDF failed with return code: " + result.getRc());
+        }
+
+        // Remove images from the OCR processed PDF if the flag is set to true
+        if (removeImagesAfter != null && removeImagesAfter) {
+            try (TempFile tempPdfWithoutImages = new TempFile(tempFileManager, "_no_images.pdf")) {
+                List<String> gsCommand =
+                        Arrays.asList(
+                                "gs",
+                                "-sDEVICE=pdfwrite",
+                                "-dFILTERIMAGE",
+                                "-o",
+                                tempPdfWithoutImages.getPath().toString(),
+                                tempOutputFile.toString());
+
+                ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
+                        .runCommandWithOutputHandling(gsCommand);
+
+                // Replace output file with version without images
+                Files.copy(
+                        tempPdfWithoutImages.getPath(),
+                        tempOutputFile,
+                        java.nio.file.StandardCopyOption.REPLACE_EXISTING);
+            }
+        }
+    }
+
+    private void processWithTesseract(
+            List<String> selectedLanguages, String ocrType, Path tempInputFile, Path tempOutputFile)
+            throws IOException, InterruptedException {
+
+        // Create temp directory for Tesseract processing
+        try (TempDirectory tempDir = new TempDirectory(tempFileManager)) {
+            File tempOutputDir = new File(tempDir.getPath().toFile(), "output");
+            File tempImagesDir = new File(tempDir.getPath().toFile(), "images");
+            File finalOutputFile = new File(tempDir.getPath().toFile(), "final_output.pdf");
 
             // Create directories
             tempOutputDir.mkdirs();
             tempImagesDir.mkdirs();
 
-            // Save input file
-            inputFile.transferTo(tempInputFile);
-
             PDFMergerUtility merger = new PDFMergerUtility();
             merger.setDestinationFileName(finalOutputFile.toString());
 
-            try (PDDocument document = pdfDocumentFactory.load(tempInputFile)) {
+            try (PDDocument document = pdfDocumentFactory.load(tempInputFile.toFile())) {
                 PDFRenderer pdfRenderer = new PDFRenderer(document);
                 int pageCount = document.getNumberOfPages();
 
@@ -135,35 +374,20 @@ public class OCRController {
                                 new File(tempOutputDir, String.format("page_%d", pageNum))
                                         .toString());
                         command.add("-l");
-                        command.add(String.join("+", languages));
-                        // Always output PDF
-                        command.add("pdf");
+                        command.add(String.join("+", selectedLanguages));
+                        command.add("pdf"); // Always output PDF
 
-                        // Use ProcessExecutor to run tesseract command
-                        try {
-                            ProcessExecutorResult result =
-                                    ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT)
-                                            .runCommandWithOutputHandling(command);
+                        ProcessExecutorResult result =
+                                ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT)
+                                        .runCommandWithOutputHandling(command);
 
-                            log.debug(
-                                    "Tesseract OCR completed for page {} with exit code {}",
-                                    pageNum,
-                                    result.getRc());
-
-                            // Add OCR'd PDF to merger
-                            merger.addSource(pageOutputPath);
-                        } catch (IOException | InterruptedException e) {
-                            log.error(
-                                    "Error processing page {} with tesseract: {}",
-                                    pageNum,
-                                    e.getMessage());
-                            // If OCR fails, fall back to the original page
-                            try (PDDocument pageDoc = new PDDocument()) {
-                                pageDoc.addPage(page);
-                                pageDoc.save(pageOutputPath);
-                                merger.addSource(pageOutputPath);
-                            }
+                        if (result.getRc() != 0) {
+                            throw new RuntimeException(
+                                    "Tesseract failed with exit code: " + result.getRc());
                         }
+
+                        // Add OCR'd PDF to merger
+                        merger.addSource(pageOutputPath);
                     } else {
                         // Save original page without OCR
                         try (PDDocument pageDoc = new PDDocument()) {
@@ -178,40 +402,11 @@ public class OCRController {
             // Merge all pages into final PDF
             merger.mergeDocuments(null);
 
-            // Read the final PDF file
-            byte[] pdfContent = java.nio.file.Files.readAllBytes(finalOutputFile.toPath());
-            String outputFilename =
-                    Filenames.toSimpleFileName(inputFile.getOriginalFilename())
-                                    .replaceFirst("[.][^.]+$", "")
-                            + "_OCR.pdf";
-
-            return ResponseEntity.ok()
-                    .header(
-                            "Content-Disposition",
-                            "attachment; filename=\"" + outputFilename + "\"")
-                    .contentType(MediaType.APPLICATION_PDF)
-                    .body(pdfContent);
-        } finally {
-            // Clean up the temp directory and all its contents
-            tempFileManager.deleteTempDirectory(tempDirPath);
-        }
-    }
-
-    private void addFileToZip(File file, String filename, ZipOutputStream zipOut)
-            throws IOException {
-        if (!file.exists()) {
-            log.warn("File {} does not exist, skipping", file);
-            return;
-        }
-        try (FileInputStream fis = new FileInputStream(file)) {
-            ZipEntry zipEntry = new ZipEntry(filename);
-            zipOut.putNextEntry(zipEntry);
-            byte[] buffer = new byte[1024];
-            int length;
-            while ((length = fis.read(buffer)) >= 0) {
-                zipOut.write(buffer, 0, length);
-            }
-            zipOut.closeEntry();
+            // Copy final output to the expected location
+            Files.copy(
+                    finalOutputFile.toPath(),
+                    tempOutputFile,
+                    java.nio.file.StandardCopyOption.REPLACE_EXISTING);
         }
     }
 }
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java
index b8c347ef1..1a72c22de 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java
@@ -15,8 +15,7 @@ import io.github.pixee.security.Filenames;
 import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;
 
-import lombok.RequiredArgsConstructor;
-
+import stirling.software.SPDF.config.EndpointConfiguration;
 import stirling.software.common.model.api.PDFFile;
 import stirling.software.common.service.CustomPDFDocumentFactory;
 import stirling.software.common.util.ProcessExecutor;
@@ -28,17 +27,28 @@ import stirling.software.common.util.WebResponseUtils;
 @RestController
 @RequestMapping("/api/v1/misc")
 @Tag(name = "Misc", description = "Miscellaneous APIs")
-@RequiredArgsConstructor
 public class RepairController {
 
     private final CustomPDFDocumentFactory pdfDocumentFactory;
     private final TempFileManager tempFileManager;
+    private final boolean ghostscriptEnabled;
+    private final boolean qpdfEnabled;
+
+    public RepairController(
+            CustomPDFDocumentFactory pdfDocumentFactory,
+            TempFileManager tempFileManager,
+            EndpointConfiguration endpointConfiguration) {
+        this.pdfDocumentFactory = pdfDocumentFactory;
+        this.tempFileManager = tempFileManager;
+        this.ghostscriptEnabled = endpointConfiguration.isGroupEnabled("Ghostscript");
+        this.qpdfEnabled = endpointConfiguration.isGroupEnabled("qpdf");
+    }
 
     @PostMapping(consumes = "multipart/form-data", value = "/repair")
     @Operation(
             summary = "Repair a PDF file",
             description =
-                    "This endpoint repairs a given PDF file by running qpdf command. The PDF is"
+                    "This endpoint repairs a given PDF file by running Ghostscript (primary), qpdf (fallback), or PDFBox (if no external tools available). The PDF is"
                             + " first saved to a temporary location, repaired, read back, and then"
                             + " returned as a response. Input:PDF Output:PDF Type:SISO")
     public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile file)
@@ -46,25 +56,72 @@ public class RepairController {
         MultipartFile inputFile = file.getFileInput();
 
         // Use TempFile with try-with-resources for automatic cleanup
-        try (TempFile tempFile = new TempFile(tempFileManager, ".pdf")) {
+        try (TempFile tempInputFile = new TempFile(tempFileManager, ".pdf");
+                TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf")) {
+
             // Save the uploaded file to the temporary location
-            inputFile.transferTo(tempFile.getFile());
+            inputFile.transferTo(tempInputFile.getFile());
 
-            List<String> command = new ArrayList<>();
-            command.add("qpdf");
-            command.add("--replace-input"); // Automatically fixes problems it can
-            command.add("--qdf"); // Linearizes and normalizes PDF structure
-            command.add("--object-streams=disable"); // Can help with some corruptions
-            command.add(tempFile.getFile().getAbsolutePath());
+            boolean repairSuccess = false;
 
-            ProcessExecutorResult returnCode =
-                    ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
-                            .runCommandWithOutputHandling(command);
+            // Try Ghostscript first if available
+            if (ghostscriptEnabled) {
+                try {
+                    List<String> gsCommand = new ArrayList<>();
+                    gsCommand.add("gs");
+                    gsCommand.add("-o");
+                    gsCommand.add(tempOutputFile.getPath().toString());
+                    gsCommand.add("-sDEVICE=pdfwrite");
+                    gsCommand.add(tempInputFile.getPath().toString());
 
-            // Read the optimized PDF file
-            byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempFile.getFile());
+                    ProcessExecutorResult gsResult =
+                            ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
+                                    .runCommandWithOutputHandling(gsCommand);
 
-            // Return the optimized PDF as a response
+                    if (gsResult.getRc() == 0) {
+                        repairSuccess = true;
+                    }
+                } catch (Exception e) {
+                    // Log and continue to QPDF fallback
+                    System.out.println(
+                            "Ghostscript repair failed, trying QPDF fallback: " + e.getMessage());
+                }
+            }
+
+            // Fallback to QPDF if Ghostscript failed or not available
+            if (!repairSuccess && qpdfEnabled) {
+                List<String> qpdfCommand = new ArrayList<>();
+                qpdfCommand.add("qpdf");
+                qpdfCommand.add("--replace-input"); // Automatically fixes problems it can
+                qpdfCommand.add("--qdf"); // Linearizes and normalizes PDF structure
+                qpdfCommand.add("--object-streams=disable"); // Can help with some corruptions
+                qpdfCommand.add(tempInputFile.getPath().toString());
+                qpdfCommand.add(tempOutputFile.getPath().toString());
+
+                ProcessExecutorResult qpdfResult =
+                        ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF)
+                                .runCommandWithOutputHandling(qpdfCommand);
+
+                repairSuccess = true;
+            }
+
+            // Use PDFBox as last resort if no external tools are available
+            if (!repairSuccess) {
+                if (!ghostscriptEnabled && !qpdfEnabled) {
+                    // Basic PDFBox repair - load and save to fix structural issues
+                    try (var document = pdfDocumentFactory.load(tempInputFile.getFile())) {
+                        document.save(tempOutputFile.getFile());
+                        repairSuccess = true;
+                    }
+                } else {
+                    throw new IOException("PDF repair failed with available tools");
+                }
+            }
+
+            // Read the repaired PDF file
+            byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempOutputFile.getFile());
+
+            // Return the repaired PDF as a response
             String outputFilename =
                     Filenames.toSimpleFileName(inputFile.getOriginalFilename())
                                     .replaceFirst("[.][^.]+$", "")
diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java b/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java
index 00279eb96..2955d7160 100644
--- a/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java
+++ b/stirling-pdf/src/main/java/stirling/software/SPDF/model/api/misc/ProcessPdfWithOcrRequest.java
@@ -19,6 +19,18 @@ public class ProcessPdfWithOcrRequest extends PDFFile {
             defaultValue = "[\"eng\"]")
     private List<String> languages;
 
+    @Schema(description = "Include OCR text in a sidecar text file if set to true")
+    private boolean sidecar;
+
+    @Schema(description = "Deskew the input file if set to true")
+    private boolean deskew;
+
+    @Schema(description = "Clean the input file if set to true")
+    private boolean clean;
+
+    @Schema(description = "Clean the final output if set to true")
+    private boolean cleanFinal;
+
     @Schema(
             description = "Specify the OCR type, e.g., 'skip-text', 'force-ocr', or 'Normal'",
             requiredMode = Schema.RequiredMode.REQUIRED,
@@ -31,4 +43,7 @@ public class ProcessPdfWithOcrRequest extends PDFFile {
             allowableValues = {"hocr", "sandwich"},
             defaultValue = "hocr")
     private String ocrRenderType = "hocr";
+
+    @Schema(description = "Remove images from the output PDF if set to true")
+    private boolean removeImagesAfter;
 }
diff --git a/stirling-pdf/src/main/resources/templates/misc/ocr-pdf.html b/stirling-pdf/src/main/resources/templates/misc/ocr-pdf.html
index f98c20d29..441542d57 100644
--- a/stirling-pdf/src/main/resources/templates/misc/ocr-pdf.html
+++ b/stirling-pdf/src/main/resources/templates/misc/ocr-pdf.html
@@ -79,6 +79,30 @@
                   </select>
                 </div>
                 <br>
+                <div class="mb-3" th:if>
+                  <label class="form-label">OCR Options</label>
+                  <div class="form-check">
+                    <input type="checkbox" class="form-check-input" id="sidecar" name="sidecar" value="true">
+                    <label class="form-check-label" for="sidecar">Include OCR text in sidecar text file</label>
+                  </div>
+                  <div class="form-check">
+                    <input type="checkbox" class="form-check-input" id="deskew" name="deskew" value="true">
+                    <label class="form-check-label" for="deskew">Deskew input file</label>
+                  </div>
+                  <div class="form-check">
+                    <input type="checkbox" class="form-check-input" id="clean" name="clean" value="true">
+                    <label class="form-check-label" for="clean">Clean input file</label>
+                  </div>
+                  <div class="form-check">
+                    <input type="checkbox" class="form-check-input" id="cleanFinal" name="cleanFinal" value="true">
+                    <label class="form-check-label" for="cleanFinal">Clean final output</label>
+                  </div>
+                  <div class="form-check">
+                    <input type="checkbox" class="form-check-input" id="removeImagesAfter" name="removeImagesAfter" value="true">
+                    <label class="form-check-label" for="removeImagesAfter">Remove images from output PDF</label>
+                  </div>
+                </div>
+                <br>
                 <button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{ocr.submit}"></button>
               </form>
               <script th:inline="javascript">