Add tooltips to advanced section in OCR and add missing translations (#4295)

# Description of Changes Add tooltips to advanced section in OCR and add missing translations
2025-08-26 14:19:24 +00:00 · 2025-08-26 12:38:47 +01:00 · 2025-08-26 12:38:47 +01:00 · 6d7f76353e
commit 6d7f76353e
parent 16e028a8ef
3 changed files with 120 additions and 4 deletions
--- a/frontend/public/locales/en-GB/translation.json
+++ b/frontend/public/locales/en-GB/translation.json
@ -996,7 +996,6 @@
    },
    "submit": "Change"
  },
-  
  "removePages": {
    "tags": "Remove pages,delete pages",
    "title": "Remove Pages",
@ -1113,7 +1112,88 @@
    },
    "help": "Please read this documentation on how to use this for other languages and/or use not in docker",
    "credit": "This service uses qpdf and Tesseract for OCR.",
-    "submit": "Process PDF with OCR"
+    "submit": "Process PDF with OCR",
+    "operation": {
+      "submit": "Process OCR and Review"
+    },
+    "results": {
+      "title": "OCR Results"
+    },
+    "languagePicker": {
+      "additionalLanguages": "Looking for additional languages?",
+      "viewSetupGuide": "View setup guide →"
+    },
+    "settings": {
+      "title": "Settings",
+      "ocrMode": {
+        "label": "OCR Mode",
+        "auto": "Auto (skip text layers)",
+        "force": "Force (re-OCR all, replace text)",
+        "strict": "Strict (abort if text found)"
+      },
+      "languages": {
+        "label": "Languages",
+        "placeholder": "Select languages"
+      },
+      "compatibilityMode": {
+        "label": "Compatibility Mode"
+      },
+      "advancedOptions": {
+        "label": "Processing Options",
+        "sidecar": "Create a text file",
+        "deskew": "Deskew pages",
+        "clean": "Clean input file",
+        "cleanFinal": "Clean final output"
+      }
+    },
+    "tooltip": {
+      "header": {
+        "title": "OCR Settings Overview"
+      },
+      "mode": {
+        "title": "OCR Mode",
+        "text": "Optical Character Recognition (OCR) helps you turn scanned or screenshotted pages into text you can search, copy, or highlight.",
+        "bullet1": "Auto skips pages that already contain text layers.",
+        "bullet2": "Force re-OCRs every page and replaces all the text.",
+        "bullet3": "Strict halts if any selectable text is found."
+      },
+      "languages": {
+        "title": "Languages",
+        "text": "Improve OCR accuracy by specifying the expected languages. Choose one or more languages to guide detection."
+      },
+      "output": {
+        "title": "Output",
+        "text": "Decide how you want the text output formatted:",
+        "bullet1": "Searchable PDF embeds text behind the original image.",
+        "bullet2": "HOCR XML returns a structured machine-readable file.",
+        "bullet3": "Plain-text sidecar creates a separate .txt file with raw content."
+      },
+      "advanced": {
+        "header": {
+          "title": "Advanced OCR Processing"
+        },
+        "compatibility": {
+          "title": "Compatibility Mode",
+          "text": "Uses OCR 'sandwich PDF' mode: results in larger files, but more reliable with certain languages and older PDF software. By default we use hOCR for smaller, modern PDFs."
+        },
+        "sidecar": {
+          "title": "Create Text File",
+          "text": "Generates a separate .txt file alongside the PDF containing all extracted text content for easy access and processing."
+        },
+        "deskew": {
+          "title": "Deskew Pages",
+          "text": "Automatically corrects skewed or tilted pages to improve OCR accuracy. Useful for scanned documents that weren't perfectly aligned."
+        },
+        "clean": {
+          "title": "Clean Input File",
+          "text": "Preprocesses the input by removing noise, enhancing contrast, and optimising the image for better OCR recognition before processing."
+        },
+        "cleanFinal": {
+          "title": "Clean Final Output",
+          "text": "Post-processes the final PDF by removing OCR artefacts and optimising the text layer for better readability and smaller file size."
+        }
+      }
+    }
  },
  "extractImages": {
    "tags": "picture,photo,save,archive,zip,capture,grab",
--- a/frontend/src/components/tooltips/useAdvancedOCRTips.ts
+++ b/frontend/src/components/tooltips/useAdvancedOCRTips.ts
@ -0,0 +1,34 @@
+import { useTranslation } from 'react-i18next';
+import { TooltipContent } from '../../types/tips';
+
+export const useAdvancedOCRTips = (): TooltipContent => {
+  const { t } = useTranslation();
+
+  return {
+    header: {
+      title: t("ocr.tooltip.advanced.header.title", "Advanced OCR Processing"),
+    },
+    tips: [
+      {
+        title: t("ocr.tooltip.advanced.compatibility.title", "Compatibility Mode"),
+        description: t("ocr.tooltip.advanced.compatibility.text", "Uses OCR 'sandwich PDF' mode: results in larger files, but more reliable with certain languages and older PDF software. By default we use hOCR for smaller, modern PDFs.")
+      },
+      {
+        title: t("ocr.tooltip.advanced.sidecar.title", "Create Text File"),
+        description: t("ocr.tooltip.advanced.sidecar.text", "Generates a separate .txt file alongside the PDF containing all extracted text content for easy access and processing.")
+      },
+      {
+        title: t("ocr.tooltip.advanced.deskew.title", "Deskew Pages"),
+        description: t("ocr.tooltip.advanced.deskew.text", "Automatically corrects skewed or tilted pages to improve OCR accuracy. Useful for scanned documents that weren't perfectly aligned.")
+      },
+      {
+        title: t("ocr.tooltip.advanced.clean.title", "Clean Input File"),
+        description: t("ocr.tooltip.advanced.clean.text", "Preprocesses the input by removing noise, enhancing contrast, and optimising the image for better OCR recognition before processing.")
+      },
+      {
+        title: t("ocr.tooltip.advanced.cleanFinal.title", "Clean Final Output"),
+        description: t("ocr.tooltip.advanced.cleanFinal.text", "Post-processes the final PDF by removing OCR artefacts and optimising the text layer for better readability and smaller file size.")
+      }
+    ]
+  };
+};
--- a/frontend/src/tools/OCR.tsx
+++ b/frontend/src/tools/OCR.tsx
@ -13,15 +13,16 @@ import { useOCRParameters } from "../hooks/tools/ocr/useOCRParameters";
 import { useOCROperation } from "../hooks/tools/ocr/useOCROperation";
 import { BaseToolProps, ToolComponent } from "../types/tool";
 import { useOCRTips } from "../components/tooltips/useOCRTips";
+import { useAdvancedOCRTips } from "../components/tooltips/useAdvancedOCRTips";

 const OCR = ({ onPreviewFile, onComplete, onError }: BaseToolProps) => {
  const { t } = useTranslation();
-  const { actions } = useNavigationActions();
  const { selectedFiles } = useFileSelection();

  const ocrParams = useOCRParameters();
  const ocrOperation = useOCROperation();
  const ocrTips = useOCRTips();
+  const advancedOCRTips = useAdvancedOCRTips();

  // Step expansion state management
  const [expandedStep, setExpandedStep] = useState<"files" | "settings" | "advanced" | null>("files");
@ -82,7 +83,7 @@ const OCR = ({ onPreviewFile, onComplete, onError }: BaseToolProps) => {
    },
    steps: [
      {
-        title: "Settings",
+        title: t("ocr.settings.title", "Settings"),
        isCollapsed: !hasFiles || settingsCollapsed,
        onCollapsedClick: hasResults
          ? handleSettingsReset
@ -108,6 +109,7 @@ const OCR = ({ onPreviewFile, onComplete, onError }: BaseToolProps) => {
              if (!hasFiles) return; // Only allow if files are selected
              setExpandedStep(expandedStep === "advanced" ? null : "advanced");
            },
+        tooltip: advancedOCRTips,
        content: (
          <AdvancedOCRSettings
            advancedOptions={ocrParams.parameters.additionalOptions}