diff --git a/frontend/public/locales/en-GB/translation.json b/frontend/public/locales/en-GB/translation.json index 261b8dbc7..e4fcb2884 100644 --- a/frontend/public/locales/en-GB/translation.json +++ b/frontend/public/locales/en-GB/translation.json @@ -996,7 +996,6 @@ }, "submit": "Change" }, - "removePages": { "tags": "Remove pages,delete pages", "title": "Remove Pages", @@ -1113,7 +1112,88 @@ }, "help": "Please read this documentation on how to use this for other languages and/or use not in docker", "credit": "This service uses qpdf and Tesseract for OCR.", - "submit": "Process PDF with OCR" + "submit": "Process PDF with OCR", + "operation": { + "submit": "Process OCR and Review" + }, + "results": { + "title": "OCR Results" + }, + "languagePicker": { + "additionalLanguages": "Looking for additional languages?", + "viewSetupGuide": "View setup guide →" + }, + "settings": { + "title": "Settings", + "ocrMode": { + "label": "OCR Mode", + "auto": "Auto (skip text layers)", + "force": "Force (re-OCR all, replace text)", + "strict": "Strict (abort if text found)" + }, + "languages": { + "label": "Languages", + "placeholder": "Select languages" + }, + "compatibilityMode": { + "label": "Compatibility Mode" + }, + "advancedOptions": { + "label": "Processing Options", + "sidecar": "Create a text file", + "deskew": "Deskew pages", + "clean": "Clean input file", + "cleanFinal": "Clean final output" + } + }, + "tooltip": { + "header": { + "title": "OCR Settings Overview" + }, + "mode": { + "title": "OCR Mode", + "text": "Optical Character Recognition (OCR) helps you turn scanned or screenshotted pages into text you can search, copy, or highlight.", + "bullet1": "Auto skips pages that already contain text layers.", + "bullet2": "Force re-OCRs every page and replaces all the text.", + "bullet3": "Strict halts if any selectable text is found." + }, + "languages": { + "title": "Languages", + "text": "Improve OCR accuracy by specifying the expected languages. Choose one or more languages to guide detection." + }, + "output": { + "title": "Output", + "text": "Decide how you want the text output formatted:", + "bullet1": "Searchable PDF embeds text behind the original image.", + "bullet2": "HOCR XML returns a structured machine-readable file.", + "bullet3": "Plain-text sidecar creates a separate .txt file with raw content." + }, + "advanced": { + "header": { + "title": "Advanced OCR Processing" + }, + "compatibility": { + "title": "Compatibility Mode", + "text": "Uses OCR 'sandwich PDF' mode: results in larger files, but more reliable with certain languages and older PDF software. By default we use hOCR for smaller, modern PDFs." + }, + "sidecar": { + "title": "Create Text File", + "text": "Generates a separate .txt file alongside the PDF containing all extracted text content for easy access and processing." + }, + "deskew": { + "title": "Deskew Pages", + "text": "Automatically corrects skewed or tilted pages to improve OCR accuracy. Useful for scanned documents that weren't perfectly aligned." + }, + "clean": { + "title": "Clean Input File", + "text": "Preprocesses the input by removing noise, enhancing contrast, and optimising the image for better OCR recognition before processing." + }, + "cleanFinal": { + "title": "Clean Final Output", + "text": "Post-processes the final PDF by removing OCR artefacts and optimising the text layer for better readability and smaller file size." + } + } + } }, "extractImages": { "tags": "picture,photo,save,archive,zip,capture,grab", diff --git a/frontend/src/components/tooltips/useAdvancedOCRTips.ts b/frontend/src/components/tooltips/useAdvancedOCRTips.ts new file mode 100644 index 000000000..e1b4532c1 --- /dev/null +++ b/frontend/src/components/tooltips/useAdvancedOCRTips.ts @@ -0,0 +1,34 @@ +import { useTranslation } from 'react-i18next'; +import { TooltipContent } from '../../types/tips'; + +export const useAdvancedOCRTips = (): TooltipContent => { + const { t } = useTranslation(); + + return { + header: { + title: t("ocr.tooltip.advanced.header.title", "Advanced OCR Processing"), + }, + tips: [ + { + title: t("ocr.tooltip.advanced.compatibility.title", "Compatibility Mode"), + description: t("ocr.tooltip.advanced.compatibility.text", "Uses OCR 'sandwich PDF' mode: results in larger files, but more reliable with certain languages and older PDF software. By default we use hOCR for smaller, modern PDFs.") + }, + { + title: t("ocr.tooltip.advanced.sidecar.title", "Create Text File"), + description: t("ocr.tooltip.advanced.sidecar.text", "Generates a separate .txt file alongside the PDF containing all extracted text content for easy access and processing.") + }, + { + title: t("ocr.tooltip.advanced.deskew.title", "Deskew Pages"), + description: t("ocr.tooltip.advanced.deskew.text", "Automatically corrects skewed or tilted pages to improve OCR accuracy. Useful for scanned documents that weren't perfectly aligned.") + }, + { + title: t("ocr.tooltip.advanced.clean.title", "Clean Input File"), + description: t("ocr.tooltip.advanced.clean.text", "Preprocesses the input by removing noise, enhancing contrast, and optimising the image for better OCR recognition before processing.") + }, + { + title: t("ocr.tooltip.advanced.cleanFinal.title", "Clean Final Output"), + description: t("ocr.tooltip.advanced.cleanFinal.text", "Post-processes the final PDF by removing OCR artefacts and optimising the text layer for better readability and smaller file size.") + } + ] + }; +}; diff --git a/frontend/src/tools/OCR.tsx b/frontend/src/tools/OCR.tsx index 52db3b0de..e2b56770e 100644 --- a/frontend/src/tools/OCR.tsx +++ b/frontend/src/tools/OCR.tsx @@ -13,15 +13,16 @@ import { useOCRParameters } from "../hooks/tools/ocr/useOCRParameters"; import { useOCROperation } from "../hooks/tools/ocr/useOCROperation"; import { BaseToolProps, ToolComponent } from "../types/tool"; import { useOCRTips } from "../components/tooltips/useOCRTips"; +import { useAdvancedOCRTips } from "../components/tooltips/useAdvancedOCRTips"; const OCR = ({ onPreviewFile, onComplete, onError }: BaseToolProps) => { const { t } = useTranslation(); - const { actions } = useNavigationActions(); const { selectedFiles } = useFileSelection(); const ocrParams = useOCRParameters(); const ocrOperation = useOCROperation(); const ocrTips = useOCRTips(); + const advancedOCRTips = useAdvancedOCRTips(); // Step expansion state management const [expandedStep, setExpandedStep] = useState<"files" | "settings" | "advanced" | null>("files"); @@ -82,7 +83,7 @@ const OCR = ({ onPreviewFile, onComplete, onError }: BaseToolProps) => { }, steps: [ { - title: "Settings", + title: t("ocr.settings.title", "Settings"), isCollapsed: !hasFiles || settingsCollapsed, onCollapsedClick: hasResults ? handleSettingsReset @@ -108,6 +109,7 @@ const OCR = ({ onPreviewFile, onComplete, onError }: BaseToolProps) => { if (!hasFiles) return; // Only allow if files are selected setExpandedStep(expandedStep === "advanced" ? null : "advanced"); }, + tooltip: advancedOCRTips, content: (