diff --git a/frontend/src/components/shared/DropdownListWithFooter.tsx b/frontend/src/components/shared/DropdownListWithFooter.tsx index fa99c44e8..368b2255e 100644 --- a/frontend/src/components/shared/DropdownListWithFooter.tsx +++ b/frontend/src/components/shared/DropdownListWithFooter.tsx @@ -1,5 +1,5 @@ import React, { ReactNode, useState, useMemo } from 'react'; -import { Stack, Text, Popover, Box, Checkbox, Group, TextInput, useMantineColorScheme } from '@mantine/core'; +import { Stack, Text, Popover, Box, Checkbox, Group, TextInput } from '@mantine/core'; import UnfoldMoreIcon from '@mui/icons-material/UnfoldMore'; import SearchIcon from '@mui/icons-material/Search'; @@ -60,7 +60,6 @@ const DropdownListWithFooter: React.FC = ({ }) => { const [searchTerm, setSearchTerm] = useState(''); - const { colorScheme } = useMantineColorScheme(); const isMultiValue = Array.isArray(value); const selectedValues = isMultiValue ? value : (value ? [value] : []); @@ -119,14 +118,10 @@ const DropdownListWithFooter: React.FC = ({ = ({ @@ -151,9 +144,7 @@ const DropdownListWithFooter: React.FC = ({ {header && ( {header} @@ -162,9 +153,7 @@ const DropdownListWithFooter: React.FC = ({ {searchable && ( = ({ }} onMouseEnter={(e) => { if (!item.disabled) { - e.currentTarget.style.backgroundColor = colorScheme === 'dark' - ? 'var(--mantine-color-dark-5)' - : 'var(--mantine-color-gray-0)'; + e.currentTarget.style.backgroundColor = 'light-dark(var(--mantine-color-gray-0), var(--mantine-color-dark-5))'; } }} onMouseLeave={(e) => { @@ -234,9 +221,7 @@ const DropdownListWithFooter: React.FC = ({ {footer && ( {footer} diff --git a/frontend/src/components/shared/LanguageSelector.tsx b/frontend/src/components/shared/LanguageSelector.tsx index 9da9d75c1..bd6269b8e 100644 --- a/frontend/src/components/shared/LanguageSelector.tsx +++ b/frontend/src/components/shared/LanguageSelector.tsx @@ -1,5 +1,5 @@ import React, { useState, useEffect } from 'react'; -import { Menu, Button, ScrollArea, useMantineTheme, useMantineColorScheme } from '@mantine/core'; +import { Menu, Button, ScrollArea } from '@mantine/core'; import { useTranslation } from 'react-i18next'; import { supportedLanguages } from '../../i18n'; import LanguageIcon from '@mui/icons-material/Language'; @@ -7,8 +7,6 @@ import styles from './LanguageSelector.module.css'; const LanguageSelector = () => { const { i18n } = useTranslation(); - const theme = useMantineTheme(); - const { colorScheme } = useMantineColorScheme(); const [opened, setOpened] = useState(false); const [animationTriggered, setAnimationTriggered] = useState(false); const [isChanging, setIsChanging] = useState(false); @@ -102,10 +100,10 @@ const LanguageSelector = () => { styles={{ root: { border: 'none', - color: colorScheme === 'dark' ? theme.colors.gray[1] : theme.colors.gray[7], + color: 'light-dark(var(--mantine-color-gray-7), var(--mantine-color-gray-1))', transition: 'background-color 0.2s cubic-bezier(0.25, 0.46, 0.45, 0.94)', '&:hover': { - backgroundColor: colorScheme === 'dark' ? theme.colors.dark[5] : theme.colors.gray[1], + backgroundColor: 'light-dark(var(--mantine-color-gray-1), var(--mantine-color-dark-5))', } }, label: { @@ -125,8 +123,8 @@ const LanguageSelector = () => { padding: '12px', borderRadius: '8px', boxShadow: '0 4px 12px rgba(0, 0, 0, 0.1)', - backgroundColor: colorScheme === 'dark' ? theme.colors.dark[6] : theme.white, - border: colorScheme === 'dark' ? `1px solid ${theme.colors.dark[4]}` : `1px solid ${theme.colors.gray[3]}`, + backgroundColor: 'light-dark(var(--mantine-color-white), var(--mantine-color-dark-6))', + border: 'light-dark(1px solid var(--mantine-color-gray-3), 1px solid var(--mantine-color-dark-4))', }} > @@ -146,6 +144,7 @@ const LanguageSelector = () => { size="sm" fullWidth onClick={(event) => handleLanguageChange(option.value, event)} + data-selected={option.value === i18n.language} styles={{ root: { borderRadius: '4px', @@ -154,21 +153,17 @@ const LanguageSelector = () => { justifyContent: 'flex-start', position: 'relative', overflow: 'hidden', - backgroundColor: option.value === i18n.language ? ( - colorScheme === 'dark' ? theme.colors.blue[8] : theme.colors.blue[1] - ) : 'transparent', - color: option.value === i18n.language ? ( - colorScheme === 'dark' ? theme.white : theme.colors.blue[9] - ) : ( - colorScheme === 'dark' ? theme.white : theme.colors.gray[7] - ), + backgroundColor: option.value === i18n.language + ? 'light-dark(var(--mantine-color-blue-1), var(--mantine-color-blue-8))' + : 'transparent', + color: option.value === i18n.language + ? 'light-dark(var(--mantine-color-blue-9), var(--mantine-color-white))' + : 'light-dark(var(--mantine-color-gray-7), var(--mantine-color-white))', transition: 'all 0.2s cubic-bezier(0.25, 0.46, 0.45, 0.94)', '&:hover': { - backgroundColor: option.value === i18n.language ? ( - colorScheme === 'dark' ? theme.colors.blue[7] : theme.colors.blue[2] - ) : ( - colorScheme === 'dark' ? theme.colors.dark[5] : theme.colors.gray[1] - ), + backgroundColor: option.value === i18n.language + ? 'light-dark(var(--mantine-color-blue-2), var(--mantine-color-blue-7))' + : 'light-dark(var(--mantine-color-gray-1), var(--mantine-color-dark-5))', transform: 'translateY(-1px)', boxShadow: '0 2px 8px rgba(0, 0, 0, 0.1)', } @@ -198,7 +193,7 @@ const LanguageSelector = () => { width: 0, height: 0, borderRadius: '50%', - backgroundColor: theme.colors.blue[4], + backgroundColor: 'var(--mantine-color-blue-4)', opacity: 0.6, transform: 'translate(-50%, -50%)', animation: 'ripple-expand 0.6s cubic-bezier(0.25, 0.46, 0.45, 0.94)', diff --git a/frontend/src/components/tools/ocr/AdvancedOCRSettings.tsx b/frontend/src/components/tools/ocr/AdvancedOCRSettings.tsx index db8f931b0..3bd8c1569 100644 --- a/frontend/src/components/tools/ocr/AdvancedOCRSettings.tsx +++ b/frontend/src/components/tools/ocr/AdvancedOCRSettings.tsx @@ -1,79 +1,80 @@ import React from 'react'; -import { Stack, Text, Divider, Switch, Group, Checkbox } from '@mantine/core'; +import { Stack, Text, Checkbox } from '@mantine/core'; import { useTranslation } from 'react-i18next'; import { OCRParameters } from './OCRSettings'; export interface AdvancedOCRParameters { - ocrRenderType: string; advancedOptions: string[]; } +interface AdvancedOption { + value: string; + label: string; + isSpecial: boolean; +} + interface AdvancedOCRSettingsProps { - ocrRenderType: string; advancedOptions: string[]; + ocrRenderType?: string; onParameterChange: (key: keyof OCRParameters, value: any) => void; disabled?: boolean; } const AdvancedOCRSettings: React.FC = ({ - ocrRenderType, advancedOptions, + ocrRenderType = 'hocr', onParameterChange, disabled = false }) => { const { t } = useTranslation(); // Define the advanced options available - const advancedOptionsData = [ - { value: 'sidecar', label: t('ocr.settings.advancedOptions.sidecar', 'Create a text file') }, - { value: 'deskew', label: t('ocr.settings.advancedOptions.deskew', 'Deskew pages') }, - { value: 'clean', label: t('ocr.settings.advancedOptions.clean', 'Clean input file') }, - { value: 'cleanFinal', label: t('ocr.settings.advancedOptions.cleanFinal', 'Clean final output') }, + const advancedOptionsData: AdvancedOption[] = [ + { value: 'compatibilityMode', label: t('ocr.settings.compatibilityMode.label', 'Compatibility Mode'), isSpecial: true }, + { value: 'sidecar', label: t('ocr.settings.advancedOptions.sidecar', 'Create a text file'), isSpecial: false }, + { value: 'deskew', label: t('ocr.settings.advancedOptions.deskew', 'Deskew pages'), isSpecial: false }, + { value: 'clean', label: t('ocr.settings.advancedOptions.clean', 'Clean input file'), isSpecial: false }, + { value: 'cleanFinal', label: t('ocr.settings.advancedOptions.cleanFinal', 'Clean final output'), isSpecial: false }, ]; // Handle individual checkbox changes const handleCheckboxChange = (optionValue: string, checked: boolean) => { - const newOptions = checked - ? [...advancedOptions, optionValue] - : advancedOptions.filter(option => option !== optionValue); - onParameterChange('additionalOptions', newOptions); + const option = advancedOptionsData.find(opt => opt.value === optionValue); + + if (option?.isSpecial) { + // Handle special options (like compatibility mode) differently + if (optionValue === 'compatibilityMode') { + onParameterChange('ocrRenderType', checked ? 'sandwich' : 'hocr'); + } + } else { + // Handle regular advanced options + const newOptions = checked + ? [...advancedOptions, optionValue] + : advancedOptions.filter(option => option !== optionValue); + onParameterChange('additionalOptions', newOptions); + } + }; + + // Check if a special option is selected + const isSpecialOptionSelected = (optionValue: string) => { + if (optionValue === 'compatibilityMode') { + return ocrRenderType === 'sandwich'; + } + return false; }; return ( - -
- - {t('ocr.settings.output.label', 'Output Render Type ')} - - - - {t('ocr.settings.output.hocr', 'HOCR (Auto)')} - - onParameterChange('ocrRenderType', event.currentTarget.checked ? 'sandwich' : 'hocr')} - disabled={disabled} - size="sm" - style={{ flexShrink: 0 }} - /> - - {t('ocr.settings.output.sandwich', 'Searchable PDF')} - - -
- - -
{t('ocr.settings.advancedOptions.label', 'Processing Options')} + {advancedOptionsData.map((option) => ( handleCheckboxChange(option.value, event.currentTarget.checked)} label={option.label} disabled={disabled} diff --git a/frontend/src/components/tools/ocr/LanguagePicker.tsx b/frontend/src/components/tools/ocr/LanguagePicker.tsx index a68e33202..8d425d1ed 100644 --- a/frontend/src/components/tools/ocr/LanguagePicker.tsx +++ b/frontend/src/components/tools/ocr/LanguagePicker.tsx @@ -1,7 +1,8 @@ import React, { useState, useEffect } from 'react'; -import { Text, Loader, useMantineColorScheme } from '@mantine/core'; +import { Text, Loader } from '@mantine/core'; import { useTranslation } from 'react-i18next'; import { tempOcrLanguages } from '../../../utils/tempOcrLanguages'; +import { getAutoOcrLanguage } from '../../../utils/languageMapping'; import DropdownListWithFooter, { DropdownItem } from '../../shared/DropdownListWithFooter'; export interface LanguageOption { @@ -16,6 +17,7 @@ export interface LanguagePickerProps { disabled?: boolean; label?: string; languagesEndpoint?: string; + autoFillFromBrowserLanguage?: boolean; } const LanguagePicker: React.FC = ({ @@ -24,12 +26,13 @@ const LanguagePicker: React.FC = ({ placeholder = 'Select languages', disabled = false, label, - languagesEndpoint = '/api/v1/ui-data/ocr-pdf' + languagesEndpoint = '/api/v1/ui-data/ocr-pdf', + autoFillFromBrowserLanguage = true, }) => { - const { t } = useTranslation(); - const { colorScheme } = useMantineColorScheme(); + const { t, i18n } = useTranslation(); const [availableLanguages, setAvailableLanguages] = useState([]); const [isLoadingLanguages, setIsLoadingLanguages] = useState(true); + const [hasAutoFilled, setHasAutoFilled] = useState(false); useEffect(() => { // Fetch available languages from backend @@ -76,6 +79,29 @@ const LanguagePicker: React.FC = ({ fetchLanguages(); }, [languagesEndpoint]); + // Auto-fill OCR language based on browser language when languages are loaded + useEffect(() => { + const shouldAutoFillLanguage = autoFillFromBrowserLanguage && !isLoadingLanguages && availableLanguages.length > 0 && !hasAutoFilled && value.length === 0; + + if (shouldAutoFillLanguage) { + // Use the comprehensive language mapping from languageMapping.ts + const suggestedOcrLanguages = getAutoOcrLanguage(i18n.language); + + if (suggestedOcrLanguages.length > 0) { + // Find the first suggested language that's available in the backend + const matchingLanguage = availableLanguages.find(lang => + suggestedOcrLanguages.includes(lang.value) + ); + + if (matchingLanguage) { + onChange([matchingLanguage.value]); + } + } + + setHasAutoFilled(true); + } + }, [autoFillFromBrowserLanguage, isLoadingLanguages, availableLanguages, hasAutoFilled, value.length, i18n.language, onChange]); + if (isLoadingLanguages) { return (
@@ -87,22 +113,23 @@ const LanguagePicker: React.FC = ({ const footer = ( <> - +
+ {t('ocr.languagePicker.additionalLanguages', 'Looking for additional languages?')} window.open('https://docs.stirlingpdf.com/Advanced%20Configuration/OCR', '_blank')} > {t('ocr.languagePicker.viewSetupGuide', 'View setup guide →')} +
); @@ -117,6 +144,7 @@ const LanguagePicker: React.FC = ({ footer={footer} multiSelect={true} maxHeight={300} + searchable={true} /> ); }; diff --git a/frontend/src/hooks/tools/ocr/useOCRParameters.ts b/frontend/src/hooks/tools/ocr/useOCRParameters.ts index 5ac0f61c3..23702cdce 100644 --- a/frontend/src/hooks/tools/ocr/useOCRParameters.ts +++ b/frontend/src/hooks/tools/ocr/useOCRParameters.ts @@ -9,7 +9,7 @@ export interface OCRParametersHook { } const defaultParameters: OCRParameters = { - languages: ['eng'], + languages: [], ocrType: 'skip-text', ocrRenderType: 'hocr', additionalOptions: [], diff --git a/frontend/src/tools/OCR.tsx b/frontend/src/tools/OCR.tsx index 35e40aad0..3e6fb0bdf 100644 --- a/frontend/src/tools/OCR.tsx +++ b/frontend/src/tools/OCR.tsx @@ -150,8 +150,8 @@ const OCR = ({ onPreviewFile, onComplete, onError }: BaseToolProps) => { completedMessage={hasFiles && hasResults && expandedStep !== 'advanced' ? "OCR processing completed" : undefined} > diff --git a/frontend/src/utils/languageMapping.ts b/frontend/src/utils/languageMapping.ts new file mode 100644 index 000000000..c716c8843 --- /dev/null +++ b/frontend/src/utils/languageMapping.ts @@ -0,0 +1,383 @@ +// Mapping from browser language codes to OCR language codes +// Handles exact matches and similar language fallbacks + +interface LanguageMapping { + [browserCode: string]: string; +} + +// Primary mapping from browser language codes to OCR language codes +const browserToOcrMapping: LanguageMapping = { + // English variants + 'en': 'eng', + 'en-US': 'eng', + 'en-GB': 'eng', + 'en-AU': 'eng', + 'en-CA': 'eng', + 'en-IE': 'eng', + 'en-NZ': 'eng', + 'en-ZA': 'eng', + + // Spanish variants + 'es': 'spa', + 'es-ES': 'spa', + 'es-MX': 'spa', + 'es-AR': 'spa', + 'es-CO': 'spa', + 'es-CL': 'spa', + 'es-PE': 'spa', + 'es-VE': 'spa', + + // French variants + 'fr': 'fra', + 'fr-FR': 'fra', + 'fr-CA': 'fra', + 'fr-BE': 'fra', + 'fr-CH': 'fra', + + // German variants + 'de': 'deu', + 'de-DE': 'deu', + 'de-AT': 'deu', + 'de-CH': 'deu', + + // Portuguese variants + 'pt': 'por', + 'pt-PT': 'por', + 'pt-BR': 'por', + + // Italian variants + 'it': 'ita', + 'it-IT': 'ita', + 'it-CH': 'ita', + + // Chinese variants + 'zh': 'chi_sim', + 'zh-CN': 'chi_sim', + 'zh-Hans': 'chi_sim', + 'zh-TW': 'chi_tra', + 'zh-HK': 'chi_tra', + 'zh-Hant': 'chi_tra', + 'zh-BO': 'bod', + + // Japanese + 'ja': 'jpn', + 'ja-JP': 'jpn', + + // Korean + 'ko': 'kor', + 'ko-KR': 'kor', + + // Russian variants + 'ru': 'rus', + 'ru-RU': 'rus', + + // Arabic variants + 'ar': 'ara', + 'ar-SA': 'ara', + 'ar-EG': 'ara', + 'ar-AE': 'ara', + 'ar-MA': 'ara', + + // Dutch variants + 'nl': 'nld', + 'nl-NL': 'nld', + 'nl-BE': 'nld', + + // Polish + 'pl': 'pol', + 'pl-PL': 'pol', + + // Czech + 'cs': 'ces', + 'cs-CZ': 'ces', + + // Slovak + 'sk': 'slk', + 'sk-SK': 'slk', + + // Hungarian + 'hu': 'hun', + 'hu-HU': 'hun', + + // Romanian + 'ro': 'ron', + 'ro-RO': 'ron', + + // Bulgarian + 'bg': 'bul', + 'bg-BG': 'bul', + + // Croatian + 'hr': 'hrv', + 'hr-HR': 'hrv', + + // Serbian + 'sr': 'srp', + 'sr-RS': 'srp', + 'sr-Latn': 'srp_latn', + + // Slovenian + 'sl': 'slv', + 'sl-SI': 'slv', + + // Estonian + 'et': 'est', + 'et-EE': 'est', + + // Latvian + 'lv': 'lav', + 'lv-LV': 'lav', + + // Lithuanian + 'lt': 'lit', + 'lt-LT': 'lit', + + // Finnish + 'fi': 'fin', + 'fi-FI': 'fin', + + // Swedish + 'sv': 'swe', + 'sv-SE': 'swe', + + // Norwegian + 'no': 'nor', + 'nb': 'nor', + 'nn': 'nor', + 'no-NO': 'nor', + 'nb-NO': 'nor', + 'nn-NO': 'nor', + + // Danish + 'da': 'dan', + 'da-DK': 'dan', + + // Icelandic + 'is': 'isl', + 'is-IS': 'isl', + + // Greek + 'el': 'ell', + 'el-GR': 'ell', + + // Turkish + 'tr': 'tur', + 'tr-TR': 'tur', + + // Hebrew + 'he': 'heb', + 'he-IL': 'heb', + + // Hindi + 'hi': 'hin', + 'hi-IN': 'hin', + + // Thai + 'th': 'tha', + 'th-TH': 'tha', + + // Vietnamese + 'vi': 'vie', + 'vi-VN': 'vie', + + // Indonesian + 'id': 'ind', + 'id-ID': 'ind', + + // Malay + 'ms': 'msa', + 'ms-MY': 'msa', + + // Filipino/Tagalog + 'fil': 'fil', + 'tl': 'tgl', + + // Ukrainian + 'uk': 'ukr', + 'uk-UA': 'ukr', + + // Belarusian + 'be': 'bel', + 'be-BY': 'bel', + + // Kazakh + 'kk': 'kaz', + 'kk-KZ': 'kaz', + + // Uzbek + 'uz': 'uzb', + 'uz-UZ': 'uzb', + + // Georgian + 'ka': 'kat', + 'ka-GE': 'kat', + + // Armenian + 'hy': 'hye', + 'hy-AM': 'hye', + + // Azerbaijani + 'az': 'aze', + 'az-AZ': 'aze', + + // Persian/Farsi + 'fa': 'fas', + 'fa-IR': 'fas', + + // Urdu + 'ur': 'urd', + 'ur-PK': 'urd', + + // Bengali + 'bn': 'ben', + 'bn-BD': 'ben', + 'bn-IN': 'ben', + + // Tamil + 'ta': 'tam', + 'ta-IN': 'tam', + 'ta-LK': 'tam', + + // Telugu + 'te': 'tel', + 'te-IN': 'tel', + + // Kannada + 'kn': 'kan', + 'kn-IN': 'kan', + + // Malayalam + 'ml': 'mal', + 'ml-IN': 'mal', + + // Gujarati + 'gu': 'guj', + 'gu-IN': 'guj', + + // Marathi + 'mr': 'mar', + 'mr-IN': 'mar', + + // Punjabi + 'pa': 'pan', + 'pa-IN': 'pan', + + // Nepali + 'ne': 'nep', + 'ne-NP': 'nep', + + // Sinhala + 'si': 'sin', + 'si-LK': 'sin', + + // Burmese + 'my': 'mya', + 'my-MM': 'mya', + + // Khmer + 'km': 'khm', + 'km-KH': 'khm', + + // Lao + 'lo': 'lao', + 'lo-LA': 'lao', + + // Mongolian + 'mn': 'mon', + 'mn-MN': 'mon', + + // Welsh + 'cy': 'cym', + 'cy-GB': 'cym', + + // Irish + 'ga': 'gle', + 'ga-IE': 'gle', + + // Scottish Gaelic + 'gd': 'gla', + 'gd-GB': 'gla', + + // Basque + 'eu': 'eus', + 'eu-ES': 'eus', + + // Catalan + 'ca': 'cat', + 'ca-ES': 'cat', + + // Galician + 'gl': 'glg', + 'gl-ES': 'glg', + + // Macedonian + 'mk': 'mkd', + 'mk-MK': 'mkd', + + // Albanian + 'sq': 'sqi', + 'sq-AL': 'sqi', + + // Maltese + 'mt': 'mlt', + 'mt-MT': 'mlt', + + // Afrikaans + 'af': 'afr', + 'af-ZA': 'afr', + + // Swahili + 'sw': 'swa', + 'sw-KE': 'swa', + 'sw-TZ': 'swa', +}; + +/** + * Maps a browser language code to an OCR language code + * Handles exact matches and similar language fallbacks + * + * @param browserLanguage - The browser language code (e.g., 'en-GB', 'fr-FR') + * @returns OCR language code if found, null if no match + */ +export function mapBrowserLanguageToOcr(browserLanguage: string): string | null { + if (!browserLanguage) return null; + + // Normalize the input + const normalizedInput = browserLanguage.toLowerCase().replace('_', '-'); + + // Try exact match first + const exactMatch = browserToOcrMapping[normalizedInput]; + if (exactMatch) return exactMatch; + + // Try with different casing variations + const variations = [ + browserLanguage, + browserLanguage.toLowerCase(), + browserLanguage.toUpperCase(), + normalizedInput, + ]; + + for (const variant of variations) { + const match = browserToOcrMapping[variant]; + if (match) return match; + } + + // Try base language code (e.g., 'en' from 'en-GB') + const baseLanguage = normalizedInput.split('-')[0]; + const baseMatch = browserToOcrMapping[baseLanguage]; + if (baseMatch) return baseMatch; + + // No match found + return null; +} + +/** + * Gets the OCR language code for the current browser language + * + * @param currentLanguage - Current i18n language + * @returns OCR language code array (empty if no match) + */ +export function getAutoOcrLanguage(currentLanguage: string): string[] { + const ocrLanguage = mapBrowserLanguageToOcr(currentLanguage); + return ocrLanguage ? [ocrLanguage] : []; +} \ No newline at end of file