Added OCR, working on the translation files for displaying different tesseract languages. going to commit this before I do that

This commit is contained in:
EthanHealy01 2025-07-29 15:49:51 +01:00
parent 1d6e988e41
commit b739a6d286
11 changed files with 1063 additions and 25 deletions

View File

@ -7,6 +7,8 @@ logging.level.org.eclipse.jetty=WARN
#logging.level.org.opensaml=DEBUG
#logging.level.stirling.software.SPDF.config.security: DEBUG
logging.level.com.zaxxer.hikari=WARN
# Enable OCR controller logging for debugging
logging.level.stirling.software.SPDF.controller.api.misc.OCRController=INFO
spring.jpa.open-in-view=false
server.forward-headers-strategy=NATIVE
server.error.path=/error

View File

@ -1399,7 +1399,7 @@
"success": "File decrypted successfully."
},
"multiTool-advert": {
"message": "هذه الميزة متوفرة في <a href=\"{0}\">صفحة الأدوات المتعددة</a> لدينا. اطلع عليها للحصول على واجهة مستخدم محسّنة لكل صفحة وميزات إضافية!"
"message": "هذه الميزة متوفرة في <a href:\"{0}\">صفحة الأدوات المتعددة</a> لدينا. اطلع عليها للحصول على واجهة مستخدم محسّنة لكل صفحة وميزات إضافية!"
},
"pageRemover": {
"title": "مزيل الصفحة",
@ -1521,6 +1521,137 @@
},
"note": "Release notes are only available in English"
},
"lang": {
"afr": "Afrikaans",
"amh": "Amharic",
"ara": "Arabic",
"asm": "Assamese",
"aze": "Azerbaijani",
"aze_cyrl": "Azerbaijani (Cyrillic)",
"bel": "Belarusian",
"ben": "Bengali",
"bod": "Tibetan",
"bos": "Bosnian",
"bre": "Breton",
"bul": "Bulgarian",
"cat": "Catalan",
"ceb": "Cebuano",
"ces": "Czech",
"chi_sim": "Chinese (Simplified)",
"chi_sim_vert": "Chinese (Simplified, Vertical)",
"chi_tra": "Chinese (Traditional)",
"chi_tra_vert": "Chinese (Traditional, Vertical)",
"chr": "Cherokee",
"cos": "Corsican",
"cym": "Welsh",
"dan": "Danish",
"dan_frak": "Danish (Fraktur)",
"deu": "German",
"deu_frak": "German (Fraktur)",
"div": "Divehi",
"dzo": "Dzongkha",
"ell": "Greek",
"eng": "English",
"enm": "English, Middle (1100-1500)",
"epo": "Esperanto",
"equ": "Math / equation detection module",
"est": "Estonian",
"eus": "Basque",
"fao": "Faroese",
"fas": "Persian",
"fil": "Filipino",
"fin": "Finnish",
"fra": "French",
"frk": "Frankish",
"frm": "French, Middle (ca.1400-1600)",
"fry": "Western Frisian",
"gla": "Scottish Gaelic",
"gle": "Irish",
"glg": "Galician",
"grc": "Ancient Greek",
"guj": "Gujarati",
"hat": "Haitian, Haitian Creole",
"heb": "Hebrew",
"hin": "Hindi",
"hrv": "Croatian",
"hun": "Hungarian",
"hye": "Armenian",
"iku": "Inuktitut",
"ind": "Indonesian",
"isl": "Icelandic",
"ita": "Italian",
"ita_old": "Italian (Old)",
"jav": "Javanese",
"jpn": "Japanese",
"jpn_vert": "Japanese (Vertical)",
"kan": "Kannada",
"kat": "Georgian",
"kat_old": "Georgian (Old)",
"kaz": "Kazakh",
"khm": "Central Khmer",
"kir": "Kirghiz, Kyrgyz",
"kmr": "Northern Kurdish",
"kor": "Korean",
"kor_vert": "Korean (Vertical)",
"lao": "Lao",
"lat": "Latin",
"lav": "Latvian",
"lit": "Lithuanian",
"ltz": "Luxembourgish",
"mal": "Malayalam",
"mar": "Marathi",
"mkd": "Macedonian",
"mlt": "Maltese",
"mon": "Mongolian",
"mri": "Maori",
"msa": "Malay",
"mya": "Burmese",
"nep": "Nepali",
"nld": "Dutch; Flemish",
"nor": "Norwegian",
"oci": "Occitan (post 1500)",
"ori": "Oriya",
"osd": "Orientation and script detection module",
"pan": "Panjabi, Punjabi",
"pol": "Polish",
"por": "Portuguese",
"pus": "Pushto, Pashto",
"que": "Quechua",
"ron": "Romanian, Moldavian, Moldovan",
"rus": "Russian",
"san": "Sanskrit",
"sin": "Sinhala, Sinhalese",
"slk": "Slovak",
"slk_frak": "Slovak (Fraktur)",
"slv": "Slovenian",
"snd": "Sindhi",
"spa": "Spanish",
"spa_old": "Spanish (Old)",
"sqi": "Albanian",
"srp": "Serbian",
"srp_latn": "Serbian (Latin)",
"sun": "Sundanese",
"swa": "Swahili",
"swe": "Swedish",
"syr": "Syriac",
"tam": "Tamil",
"tat": "Tatar",
"tel": "Telugu",
"tgk": "Tajik",
"tgl": "Tagalog",
"tha": "Thai",
"tir": "Tigrinya",
"ton": "Tonga (Tonga Islands)",
"tur": "Turkish",
"uig": "Uighur, Uyghur",
"ukr": "Ukrainian",
"urd": "Urdu",
"uzb": "Uzbek",
"uzb_cyrl": "Uzbek (Cyrillic)",
"vie": "Vietnamese",
"yid": "Yiddish",
"yor": "Yoruba"
},
"cookieBanner": {
"popUp": {
"title": "How we use Cookies",

View File

@ -1521,6 +1521,115 @@
},
"note": "Versionshinweise sind nur auf Englisch verfügbar"
},
"lang": {
"eng": "Englisch",
"fra": "Französisch",
"deu": "Deutsch",
"spa": "Spanisch",
"ita": "Italienisch",
"por": "Portugiesisch",
"rus": "Russisch",
"chi_sim": "Chinesisch (vereinfacht)",
"chi_sim_vert": "Chinesisch (vereinfacht, vertikal)",
"chi_tra": "Chinesisch (traditionell)",
"chi_tra_vert": "Chinesisch (traditionell, vertikal)",
"jpn": "Japanisch",
"jpn_vert": "Japanisch (vertikal)",
"kor": "Koreanisch",
"kor_vert": "Koreanisch (vertikal)",
"ara": "Arabisch",
"hin": "Hindi",
"nld": "Niederländisch",
"ces": "Tschechisch",
"pol": "Polnisch",
"tur": "Türkisch",
"ukr": "Ukrainisch",
"vie": "Vietnamesisch",
"swe": "Schwedisch",
"nor": "Norwegisch",
"fin": "Finnisch",
"dan": "Dänisch",
"ell": "Griechisch",
"heb": "Hebräisch",
"hun": "Ungarisch",
"bul": "Bulgarisch",
"ron": "Rumänisch",
"hrv": "Kroatisch",
"slk": "Slowakisch",
"ind": "Indonesisch",
"tha": "Thailändisch",
"slv": "Slowenisch",
"lav": "Lettisch",
"lit": "Litauisch",
"est": "Estnisch",
"cat": "Katalanisch",
"eus": "Baskisch",
"glg": "Galicisch",
"oci": "Okzitanisch",
"afr": "Afrikaans",
"amh": "Amharisch",
"asm": "Assamesisch",
"aze": "Aserbaidschanisch",
"aze_cyrl": "Aserbaidschanisch (kyrillisch)",
"bel": "Weißrussisch",
"ben": "Bengalisch",
"bod": "Tibetisch",
"bos": "Bosnisch",
"bre": "Bretonisch",
"ceb": "Cebuano",
"chr": "Cherokee",
"cym": "Walisisch",
"dzo": "Dzongkha",
"epo": "Esperanto",
"equ": "Mathematik / Gleichungserkennung",
"fas": "Persisch",
"fil": "Filipino",
"fry": "Westfriesisch",
"gle": "Irisch",
"guj": "Gujarati",
"hat": "Haitianisches Kreolisch",
"iku": "Inuktitut",
"jav": "Javanisch",
"kan": "Kannada",
"kaz": "Kasachisch",
"kaz_cyrl": "Kasachisch (kyrillisch)",
"khm": "Khmer",
"kir": "Kirgisisch",
"kur": "Kurdisch",
"lao": "Laotisch",
"lat": "Latein",
"mar": "Marathi",
"mlt": "Maltesisch",
"mon": "Mongolisch",
"mri": "Maori",
"msa": "Malaiisch",
"mya": "Myanmar",
"nep": "Nepalesisch",
"nno": "Norwegisch Nynorsk",
"ori": "Oriya",
"pan": "Punjabi",
"que": "Quechua",
"sin": "Sinhala",
"snd": "Sindhi",
"sqi": "Albanisch",
"srp": "Serbisch",
"srp_latn": "Serbisch (lateinisch)",
"sun": "Sundanesisch",
"swa": "Swahili",
"syr": "Syrisch",
"tam": "Tamil",
"tel": "Telugu",
"tgk": "Tadschikisch",
"tgl": "Tagalog",
"tir": "Tigrinya",
"ton": "Tonga",
"uig": "Uigurisch",
"urd": "Urdu",
"uzb": "Usbekisch",
"uzb_cyrl": "Usbekisch (kyrillisch)",
"yid": "Jiddisch",
"yor": "Yoruba"
},
"cookieBanner": {
"popUp": {
"title": "Wie wir Cookies verwenden",

View File

@ -1531,6 +1531,115 @@
"desc": "View and test the Stirling PDF API endpoints",
"tags": "api,documentation,swagger,endpoints,development"
},
"lang": {
"eng": "English",
"fra": "French",
"deu": "German",
"spa": "Spanish",
"ita": "Italian",
"por": "Portuguese",
"rus": "Russian",
"chi_sim": "Chinese (Simplified)",
"chi_sim_vert": "Chinese (Simplified, Vertical)",
"chi_tra": "Chinese (Traditional)",
"chi_tra_vert": "Chinese (Traditional, Vertical)",
"jpn": "Japanese",
"jpn_vert": "Japanese (Vertical)",
"kor": "Korean",
"kor_vert": "Korean (Vertical)",
"ara": "Arabic",
"hin": "Hindi",
"nld": "Dutch",
"ces": "Czech",
"pol": "Polish",
"tur": "Turkish",
"ukr": "Ukrainian",
"vie": "Vietnamese",
"swe": "Swedish",
"nor": "Norwegian",
"fin": "Finnish",
"dan": "Danish",
"ell": "Greek",
"heb": "Hebrew",
"hun": "Hungarian",
"bul": "Bulgarian",
"ron": "Romanian",
"hrv": "Croatian",
"slk": "Slovak",
"ind": "Indonesian",
"tha": "Thai",
"slv": "Slovenian",
"lav": "Latvian",
"lit": "Lithuanian",
"est": "Estonian",
"cat": "Catalan",
"eus": "Basque",
"glg": "Galician",
"oci": "Occitan",
"afr": "Afrikaans",
"amh": "Amharic",
"asm": "Assamese",
"aze": "Azerbaijani",
"aze_cyrl": "Azerbaijani (Cyrillic)",
"bel": "Belarusian",
"ben": "Bengali",
"bod": "Tibetan",
"bos": "Bosnian",
"bre": "Breton",
"ceb": "Cebuano",
"chr": "Cherokee",
"cym": "Welsh",
"dzo": "Dzongkha",
"epo": "Esperanto",
"equ": "Math / equation detection",
"fas": "Persian",
"fil": "Filipino",
"fry": "Western Frisian",
"gle": "Irish",
"guj": "Gujarati",
"hat": "Haitian Creole",
"iku": "Inuktitut",
"jav": "Javanese",
"kan": "Kannada",
"kaz": "Kazakh",
"kaz_cyrl": "Kazakh (Cyrillic)",
"khm": "Khmer",
"kir": "Kyrgyz",
"kur": "Kurdish",
"lao": "Lao",
"lat": "Latin",
"mar": "Marathi",
"mlt": "Maltese",
"mon": "Mongolian",
"mri": "Maori",
"msa": "Malay",
"mya": "Myanmar",
"nep": "Nepali",
"nno": "Norwegian Nynorsk",
"ori": "Oriya",
"pan": "Punjabi",
"que": "Quechua",
"sin": "Sinhala",
"snd": "Sindhi",
"sqi": "Albanian",
"srp": "Serbian",
"srp_latn": "Serbian (Latin)",
"sun": "Sundanese",
"swa": "Swahili",
"syr": "Syriac",
"tam": "Tamil",
"tel": "Telugu",
"tgk": "Tajik",
"tgl": "Tagalog",
"tir": "Tigrinya",
"ton": "Tonga",
"uig": "Uyghur",
"urd": "Urdu",
"uzb": "Uzbek",
"uzb_cyrl": "Uzbek (Cyrillic)",
"yid": "Yiddish",
"yor": "Yoruba"
},
"cookieBanner": {
"popUp": {
"title": "How we use Cookies",

View File

@ -0,0 +1,161 @@
import React, { useState, useEffect } from 'react';
import { Stack, Select, MultiSelect, Text, Loader } from '@mantine/core';
import { useTranslation } from 'react-i18next';
export interface OCRParameters {
languages: string[];
ocrType: string;
ocrRenderType: string;
additionalOptions: string[];
}
interface OCRSettingsProps {
parameters: OCRParameters;
onParameterChange: (key: keyof OCRParameters, value: any) => void;
disabled?: boolean;
}
const OCRSettings: React.FC<OCRSettingsProps> = ({
parameters,
onParameterChange,
disabled = false
}) => {
const { t } = useTranslation();
const [availableLanguages, setAvailableLanguages] = useState<{value: string, label: string}[]>([]);
const [isLoadingLanguages, setIsLoadingLanguages] = useState(true);
// Define the additional options available
const additionalOptionsData = [
{ value: 'sidecar', label: 'Create sidecar text file' },
{ value: 'deskew', label: 'Deskew pages' },
{ value: 'clean', label: 'Clean input file' },
{ value: 'cleanFinal', label: 'Clean final output' },
{ value: 'removeImagesAfter', label: 'Remove images after OCR' },
];
useEffect(() => {
// Fetch available languages from backend
const fetchLanguages = async () => {
console.log('[OCR Languages] Starting language fetch...');
const url = '/api/v1/ui-data/ocr-pdf';
console.log('[OCR Languages] Fetching from URL:', url);
try {
const response = await fetch(url);
console.log('[OCR Languages] Response received:', {
status: response.status,
statusText: response.statusText,
ok: response.ok,
headers: Object.fromEntries(response.headers.entries())
});
if (response.ok) {
const data: { languages: string[] } = await response.json();
const languages = data.languages;
console.log('[OCR Languages] Raw response data:', languages);
console.log('[OCR Languages] Response type:', typeof languages, 'Array?', Array.isArray(languages));
const languageOptions = languages.map(lang => {
// Try to get the translated language name, fallback to capitalized code
const translatedName = t(`lang.${lang}`);
const displayName = translatedName;
console.log(`[OCR Languages] Language mapping: ${lang} -> ${displayName} (translated: ${!!translatedName})`);
return {
value: lang,
label: displayName
};
});
console.log('[OCR Languages] Transformed language options:', languageOptions);
setAvailableLanguages(languageOptions);
console.log('[OCR Languages] Successfully set', languageOptions.length, 'languages');
} else {
console.error('[OCR Languages] Response not OK:', response.status, response.statusText);
const errorText = await response.text();
console.error('[OCR Languages] Error response body:', errorText);
}
} catch (error) {
console.error('[OCR Languages] Fetch failed with error:', error);
console.error('[OCR Languages] Error details:', {
name: error instanceof Error ? error.name : 'Unknown',
message: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined
});
} finally {
setIsLoadingLanguages(false);
console.log('[OCR Languages] Language loading completed');
}
};
fetchLanguages();
}, [t]); // Add t to dependencies since we're using it in the effect
return (
<Stack gap="md">
<Text size="sm" fw={500}>OCR Configuration</Text>
{isLoadingLanguages ? (
<div style={{ display: 'flex', alignItems: 'center', gap: '8px' }}>
<Loader size="xs" />
<Text size="sm">Loading available languages...</Text>
</div>
) : (
<Select
label="Languages"
placeholder="Select primary language for OCR"
value={parameters.languages[0] || ''}
onChange={(value) => onParameterChange('languages', value ? [value] : [])}
data={availableLanguages}
disabled={disabled}
clearable
/>
)}
<Select
label="OCR Mode"
value={parameters.ocrType}
onChange={(value) => onParameterChange('ocrType', value || 'skip-text')}
data={[
{ value: 'skip-text', label: 'Auto (skip text layers)' },
{ value: 'force-ocr', label: 'Force OCR - Process all pages' },
{ value: 'Normal', label: 'Normal - Error if text exists' },
]}
disabled={disabled}
/>
<Select
label="Output"
value={parameters.ocrRenderType}
onChange={(value) => onParameterChange('ocrRenderType', value || 'hocr')}
data={[
{ value: 'hocr', label: 'Searchable PDF (sandwich)' },
{ value: 'sandwich', label: 'Sandwich' },
]}
disabled={disabled}
/>
<MultiSelect
label="Additional Options"
placeholder="Select additional options"
value={parameters.additionalOptions}
onChange={(value) => onParameterChange('additionalOptions', value)}
data={additionalOptionsData}
disabled={disabled}
clearable
styles={{
input: {
backgroundColor: 'var(--mantine-color-gray-1)',
borderColor: 'var(--mantine-color-gray-3)',
},
dropdown: {
backgroundColor: 'var(--mantine-color-gray-1)',
}
}}
/>
</Stack>
);
};
export default OCRSettings;

View File

@ -0,0 +1,245 @@
import { useState, useCallback } from 'react';
import axios from 'axios';
import { useTranslation } from 'react-i18next';
import { useFileContext } from '../../../contexts/FileContext';
import { FileOperation } from '../../../types/fileContext';
import { OCRParameters } from '../../../components/tools/ocr/OCRSettings';
export interface OCROperationHook {
files: File[];
thumbnails: string[];
downloadUrl: string | null;
downloadFilename: string | null;
isLoading: boolean;
isGeneratingThumbnails: boolean;
status: string;
errorMessage: string | null;
executeOperation: (parameters: OCRParameters, selectedFiles: File[]) => Promise<void>;
resetResults: () => void;
clearError: () => void;
}
export const useOCROperation = (): OCROperationHook => {
const { t } = useTranslation();
const {
recordOperation,
markOperationApplied,
markOperationFailed,
addFiles
} = useFileContext();
// Internal state management
const [files, setFiles] = useState<File[]>([]);
const [thumbnails, setThumbnails] = useState<string[]>([]);
const [isGeneratingThumbnails, setIsGeneratingThumbnails] = useState(false);
const [downloadUrl, setDownloadUrl] = useState<string | null>(null);
const [downloadFilename, setDownloadFilename] = useState<string>('');
const [status, setStatus] = useState('');
const [errorMessage, setErrorMessage] = useState<string | null>(null);
const [isLoading, setIsLoading] = useState(false);
// Track blob URLs for cleanup
const [blobUrls, setBlobUrls] = useState<string[]>([]);
const cleanupBlobUrls = useCallback(() => {
blobUrls.forEach(url => {
try {
URL.revokeObjectURL(url);
} catch (error) {
console.warn('Failed to revoke blob URL:', error);
}
});
setBlobUrls([]);
}, [blobUrls]);
const buildFormData = useCallback((
parameters: OCRParameters,
file: File
) => {
const formData = new FormData();
// Add the file
formData.append('fileInput', file);
// Add languages as multiple parameters with same name (like checkboxes)
parameters.languages.forEach(lang => {
formData.append('languages', lang);
});
// Add other parameters
formData.append('ocrType', parameters.ocrType);
formData.append('ocrRenderType', parameters.ocrRenderType);
// Handle additional options - convert array to individual boolean parameters
formData.append('sidecar', parameters.additionalOptions.includes('sidecar').toString());
formData.append('deskew', parameters.additionalOptions.includes('deskew').toString());
formData.append('clean', parameters.additionalOptions.includes('clean').toString());
formData.append('cleanFinal', parameters.additionalOptions.includes('cleanFinal').toString());
formData.append('removeImagesAfter', parameters.additionalOptions.includes('removeImagesAfter').toString());
const endpoint = '/api/v1/misc/ocr-pdf';
return { formData, endpoint };
}, []);
const createOperation = useCallback((
parameters: OCRParameters,
selectedFiles: File[]
): { operation: FileOperation; operationId: string; fileId: string } => {
const operationId = `ocr-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
const fileId = selectedFiles.map(f => f.name).join(',');
const operation: FileOperation = {
id: operationId,
type: 'ocr',
timestamp: Date.now(),
fileIds: selectedFiles.map(f => f.name),
status: 'pending',
metadata: {
originalFileName: selectedFiles[0]?.name,
parameters: {
languages: parameters.languages,
ocrType: parameters.ocrType,
ocrRenderType: parameters.ocrRenderType,
additionalOptions: parameters.additionalOptions,
},
fileSize: selectedFiles.reduce((sum, f) => sum + f.size, 0)
}
};
return { operation, operationId, fileId };
}, []);
const executeOperation = useCallback(async (
parameters: OCRParameters,
selectedFiles: File[]
) => {
if (selectedFiles.length === 0) {
setStatus(t("noFileSelected") || "No file selected");
return;
}
if (parameters.languages.length === 0) {
setErrorMessage('Please select at least one language for OCR processing.');
return;
}
const validFiles = selectedFiles.filter(file => file.size > 0);
if (validFiles.length === 0) {
setErrorMessage('No valid files to process. All selected files are empty.');
return;
}
if (validFiles.length < selectedFiles.length) {
console.warn(`Skipping ${selectedFiles.length - validFiles.length} empty files`);
}
const { operation, operationId, fileId } = createOperation(parameters, selectedFiles);
recordOperation(fileId, operation);
setStatus(t("loading") || "Loading...");
setIsLoading(true);
setErrorMessage(null);
setFiles([]);
setThumbnails([]);
try {
const processedFiles: File[] = [];
const failedFiles: string[] = [];
// OCR typically processes one file at a time
for (let i = 0; i < validFiles.length; i++) {
const file = validFiles[i];
setStatus(`Processing OCR for ${file.name} (${i + 1}/${validFiles.length})`);
try {
const { formData, endpoint } = buildFormData(parameters, file);
const response = await axios.post(endpoint, formData, { responseType: "blob" });
const contentType = response.headers['content-type'] || 'application/pdf';
const blob = new Blob([response.data], { type: contentType });
const processedFile = new File([blob], `ocr_${file.name}`, { type: contentType });
processedFiles.push(processedFile);
} catch (fileError) {
console.error(`Failed to process OCR for ${file.name}:`, fileError);
failedFiles.push(file.name);
}
}
if (failedFiles.length > 0 && processedFiles.length === 0) {
throw new Error(`Failed to process OCR for all files: ${failedFiles.join(', ')}`);
}
if (failedFiles.length > 0) {
setStatus(`Processed ${processedFiles.length}/${validFiles.length} files. Failed: ${failedFiles.join(', ')}`);
} else {
setStatus(`OCR completed successfully for ${processedFiles.length} file(s)`);
}
setFiles(processedFiles);
setIsGeneratingThumbnails(true);
await addFiles(processedFiles);
// Cleanup old blob URLs
cleanupBlobUrls();
// Create download URL
if (processedFiles.length === 1) {
const url = window.URL.createObjectURL(processedFiles[0]);
setDownloadUrl(url);
setBlobUrls([url]);
setDownloadFilename(`ocr_${selectedFiles[0].name}`);
} else {
// For multiple files, we could create a zip, but for now just handle the first file
const url = window.URL.createObjectURL(processedFiles[0]);
setDownloadUrl(url);
setBlobUrls([url]);
setDownloadFilename(`ocr_${validFiles.length}_files.pdf`);
}
markOperationApplied(fileId, operationId);
setIsGeneratingThumbnails(false);
} catch (error) {
console.error('OCR operation error:', error);
const errorMessage = error instanceof Error ? error.message : 'OCR operation failed';
setErrorMessage(errorMessage);
setStatus('');
markOperationFailed(fileId, operationId, errorMessage);
} finally {
setIsLoading(false);
}
}, [buildFormData, createOperation, recordOperation, addFiles, cleanupBlobUrls, markOperationApplied, markOperationFailed, t]);
const resetResults = useCallback(() => {
setFiles([]);
setThumbnails([]);
setDownloadUrl(null);
setDownloadFilename('');
setStatus('');
setErrorMessage(null);
setIsLoading(false);
setIsGeneratingThumbnails(false);
cleanupBlobUrls();
}, [cleanupBlobUrls]);
const clearError = useCallback(() => {
setErrorMessage(null);
}, []);
return {
files,
thumbnails,
downloadUrl,
downloadFilename,
isLoading,
isGeneratingThumbnails,
status,
errorMessage,
executeOperation,
resetResults,
clearError,
};
};

View File

@ -0,0 +1,43 @@
import { useState } from 'react';
import { OCRParameters } from '../../../components/tools/ocr/OCRSettings';
export interface OCRParametersHook {
parameters: OCRParameters;
updateParameter: (key: keyof OCRParameters, value: any) => void;
resetParameters: () => void;
validateParameters: () => boolean;
}
const defaultParameters: OCRParameters = {
languages: ['eng'],
ocrType: 'skip-text',
ocrRenderType: 'hocr',
additionalOptions: [],
};
export const useOCRParameters = (): OCRParametersHook => {
const [parameters, setParameters] = useState<OCRParameters>(defaultParameters);
const updateParameter = (key: keyof OCRParameters, value: any) => {
setParameters(prev => ({
...prev,
[key]: value
}));
};
const resetParameters = () => {
setParameters(defaultParameters);
};
const validateParameters = () => {
// At minimum, we need at least one language selected
return parameters.languages.length > 0;
};
return {
parameters,
updateParameter,
resetParameters,
validateParameters,
};
};

View File

@ -15,33 +15,55 @@ export function useEndpointEnabled(endpoint: string): {
const fetchEndpointStatus = async () => {
if (!endpoint) {
console.log('[Endpoint Validation] No endpoint provided, setting to null');
setEnabled(null);
setLoading(false);
return;
}
console.log(`[Endpoint Validation] Starting validation for endpoint: ${endpoint}`);
try {
setLoading(true);
setError(null);
const response = await fetch(`/api/v1/config/endpoint-enabled?endpoint=${encodeURIComponent(endpoint)}`);
const url = `/api/v1/config/endpoint-enabled?endpoint=${encodeURIComponent(endpoint)}`;
console.log(`[Endpoint Validation] Fetching from URL: ${url}`);
const response = await fetch(url);
console.log(`[Endpoint Validation] Response received for ${endpoint}:`, {
status: response.status,
statusText: response.statusText,
ok: response.ok,
headers: Object.fromEntries(response.headers.entries())
});
if (!response.ok) {
throw new Error(`Failed to check endpoint: ${response.status} ${response.statusText}`);
const errorMessage = `Failed to check endpoint: ${response.status} ${response.statusText}`;
console.error(`[Endpoint Validation] Error response for ${endpoint}:`, errorMessage);
throw new Error(errorMessage);
}
const isEnabled: boolean = await response.json();
console.log(`[Endpoint Validation] Endpoint ${endpoint} status:`, isEnabled);
setEnabled(isEnabled);
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Unknown error occurred';
console.error(`[Endpoint Validation] Failed to check endpoint ${endpoint}:`, err);
console.error(`[Endpoint Validation] Error details:`, {
name: err instanceof Error ? err.name : 'Unknown',
message: errorMessage,
stack: err instanceof Error ? err.stack : undefined
});
setError(errorMessage);
console.error(`Failed to check endpoint ${endpoint}:`, err);
} finally {
setLoading(false);
console.log(`[Endpoint Validation] Completed validation for ${endpoint}, loading: false`);
}
};
useEffect(() => {
console.log(`[Endpoint Validation] useEffect triggered for endpoint: ${endpoint}`);
fetchEndpointStatus();
}, [endpoint]);
@ -69,42 +91,66 @@ export function useMultipleEndpointsEnabled(endpoints: string[]): {
const fetchAllEndpointStatuses = async () => {
if (!endpoints || endpoints.length === 0) {
console.log('[Endpoint Validation] No endpoints provided for batch validation');
setEndpointStatus({});
setLoading(false);
return;
}
console.log(`[Endpoint Validation] Starting batch validation for ${endpoints.length} endpoints:`, endpoints);
try {
setLoading(true);
setError(null);
// Use batch API for efficiency
const endpointsParam = endpoints.join(',');
const response = await fetch(`/api/v1/config/endpoints-enabled?endpoints=${encodeURIComponent(endpointsParam)}`);
const url = `/api/v1/config/endpoints-enabled?endpoints=${encodeURIComponent(endpointsParam)}`;
console.log(`[Endpoint Validation] Batch fetch URL: ${url}`);
const response = await fetch(url);
console.log(`[Endpoint Validation] Batch response received:`, {
status: response.status,
statusText: response.statusText,
ok: response.ok,
headers: Object.fromEntries(response.headers.entries())
});
if (!response.ok) {
throw new Error(`Failed to check endpoints: ${response.status} ${response.statusText}`);
const errorMessage = `Failed to check endpoints: ${response.status} ${response.statusText}`;
console.error(`[Endpoint Validation] Batch error response:`, errorMessage);
throw new Error(errorMessage);
}
const statusMap: Record<string, boolean> = await response.json();
console.log(`[Endpoint Validation] Batch endpoint statuses:`, statusMap);
setEndpointStatus(statusMap);
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Unknown error occurred';
console.error('[Endpoint Validation] Failed to check multiple endpoints:', err);
console.error('[Endpoint Validation] Batch error details:', {
name: err instanceof Error ? err.name : 'Unknown',
message: errorMessage,
stack: err instanceof Error ? err.stack : undefined
});
setError(errorMessage);
console.error('Failed to check multiple endpoints:', err);
// Fallback: assume all endpoints are disabled on error
const fallbackStatus = endpoints.reduce((acc, endpoint) => {
acc[endpoint] = false;
return acc;
}, {} as Record<string, boolean>);
console.log('[Endpoint Validation] Using fallback status (all disabled):', fallbackStatus);
setEndpointStatus(fallbackStatus);
} finally {
setLoading(false);
console.log(`[Endpoint Validation] Completed batch validation for ${endpoints.length} endpoints, loading: false`);
}
};
useEffect(() => {
const endpointsKey = endpoints.join(',');
console.log(`[Endpoint Validation] Batch useEffect triggered for endpoints: ${endpointsKey}`);
fetchAllEndpointStatuses();
}, [endpoints.join(',')]); // Re-run when endpoints array changes

View File

@ -36,6 +36,17 @@ const toolDefinitions: Record<string, ToolDefinition> = {
description: "Open API documentation",
endpoints: ["swagger-ui"]
},
ocr: {
id: "ocr",
icon: <span className="material-symbols-rounded font-size-20">
quick_reference_all
</span>,
component: React.lazy(() => import("../tools/OCR")),
maxFiles: -1,
category: "utility",
description: "Extract text from images using OCR",
endpoints: ["ocr-pdf"]
},
};
@ -60,12 +71,30 @@ export const useToolManagement = (): ToolManagementResult => {
Object.values(toolDefinitions).flatMap(tool => tool.endpoints || [])
));
const { endpointStatus, loading: endpointsLoading } = useMultipleEndpointsEnabled(allEndpoints);
console.log("[Tool Management] Endpoint validation results:", {
totalEndpoints: allEndpoints.length,
endpoints: allEndpoints,
status: endpointStatus,
loading: endpointsLoading
});
const isToolAvailable = useCallback((toolKey: string): boolean => {
if (endpointsLoading) return true;
if (endpointsLoading) {
console.log(`[Tool Management] Tool ${toolKey} availability check - endpoints still loading`);
return true;
}
const tool = toolDefinitions[toolKey];
if (!tool?.endpoints) return true;
return tool.endpoints.some(endpoint => endpointStatus[endpoint] === true);
if (!tool?.endpoints) {
console.log(`[Tool Management] Tool ${toolKey} has no endpoints defined, assuming available`);
return true;
}
const isAvailable = tool.endpoints.some(endpoint => endpointStatus[endpoint] === true);
console.log(`[Tool Management] Tool ${toolKey} availability:`, {
toolEndpoints: tool.endpoints,
endpointStatuses: tool.endpoints.map(ep => ({ endpoint: ep, enabled: endpointStatus[ep] })),
isAvailable
});
return isAvailable;
}, [endpointsLoading, endpointStatus]);
const toolRegistry: ToolRegistry = useMemo(() => {

175
frontend/src/tools/OCR.tsx Normal file
View File

@ -0,0 +1,175 @@
import React, { useEffect, useMemo } from "react";
import { Button, Stack, Text } from "@mantine/core";
import { useTranslation } from "react-i18next";
import DownloadIcon from "@mui/icons-material/Download";
import { useEndpointEnabled } from "../hooks/useEndpointConfig";
import { useFileContext } from "../contexts/FileContext";
import { useToolFileSelection } from "../contexts/FileSelectionContext";
import ToolStep, { ToolStepContainer } from "../components/tools/shared/ToolStep";
import OperationButton from "../components/tools/shared/OperationButton";
import ErrorNotification from "../components/tools/shared/ErrorNotification";
import FileStatusIndicator from "../components/tools/shared/FileStatusIndicator";
import ResultsPreview from "../components/tools/shared/ResultsPreview";
import OCRSettings from "../components/tools/ocr/OCRSettings";
import { useOCRParameters } from "../hooks/tools/ocr/useOCRParameters";
import { useOCROperation } from "../hooks/tools/ocr/useOCROperation";
import { BaseToolProps } from "../types/tool";
const OCR = ({ onPreviewFile, onComplete, onError }: BaseToolProps) => {
const { t } = useTranslation();
const { setCurrentMode } = useFileContext();
const { selectedFiles } = useToolFileSelection();
const ocrParams = useOCRParameters();
const ocrOperation = useOCROperation();
// Endpoint validation
const { enabled: endpointEnabled, loading: endpointLoading } = useEndpointEnabled("ocr-pdf");
useEffect(() => {
console.log('[OCR Tool] Endpoint validation status:', {
endpoint: 'ocr-pdf',
enabled: endpointEnabled,
loading: endpointLoading
});
}, [endpointEnabled, endpointLoading]);
useEffect(() => {
ocrOperation.resetResults();
onPreviewFile?.(null);
}, [ocrParams.parameters, selectedFiles]);
const handleOCR = async () => {
try {
await ocrOperation.executeOperation(
ocrParams.parameters,
selectedFiles
);
if (ocrOperation.files && onComplete) {
onComplete(ocrOperation.files);
}
} catch (error) {
if (onError) {
onError(error instanceof Error ? error.message : 'OCR operation failed');
}
}
};
const handleThumbnailClick = (file: File) => {
onPreviewFile?.(file);
sessionStorage.setItem('previousMode', 'ocr');
setCurrentMode('viewer');
};
const handleSettingsReset = () => {
ocrOperation.resetResults();
onPreviewFile?.(null);
setCurrentMode('ocr');
};
const hasFiles = selectedFiles.length > 0;
const hasResults = ocrOperation.files.length > 0 || ocrOperation.downloadUrl !== null;
const filesCollapsed = hasFiles;
const settingsCollapsed = hasResults;
const previewResults = useMemo(() =>
ocrOperation.files?.map((file: File, index: number) => ({
file,
thumbnail: ocrOperation.thumbnails[index]
})) || [],
[ocrOperation.files, ocrOperation.thumbnails]
);
return (
<ToolStepContainer>
<Stack gap="sm" h="100%" p="sm" style={{ overflow: 'auto' }}>
{/* Files Step */}
<ToolStep
title="Files"
isVisible={true}
isCollapsed={filesCollapsed}
isCompleted={filesCollapsed}
completedMessage={hasFiles ?
selectedFiles.length === 1
? `Selected: ${selectedFiles[0].name}`
: `Selected: ${selectedFiles.length} files`
: undefined}
>
<FileStatusIndicator
selectedFiles={selectedFiles}
placeholder="Select a PDF file in the main view to get started"
/>
</ToolStep>
{/* Settings Step */}
<ToolStep
title="Settings"
isVisible={hasFiles}
isCollapsed={settingsCollapsed}
isCompleted={settingsCollapsed}
onCollapsedClick={settingsCollapsed ? handleSettingsReset : undefined}
completedMessage={settingsCollapsed ? "OCR processing completed" : undefined}
>
<Stack gap="sm">
<OCRSettings
parameters={ocrParams.parameters}
onParameterChange={ocrParams.updateParameter}
disabled={endpointLoading}
/>
<OperationButton
onClick={handleOCR}
isLoading={ocrOperation.isLoading}
disabled={!ocrParams.validateParameters() || !hasFiles || !endpointEnabled}
loadingText={t("loading")}
submitText="Process OCR and Review"
/>
</Stack>
</ToolStep>
{/* Results Step */}
<ToolStep
title="Results"
isVisible={hasResults}
>
<Stack gap="sm">
{ocrOperation.status && (
<Text size="sm" c="dimmed">{ocrOperation.status}</Text>
)}
<ErrorNotification
error={ocrOperation.errorMessage}
onClose={ocrOperation.clearError}
/>
{ocrOperation.downloadUrl && (
<Button
component="a"
href={ocrOperation.downloadUrl}
download={ocrOperation.downloadFilename}
leftSection={<DownloadIcon />}
color="green"
fullWidth
mb="md"
>
{t("download", "Download")}
</Button>
)}
<ResultsPreview
files={previewResults}
onFileClick={handleThumbnailClick}
isGeneratingThumbnails={ocrOperation.isGeneratingThumbnails}
title="OCR Results"
/>
</Stack>
</ToolStep>
</Stack>
</ToolStepContainer>
);
}
export default OCR;

View File

@ -5,15 +5,13 @@
import { ProcessedFile } from './processing';
import { PDFDocument, PDFPage, PageOperation } from './pageEditor';
export type ModeType = 'viewer' | 'pageEditor' | 'fileEditor' | 'merge' | 'split' | 'compress';
export type ModeType = 'viewer' | 'pageEditor' | 'fileEditor' | 'merge' | 'split' | 'compress' | 'ocr';
// Legacy types for backward compatibility during transition
export type ViewType = 'viewer' | 'pageEditor' | 'fileEditor';
export type ToolType = 'merge' | 'split' | 'compress' | null;
export type OperationType = 'merge' | 'split' | 'compress' | 'add' | 'remove' | 'replace' | 'convert' | 'upload' | 'ocr';
export interface FileOperation {
id: string;
type: 'merge' | 'split' | 'compress' | 'add' | 'remove' | 'replace' | 'convert' | 'upload';
type: OperationType;
timestamp: number;
fileIds: string[];
status: 'pending' | 'applied' | 'failed';
@ -56,9 +54,6 @@ export interface FileContextState {
// Current navigation state
currentMode: ModeType;
// Legacy fields for backward compatibility
currentView: ViewType;
currentTool: ToolType;
// Edit history and state
fileEditHistory: Map<string, FileEditHistory>;
@ -97,10 +92,6 @@ export interface FileContextActions {
// Navigation
setCurrentMode: (mode: ModeType) => void;
// Legacy navigation functions for backward compatibility
setCurrentView: (view: ViewType) => void;
setCurrentTool: (tool: ToolType) => void;
// Selection management
setSelectedFiles: (fileIds: string[]) => void;
setSelectedPages: (pageNumbers: number[]) => void;
@ -168,9 +159,6 @@ export interface WithFileContext {
// URL parameter types for deep linking
export interface FileContextUrlParams {
mode?: ModeType;
// Legacy parameters for backward compatibility
view?: ViewType;
tool?: ToolType;
fileIds?: string[];
pageIds?: string[];
zoom?: number;