From 1770d7fa3bd0d05c9546c5d5a9435221bec9dd20 Mon Sep 17 00:00:00 2001 From: Reece Browne Date: Thu, 7 Aug 2025 17:15:24 +0100 Subject: [PATCH] Clan up zip usUpdated OCR --- .../src/hooks/tools/ocr/useOCROperation.ts | 66 +++++-------------- .../hooks/tools/shared/useToolOperation.ts | 6 +- .../hooks/tools/shared/useToolResources.ts | 23 +++---- frontend/src/utils/toolResponseProcessor.ts | 8 +-- 4 files changed, 35 insertions(+), 68 deletions(-) diff --git a/frontend/src/hooks/tools/ocr/useOCROperation.ts b/frontend/src/hooks/tools/ocr/useOCROperation.ts index ecfe32fd7..ad0c51c67 100644 --- a/frontend/src/hooks/tools/ocr/useOCROperation.ts +++ b/frontend/src/hooks/tools/ocr/useOCROperation.ts @@ -4,42 +4,7 @@ import { useTranslation } from 'react-i18next'; import { OCRParameters } from '../../../components/tools/ocr/OCRSettings'; import { useToolOperation, ToolOperationConfig } from '../shared/useToolOperation'; import { createStandardErrorHandler } from '../../../utils/toolErrorHandler'; - -//Extract files from a ZIP blob -async function extractZipFile(zipBlob: Blob): Promise { - const JSZip = await import('jszip'); - const zip = new JSZip.default(); - - const arrayBuffer = await zipBlob.arrayBuffer(); - const zipContent = await zip.loadAsync(arrayBuffer); - - const extractedFiles: File[] = []; - - for (const [filename, file] of Object.entries(zipContent.files)) { - if (!file.dir) { - const content = await file.async('blob'); - const extractedFile = new File([content], filename, { type: getMimeType(filename) }); - extractedFiles.push(extractedFile); - } - } - - return extractedFiles; -} - -//Get MIME type based on file extension -function getMimeType(filename: string): string { - const ext = filename.toLowerCase().split('.').pop(); - switch (ext) { - case 'pdf': - return 'application/pdf'; - case 'txt': - return 'text/plain'; - case 'zip': - return 'application/zip'; - default: - return 'application/octet-stream'; - } -} +import { useToolResources } from '../shared/useToolResources'; const buildFormData = (parameters: OCRParameters, file: File): FormData => { const formData = new FormData(); @@ -68,15 +33,12 @@ const buildFormData = (parameters: OCRParameters, file: File): FormData => { export const useOCROperation = () => { const { t } = useTranslation(); + const { extractZipFiles } = useToolResources(); const customOCRProcessor = useCallback(async ( parameters: OCRParameters, selectedFiles: File[] ): Promise => { - if (parameters.languages.length === 0) { - throw new Error(t('ocr.validation.languageRequired', 'Please select at least one language for OCR processing.')); - } - const processedFiles: File[] = []; const failedFiles: string[] = []; @@ -85,7 +47,7 @@ export const useOCROperation = () => { const file = selectedFiles[i]; try { - const formData = buildFormData(file, parameters); + const formData = buildFormData(parameters, file); const response = await axios.post('/api/v1/misc/ocr-pdf', formData, { responseType: "blob" }); @@ -111,16 +73,22 @@ export const useOCROperation = () => { // Check if it's a ZIP file (OCR service returns ZIP when sidecar is enabled or for multi-file results) if (header.startsWith('PK')) { try { - // Extract ZIP file contents - const zipFiles = await extractZipFile(response.data); + // Extract ZIP file contents using tool resources + const zipBlob = new Blob([arrayBuffer]); + const extractedFiles = await extractZipFiles(zipBlob); - // Add extracted files to processed files - processedFiles.push(...zipFiles); + if (extractedFiles.length > 0) { + // Add extracted files to processed files + processedFiles.push(...extractedFiles); + } else { + // Fallback to treating as single ZIP file if extraction failed + const zipFile = new File([arrayBuffer], `ocr_${file.name}.zip`, { type: 'application/zip' }); + processedFiles.push(zipFile); + } } catch (extractError) { // Fallback to treating as single ZIP file - const blob = new Blob([response.data], { type: 'application/zip' }); - const processedFile = new File([blob], `ocr_${file.name}.zip`, { type: 'application/zip' }); - processedFiles.push(processedFile); + const zipFile = new File([arrayBuffer], `ocr_${file.name}.zip`, { type: 'application/zip' }); + processedFiles.push(zipFile); } continue; // Skip the PDF validation for ZIP files } @@ -150,7 +118,7 @@ export const useOCROperation = () => { throw new Error(`Response is not a valid PDF file. Header: "${header}"`); } - const blob = new Blob([response.data], { type: contentType }); + const blob = new Blob([arrayBuffer], { type: contentType }); const processedFile = new File([blob], `ocr_${file.name}`, { type: contentType }); processedFiles.push(processedFile); diff --git a/frontend/src/hooks/tools/shared/useToolOperation.ts b/frontend/src/hooks/tools/shared/useToolOperation.ts index 55252f05a..d193a2d55 100644 --- a/frontend/src/hooks/tools/shared/useToolOperation.ts +++ b/frontend/src/hooks/tools/shared/useToolOperation.ts @@ -171,10 +171,8 @@ export const useToolOperation = ( // Handle response based on responseHandler if (config.responseHandler?.type === 'zip' && config.responseHandler?.useZipExtractor) { - const zipFile = new File([response.data], 'results.zip', { type: 'application/zip' }); - const { zipFileService } = await import('../../../services/zipFileService'); - const extractionResult = await zipFileService.extractPdfFiles(zipFile); - processedFiles = extractionResult.success ? extractionResult.extractedFiles : []; + // Use tool resources for ZIP extraction + processedFiles = await toolResources.extractZipFiles(response.data); } else { // Single file response const filename = validFiles.length === 1 diff --git a/frontend/src/hooks/tools/shared/useToolResources.ts b/frontend/src/hooks/tools/shared/useToolResources.ts index c368405e9..d603212cd 100644 --- a/frontend/src/hooks/tools/shared/useToolResources.ts +++ b/frontend/src/hooks/tools/shared/useToolResources.ts @@ -1,5 +1,6 @@ import { useState, useCallback, useEffect } from 'react'; import { generateThumbnailForFile } from '../../../utils/thumbnailUtils'; +import { zipFileService } from '../../../services/zipFileService'; export const useToolResources = () => { const [blobUrls, setBlobUrls] = useState([]); @@ -48,6 +49,12 @@ export const useToolResources = () => { return thumbnails; }, []); + const extractZipFiles = useCallback(async (zipBlob: Blob): Promise => { + const zipFile = new File([zipBlob], 'temp.zip', { type: 'application/zip' }); + const extractionResult = await zipFileService.extractPdfFiles(zipFile); + return extractionResult.success ? extractionResult.extractedFiles : []; + }, []); + const createDownloadInfo = useCallback(async ( files: File[], operationType: string @@ -58,24 +65,18 @@ export const useToolResources = () => { return { url, filename: files[0].name }; } - // Multiple files - create zip - const JSZip = (await import('jszip')).default; - const zip = new JSZip(); - - files.forEach(file => { - zip.file(file.name, file); - }); - - const zipBlob = await zip.generateAsync({ type: 'blob' }); - const url = URL.createObjectURL(zipBlob); + // Multiple files - create zip using shared service + const { zipFile } = await zipFileService.createZipFromFiles(files, `${operationType}_results.zip`); + const url = URL.createObjectURL(zipFile); addBlobUrl(url); - return { url, filename: `${operationType}_results.zip` }; + return { url, filename: zipFile.name }; }, [addBlobUrl]); return { generateThumbnails, createDownloadInfo, + extractZipFiles, cleanupBlobUrls, }; }; \ No newline at end of file diff --git a/frontend/src/utils/toolResponseProcessor.ts b/frontend/src/utils/toolResponseProcessor.ts index 15069d056..8433d1e50 100644 --- a/frontend/src/utils/toolResponseProcessor.ts +++ b/frontend/src/utils/toolResponseProcessor.ts @@ -1,4 +1,4 @@ -import { zipFileService } from '../services/zipFileService'; +// Note: This utility should be used with useToolResources for ZIP operations export interface ResponseHandler { type: 'single' | 'zip' | 'custom'; @@ -12,6 +12,7 @@ const defaultResponseHandler: ResponseHandler = { /** * Processes API response blob based on handler configuration + * Note: For ZIP extraction, use useToolResources.extractZipFiles instead */ export const processResponse = async ( blob: Blob, @@ -24,9 +25,8 @@ export const processResponse = async ( switch (handler.type) { case 'zip': if (handler.useZipExtractor) { - const zipFile = new File([blob], 'result.zip', { type: 'application/zip' }); - const extractionResult = await zipFileService.extractPdfFiles(zipFile); - return extractionResult.success ? extractionResult.extractedFiles : []; + // This path should be avoided - use useToolResources.extractZipFiles instead + throw new Error('ZIP extraction should use useToolResources.extractZipFiles'); } // Fall through to custom if no zip extractor case 'custom':