From 3a5402b55af0814d090a228af7129f4af977fc59 Mon Sep 17 00:00:00 2001 From: Reece Browne Date: Wed, 20 Aug 2025 14:10:40 +0100 Subject: [PATCH] Thumbnail generagtion improvements. PDf worker management improvements --- .../components/pageEditor/PageThumbnail.tsx | 6 --- frontend/src/components/viewer/Viewer.tsx | 10 +++-- frontend/src/contexts/IndexedDBContext.tsx | 11 +++-- frontend/src/hooks/usePDFProcessor.ts | 42 ++++++++++++++----- .../src/hooks/usePdfSignatureDetection.ts | 8 ++-- frontend/src/services/fileAnalyzer.ts | 11 +++-- frontend/src/services/pdfProcessingService.ts | 9 ++-- frontend/src/services/pdfWorkerManager.ts | 37 ++++++++++++---- .../services/thumbnailGenerationService.ts | 16 ++++--- frontend/src/utils/thumbnailUtils.ts | 17 ++++---- 10 files changed, 104 insertions(+), 63 deletions(-) diff --git a/frontend/src/components/pageEditor/PageThumbnail.tsx b/frontend/src/components/pageEditor/PageThumbnail.tsx index eea7c682d..f1590978a 100644 --- a/frontend/src/components/pageEditor/PageThumbnail.tsx +++ b/frontend/src/components/pageEditor/PageThumbnail.tsx @@ -13,12 +13,6 @@ import { Command } from '../../hooks/useUndoRedo'; import { useFileState } from '../../contexts/FileContext'; import { useThumbnailGeneration } from '../../hooks/useThumbnailGeneration'; import styles from './PageEditor.module.css'; -import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist'; - -// Ensure PDF.js worker is available -if (!GlobalWorkerOptions.workerSrc) { - GlobalWorkerOptions.workerSrc = '/pdf.worker.js'; -} interface PageThumbnailProps { page: PDFPage; diff --git a/frontend/src/components/viewer/Viewer.tsx b/frontend/src/components/viewer/Viewer.tsx index 17aaca490..85ba8c976 100644 --- a/frontend/src/components/viewer/Viewer.tsx +++ b/frontend/src/components/viewer/Viewer.tsx @@ -1,7 +1,7 @@ import React, { useEffect, useState, useRef, useCallback } from "react"; import { Paper, Stack, Text, ScrollArea, Loader, Center, Button, Group, NumberInput, useMantineTheme, ActionIcon, Box, Tabs } from "@mantine/core"; -import { getDocument, GlobalWorkerOptions } from "pdfjs-dist"; import { useTranslation } from "react-i18next"; +import { pdfWorkerManager } from "../../services/pdfWorkerManager"; import ArrowBackIosNewIcon from "@mui/icons-material/ArrowBackIosNew"; import ArrowForwardIosIcon from "@mui/icons-material/ArrowForwardIos"; import FirstPageIcon from "@mui/icons-material/FirstPage"; @@ -16,7 +16,6 @@ import SkeletonLoader from '../shared/SkeletonLoader'; import { useFileState, useFileActions, useCurrentFile, useProcessedFiles } from "../../contexts/FileContext"; import { useFileWithUrl } from "../../hooks/useFileWithUrl"; -GlobalWorkerOptions.workerSrc = "/pdf.worker.js"; // Lazy loading page image component interface LazyPageImageProps { @@ -399,7 +398,7 @@ const Viewer = ({ throw new Error('No valid PDF source available'); } - const pdf = await getDocument(pdfData).promise; + const pdf = await pdfWorkerManager.createDocument(pdfData); pdfDocRef.current = pdf; setNumPages(pdf.numPages); if (!cancelled) { @@ -420,6 +419,11 @@ const Viewer = ({ cancelled = true; // Stop any ongoing preloading preloadingRef.current = false; + // Cleanup PDF document using worker manager + if (pdfDocRef.current) { + pdfWorkerManager.destroyDocument(pdfDocRef.current); + pdfDocRef.current = null; + } // Cleanup ArrayBuffer reference to help garbage collection currentArrayBufferRef.current = null; }; diff --git a/frontend/src/contexts/IndexedDBContext.tsx b/frontend/src/contexts/IndexedDBContext.tsx index 727ce2c3e..6b528fc06 100644 --- a/frontend/src/contexts/IndexedDBContext.tsx +++ b/frontend/src/contexts/IndexedDBContext.tsx @@ -10,6 +10,7 @@ import { fileStorage, StoredFile } from '../services/fileStorage'; import { FileId } from '../types/fileContext'; import { FileMetadata } from '../types/file'; import { generateThumbnailForFile } from '../utils/thumbnailUtils'; +import { pdfWorkerManager } from '../services/pdfWorkerManager'; interface IndexedDBContextValue { // Core CRUD operations @@ -82,16 +83,15 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) { // DEBUG: Check original file before saving if (DEBUG && file.type === 'application/pdf') { try { - const { getDocument } = await import('pdfjs-dist'); const arrayBuffer = await file.arrayBuffer(); - const pdf = await getDocument({ data: arrayBuffer }).promise; + const pdf = await pdfWorkerManager.createDocument(arrayBuffer); console.log(`🔍 BEFORE SAVE - Original file:`, { name: file.name, size: file.size, arrayBufferSize: arrayBuffer.byteLength, pages: pdf.numPages }); - pdf.destroy(); + pdfWorkerManager.destroyDocument(pdf); } catch (error) { console.error(`🔍 Error validating file before save:`, error); } @@ -152,11 +152,10 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) { // Quick PDF validation try { - const { getDocument } = await import('pdfjs-dist'); const arrayBuffer = await file.arrayBuffer(); - const pdf = await getDocument({ data: arrayBuffer }).promise; + const pdf = await pdfWorkerManager.createDocument(arrayBuffer); console.log(`🔍 AFTER LOAD - PDF validation: ${pdf.numPages} pages in reconstructed file`); - pdf.destroy(); + pdfWorkerManager.destroyDocument(pdf); } catch (error) { console.error(`🔍 AFTER LOAD - PDF reconstruction error:`, error); } diff --git a/frontend/src/hooks/usePDFProcessor.ts b/frontend/src/hooks/usePDFProcessor.ts index 0a717a3a9..a35b777ca 100644 --- a/frontend/src/hooks/usePDFProcessor.ts +++ b/frontend/src/hooks/usePDFProcessor.ts @@ -1,6 +1,6 @@ import { useState, useCallback } from 'react'; -import { getDocument } from 'pdfjs-dist'; import { PDFDocument, PDFPage } from '../types/pageEditor'; +import { pdfWorkerManager } from '../services/pdfWorkerManager'; export function usePDFProcessor() { const [loading, setLoading] = useState(false); @@ -13,7 +13,7 @@ export function usePDFProcessor() { ): Promise => { try { const arrayBuffer = await file.arrayBuffer(); - const pdf = await getDocument({ data: arrayBuffer }).promise; + const pdf = await pdfWorkerManager.createDocument(arrayBuffer); const page = await pdf.getPage(pageNumber); const viewport = page.getViewport({ scale }); @@ -29,8 +29,8 @@ export function usePDFProcessor() { await page.render({ canvasContext: context, viewport }).promise; const thumbnail = canvas.toDataURL(); - // Clean up - pdf.destroy(); + // Clean up using worker manager + pdfWorkerManager.destroyDocument(pdf); return thumbnail; } catch (error) { @@ -39,13 +39,35 @@ export function usePDFProcessor() { } }, []); + // Internal function to generate thumbnail from already-opened PDF + const generateThumbnailFromPDF = useCallback(async ( + pdf: any, + pageNumber: number, + scale: number = 0.5 + ): Promise => { + const page = await pdf.getPage(pageNumber); + + const viewport = page.getViewport({ scale }); + const canvas = document.createElement('canvas'); + canvas.width = viewport.width; + canvas.height = viewport.height; + + const context = canvas.getContext('2d'); + if (!context) { + throw new Error('Could not get canvas context'); + } + + await page.render({ canvasContext: context, viewport }).promise; + return canvas.toDataURL(); + }, []); + const processPDFFile = useCallback(async (file: File): Promise => { setLoading(true); setError(null); try { const arrayBuffer = await file.arrayBuffer(); - const pdf = await getDocument({ data: arrayBuffer }).promise; + const pdf = await pdfWorkerManager.createDocument(arrayBuffer); const totalPages = pdf.numPages; const pages: PDFPage[] = []; @@ -61,19 +83,19 @@ export function usePDFProcessor() { }); } - // Generate thumbnails for first 10 pages immediately for better UX + // Generate thumbnails for first 10 pages immediately using the same PDF instance const priorityPages = Math.min(10, totalPages); for (let i = 1; i <= priorityPages; i++) { try { - const thumbnail = await generatePageThumbnail(file, i); + const thumbnail = await generateThumbnailFromPDF(pdf, i); pages[i - 1].thumbnail = thumbnail; } catch (error) { console.warn(`Failed to generate thumbnail for page ${i}:`, error); } } - // Clean up - pdf.destroy(); + // Clean up using worker manager + pdfWorkerManager.destroyDocument(pdf); const document: PDFDocument = { id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, @@ -91,7 +113,7 @@ export function usePDFProcessor() { } finally { setLoading(false); } - }, [generatePageThumbnail]); + }, [generateThumbnailFromPDF]); return { processPDFFile, diff --git a/frontend/src/hooks/usePdfSignatureDetection.ts b/frontend/src/hooks/usePdfSignatureDetection.ts index ea7d0bdf0..17f90f2d9 100644 --- a/frontend/src/hooks/usePdfSignatureDetection.ts +++ b/frontend/src/hooks/usePdfSignatureDetection.ts @@ -1,5 +1,6 @@ import { useState, useEffect } from 'react'; import * as pdfjsLib from 'pdfjs-dist'; +import { pdfWorkerManager } from '../services/pdfWorkerManager'; export interface PdfSignatureDetectionResult { hasDigitalSignatures: boolean; @@ -21,14 +22,12 @@ export const usePdfSignatureDetection = (files: File[]): PdfSignatureDetectionRe let foundSignature = false; try { - // Set up PDF.js worker - pdfjsLib.GlobalWorkerOptions.workerSrc = '/pdfjs-legacy/pdf.worker.mjs'; for (const file of files) { const arrayBuffer = await file.arrayBuffer(); try { - const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise; + const pdf = await pdfWorkerManager.createDocument(arrayBuffer); for (let i = 1; i <= pdf.numPages; i++) { const page = await pdf.getPage(i); @@ -42,6 +41,9 @@ export const usePdfSignatureDetection = (files: File[]): PdfSignatureDetectionRe if (foundSignature) break; } + + // Clean up PDF document using worker manager + pdfWorkerManager.destroyDocument(pdf); } catch (error) { console.warn('Error analyzing PDF for signatures:', error); } diff --git a/frontend/src/services/fileAnalyzer.ts b/frontend/src/services/fileAnalyzer.ts index 6c5f3ec1f..537692600 100644 --- a/frontend/src/services/fileAnalyzer.ts +++ b/frontend/src/services/fileAnalyzer.ts @@ -1,5 +1,5 @@ -import { getDocument } from 'pdfjs-dist'; import { FileAnalysis, ProcessingStrategy } from '../types/processing'; +import { pdfWorkerManager } from './pdfWorkerManager'; export class FileAnalyzer { private static readonly SIZE_THRESHOLDS = { @@ -66,17 +66,16 @@ export class FileAnalyzer { // For large files, try the whole file first (PDF.js needs the complete structure) const arrayBuffer = await file.arrayBuffer(); - const pdf = await getDocument({ - data: arrayBuffer, + const pdf = await pdfWorkerManager.createDocument(arrayBuffer, { stopAtErrors: false, // Don't stop at minor errors verbosity: 0 // Suppress PDF.js warnings - }).promise; + }); const pageCount = pdf.numPages; const isEncrypted = (pdf as any).isEncrypted; - // Clean up - pdf.destroy(); + // Clean up using worker manager + pdfWorkerManager.destroyDocument(pdf); return { pageCount, diff --git a/frontend/src/services/pdfProcessingService.ts b/frontend/src/services/pdfProcessingService.ts index 2b0dae0f0..065f53210 100644 --- a/frontend/src/services/pdfProcessingService.ts +++ b/frontend/src/services/pdfProcessingService.ts @@ -1,9 +1,6 @@ -import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist'; import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing'; import { ProcessingCache } from './processingCache'; - -// Set up PDF.js worker -GlobalWorkerOptions.workerSrc = '/pdf.worker.js'; +import { pdfWorkerManager } from './pdfWorkerManager'; export class PDFProcessingService { private static instance: PDFProcessingService; @@ -96,7 +93,7 @@ export class PDFProcessingService { onProgress: (progress: number) => void ): Promise { const arrayBuffer = await file.arrayBuffer(); - const pdf = await getDocument({ data: arrayBuffer }).promise; + const pdf = await pdfWorkerManager.createDocument(arrayBuffer); const totalPages = pdf.numPages; onProgress(10); // PDF loaded @@ -129,7 +126,7 @@ export class PDFProcessingService { onProgress(progress); } - pdf.destroy(); + pdfWorkerManager.destroyDocument(pdf); onProgress(100); return { diff --git a/frontend/src/services/pdfWorkerManager.ts b/frontend/src/services/pdfWorkerManager.ts index bad382109..c31bc5f88 100644 --- a/frontend/src/services/pdfWorkerManager.ts +++ b/frontend/src/services/pdfWorkerManager.ts @@ -39,9 +39,10 @@ class PDFWorkerManager { /** * Create a PDF document with proper lifecycle management + * Supports ArrayBuffer, Uint8Array, URL string, or {data: ArrayBuffer} object */ async createDocument( - data: ArrayBuffer | Uint8Array, + data: ArrayBuffer | Uint8Array | string | { data: ArrayBuffer }, options: { disableAutoFetch?: boolean; disableStream?: boolean; @@ -55,13 +56,33 @@ class PDFWorkerManager { await this.waitForAvailableWorker(); } - const loadingTask = getDocument({ - data, - disableAutoFetch: options.disableAutoFetch ?? true, - disableStream: options.disableStream ?? true, - stopAtErrors: options.stopAtErrors ?? false, - verbosity: options.verbosity ?? 0 - }); + // Normalize input data to PDF.js format + let pdfData: any; + if (data instanceof ArrayBuffer || data instanceof Uint8Array) { + pdfData = { data }; + } else if (typeof data === 'string') { + pdfData = data; // URL string + } else if (data && typeof data === 'object' && 'data' in data) { + pdfData = data; // Already in {data: ArrayBuffer} format + } else { + pdfData = data; // Pass through as-is + } + + const loadingTask = getDocument( + typeof pdfData === 'string' ? { + url: pdfData, + disableAutoFetch: options.disableAutoFetch ?? true, + disableStream: options.disableStream ?? true, + stopAtErrors: options.stopAtErrors ?? false, + verbosity: options.verbosity ?? 0 + } : { + ...pdfData, + disableAutoFetch: options.disableAutoFetch ?? true, + disableStream: options.disableStream ?? true, + stopAtErrors: options.stopAtErrors ?? false, + verbosity: options.verbosity ?? 0 + } + ); try { const pdf = await loadingTask.promise; diff --git a/frontend/src/services/thumbnailGenerationService.ts b/frontend/src/services/thumbnailGenerationService.ts index a8f130dd9..d59a2341e 100644 --- a/frontend/src/services/thumbnailGenerationService.ts +++ b/frontend/src/services/thumbnailGenerationService.ts @@ -60,8 +60,12 @@ export class ThumbnailGenerationService { this.evictLeastRecentlyUsedPDF(); } - const { getDocument } = await import('pdfjs-dist'); - const pdf = await getDocument({ data: pdfArrayBuffer }).promise; + // Use centralized worker manager instead of direct getDocument + const pdf = await pdfWorkerManager.createDocument(pdfArrayBuffer, { + disableAutoFetch: true, + disableStream: true, + stopAtErrors: false + }); this.pdfDocumentCache.set(fileId, { pdf, @@ -98,7 +102,7 @@ export class ThumbnailGenerationService { } if (oldestEntry) { - oldestEntry[1].pdf.destroy(); // Clean up PDF worker + pdfWorkerManager.destroyDocument(oldestEntry[1].pdf); // Use worker manager for cleanup this.pdfDocumentCache.delete(oldestEntry[0]); } } @@ -257,9 +261,9 @@ export class ThumbnailGenerationService { } clearPDFCache(): void { - // Destroy all cached PDF documents + // Destroy all cached PDF documents using worker manager for (const [, cached] of this.pdfDocumentCache) { - cached.pdf.destroy(); + pdfWorkerManager.destroyDocument(cached.pdf); } this.pdfDocumentCache.clear(); } @@ -267,7 +271,7 @@ export class ThumbnailGenerationService { clearPDFCacheForFile(fileId: string): void { const cached = this.pdfDocumentCache.get(fileId); if (cached) { - cached.pdf.destroy(); + pdfWorkerManager.destroyDocument(cached.pdf); this.pdfDocumentCache.delete(fileId); } } diff --git a/frontend/src/utils/thumbnailUtils.ts b/frontend/src/utils/thumbnailUtils.ts index 544ea91fc..c1a130b83 100644 --- a/frontend/src/utils/thumbnailUtils.ts +++ b/frontend/src/utils/thumbnailUtils.ts @@ -1,4 +1,4 @@ -import { getDocument } from "pdfjs-dist"; +import { pdfWorkerManager } from '../services/pdfWorkerManager'; export interface ThumbnailWithMetadata { thumbnail: string | undefined; @@ -303,16 +303,15 @@ function formatFileSize(bytes: number): string { async function generatePDFThumbnail(arrayBuffer: ArrayBuffer, file: File, scale: number): Promise { try { - const pdf = await getDocument({ - data: arrayBuffer, + const pdf = await pdfWorkerManager.createDocument(arrayBuffer, { disableAutoFetch: true, disableStream: true - }).promise; + }); const thumbnail = await generateStandardPDFThumbnail(pdf, scale); - // Immediately clean up memory after thumbnail generation - pdf.destroy(); + // Immediately clean up memory after thumbnail generation using worker manager + pdfWorkerManager.destroyDocument(pdf); return thumbnail; } catch (error) { if (error instanceof Error) { @@ -385,7 +384,7 @@ export async function generateThumbnailWithMetadata(file: File): Promise