Thumbnail generagtion improvements. PDf worker management improvements

This commit is contained in:
Reece Browne 2025-08-20 14:10:40 +01:00
parent ddb79a1662
commit 3a5402b55a
10 changed files with 104 additions and 63 deletions

View File

@ -13,12 +13,6 @@ import { Command } from '../../hooks/useUndoRedo';
import { useFileState } from '../../contexts/FileContext'; import { useFileState } from '../../contexts/FileContext';
import { useThumbnailGeneration } from '../../hooks/useThumbnailGeneration'; import { useThumbnailGeneration } from '../../hooks/useThumbnailGeneration';
import styles from './PageEditor.module.css'; import styles from './PageEditor.module.css';
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
// Ensure PDF.js worker is available
if (!GlobalWorkerOptions.workerSrc) {
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
}
interface PageThumbnailProps { interface PageThumbnailProps {
page: PDFPage; page: PDFPage;

View File

@ -1,7 +1,7 @@
import React, { useEffect, useState, useRef, useCallback } from "react"; import React, { useEffect, useState, useRef, useCallback } from "react";
import { Paper, Stack, Text, ScrollArea, Loader, Center, Button, Group, NumberInput, useMantineTheme, ActionIcon, Box, Tabs } from "@mantine/core"; import { Paper, Stack, Text, ScrollArea, Loader, Center, Button, Group, NumberInput, useMantineTheme, ActionIcon, Box, Tabs } from "@mantine/core";
import { getDocument, GlobalWorkerOptions } from "pdfjs-dist";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { pdfWorkerManager } from "../../services/pdfWorkerManager";
import ArrowBackIosNewIcon from "@mui/icons-material/ArrowBackIosNew"; import ArrowBackIosNewIcon from "@mui/icons-material/ArrowBackIosNew";
import ArrowForwardIosIcon from "@mui/icons-material/ArrowForwardIos"; import ArrowForwardIosIcon from "@mui/icons-material/ArrowForwardIos";
import FirstPageIcon from "@mui/icons-material/FirstPage"; import FirstPageIcon from "@mui/icons-material/FirstPage";
@ -16,7 +16,6 @@ import SkeletonLoader from '../shared/SkeletonLoader';
import { useFileState, useFileActions, useCurrentFile, useProcessedFiles } from "../../contexts/FileContext"; import { useFileState, useFileActions, useCurrentFile, useProcessedFiles } from "../../contexts/FileContext";
import { useFileWithUrl } from "../../hooks/useFileWithUrl"; import { useFileWithUrl } from "../../hooks/useFileWithUrl";
GlobalWorkerOptions.workerSrc = "/pdf.worker.js";
// Lazy loading page image component // Lazy loading page image component
interface LazyPageImageProps { interface LazyPageImageProps {
@ -399,7 +398,7 @@ const Viewer = ({
throw new Error('No valid PDF source available'); throw new Error('No valid PDF source available');
} }
const pdf = await getDocument(pdfData).promise; const pdf = await pdfWorkerManager.createDocument(pdfData);
pdfDocRef.current = pdf; pdfDocRef.current = pdf;
setNumPages(pdf.numPages); setNumPages(pdf.numPages);
if (!cancelled) { if (!cancelled) {
@ -420,6 +419,11 @@ const Viewer = ({
cancelled = true; cancelled = true;
// Stop any ongoing preloading // Stop any ongoing preloading
preloadingRef.current = false; preloadingRef.current = false;
// Cleanup PDF document using worker manager
if (pdfDocRef.current) {
pdfWorkerManager.destroyDocument(pdfDocRef.current);
pdfDocRef.current = null;
}
// Cleanup ArrayBuffer reference to help garbage collection // Cleanup ArrayBuffer reference to help garbage collection
currentArrayBufferRef.current = null; currentArrayBufferRef.current = null;
}; };

View File

@ -10,6 +10,7 @@ import { fileStorage, StoredFile } from '../services/fileStorage';
import { FileId } from '../types/fileContext'; import { FileId } from '../types/fileContext';
import { FileMetadata } from '../types/file'; import { FileMetadata } from '../types/file';
import { generateThumbnailForFile } from '../utils/thumbnailUtils'; import { generateThumbnailForFile } from '../utils/thumbnailUtils';
import { pdfWorkerManager } from '../services/pdfWorkerManager';
interface IndexedDBContextValue { interface IndexedDBContextValue {
// Core CRUD operations // Core CRUD operations
@ -82,16 +83,15 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
// DEBUG: Check original file before saving // DEBUG: Check original file before saving
if (DEBUG && file.type === 'application/pdf') { if (DEBUG && file.type === 'application/pdf') {
try { try {
const { getDocument } = await import('pdfjs-dist');
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise; const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
console.log(`🔍 BEFORE SAVE - Original file:`, { console.log(`🔍 BEFORE SAVE - Original file:`, {
name: file.name, name: file.name,
size: file.size, size: file.size,
arrayBufferSize: arrayBuffer.byteLength, arrayBufferSize: arrayBuffer.byteLength,
pages: pdf.numPages pages: pdf.numPages
}); });
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
} catch (error) { } catch (error) {
console.error(`🔍 Error validating file before save:`, error); console.error(`🔍 Error validating file before save:`, error);
} }
@ -152,11 +152,10 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
// Quick PDF validation // Quick PDF validation
try { try {
const { getDocument } = await import('pdfjs-dist');
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise; const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
console.log(`🔍 AFTER LOAD - PDF validation: ${pdf.numPages} pages in reconstructed file`); console.log(`🔍 AFTER LOAD - PDF validation: ${pdf.numPages} pages in reconstructed file`);
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
} catch (error) { } catch (error) {
console.error(`🔍 AFTER LOAD - PDF reconstruction error:`, error); console.error(`🔍 AFTER LOAD - PDF reconstruction error:`, error);
} }

View File

@ -1,6 +1,6 @@
import { useState, useCallback } from 'react'; import { useState, useCallback } from 'react';
import { getDocument } from 'pdfjs-dist';
import { PDFDocument, PDFPage } from '../types/pageEditor'; import { PDFDocument, PDFPage } from '../types/pageEditor';
import { pdfWorkerManager } from '../services/pdfWorkerManager';
export function usePDFProcessor() { export function usePDFProcessor() {
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
@ -13,7 +13,7 @@ export function usePDFProcessor() {
): Promise<string> => { ): Promise<string> => {
try { try {
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise; const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const page = await pdf.getPage(pageNumber); const page = await pdf.getPage(pageNumber);
const viewport = page.getViewport({ scale }); const viewport = page.getViewport({ scale });
@ -29,8 +29,8 @@ export function usePDFProcessor() {
await page.render({ canvasContext: context, viewport }).promise; await page.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL(); const thumbnail = canvas.toDataURL();
// Clean up // Clean up using worker manager
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
return thumbnail; return thumbnail;
} catch (error) { } catch (error) {
@ -39,13 +39,35 @@ export function usePDFProcessor() {
} }
}, []); }, []);
// Internal function to generate thumbnail from already-opened PDF
const generateThumbnailFromPDF = useCallback(async (
pdf: any,
pageNumber: number,
scale: number = 0.5
): Promise<string> => {
const page = await pdf.getPage(pageNumber);
const viewport = page.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (!context) {
throw new Error('Could not get canvas context');
}
await page.render({ canvasContext: context, viewport }).promise;
return canvas.toDataURL();
}, []);
const processPDFFile = useCallback(async (file: File): Promise<PDFDocument> => { const processPDFFile = useCallback(async (file: File): Promise<PDFDocument> => {
setLoading(true); setLoading(true);
setError(null); setError(null);
try { try {
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise; const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const totalPages = pdf.numPages; const totalPages = pdf.numPages;
const pages: PDFPage[] = []; const pages: PDFPage[] = [];
@ -61,19 +83,19 @@ export function usePDFProcessor() {
}); });
} }
// Generate thumbnails for first 10 pages immediately for better UX // Generate thumbnails for first 10 pages immediately using the same PDF instance
const priorityPages = Math.min(10, totalPages); const priorityPages = Math.min(10, totalPages);
for (let i = 1; i <= priorityPages; i++) { for (let i = 1; i <= priorityPages; i++) {
try { try {
const thumbnail = await generatePageThumbnail(file, i); const thumbnail = await generateThumbnailFromPDF(pdf, i);
pages[i - 1].thumbnail = thumbnail; pages[i - 1].thumbnail = thumbnail;
} catch (error) { } catch (error) {
console.warn(`Failed to generate thumbnail for page ${i}:`, error); console.warn(`Failed to generate thumbnail for page ${i}:`, error);
} }
} }
// Clean up // Clean up using worker manager
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
const document: PDFDocument = { const document: PDFDocument = {
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
@ -91,7 +113,7 @@ export function usePDFProcessor() {
} finally { } finally {
setLoading(false); setLoading(false);
} }
}, [generatePageThumbnail]); }, [generateThumbnailFromPDF]);
return { return {
processPDFFile, processPDFFile,

View File

@ -1,5 +1,6 @@
import { useState, useEffect } from 'react'; import { useState, useEffect } from 'react';
import * as pdfjsLib from 'pdfjs-dist'; import * as pdfjsLib from 'pdfjs-dist';
import { pdfWorkerManager } from '../services/pdfWorkerManager';
export interface PdfSignatureDetectionResult { export interface PdfSignatureDetectionResult {
hasDigitalSignatures: boolean; hasDigitalSignatures: boolean;
@ -21,14 +22,12 @@ export const usePdfSignatureDetection = (files: File[]): PdfSignatureDetectionRe
let foundSignature = false; let foundSignature = false;
try { try {
// Set up PDF.js worker
pdfjsLib.GlobalWorkerOptions.workerSrc = '/pdfjs-legacy/pdf.worker.mjs';
for (const file of files) { for (const file of files) {
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
try { try {
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise; const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
for (let i = 1; i <= pdf.numPages; i++) { for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i); const page = await pdf.getPage(i);
@ -42,6 +41,9 @@ export const usePdfSignatureDetection = (files: File[]): PdfSignatureDetectionRe
if (foundSignature) break; if (foundSignature) break;
} }
// Clean up PDF document using worker manager
pdfWorkerManager.destroyDocument(pdf);
} catch (error) { } catch (error) {
console.warn('Error analyzing PDF for signatures:', error); console.warn('Error analyzing PDF for signatures:', error);
} }

View File

@ -1,5 +1,5 @@
import { getDocument } from 'pdfjs-dist';
import { FileAnalysis, ProcessingStrategy } from '../types/processing'; import { FileAnalysis, ProcessingStrategy } from '../types/processing';
import { pdfWorkerManager } from './pdfWorkerManager';
export class FileAnalyzer { export class FileAnalyzer {
private static readonly SIZE_THRESHOLDS = { private static readonly SIZE_THRESHOLDS = {
@ -66,17 +66,16 @@ export class FileAnalyzer {
// For large files, try the whole file first (PDF.js needs the complete structure) // For large files, try the whole file first (PDF.js needs the complete structure)
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ const pdf = await pdfWorkerManager.createDocument(arrayBuffer, {
data: arrayBuffer,
stopAtErrors: false, // Don't stop at minor errors stopAtErrors: false, // Don't stop at minor errors
verbosity: 0 // Suppress PDF.js warnings verbosity: 0 // Suppress PDF.js warnings
}).promise; });
const pageCount = pdf.numPages; const pageCount = pdf.numPages;
const isEncrypted = (pdf as any).isEncrypted; const isEncrypted = (pdf as any).isEncrypted;
// Clean up // Clean up using worker manager
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
return { return {
pageCount, pageCount,

View File

@ -1,9 +1,6 @@
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing'; import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing';
import { ProcessingCache } from './processingCache'; import { ProcessingCache } from './processingCache';
import { pdfWorkerManager } from './pdfWorkerManager';
// Set up PDF.js worker
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
export class PDFProcessingService { export class PDFProcessingService {
private static instance: PDFProcessingService; private static instance: PDFProcessingService;
@ -96,7 +93,7 @@ export class PDFProcessingService {
onProgress: (progress: number) => void onProgress: (progress: number) => void
): Promise<ProcessedFile> { ): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise; const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const totalPages = pdf.numPages; const totalPages = pdf.numPages;
onProgress(10); // PDF loaded onProgress(10); // PDF loaded
@ -129,7 +126,7 @@ export class PDFProcessingService {
onProgress(progress); onProgress(progress);
} }
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
onProgress(100); onProgress(100);
return { return {

View File

@ -39,9 +39,10 @@ class PDFWorkerManager {
/** /**
* Create a PDF document with proper lifecycle management * Create a PDF document with proper lifecycle management
* Supports ArrayBuffer, Uint8Array, URL string, or {data: ArrayBuffer} object
*/ */
async createDocument( async createDocument(
data: ArrayBuffer | Uint8Array, data: ArrayBuffer | Uint8Array | string | { data: ArrayBuffer },
options: { options: {
disableAutoFetch?: boolean; disableAutoFetch?: boolean;
disableStream?: boolean; disableStream?: boolean;
@ -55,13 +56,33 @@ class PDFWorkerManager {
await this.waitForAvailableWorker(); await this.waitForAvailableWorker();
} }
const loadingTask = getDocument({ // Normalize input data to PDF.js format
data, let pdfData: any;
disableAutoFetch: options.disableAutoFetch ?? true, if (data instanceof ArrayBuffer || data instanceof Uint8Array) {
disableStream: options.disableStream ?? true, pdfData = { data };
stopAtErrors: options.stopAtErrors ?? false, } else if (typeof data === 'string') {
verbosity: options.verbosity ?? 0 pdfData = data; // URL string
}); } else if (data && typeof data === 'object' && 'data' in data) {
pdfData = data; // Already in {data: ArrayBuffer} format
} else {
pdfData = data; // Pass through as-is
}
const loadingTask = getDocument(
typeof pdfData === 'string' ? {
url: pdfData,
disableAutoFetch: options.disableAutoFetch ?? true,
disableStream: options.disableStream ?? true,
stopAtErrors: options.stopAtErrors ?? false,
verbosity: options.verbosity ?? 0
} : {
...pdfData,
disableAutoFetch: options.disableAutoFetch ?? true,
disableStream: options.disableStream ?? true,
stopAtErrors: options.stopAtErrors ?? false,
verbosity: options.verbosity ?? 0
}
);
try { try {
const pdf = await loadingTask.promise; const pdf = await loadingTask.promise;

View File

@ -60,8 +60,12 @@ export class ThumbnailGenerationService {
this.evictLeastRecentlyUsedPDF(); this.evictLeastRecentlyUsedPDF();
} }
const { getDocument } = await import('pdfjs-dist'); // Use centralized worker manager instead of direct getDocument
const pdf = await getDocument({ data: pdfArrayBuffer }).promise; const pdf = await pdfWorkerManager.createDocument(pdfArrayBuffer, {
disableAutoFetch: true,
disableStream: true,
stopAtErrors: false
});
this.pdfDocumentCache.set(fileId, { this.pdfDocumentCache.set(fileId, {
pdf, pdf,
@ -98,7 +102,7 @@ export class ThumbnailGenerationService {
} }
if (oldestEntry) { if (oldestEntry) {
oldestEntry[1].pdf.destroy(); // Clean up PDF worker pdfWorkerManager.destroyDocument(oldestEntry[1].pdf); // Use worker manager for cleanup
this.pdfDocumentCache.delete(oldestEntry[0]); this.pdfDocumentCache.delete(oldestEntry[0]);
} }
} }
@ -257,9 +261,9 @@ export class ThumbnailGenerationService {
} }
clearPDFCache(): void { clearPDFCache(): void {
// Destroy all cached PDF documents // Destroy all cached PDF documents using worker manager
for (const [, cached] of this.pdfDocumentCache) { for (const [, cached] of this.pdfDocumentCache) {
cached.pdf.destroy(); pdfWorkerManager.destroyDocument(cached.pdf);
} }
this.pdfDocumentCache.clear(); this.pdfDocumentCache.clear();
} }
@ -267,7 +271,7 @@ export class ThumbnailGenerationService {
clearPDFCacheForFile(fileId: string): void { clearPDFCacheForFile(fileId: string): void {
const cached = this.pdfDocumentCache.get(fileId); const cached = this.pdfDocumentCache.get(fileId);
if (cached) { if (cached) {
cached.pdf.destroy(); pdfWorkerManager.destroyDocument(cached.pdf);
this.pdfDocumentCache.delete(fileId); this.pdfDocumentCache.delete(fileId);
} }
} }

View File

@ -1,4 +1,4 @@
import { getDocument } from "pdfjs-dist"; import { pdfWorkerManager } from '../services/pdfWorkerManager';
export interface ThumbnailWithMetadata { export interface ThumbnailWithMetadata {
thumbnail: string | undefined; thumbnail: string | undefined;
@ -303,16 +303,15 @@ function formatFileSize(bytes: number): string {
async function generatePDFThumbnail(arrayBuffer: ArrayBuffer, file: File, scale: number): Promise<string> { async function generatePDFThumbnail(arrayBuffer: ArrayBuffer, file: File, scale: number): Promise<string> {
try { try {
const pdf = await getDocument({ const pdf = await pdfWorkerManager.createDocument(arrayBuffer, {
data: arrayBuffer,
disableAutoFetch: true, disableAutoFetch: true,
disableStream: true disableStream: true
}).promise; });
const thumbnail = await generateStandardPDFThumbnail(pdf, scale); const thumbnail = await generateStandardPDFThumbnail(pdf, scale);
// Immediately clean up memory after thumbnail generation // Immediately clean up memory after thumbnail generation using worker manager
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
return thumbnail; return thumbnail;
} catch (error) { } catch (error) {
if (error instanceof Error) { if (error instanceof Error) {
@ -385,7 +384,7 @@ export async function generateThumbnailWithMetadata(file: File): Promise<Thumbna
try { try {
const arrayBuffer = await file.arrayBuffer(); const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise; const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const pageCount = pdf.numPages; const pageCount = pdf.numPages;
const page = await pdf.getPage(1); const page = await pdf.getPage(1);
@ -396,14 +395,14 @@ export async function generateThumbnailWithMetadata(file: File): Promise<Thumbna
const context = canvas.getContext("2d"); const context = canvas.getContext("2d");
if (!context) { if (!context) {
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
throw new Error('Could not get canvas context'); throw new Error('Could not get canvas context');
} }
await page.render({ canvasContext: context, viewport }).promise; await page.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL(); const thumbnail = canvas.toDataURL();
pdf.destroy(); pdfWorkerManager.destroyDocument(pdf);
return { thumbnail, pageCount }; return { thumbnail, pageCount };
} catch (error) { } catch (error) {