Thumbnail generagtion improvements. PDf worker management improvements

This commit is contained in:
Reece Browne 2025-08-20 14:10:40 +01:00
parent ddb79a1662
commit 3a5402b55a
10 changed files with 104 additions and 63 deletions

View File

@ -13,12 +13,6 @@ import { Command } from '../../hooks/useUndoRedo';
import { useFileState } from '../../contexts/FileContext';
import { useThumbnailGeneration } from '../../hooks/useThumbnailGeneration';
import styles from './PageEditor.module.css';
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
// Ensure PDF.js worker is available
if (!GlobalWorkerOptions.workerSrc) {
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
}
interface PageThumbnailProps {
page: PDFPage;

View File

@ -1,7 +1,7 @@
import React, { useEffect, useState, useRef, useCallback } from "react";
import { Paper, Stack, Text, ScrollArea, Loader, Center, Button, Group, NumberInput, useMantineTheme, ActionIcon, Box, Tabs } from "@mantine/core";
import { getDocument, GlobalWorkerOptions } from "pdfjs-dist";
import { useTranslation } from "react-i18next";
import { pdfWorkerManager } from "../../services/pdfWorkerManager";
import ArrowBackIosNewIcon from "@mui/icons-material/ArrowBackIosNew";
import ArrowForwardIosIcon from "@mui/icons-material/ArrowForwardIos";
import FirstPageIcon from "@mui/icons-material/FirstPage";
@ -16,7 +16,6 @@ import SkeletonLoader from '../shared/SkeletonLoader';
import { useFileState, useFileActions, useCurrentFile, useProcessedFiles } from "../../contexts/FileContext";
import { useFileWithUrl } from "../../hooks/useFileWithUrl";
GlobalWorkerOptions.workerSrc = "/pdf.worker.js";
// Lazy loading page image component
interface LazyPageImageProps {
@ -399,7 +398,7 @@ const Viewer = ({
throw new Error('No valid PDF source available');
}
const pdf = await getDocument(pdfData).promise;
const pdf = await pdfWorkerManager.createDocument(pdfData);
pdfDocRef.current = pdf;
setNumPages(pdf.numPages);
if (!cancelled) {
@ -420,6 +419,11 @@ const Viewer = ({
cancelled = true;
// Stop any ongoing preloading
preloadingRef.current = false;
// Cleanup PDF document using worker manager
if (pdfDocRef.current) {
pdfWorkerManager.destroyDocument(pdfDocRef.current);
pdfDocRef.current = null;
}
// Cleanup ArrayBuffer reference to help garbage collection
currentArrayBufferRef.current = null;
};

View File

@ -10,6 +10,7 @@ import { fileStorage, StoredFile } from '../services/fileStorage';
import { FileId } from '../types/fileContext';
import { FileMetadata } from '../types/file';
import { generateThumbnailForFile } from '../utils/thumbnailUtils';
import { pdfWorkerManager } from '../services/pdfWorkerManager';
interface IndexedDBContextValue {
// Core CRUD operations
@ -82,16 +83,15 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
// DEBUG: Check original file before saving
if (DEBUG && file.type === 'application/pdf') {
try {
const { getDocument } = await import('pdfjs-dist');
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
console.log(`🔍 BEFORE SAVE - Original file:`, {
name: file.name,
size: file.size,
arrayBufferSize: arrayBuffer.byteLength,
pages: pdf.numPages
});
pdf.destroy();
pdfWorkerManager.destroyDocument(pdf);
} catch (error) {
console.error(`🔍 Error validating file before save:`, error);
}
@ -152,11 +152,10 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
// Quick PDF validation
try {
const { getDocument } = await import('pdfjs-dist');
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
console.log(`🔍 AFTER LOAD - PDF validation: ${pdf.numPages} pages in reconstructed file`);
pdf.destroy();
pdfWorkerManager.destroyDocument(pdf);
} catch (error) {
console.error(`🔍 AFTER LOAD - PDF reconstruction error:`, error);
}

View File

@ -1,6 +1,6 @@
import { useState, useCallback } from 'react';
import { getDocument } from 'pdfjs-dist';
import { PDFDocument, PDFPage } from '../types/pageEditor';
import { pdfWorkerManager } from '../services/pdfWorkerManager';
export function usePDFProcessor() {
const [loading, setLoading] = useState(false);
@ -13,7 +13,7 @@ export function usePDFProcessor() {
): Promise<string> => {
try {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const page = await pdf.getPage(pageNumber);
const viewport = page.getViewport({ scale });
@ -29,8 +29,8 @@ export function usePDFProcessor() {
await page.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL();
// Clean up
pdf.destroy();
// Clean up using worker manager
pdfWorkerManager.destroyDocument(pdf);
return thumbnail;
} catch (error) {
@ -39,13 +39,35 @@ export function usePDFProcessor() {
}
}, []);
// Internal function to generate thumbnail from already-opened PDF
const generateThumbnailFromPDF = useCallback(async (
pdf: any,
pageNumber: number,
scale: number = 0.5
): Promise<string> => {
const page = await pdf.getPage(pageNumber);
const viewport = page.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (!context) {
throw new Error('Could not get canvas context');
}
await page.render({ canvasContext: context, viewport }).promise;
return canvas.toDataURL();
}, []);
const processPDFFile = useCallback(async (file: File): Promise<PDFDocument> => {
setLoading(true);
setError(null);
try {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const totalPages = pdf.numPages;
const pages: PDFPage[] = [];
@ -61,19 +83,19 @@ export function usePDFProcessor() {
});
}
// Generate thumbnails for first 10 pages immediately for better UX
// Generate thumbnails for first 10 pages immediately using the same PDF instance
const priorityPages = Math.min(10, totalPages);
for (let i = 1; i <= priorityPages; i++) {
try {
const thumbnail = await generatePageThumbnail(file, i);
const thumbnail = await generateThumbnailFromPDF(pdf, i);
pages[i - 1].thumbnail = thumbnail;
} catch (error) {
console.warn(`Failed to generate thumbnail for page ${i}:`, error);
}
}
// Clean up
pdf.destroy();
// Clean up using worker manager
pdfWorkerManager.destroyDocument(pdf);
const document: PDFDocument = {
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
@ -91,7 +113,7 @@ export function usePDFProcessor() {
} finally {
setLoading(false);
}
}, [generatePageThumbnail]);
}, [generateThumbnailFromPDF]);
return {
processPDFFile,

View File

@ -1,5 +1,6 @@
import { useState, useEffect } from 'react';
import * as pdfjsLib from 'pdfjs-dist';
import { pdfWorkerManager } from '../services/pdfWorkerManager';
export interface PdfSignatureDetectionResult {
hasDigitalSignatures: boolean;
@ -21,14 +22,12 @@ export const usePdfSignatureDetection = (files: File[]): PdfSignatureDetectionRe
let foundSignature = false;
try {
// Set up PDF.js worker
pdfjsLib.GlobalWorkerOptions.workerSrc = '/pdfjs-legacy/pdf.worker.mjs';
for (const file of files) {
const arrayBuffer = await file.arrayBuffer();
try {
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
@ -42,6 +41,9 @@ export const usePdfSignatureDetection = (files: File[]): PdfSignatureDetectionRe
if (foundSignature) break;
}
// Clean up PDF document using worker manager
pdfWorkerManager.destroyDocument(pdf);
} catch (error) {
console.warn('Error analyzing PDF for signatures:', error);
}

View File

@ -1,5 +1,5 @@
import { getDocument } from 'pdfjs-dist';
import { FileAnalysis, ProcessingStrategy } from '../types/processing';
import { pdfWorkerManager } from './pdfWorkerManager';
export class FileAnalyzer {
private static readonly SIZE_THRESHOLDS = {
@ -66,17 +66,16 @@ export class FileAnalyzer {
// For large files, try the whole file first (PDF.js needs the complete structure)
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({
data: arrayBuffer,
const pdf = await pdfWorkerManager.createDocument(arrayBuffer, {
stopAtErrors: false, // Don't stop at minor errors
verbosity: 0 // Suppress PDF.js warnings
}).promise;
});
const pageCount = pdf.numPages;
const isEncrypted = (pdf as any).isEncrypted;
// Clean up
pdf.destroy();
// Clean up using worker manager
pdfWorkerManager.destroyDocument(pdf);
return {
pageCount,

View File

@ -1,9 +1,6 @@
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing';
import { ProcessingCache } from './processingCache';
// Set up PDF.js worker
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
import { pdfWorkerManager } from './pdfWorkerManager';
export class PDFProcessingService {
private static instance: PDFProcessingService;
@ -96,7 +93,7 @@ export class PDFProcessingService {
onProgress: (progress: number) => void
): Promise<ProcessedFile> {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const totalPages = pdf.numPages;
onProgress(10); // PDF loaded
@ -129,7 +126,7 @@ export class PDFProcessingService {
onProgress(progress);
}
pdf.destroy();
pdfWorkerManager.destroyDocument(pdf);
onProgress(100);
return {

View File

@ -39,9 +39,10 @@ class PDFWorkerManager {
/**
* Create a PDF document with proper lifecycle management
* Supports ArrayBuffer, Uint8Array, URL string, or {data: ArrayBuffer} object
*/
async createDocument(
data: ArrayBuffer | Uint8Array,
data: ArrayBuffer | Uint8Array | string | { data: ArrayBuffer },
options: {
disableAutoFetch?: boolean;
disableStream?: boolean;
@ -55,13 +56,33 @@ class PDFWorkerManager {
await this.waitForAvailableWorker();
}
const loadingTask = getDocument({
data,
disableAutoFetch: options.disableAutoFetch ?? true,
disableStream: options.disableStream ?? true,
stopAtErrors: options.stopAtErrors ?? false,
verbosity: options.verbosity ?? 0
});
// Normalize input data to PDF.js format
let pdfData: any;
if (data instanceof ArrayBuffer || data instanceof Uint8Array) {
pdfData = { data };
} else if (typeof data === 'string') {
pdfData = data; // URL string
} else if (data && typeof data === 'object' && 'data' in data) {
pdfData = data; // Already in {data: ArrayBuffer} format
} else {
pdfData = data; // Pass through as-is
}
const loadingTask = getDocument(
typeof pdfData === 'string' ? {
url: pdfData,
disableAutoFetch: options.disableAutoFetch ?? true,
disableStream: options.disableStream ?? true,
stopAtErrors: options.stopAtErrors ?? false,
verbosity: options.verbosity ?? 0
} : {
...pdfData,
disableAutoFetch: options.disableAutoFetch ?? true,
disableStream: options.disableStream ?? true,
stopAtErrors: options.stopAtErrors ?? false,
verbosity: options.verbosity ?? 0
}
);
try {
const pdf = await loadingTask.promise;

View File

@ -60,8 +60,12 @@ export class ThumbnailGenerationService {
this.evictLeastRecentlyUsedPDF();
}
const { getDocument } = await import('pdfjs-dist');
const pdf = await getDocument({ data: pdfArrayBuffer }).promise;
// Use centralized worker manager instead of direct getDocument
const pdf = await pdfWorkerManager.createDocument(pdfArrayBuffer, {
disableAutoFetch: true,
disableStream: true,
stopAtErrors: false
});
this.pdfDocumentCache.set(fileId, {
pdf,
@ -98,7 +102,7 @@ export class ThumbnailGenerationService {
}
if (oldestEntry) {
oldestEntry[1].pdf.destroy(); // Clean up PDF worker
pdfWorkerManager.destroyDocument(oldestEntry[1].pdf); // Use worker manager for cleanup
this.pdfDocumentCache.delete(oldestEntry[0]);
}
}
@ -257,9 +261,9 @@ export class ThumbnailGenerationService {
}
clearPDFCache(): void {
// Destroy all cached PDF documents
// Destroy all cached PDF documents using worker manager
for (const [, cached] of this.pdfDocumentCache) {
cached.pdf.destroy();
pdfWorkerManager.destroyDocument(cached.pdf);
}
this.pdfDocumentCache.clear();
}
@ -267,7 +271,7 @@ export class ThumbnailGenerationService {
clearPDFCacheForFile(fileId: string): void {
const cached = this.pdfDocumentCache.get(fileId);
if (cached) {
cached.pdf.destroy();
pdfWorkerManager.destroyDocument(cached.pdf);
this.pdfDocumentCache.delete(fileId);
}
}

View File

@ -1,4 +1,4 @@
import { getDocument } from "pdfjs-dist";
import { pdfWorkerManager } from '../services/pdfWorkerManager';
export interface ThumbnailWithMetadata {
thumbnail: string | undefined;
@ -303,16 +303,15 @@ function formatFileSize(bytes: number): string {
async function generatePDFThumbnail(arrayBuffer: ArrayBuffer, file: File, scale: number): Promise<string> {
try {
const pdf = await getDocument({
data: arrayBuffer,
const pdf = await pdfWorkerManager.createDocument(arrayBuffer, {
disableAutoFetch: true,
disableStream: true
}).promise;
});
const thumbnail = await generateStandardPDFThumbnail(pdf, scale);
// Immediately clean up memory after thumbnail generation
pdf.destroy();
// Immediately clean up memory after thumbnail generation using worker manager
pdfWorkerManager.destroyDocument(pdf);
return thumbnail;
} catch (error) {
if (error instanceof Error) {
@ -385,7 +384,7 @@ export async function generateThumbnailWithMetadata(file: File): Promise<Thumbna
try {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
const pageCount = pdf.numPages;
const page = await pdf.getPage(1);
@ -396,14 +395,14 @@ export async function generateThumbnailWithMetadata(file: File): Promise<Thumbna
const context = canvas.getContext("2d");
if (!context) {
pdf.destroy();
pdfWorkerManager.destroyDocument(pdf);
throw new Error('Could not get canvas context');
}
await page.render({ canvasContext: context, viewport }).promise;
const thumbnail = canvas.toDataURL();
pdf.destroy();
pdfWorkerManager.destroyDocument(pdf);
return { thumbnail, pageCount };
} catch (error) {