Stop creating new pdf worker for every batch

This commit is contained in:
Reece Browne 2025-08-20 13:34:13 +01:00
parent 1eb89a22c2
commit ddb79a1662
3 changed files with 108 additions and 6 deletions

View File

@ -354,6 +354,7 @@ const PageEditor = ({
// Start parallel thumbnail generation WITHOUT blocking the main thread // Start parallel thumbnail generation WITHOUT blocking the main thread
await generateThumbnails( await generateThumbnails(
fileId, // Add fileId as first parameter
arrayBuffer, arrayBuffer,
pageNumbers, pageNumbers,
{ {
@ -363,10 +364,10 @@ const PageEditor = ({
parallelBatches: 3 // Use 3 Web Workers in parallel parallelBatches: 3 // Use 3 Web Workers in parallel
}, },
// Progress callback for thumbnail updates // Progress callback for thumbnail updates
(progress) => { (progress: { completed: number; total: number; thumbnails: Array<{ pageNumber: number; thumbnail: string }> }) => {
// Batch process thumbnails to reduce main thread work // Batch process thumbnails to reduce main thread work
requestAnimationFrame(() => { requestAnimationFrame(() => {
progress.thumbnails.forEach(({ pageNumber, thumbnail }) => { progress.thumbnails.forEach(({ pageNumber, thumbnail }: { pageNumber: number; thumbnail: string }) => {
// Use stable fileId for cache key // Use stable fileId for cache key
const pageId = `${fileId}-page-${pageNumber}`; const pageId = `${fileId}-page-${pageNumber}`;
addThumbnailToCache(pageId, thumbnail); addThumbnailToCache(pageId, thumbnail);

View File

@ -70,7 +70,11 @@ async function processRequestQueue() {
console.log(`📸 Batch generating ${requests.length} thumbnails for pages: ${pageNumbers.slice(0, 5).join(', ')}${pageNumbers.length > 5 ? '...' : ''}`); console.log(`📸 Batch generating ${requests.length} thumbnails for pages: ${pageNumbers.slice(0, 5).join(', ')}${pageNumbers.length > 5 ? '...' : ''}`);
// Use file name as fileId for PDF document caching
const fileId = file.name + '_' + file.size + '_' + file.lastModified;
const results = await thumbnailGenerationService.generateThumbnails( const results = await thumbnailGenerationService.generateThumbnails(
fileId,
arrayBuffer, arrayBuffer,
pageNumbers, pageNumbers,
{ scale: 1.0, quality: 0.8, batchSize: BATCH_SIZE }, { scale: 1.0, quality: 0.8, batchSize: BATCH_SIZE },
@ -111,6 +115,7 @@ async function processRequestQueue() {
*/ */
export function useThumbnailGeneration() { export function useThumbnailGeneration() {
const generateThumbnails = useCallback(async ( const generateThumbnails = useCallback(async (
fileId: string,
pdfArrayBuffer: ArrayBuffer, pdfArrayBuffer: ArrayBuffer,
pageNumbers: number[], pageNumbers: number[],
options: { options: {
@ -122,6 +127,7 @@ export function useThumbnailGeneration() {
onProgress?: (progress: { completed: number; total: number; thumbnails: any[] }) => void onProgress?: (progress: { completed: number; total: number; thumbnails: any[] }) => void
) => { ) => {
return thumbnailGenerationService.generateThumbnails( return thumbnailGenerationService.generateThumbnails(
fileId,
pdfArrayBuffer, pdfArrayBuffer,
pageNumbers, pageNumbers,
options, options,
@ -160,6 +166,10 @@ export function useThumbnailGeneration() {
thumbnailGenerationService.destroy(); thumbnailGenerationService.destroy();
}, []); }, []);
const clearPDFCacheForFile = useCallback((fileId: string) => {
thumbnailGenerationService.clearPDFCacheForFile(fileId);
}, []);
const requestThumbnail = useCallback(async ( const requestThumbnail = useCallback(async (
pageId: string, pageId: string,
file: File, file: File,
@ -223,6 +233,7 @@ export function useThumbnailGeneration() {
getCacheStats, getCacheStats,
stopGeneration, stopGeneration,
destroyThumbnails, destroyThumbnails,
clearPDFCacheForFile,
requestThumbnail requestThumbnail
}; };
} }

View File

@ -2,6 +2,8 @@
* High-performance thumbnail generation service using main thread processing * High-performance thumbnail generation service using main thread processing
*/ */
import { pdfWorkerManager } from './pdfWorkerManager';
interface ThumbnailResult { interface ThumbnailResult {
pageNumber: number; pageNumber: number;
thumbnail: string; thumbnail: string;
@ -22,20 +24,90 @@ interface CachedThumbnail {
sizeBytes: number; sizeBytes: number;
} }
interface CachedPDFDocument {
pdf: any; // PDFDocumentProxy from pdfjs-dist
lastUsed: number;
refCount: number;
}
export class ThumbnailGenerationService { export class ThumbnailGenerationService {
// Session-based thumbnail cache // Session-based thumbnail cache
private thumbnailCache = new Map<string, CachedThumbnail>(); private thumbnailCache = new Map<string, CachedThumbnail>();
private maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit private maxCacheSizeBytes = 1024 * 1024 * 1024; // 1GB cache limit
private currentCacheSize = 0; private currentCacheSize = 0;
// PDF document cache to reuse PDF instances and avoid creating multiple workers
private pdfDocumentCache = new Map<string, CachedPDFDocument>();
private maxPdfCacheSize = 10; // Keep up to 10 PDF documents cached
constructor(private maxWorkers: number = 3) { constructor(private maxWorkers: number = 3) {
// PDF rendering requires DOM access, so we use optimized main thread processing // PDF rendering requires DOM access, so we use optimized main thread processing
} }
/**
* Get or create a cached PDF document
*/
private async getCachedPDFDocument(fileId: string, pdfArrayBuffer: ArrayBuffer): Promise<any> {
const cached = this.pdfDocumentCache.get(fileId);
if (cached) {
cached.lastUsed = Date.now();
cached.refCount++;
return cached.pdf;
}
// Evict old PDFs if cache is full
while (this.pdfDocumentCache.size >= this.maxPdfCacheSize) {
this.evictLeastRecentlyUsedPDF();
}
const { getDocument } = await import('pdfjs-dist');
const pdf = await getDocument({ data: pdfArrayBuffer }).promise;
this.pdfDocumentCache.set(fileId, {
pdf,
lastUsed: Date.now(),
refCount: 1
});
return pdf;
}
/**
* Release a reference to a cached PDF document
*/
private releasePDFDocument(fileId: string): void {
const cached = this.pdfDocumentCache.get(fileId);
if (cached) {
cached.refCount--;
// Don't destroy immediately - keep in cache for potential reuse
}
}
/**
* Evict the least recently used PDF document
*/
private evictLeastRecentlyUsedPDF(): void {
let oldestEntry: [string, CachedPDFDocument] | null = null;
let oldestTime = Date.now();
for (const [key, value] of this.pdfDocumentCache.entries()) {
if (value.lastUsed < oldestTime && value.refCount === 0) {
oldestTime = value.lastUsed;
oldestEntry = [key, value];
}
}
if (oldestEntry) {
oldestEntry[1].pdf.destroy(); // Clean up PDF worker
this.pdfDocumentCache.delete(oldestEntry[0]);
}
}
/** /**
* Generate thumbnails for multiple pages using main thread processing * Generate thumbnails for multiple pages using main thread processing
*/ */
async generateThumbnails( async generateThumbnails(
fileId: string,
pdfArrayBuffer: ArrayBuffer, pdfArrayBuffer: ArrayBuffer,
pageNumbers: number[], pageNumbers: number[],
options: ThumbnailGenerationOptions = {}, options: ThumbnailGenerationOptions = {},
@ -46,21 +118,21 @@ export class ThumbnailGenerationService {
quality = 0.8 quality = 0.8
} = options; } = options;
return await this.generateThumbnailsMainThread(pdfArrayBuffer, pageNumbers, scale, quality, onProgress); return await this.generateThumbnailsMainThread(fileId, pdfArrayBuffer, pageNumbers, scale, quality, onProgress);
} }
/** /**
* Main thread thumbnail generation with batching for UI responsiveness * Main thread thumbnail generation with batching for UI responsiveness
*/ */
private async generateThumbnailsMainThread( private async generateThumbnailsMainThread(
fileId: string,
pdfArrayBuffer: ArrayBuffer, pdfArrayBuffer: ArrayBuffer,
pageNumbers: number[], pageNumbers: number[],
scale: number, scale: number,
quality: number, quality: number,
onProgress?: (progress: { completed: number; total: number; thumbnails: ThumbnailResult[] }) => void onProgress?: (progress: { completed: number; total: number; thumbnails: ThumbnailResult[] }) => void
): Promise<ThumbnailResult[]> { ): Promise<ThumbnailResult[]> {
const { getDocument } = await import('pdfjs-dist'); const pdf = await this.getCachedPDFDocument(fileId, pdfArrayBuffer);
const pdf = await getDocument({ data: pdfArrayBuffer }).promise;
const allResults: ThumbnailResult[] = []; const allResults: ThumbnailResult[] = [];
let completed = 0; let completed = 0;
@ -116,7 +188,8 @@ export class ThumbnailGenerationService {
await new Promise(resolve => setTimeout(resolve, 1)); await new Promise(resolve => setTimeout(resolve, 1));
} }
await pdf.destroy(); // Release reference to PDF document (don't destroy - keep in cache)
this.releasePDFDocument(fileId);
return allResults; return allResults;
} }
@ -183,8 +256,25 @@ export class ThumbnailGenerationService {
this.currentCacheSize = 0; this.currentCacheSize = 0;
} }
clearPDFCache(): void {
// Destroy all cached PDF documents
for (const [, cached] of this.pdfDocumentCache) {
cached.pdf.destroy();
}
this.pdfDocumentCache.clear();
}
clearPDFCacheForFile(fileId: string): void {
const cached = this.pdfDocumentCache.get(fileId);
if (cached) {
cached.pdf.destroy();
this.pdfDocumentCache.delete(fileId);
}
}
destroy(): void { destroy(): void {
this.clearCache(); this.clearCache();
this.clearPDFCache();
} }
} }