mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-08-26 06:09:23 +00:00
Thumbnail generagtion improvements. PDf worker management improvements
This commit is contained in:
parent
ddb79a1662
commit
3a5402b55a
@ -13,12 +13,6 @@ import { Command } from '../../hooks/useUndoRedo';
|
||||
import { useFileState } from '../../contexts/FileContext';
|
||||
import { useThumbnailGeneration } from '../../hooks/useThumbnailGeneration';
|
||||
import styles from './PageEditor.module.css';
|
||||
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||
|
||||
// Ensure PDF.js worker is available
|
||||
if (!GlobalWorkerOptions.workerSrc) {
|
||||
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||
}
|
||||
|
||||
interface PageThumbnailProps {
|
||||
page: PDFPage;
|
||||
|
@ -1,7 +1,7 @@
|
||||
import React, { useEffect, useState, useRef, useCallback } from "react";
|
||||
import { Paper, Stack, Text, ScrollArea, Loader, Center, Button, Group, NumberInput, useMantineTheme, ActionIcon, Box, Tabs } from "@mantine/core";
|
||||
import { getDocument, GlobalWorkerOptions } from "pdfjs-dist";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { pdfWorkerManager } from "../../services/pdfWorkerManager";
|
||||
import ArrowBackIosNewIcon from "@mui/icons-material/ArrowBackIosNew";
|
||||
import ArrowForwardIosIcon from "@mui/icons-material/ArrowForwardIos";
|
||||
import FirstPageIcon from "@mui/icons-material/FirstPage";
|
||||
@ -16,7 +16,6 @@ import SkeletonLoader from '../shared/SkeletonLoader';
|
||||
import { useFileState, useFileActions, useCurrentFile, useProcessedFiles } from "../../contexts/FileContext";
|
||||
import { useFileWithUrl } from "../../hooks/useFileWithUrl";
|
||||
|
||||
GlobalWorkerOptions.workerSrc = "/pdf.worker.js";
|
||||
|
||||
// Lazy loading page image component
|
||||
interface LazyPageImageProps {
|
||||
@ -399,7 +398,7 @@ const Viewer = ({
|
||||
throw new Error('No valid PDF source available');
|
||||
}
|
||||
|
||||
const pdf = await getDocument(pdfData).promise;
|
||||
const pdf = await pdfWorkerManager.createDocument(pdfData);
|
||||
pdfDocRef.current = pdf;
|
||||
setNumPages(pdf.numPages);
|
||||
if (!cancelled) {
|
||||
@ -420,6 +419,11 @@ const Viewer = ({
|
||||
cancelled = true;
|
||||
// Stop any ongoing preloading
|
||||
preloadingRef.current = false;
|
||||
// Cleanup PDF document using worker manager
|
||||
if (pdfDocRef.current) {
|
||||
pdfWorkerManager.destroyDocument(pdfDocRef.current);
|
||||
pdfDocRef.current = null;
|
||||
}
|
||||
// Cleanup ArrayBuffer reference to help garbage collection
|
||||
currentArrayBufferRef.current = null;
|
||||
};
|
||||
|
@ -10,6 +10,7 @@ import { fileStorage, StoredFile } from '../services/fileStorage';
|
||||
import { FileId } from '../types/fileContext';
|
||||
import { FileMetadata } from '../types/file';
|
||||
import { generateThumbnailForFile } from '../utils/thumbnailUtils';
|
||||
import { pdfWorkerManager } from '../services/pdfWorkerManager';
|
||||
|
||||
interface IndexedDBContextValue {
|
||||
// Core CRUD operations
|
||||
@ -82,16 +83,15 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
|
||||
// DEBUG: Check original file before saving
|
||||
if (DEBUG && file.type === 'application/pdf') {
|
||||
try {
|
||||
const { getDocument } = await import('pdfjs-dist');
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
|
||||
console.log(`🔍 BEFORE SAVE - Original file:`, {
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
arrayBufferSize: arrayBuffer.byteLength,
|
||||
pages: pdf.numPages
|
||||
});
|
||||
pdf.destroy();
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
} catch (error) {
|
||||
console.error(`🔍 Error validating file before save:`, error);
|
||||
}
|
||||
@ -152,11 +152,10 @@ export function IndexedDBProvider({ children }: IndexedDBProviderProps) {
|
||||
|
||||
// Quick PDF validation
|
||||
try {
|
||||
const { getDocument } = await import('pdfjs-dist');
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
|
||||
console.log(`🔍 AFTER LOAD - PDF validation: ${pdf.numPages} pages in reconstructed file`);
|
||||
pdf.destroy();
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
} catch (error) {
|
||||
console.error(`🔍 AFTER LOAD - PDF reconstruction error:`, error);
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
import { useState, useCallback } from 'react';
|
||||
import { getDocument } from 'pdfjs-dist';
|
||||
import { PDFDocument, PDFPage } from '../types/pageEditor';
|
||||
import { pdfWorkerManager } from '../services/pdfWorkerManager';
|
||||
|
||||
export function usePDFProcessor() {
|
||||
const [loading, setLoading] = useState(false);
|
||||
@ -13,7 +13,7 @@ export function usePDFProcessor() {
|
||||
): Promise<string> => {
|
||||
try {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
|
||||
const page = await pdf.getPage(pageNumber);
|
||||
|
||||
const viewport = page.getViewport({ scale });
|
||||
@ -29,8 +29,8 @@ export function usePDFProcessor() {
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
const thumbnail = canvas.toDataURL();
|
||||
|
||||
// Clean up
|
||||
pdf.destroy();
|
||||
// Clean up using worker manager
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
|
||||
return thumbnail;
|
||||
} catch (error) {
|
||||
@ -39,13 +39,35 @@ export function usePDFProcessor() {
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Internal function to generate thumbnail from already-opened PDF
|
||||
const generateThumbnailFromPDF = useCallback(async (
|
||||
pdf: any,
|
||||
pageNumber: number,
|
||||
scale: number = 0.5
|
||||
): Promise<string> => {
|
||||
const page = await pdf.getPage(pageNumber);
|
||||
|
||||
const viewport = page.getViewport({ scale });
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
|
||||
const context = canvas.getContext('2d');
|
||||
if (!context) {
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
return canvas.toDataURL();
|
||||
}, []);
|
||||
|
||||
const processPDFFile = useCallback(async (file: File): Promise<PDFDocument> => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
const pages: PDFPage[] = [];
|
||||
@ -61,19 +83,19 @@ export function usePDFProcessor() {
|
||||
});
|
||||
}
|
||||
|
||||
// Generate thumbnails for first 10 pages immediately for better UX
|
||||
// Generate thumbnails for first 10 pages immediately using the same PDF instance
|
||||
const priorityPages = Math.min(10, totalPages);
|
||||
for (let i = 1; i <= priorityPages; i++) {
|
||||
try {
|
||||
const thumbnail = await generatePageThumbnail(file, i);
|
||||
const thumbnail = await generateThumbnailFromPDF(pdf, i);
|
||||
pages[i - 1].thumbnail = thumbnail;
|
||||
} catch (error) {
|
||||
console.warn(`Failed to generate thumbnail for page ${i}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up
|
||||
pdf.destroy();
|
||||
// Clean up using worker manager
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
|
||||
const document: PDFDocument = {
|
||||
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
@ -91,7 +113,7 @@ export function usePDFProcessor() {
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [generatePageThumbnail]);
|
||||
}, [generateThumbnailFromPDF]);
|
||||
|
||||
return {
|
||||
processPDFFile,
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
import { pdfWorkerManager } from '../services/pdfWorkerManager';
|
||||
|
||||
export interface PdfSignatureDetectionResult {
|
||||
hasDigitalSignatures: boolean;
|
||||
@ -21,14 +22,12 @@ export const usePdfSignatureDetection = (files: File[]): PdfSignatureDetectionRe
|
||||
let foundSignature = false;
|
||||
|
||||
try {
|
||||
// Set up PDF.js worker
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = '/pdfjs-legacy/pdf.worker.mjs';
|
||||
|
||||
for (const file of files) {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
|
||||
try {
|
||||
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
|
||||
|
||||
for (let i = 1; i <= pdf.numPages; i++) {
|
||||
const page = await pdf.getPage(i);
|
||||
@ -42,6 +41,9 @@ export const usePdfSignatureDetection = (files: File[]): PdfSignatureDetectionRe
|
||||
|
||||
if (foundSignature) break;
|
||||
}
|
||||
|
||||
// Clean up PDF document using worker manager
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
} catch (error) {
|
||||
console.warn('Error analyzing PDF for signatures:', error);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { getDocument } from 'pdfjs-dist';
|
||||
import { FileAnalysis, ProcessingStrategy } from '../types/processing';
|
||||
import { pdfWorkerManager } from './pdfWorkerManager';
|
||||
|
||||
export class FileAnalyzer {
|
||||
private static readonly SIZE_THRESHOLDS = {
|
||||
@ -66,17 +66,16 @@ export class FileAnalyzer {
|
||||
// For large files, try the whole file first (PDF.js needs the complete structure)
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
|
||||
const pdf = await getDocument({
|
||||
data: arrayBuffer,
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer, {
|
||||
stopAtErrors: false, // Don't stop at minor errors
|
||||
verbosity: 0 // Suppress PDF.js warnings
|
||||
}).promise;
|
||||
});
|
||||
|
||||
const pageCount = pdf.numPages;
|
||||
const isEncrypted = (pdf as any).isEncrypted;
|
||||
|
||||
// Clean up
|
||||
pdf.destroy();
|
||||
// Clean up using worker manager
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
|
||||
return {
|
||||
pageCount,
|
||||
|
@ -1,9 +1,6 @@
|
||||
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
||||
import { ProcessedFile, ProcessingState, PDFPage } from '../types/processing';
|
||||
import { ProcessingCache } from './processingCache';
|
||||
|
||||
// Set up PDF.js worker
|
||||
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
||||
import { pdfWorkerManager } from './pdfWorkerManager';
|
||||
|
||||
export class PDFProcessingService {
|
||||
private static instance: PDFProcessingService;
|
||||
@ -96,7 +93,7 @@ export class PDFProcessingService {
|
||||
onProgress: (progress: number) => void
|
||||
): Promise<ProcessedFile> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
|
||||
const totalPages = pdf.numPages;
|
||||
|
||||
onProgress(10); // PDF loaded
|
||||
@ -129,7 +126,7 @@ export class PDFProcessingService {
|
||||
onProgress(progress);
|
||||
}
|
||||
|
||||
pdf.destroy();
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
onProgress(100);
|
||||
|
||||
return {
|
||||
|
@ -39,9 +39,10 @@ class PDFWorkerManager {
|
||||
|
||||
/**
|
||||
* Create a PDF document with proper lifecycle management
|
||||
* Supports ArrayBuffer, Uint8Array, URL string, or {data: ArrayBuffer} object
|
||||
*/
|
||||
async createDocument(
|
||||
data: ArrayBuffer | Uint8Array,
|
||||
data: ArrayBuffer | Uint8Array | string | { data: ArrayBuffer },
|
||||
options: {
|
||||
disableAutoFetch?: boolean;
|
||||
disableStream?: boolean;
|
||||
@ -55,13 +56,33 @@ class PDFWorkerManager {
|
||||
await this.waitForAvailableWorker();
|
||||
}
|
||||
|
||||
const loadingTask = getDocument({
|
||||
data,
|
||||
disableAutoFetch: options.disableAutoFetch ?? true,
|
||||
disableStream: options.disableStream ?? true,
|
||||
stopAtErrors: options.stopAtErrors ?? false,
|
||||
verbosity: options.verbosity ?? 0
|
||||
});
|
||||
// Normalize input data to PDF.js format
|
||||
let pdfData: any;
|
||||
if (data instanceof ArrayBuffer || data instanceof Uint8Array) {
|
||||
pdfData = { data };
|
||||
} else if (typeof data === 'string') {
|
||||
pdfData = data; // URL string
|
||||
} else if (data && typeof data === 'object' && 'data' in data) {
|
||||
pdfData = data; // Already in {data: ArrayBuffer} format
|
||||
} else {
|
||||
pdfData = data; // Pass through as-is
|
||||
}
|
||||
|
||||
const loadingTask = getDocument(
|
||||
typeof pdfData === 'string' ? {
|
||||
url: pdfData,
|
||||
disableAutoFetch: options.disableAutoFetch ?? true,
|
||||
disableStream: options.disableStream ?? true,
|
||||
stopAtErrors: options.stopAtErrors ?? false,
|
||||
verbosity: options.verbosity ?? 0
|
||||
} : {
|
||||
...pdfData,
|
||||
disableAutoFetch: options.disableAutoFetch ?? true,
|
||||
disableStream: options.disableStream ?? true,
|
||||
stopAtErrors: options.stopAtErrors ?? false,
|
||||
verbosity: options.verbosity ?? 0
|
||||
}
|
||||
);
|
||||
|
||||
try {
|
||||
const pdf = await loadingTask.promise;
|
||||
|
@ -60,8 +60,12 @@ export class ThumbnailGenerationService {
|
||||
this.evictLeastRecentlyUsedPDF();
|
||||
}
|
||||
|
||||
const { getDocument } = await import('pdfjs-dist');
|
||||
const pdf = await getDocument({ data: pdfArrayBuffer }).promise;
|
||||
// Use centralized worker manager instead of direct getDocument
|
||||
const pdf = await pdfWorkerManager.createDocument(pdfArrayBuffer, {
|
||||
disableAutoFetch: true,
|
||||
disableStream: true,
|
||||
stopAtErrors: false
|
||||
});
|
||||
|
||||
this.pdfDocumentCache.set(fileId, {
|
||||
pdf,
|
||||
@ -98,7 +102,7 @@ export class ThumbnailGenerationService {
|
||||
}
|
||||
|
||||
if (oldestEntry) {
|
||||
oldestEntry[1].pdf.destroy(); // Clean up PDF worker
|
||||
pdfWorkerManager.destroyDocument(oldestEntry[1].pdf); // Use worker manager for cleanup
|
||||
this.pdfDocumentCache.delete(oldestEntry[0]);
|
||||
}
|
||||
}
|
||||
@ -257,9 +261,9 @@ export class ThumbnailGenerationService {
|
||||
}
|
||||
|
||||
clearPDFCache(): void {
|
||||
// Destroy all cached PDF documents
|
||||
// Destroy all cached PDF documents using worker manager
|
||||
for (const [, cached] of this.pdfDocumentCache) {
|
||||
cached.pdf.destroy();
|
||||
pdfWorkerManager.destroyDocument(cached.pdf);
|
||||
}
|
||||
this.pdfDocumentCache.clear();
|
||||
}
|
||||
@ -267,7 +271,7 @@ export class ThumbnailGenerationService {
|
||||
clearPDFCacheForFile(fileId: string): void {
|
||||
const cached = this.pdfDocumentCache.get(fileId);
|
||||
if (cached) {
|
||||
cached.pdf.destroy();
|
||||
pdfWorkerManager.destroyDocument(cached.pdf);
|
||||
this.pdfDocumentCache.delete(fileId);
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { getDocument } from "pdfjs-dist";
|
||||
import { pdfWorkerManager } from '../services/pdfWorkerManager';
|
||||
|
||||
export interface ThumbnailWithMetadata {
|
||||
thumbnail: string | undefined;
|
||||
@ -303,16 +303,15 @@ function formatFileSize(bytes: number): string {
|
||||
|
||||
async function generatePDFThumbnail(arrayBuffer: ArrayBuffer, file: File, scale: number): Promise<string> {
|
||||
try {
|
||||
const pdf = await getDocument({
|
||||
data: arrayBuffer,
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer, {
|
||||
disableAutoFetch: true,
|
||||
disableStream: true
|
||||
}).promise;
|
||||
});
|
||||
|
||||
const thumbnail = await generateStandardPDFThumbnail(pdf, scale);
|
||||
|
||||
// Immediately clean up memory after thumbnail generation
|
||||
pdf.destroy();
|
||||
// Immediately clean up memory after thumbnail generation using worker manager
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
return thumbnail;
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
@ -385,7 +384,7 @@ export async function generateThumbnailWithMetadata(file: File): Promise<Thumbna
|
||||
|
||||
try {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const pdf = await pdfWorkerManager.createDocument(arrayBuffer);
|
||||
|
||||
const pageCount = pdf.numPages;
|
||||
const page = await pdf.getPage(1);
|
||||
@ -396,14 +395,14 @@ export async function generateThumbnailWithMetadata(file: File): Promise<Thumbna
|
||||
const context = canvas.getContext("2d");
|
||||
|
||||
if (!context) {
|
||||
pdf.destroy();
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
throw new Error('Could not get canvas context');
|
||||
}
|
||||
|
||||
await page.render({ canvasContext: context, viewport }).promise;
|
||||
const thumbnail = canvas.toDataURL();
|
||||
|
||||
pdf.destroy();
|
||||
pdfWorkerManager.destroyDocument(pdf);
|
||||
return { thumbnail, pageCount };
|
||||
|
||||
} catch (error) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user