Refactor file processing architecture to centralize metadata extraction, page counting, and thumbnail generation. Introduce FileProcessingService for improved file handling and performance. Update FileContext to utilize the new service for processing files upon addition, ensuring consistent metadata updates and thumbnail management.

This commit is contained in:
Reece Browne 2025-08-14 19:46:02 +01:00
parent 4a0c577312
commit 29a4e03784
5 changed files with 641 additions and 363 deletions

View File

@ -10,7 +10,6 @@ import { useToolFileSelection, useProcessedFiles, useFileState, useFileManagemen
import { FileOperation, createStableFileId } from '../../types/fileContext';
import { fileStorage } from '../../services/fileStorage';
import { generateThumbnailForFile } from '../../utils/thumbnailUtils';
import { useThumbnailGeneration } from '../../hooks/useThumbnailGeneration';
import { zipFileService } from '../../services/zipFileService';
import { detectFileExtension } from '../../utils/fileUtils';
import styles from '../pageEditor/PageEditor.module.css';
@ -48,9 +47,6 @@ const FileEditor = ({
}: FileEditorProps) => {
const { t } = useTranslation();
// Thumbnail cache for sharing with PageEditor
const { getThumbnailFromCache, addThumbnailToCache } = useThumbnailGeneration();
// Utility function to check if a file extension is supported
const isFileSupported = useCallback((fileName: string): boolean => {
const extension = detectFileExtension(fileName);
@ -157,33 +153,19 @@ const FileEditor = ({
// Convert shared files to FileEditor format
const convertToFileItem = useCallback(async (sharedFile: any): Promise<FileItem> => {
let thumbnail = sharedFile.thumbnail;
if (!thumbnail) {
// Check cache first using the file ID
const fileId = sharedFile.id || `file-${Date.now()}-${Math.random()}`;
const page1CacheKey = `${fileId}-page-1`;
thumbnail = getThumbnailFromCache(page1CacheKey);
if (!thumbnail) {
// Generate and cache thumbnail
thumbnail = await generateThumbnailForFile(sharedFile.file || sharedFile);
if (thumbnail) {
addThumbnailToCache(page1CacheKey, thumbnail);
console.log(`📸 FileEditor: Cached page-1 thumbnail for legacy file (key: ${page1CacheKey})`);
}
}
}
// Use processed data if available, otherwise fallback to legacy approach
const thumbnail = sharedFile.thumbnail || sharedFile.thumbnailUrl ||
(await generateThumbnailForFile(sharedFile.file || sharedFile));
return {
id: sharedFile.id || `file-${Date.now()}-${Math.random()}`,
name: (sharedFile.file?.name || sharedFile.name || 'unknown'),
pageCount: sharedFile.pageCount || 1, // Default to 1 page if unknown
pageCount: sharedFile.processedFile?.totalPages || sharedFile.pageCount || 1,
thumbnail: thumbnail || '',
size: sharedFile.file?.size || sharedFile.size || 0,
file: sharedFile.file || sharedFile,
};
}, [getThumbnailFromCache, addThumbnailToCache]);
}, []);
// Convert activeFiles to FileItem format using context (async to avoid blocking)
useEffect(() => {
@ -216,47 +198,11 @@ const FileEditor = ({
if (!file) continue; // Skip if file not found
// Use record's thumbnail if available, otherwise check cache, then generate
let thumbnail: string | undefined = record.thumbnailUrl;
if (!thumbnail) {
// Check if PageEditor has already cached a page-1 thumbnail for this file
const page1CacheKey = `${record.id}-page-1`;
thumbnail = getThumbnailFromCache(page1CacheKey) || undefined;
if (!thumbnail) {
try {
thumbnail = await generateThumbnailForFile(file);
// Store in cache for PageEditor to reuse
if (thumbnail) {
addThumbnailToCache(page1CacheKey, thumbnail);
console.log(`📸 FileEditor: Cached page-1 thumbnail for ${file.name} (key: ${page1CacheKey})`);
}
} catch (error) {
console.warn(`Failed to generate thumbnail for ${file.name}:`, error);
thumbnail = undefined; // Use placeholder
}
} else {
console.log(`📸 FileEditor: Reused cached page-1 thumbnail for ${file.name} (key: ${page1CacheKey})`);
}
}
// Page count estimation for display purposes only
let pageCount = 1; // Default for non-PDFs and display in FileEditor
// Use processed data from centralized file processing service
const thumbnail = record.thumbnailUrl; // Already processed by FileProcessingService
const pageCount = record.processedFile?.totalPages || 1; // Use processed page count
if (file.type === 'application/pdf') {
// Quick page count estimation for FileEditor display only
// PageEditor will do its own more thorough page detection
try {
const arrayBuffer = await file.arrayBuffer();
const text = new TextDecoder('latin1').decode(arrayBuffer);
const pageMatches = text.match(/\/Type\s*\/Page[^s]/g);
pageCount = pageMatches ? pageMatches.length : 1;
console.log(`📄 FileEditor estimated page count for ${file.name}: ${pageCount} pages (display only)`);
} catch (error) {
console.warn(`Failed to estimate page count for ${file.name}:`, error);
pageCount = 1; // Safe fallback
}
}
console.log(`📄 FileEditor: Using processed data for ${file.name}: ${pageCount} pages, thumbnail: ${!!thumbnail}`);
const convertedFile = {
id: record.id, // Use the record's UUID from FileContext

View File

@ -93,15 +93,16 @@ const PageEditor = ({
destroyThumbnails
} = useThumbnailGeneration();
// State for discovered page document
const [discoveredDocument, setDiscoveredDocument] = useState<PDFDocument | null>(null);
const [isDiscoveringPages, setIsDiscoveringPages] = useState(false);
// Get primary file record outside useMemo to track processedFile changes
const primaryFileRecord = primaryFileId ? selectors.getFileRecord(primaryFileId) : null;
const processedFilePages = primaryFileRecord?.processedFile?.pages;
const processedFileTotalPages = primaryFileRecord?.processedFile?.totalPages;
// Compute merged document with stable signature (prevents infinite loops)
const mergedPdfDocument = useMemo((): PDFDocument | null => {
if (activeFileIds.length === 0) return null;
const primaryFileRecord = primaryFileId ? selectors.getFileRecord(primaryFileId) : null;
const primaryFile = primaryFileId ? selectors.getFile(primaryFileId) : null;
// If we have file IDs but no file record, something is wrong - return null to show loading
@ -124,60 +125,54 @@ const PageEditor = ({
console.log(`🎬 PageEditor: Building document for ${name}`);
console.log(`🎬 ProcessedFile exists:`, !!processedFile);
console.log(`🎬 ProcessedFile pages:`, processedFile?.pages?.length || 0);
console.log(`🎬 ProcessedFile totalPages:`, processedFile?.totalPages || 'unknown');
if (processedFile?.pages) {
console.log(`🎬 Pages structure:`, processedFile.pages.map(p => ({ pageNumber: p.pageNumber || 'unknown', hasThumbnail: !!p.thumbnail })));
}
console.log(`🎬 Will use ${(processedFile?.pages?.length || 0) > 0 ? 'PROCESSED' : 'FALLBACK'} pages`);
// Convert processed pages to PageEditor format, or discover pages if not processed yet
let pages: PDFPage[];
if (processedFile?.pages && processedFile.pages.length > 0) {
// Use existing processed data
pages = processedFile.pages.map((page, index) => {
const pageId = `${primaryFileId}-page-${index + 1}`;
// Try multiple sources for thumbnails in order of preference:
// 1. Processed data thumbnail
// 2. Cached thumbnail from previous generation
// 3. For page 1: FileEditor's thumbnailUrl (sharing optimization)
let thumbnail = page.thumbnail || null;
if (!thumbnail) {
thumbnail = getThumbnailFromCache(pageId) || null;
}
if (!thumbnail && index === 0) {
// For page 1, also check if FileEditor has already generated a thumbnail
thumbnail = primaryFileRecord.thumbnailUrl || null;
// If we found a FileEditor thumbnail, cache it for consistency
if (thumbnail) {
addThumbnailToCache(pageId, thumbnail);
console.log(`📸 PageEditor: Reused FileEditor thumbnail for page 1 (${pageId})`);
// Convert processed pages to PageEditor format
// All processing is now handled by FileProcessingService when files are added
const pages: PDFPage[] = processedFile?.pages && processedFile.pages.length > 0
? processedFile.pages.map((page, index) => {
const pageId = `${primaryFileId}-page-${index + 1}`;
// Try multiple sources for thumbnails in order of preference:
// 1. Processed data thumbnail
// 2. Cached thumbnail from previous generation
// 3. For page 1: FileRecord's thumbnailUrl (from FileProcessingService)
let thumbnail = page.thumbnail || null;
const cachedThumbnail = getThumbnailFromCache(pageId);
if (!thumbnail && cachedThumbnail) {
thumbnail = cachedThumbnail;
console.log(`📸 PageEditor: Using cached thumbnail for page ${index + 1} (${pageId})`);
}
}
return {
id: pageId,
pageNumber: index + 1,
thumbnail,
rotation: page.rotation || 0,
if (!thumbnail && index === 0) {
// For page 1, use the thumbnail from FileProcessingService
thumbnail = primaryFileRecord.thumbnailUrl || null;
if (thumbnail) {
addThumbnailToCache(pageId, thumbnail);
console.log(`📸 PageEditor: Using FileProcessingService thumbnail for page 1 (${pageId})`);
}
}
return {
id: pageId,
pageNumber: index + 1,
thumbnail,
rotation: page.rotation || 0,
selected: false,
splitBefore: page.splitBefore || false,
};
})
: [{ // Fallback while FileProcessingService is working
id: `${primaryFileId}-page-1`,
pageNumber: 1,
thumbnail: getThumbnailFromCache(`${primaryFileId}-page-1`) || primaryFileRecord.thumbnailUrl || null,
rotation: 0,
selected: false,
splitBefore: page.splitBefore || false,
};
});
} else if (discoveredDocument && discoveredDocument.id === (primaryFileId ?? 'unknown')) {
// Use discovered document if available and matches current file
pages = discoveredDocument.pages;
} else {
// No processed data and no discovered data yet - show placeholder while discovering
console.log(`🎬 PageEditor: No processedFile data, showing placeholder while discovering pages for ${name}`);
pages = [{
id: `${primaryFileId}-page-1`,
pageNumber: 1,
thumbnail: getThumbnailFromCache(`${primaryFileId}-page-1`) || primaryFileRecord.thumbnailUrl || null,
rotation: 0,
selected: false,
splitBefore: false,
}];
}
splitBefore: false,
}];
// Create document with determined pages
@ -189,123 +184,8 @@ const PageEditor = ({
totalPages: pages.length,
destroy: () => {} // Optional cleanup function
};
}, [filesSignature, activeFileIds, primaryFileId, selectors, getThumbnailFromCache, addThumbnailToCache, discoveredDocument]);
}, [filesSignature, activeFileIds, primaryFileId, primaryFileRecord, processedFilePages, processedFileTotalPages, selectors, getThumbnailFromCache, addThumbnailToCache]);
// Async page discovery effect
useEffect(() => {
const discoverPages = async () => {
if (!primaryFileId) return;
const record = selectors.getFileRecord(primaryFileId);
const primaryFile = selectors.getFile(primaryFileId);
if (!record || !primaryFile) return;
// Skip if we already have processed data or are currently discovering
if (record.processedFile?.pages || isDiscoveringPages) return;
// Only discover for PDF files
if (primaryFile.type !== 'application/pdf') return;
console.log(`🎬 PageEditor: Starting async page discovery for ${primaryFile.name}`);
setIsDiscoveringPages(true);
try {
let discoveredPageCount = 1;
// Try PDF.js first (more accurate)
try {
const arrayBuffer = await primaryFile.arrayBuffer();
const pdfDoc = await import('pdfjs-dist').then(pdfjs => pdfjs.getDocument({
data: arrayBuffer,
disableAutoFetch: true,
disableStream: true
}).promise);
discoveredPageCount = pdfDoc.numPages;
console.log(`🎬 PageEditor: Discovered ${discoveredPageCount} pages using PDF.js`);
// Clean up PDF document immediately
pdfDoc.destroy();
} catch (pdfError) {
console.warn(`🎬 PageEditor: PDF.js failed, trying text analysis:`, pdfError);
// Fallback to text analysis
try {
const arrayBuffer = await primaryFile.arrayBuffer();
const text = new TextDecoder('latin1').decode(arrayBuffer);
const pageMatches = text.match(/\/Type\s*\/Page[^s]/g);
discoveredPageCount = pageMatches ? pageMatches.length : 1;
console.log(`🎬 PageEditor: Discovered ${discoveredPageCount} pages using text analysis`);
} catch (textError) {
console.warn(`🎬 PageEditor: Text analysis also failed:`, textError);
discoveredPageCount = 1;
}
}
// Create page structure
const pages = Array.from({ length: discoveredPageCount }, (_, index) => {
const pageId = `${primaryFileId}-page-${index + 1}`;
let thumbnail = getThumbnailFromCache(pageId) || null;
// For page 1, also check FileEditor's thumbnail
if (!thumbnail && index === 0) {
thumbnail = record.thumbnailUrl || null;
if (thumbnail) {
addThumbnailToCache(pageId, thumbnail);
console.log(`📸 PageEditor: Reused FileEditor thumbnail for page 1 (${pageId})`);
}
}
return {
id: pageId,
pageNumber: index + 1,
thumbnail,
rotation: 0,
selected: false,
splitBefore: false,
};
});
// Create discovered document
const discoveredDoc: PDFDocument = {
id: primaryFileId,
name: primaryFile.name,
file: primaryFile,
pages,
totalPages: pages.length,
destroy: () => {}
};
// Save to state for immediate UI update
setDiscoveredDocument(discoveredDoc);
// Save to FileContext for persistence
const processedFileData = {
pages: pages.map(page => ({
pageNumber: page.pageNumber,
thumbnail: page.thumbnail || undefined,
rotation: page.rotation,
splitBefore: page.splitBefore
})),
totalPages: discoveredPageCount,
lastProcessed: Date.now()
};
actions.updateFileRecord(primaryFileId, {
processedFile: processedFileData
});
console.log(`🎬 PageEditor: Page discovery complete - ${discoveredPageCount} pages saved to FileContext`);
} catch (error) {
console.error(`🎬 PageEditor: Page discovery failed:`, error);
} finally {
setIsDiscoveringPages(false);
}
};
discoverPages();
}, [primaryFileId, selectors, isDiscoveringPages, getThumbnailFromCache, addThumbnailToCache, actions]);
// Display document: Use edited version if exists, otherwise original
const displayDocument = editedDocument || mergedPdfDocument;
@ -372,150 +252,107 @@ const PageEditor = ({
// PageEditor no longer handles cleanup - it's centralized in FileContext
/**
* Using ref instead of state prevents infinite loops.
* State changes would trigger re-renders and effect re-runs.
*/
const thumbnailGenerationStarted = useRef(false);
// Simple cache-first thumbnail generation (no complex detection needed)
// Start thumbnail generation process (guards against re-entry) - stable version
const startThumbnailGeneration = useCallback(() => {
// Access current values directly - avoid stale closures
const currentDocument = mergedPdfDocument;
const currentActiveFileIds = activeFileIds;
const currentPrimaryFileId = primaryFileId;
if (!currentDocument || currentActiveFileIds.length !== 1 || !currentPrimaryFileId || thumbnailGenerationStarted.current) {
// Simple thumbnail generation - generate pages 2+ that aren't cached
const generateMissingThumbnails = useCallback(async () => {
if (!mergedPdfDocument || !primaryFileId || activeFileIds.length !== 1) {
return;
}
const file = selectors.getFile(currentPrimaryFileId);
const file = selectors.getFile(primaryFileId);
if (!file) return;
const totalPages = currentDocument.totalPages || currentDocument.pages.length || 0;
if (totalPages <= 0) return; // nothing to generate yet
thumbnailGenerationStarted.current = true;
// Run everything asynchronously to avoid blocking the main thread
setTimeout(async () => {
try {
// Load PDF array buffer for Web Workers
const arrayBuffer = await file.arrayBuffer();
// Generate page numbers for pages that don't have thumbnails yet
const pageNumbers = Array.from({ length: totalPages }, (_, i) => i + 1)
.filter(pageNum => {
const page = currentDocument.pages.find(p => p.pageNumber === pageNum);
return !page?.thumbnail; // Only generate for pages without thumbnails
});
// If no pages need thumbnails, we're done
if (pageNumbers.length === 0) {
return;
}
// Calculate quality scale based on file size
const scale = currentActiveFileIds.length === 1 && currentPrimaryFileId ?
calculateScaleFromFileSize(selectors.getFileRecord(currentPrimaryFileId)?.size || 0) : 0.2;
// Start parallel thumbnail generation WITHOUT blocking the main thread
const generationPromise = generateThumbnails(
arrayBuffer,
pageNumbers,
{
scale, // Dynamic quality based on file size
quality: 0.8,
batchSize: 15, // Smaller batches per worker for smoother UI
parallelBatches: 3 // Use 3 Web Workers in parallel
},
// Progress callback for thumbnail updates
(progress) => {
// Batch process thumbnails to reduce main thread work
requestAnimationFrame(() => {
progress.thumbnails.forEach(({ pageNumber, thumbnail }) => {
// Use stable fileId for cache key
const pageId = `${currentPrimaryFileId}-page-${pageNumber}`;
const cached = getThumbnailFromCache(pageId);
if (!cached) {
addThumbnailToCache(pageId, thumbnail);
// Persist thumbnail to FileContext for durability
const fileRecord = selectors.getFileRecord(currentPrimaryFileId);
if (fileRecord) {
const updatedProcessedFile = {
...fileRecord.processedFile,
pages: fileRecord.processedFile?.pages?.map((page, index) =>
index + 1 === pageNumber
? { ...page, thumbnail }
: page
) || [{ thumbnail }] // Create pages array if it doesn't exist
};
// For page 1, also update the file record's thumbnailUrl so FileEditor can use it directly
const updates: any = { processedFile: updatedProcessedFile };
if (pageNumber === 1) {
updates.thumbnailUrl = thumbnail;
console.log(`📸 PageEditor: Set thumbnailUrl for FileEditor reuse (${currentPrimaryFileId})`);
}
actions.updateFileRecord(currentPrimaryFileId, updates);
}
window.dispatchEvent(new CustomEvent('thumbnailReady', {
detail: { pageNumber, thumbnail, pageId }
}));
}
});
});
}
);
// Handle completion
generationPromise
.then(() => {
// Keep thumbnailGenerationStarted as true to prevent restarts
})
.catch(error => {
console.error('PageEditor: Thumbnail generation failed:', error);
thumbnailGenerationStarted.current = false;
});
} catch (error) {
console.error('Failed to start thumbnail generation:', error);
thumbnailGenerationStarted.current = false;
const totalPages = mergedPdfDocument.totalPages;
if (totalPages <= 1) return; // Only page 1, nothing to generate
// Check which pages 2+ need thumbnails (not in cache)
const pageNumbersToGenerate = [];
for (let pageNum = 2; pageNum <= totalPages; pageNum++) {
const pageId = `${primaryFileId}-page-${pageNum}`;
if (!getThumbnailFromCache(pageId)) {
pageNumbersToGenerate.push(pageNum);
}
}, 0); // setTimeout with 0ms to defer to next tick
}, [generateThumbnails, getThumbnailFromCache, addThumbnailToCache, selectors, actions]); // Only stable function dependencies
// Start thumbnail generation when files change (stable signature prevents loops)
useEffect(() => {
if (mergedPdfDocument && !thumbnailGenerationStarted.current) {
// Check if ALL pages already have thumbnails
const totalPages = mergedPdfDocument.totalPages || mergedPdfDocument.pages.length || 0;
const pagesWithThumbnails = mergedPdfDocument.pages.filter(page => page.thumbnail).length;
const hasAllThumbnails = pagesWithThumbnails === totalPages;
if (hasAllThumbnails) {
return; // Skip generation if thumbnails exist
}
// Small delay to let document render, then start thumbnail generation
const timer = setTimeout(startThumbnailGeneration, 500);
return () => clearTimeout(timer);
}
}, [filesSignature, startThumbnailGeneration]);
if (pageNumbersToGenerate.length === 0) {
console.log(`📸 PageEditor: All pages 2+ already cached, skipping generation`);
return;
}
console.log(`📸 PageEditor: Generating thumbnails for pages: [${pageNumbersToGenerate.join(', ')}]`);
try {
// Load PDF array buffer for Web Workers
const arrayBuffer = await file.arrayBuffer();
// Calculate quality scale based on file size
const scale = calculateScaleFromFileSize(selectors.getFileRecord(primaryFileId)?.size || 0);
// Start parallel thumbnail generation WITHOUT blocking the main thread
await generateThumbnails(
arrayBuffer,
pageNumbersToGenerate,
{
scale, // Dynamic quality based on file size
quality: 0.8,
batchSize: 15, // Smaller batches per worker for smoother UI
parallelBatches: 3 // Use 3 Web Workers in parallel
},
// Progress callback for thumbnail updates
(progress) => {
// Batch process thumbnails to reduce main thread work
requestAnimationFrame(() => {
progress.thumbnails.forEach(({ pageNumber, thumbnail }) => {
// Use stable fileId for cache key
const pageId = `${primaryFileId}-page-${pageNumber}`;
addThumbnailToCache(pageId, thumbnail);
// Also update the processedFile so document rebuilds include the thumbnail
const fileRecord = selectors.getFileRecord(primaryFileId);
if (fileRecord?.processedFile?.pages) {
const updatedProcessedFile = {
...fileRecord.processedFile,
pages: fileRecord.processedFile.pages.map((page, index) =>
index + 1 === pageNumber
? { ...page, thumbnail }
: page
)
};
actions.updateFileRecord(primaryFileId, { processedFile: updatedProcessedFile });
}
window.dispatchEvent(new CustomEvent('thumbnailReady', {
detail: { pageNumber, thumbnail, pageId }
}));
});
});
}
);
console.log(`📸 PageEditor: Thumbnail generation completed for pages [${pageNumbersToGenerate.join(', ')}]`);
} catch (error) {
console.error('PageEditor: Thumbnail generation failed:', error);
}
}, [mergedPdfDocument, primaryFileId, activeFileIds, generateThumbnails, getThumbnailFromCache, addThumbnailToCache, selectors, actions]);
// Simple useEffect - just generate missing thumbnails when document is ready
useEffect(() => {
if (mergedPdfDocument && mergedPdfDocument.totalPages > 1) {
console.log(`📸 PageEditor: Document ready with ${mergedPdfDocument.totalPages} pages, checking for missing thumbnails`);
generateMissingThumbnails();
}
}, [mergedPdfDocument, generateMissingThumbnails]);
// Cleanup thumbnail generation when component unmounts
useEffect(() => {
return () => {
thumbnailGenerationStarted.current = false;
// Stop any ongoing thumbnail generation
if (stopGeneration) {
stopGeneration();
}
};
}, [stopGeneration]); // Only depend on the stopGeneration function
}, [stopGeneration]);
// Clear selections when files change - use stable signature
useEffect(() => {
@ -747,6 +584,11 @@ const PageEditor = ({
request.onsuccess = () => {
const db = request.result;
// Check if the object store exists before trying to access it
if (!db.objectStoreNames.contains('drafts')) {
console.warn('drafts object store does not exist, skipping auto-save');
return;
}
const transaction = db.transaction('drafts', 'readwrite');
const store = transaction.objectStore('drafts');
store.put(draftData, draftKey);
@ -822,8 +664,20 @@ const PageEditor = ({
try {
const request = indexedDB.open('stirling-pdf-drafts', 1);
request.onupgradeneeded = () => {
const db = request.result;
if (!db.objectStoreNames.contains('drafts')) {
db.createObjectStore('drafts');
}
};
request.onsuccess = () => {
const db = request.result;
// Check if the object store exists before trying to access it
if (!db.objectStoreNames.contains('drafts')) {
console.warn('drafts object store does not exist, skipping cleanup');
return;
}
const transaction = db.transaction('drafts', 'readwrite');
const store = transaction.objectStore('drafts');
store.delete(draftKey);
@ -838,6 +692,16 @@ const PageEditor = ({
resolve(); // Don't fail the whole operation if cleanup fails
};
dbRequest.onupgradeneeded = (event) => {
const db = (event.target as IDBOpenDBRequest).result;
// Create object store if it doesn't exist
if (!db.objectStoreNames.contains('drafts')) {
db.createObjectStore('drafts');
console.log('Created drafts object store during cleanup fallback');
}
};
dbRequest.onsuccess = () => {
const db = dbRequest.result;

View File

@ -44,6 +44,7 @@ import {
import { EnhancedPDFProcessingService } from '../services/enhancedPDFProcessingService';
import { thumbnailGenerationService } from '../services/thumbnailGenerationService';
import { fileStorage } from '../services/fileStorage';
import { fileProcessingService } from '../services/fileProcessingService';
// Get service instances
const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance();
@ -430,13 +431,37 @@ export function FileContextProvider({
fileRecords.push(record);
addedFiles.push(file);
// Start centralized file processing (async, non-blocking)
fileProcessingService.processFile(file, fileId).then(result => {
// Only update if file still exists in context
if (filesRef.current.has(fileId)) {
if (result.success && result.metadata) {
// Update with processed metadata using dispatch directly
dispatch({
type: 'UPDATE_FILE_RECORD',
payload: {
id: fileId,
updates: {
processedFile: result.metadata,
thumbnailUrl: result.metadata.thumbnailUrl
}
}
});
console.log(`✅ File processing complete for ${file.name}: ${result.metadata.totalPages} pages`);
} else {
console.warn(`❌ File processing failed for ${file.name}:`, result.error);
}
}
}).catch(error => {
console.error(`❌ File processing error for ${file.name}:`, error);
});
// Optional: Persist to IndexedDB if enabled
if (enablePersistence) {
try {
// Generate thumbnail and store in IndexedDB with our UUID
import('../utils/thumbnailUtils').then(({ generateThumbnailForFile }) => {
return generateThumbnailForFile(file);
}).then(thumbnail => {
// Use the thumbnail from processing service if available
fileProcessingService.processFile(file, fileId).then(result => {
const thumbnail = result.metadata?.thumbnailUrl;
return fileStorage.storeFile(file, fileId, thumbnail);
}).then(() => {
console.log('File persisted to IndexedDB:', fileId);
@ -472,7 +497,7 @@ export function FileContextProvider({
// Return only the newly added files
return addedFiles;
}, [enablePersistence]); // Include enablePersistence for persistence logic
}, [enablePersistence]); // Remove updateFileRecord dependency
const removeFiles = useCallback((fileIds: FileId[], deleteFromStorage: boolean = true) => {
// Clean up Files from ref map first

View File

@ -0,0 +1,151 @@
/**
* Centralized file processing service
* Handles metadata discovery, page counting, and thumbnail generation
* Called when files are added to FileContext, before any view sees them
*/
import { getDocument } from 'pdfjs-dist';
import { generateThumbnailForFile } from '../utils/thumbnailUtils';
export interface ProcessedFileMetadata {
totalPages: number;
pages: Array<{
pageNumber: number;
thumbnail?: string;
rotation: number;
splitBefore: boolean;
}>;
thumbnailUrl?: string; // Page 1 thumbnail for FileEditor
lastProcessed: number;
}
export interface FileProcessingResult {
success: boolean;
metadata?: ProcessedFileMetadata;
error?: string;
}
class FileProcessingService {
private processingCache = new Map<string, Promise<FileProcessingResult>>();
/**
* Process a file to extract metadata, page count, and generate thumbnails
* This is the single source of truth for file processing
*/
async processFile(file: File, fileId: string): Promise<FileProcessingResult> {
// Check if we're already processing this file
const existingPromise = this.processingCache.get(fileId);
if (existingPromise) {
console.log(`📁 FileProcessingService: Using cached processing for ${file.name}`);
return existingPromise;
}
// Create processing promise
const processingPromise = this.performProcessing(file, fileId);
this.processingCache.set(fileId, processingPromise);
// Clean up cache after completion
processingPromise.finally(() => {
this.processingCache.delete(fileId);
});
return processingPromise;
}
private async performProcessing(file: File, fileId: string): Promise<FileProcessingResult> {
console.log(`📁 FileProcessingService: Starting processing for ${file.name} (${fileId})`);
try {
let totalPages = 1;
let thumbnailUrl: string | undefined;
// Handle PDF files
if (file.type === 'application/pdf') {
// Discover page count using PDF.js (most accurate)
try {
const arrayBuffer = await file.arrayBuffer();
const pdfDoc = await getDocument({
data: arrayBuffer,
disableAutoFetch: true,
disableStream: true
}).promise;
totalPages = pdfDoc.numPages;
console.log(`📁 FileProcessingService: PDF.js discovered ${totalPages} pages for ${file.name}`);
// Clean up immediately
pdfDoc.destroy();
} catch (pdfError) {
console.warn(`📁 FileProcessingService: PDF.js failed for ${file.name}, trying fallback:`, pdfError);
// Fallback to text analysis
try {
const arrayBuffer = await file.arrayBuffer();
const text = new TextDecoder('latin1').decode(arrayBuffer);
const pageMatches = text.match(/\/Type\s*\/Page[^s]/g);
totalPages = pageMatches ? pageMatches.length : 1;
console.log(`📁 FileProcessingService: Text analysis discovered ${totalPages} pages for ${file.name}`);
} catch (textError) {
console.warn(`📁 FileProcessingService: Text analysis also failed for ${file.name}:`, textError);
totalPages = 1;
}
}
}
// Generate page 1 thumbnail
try {
thumbnailUrl = await generateThumbnailForFile(file);
console.log(`📁 FileProcessingService: Generated thumbnail for ${file.name}`);
} catch (thumbError) {
console.warn(`📁 FileProcessingService: Thumbnail generation failed for ${file.name}:`, thumbError);
}
// Create page structure
const pages = Array.from({ length: totalPages }, (_, index) => ({
pageNumber: index + 1,
thumbnail: index === 0 ? thumbnailUrl : undefined, // Only page 1 gets thumbnail initially
rotation: 0,
splitBefore: false
}));
const metadata: ProcessedFileMetadata = {
totalPages,
pages,
thumbnailUrl, // For FileEditor display
lastProcessed: Date.now()
};
console.log(`📁 FileProcessingService: Processing complete for ${file.name} - ${totalPages} pages`);
return {
success: true,
metadata
};
} catch (error) {
console.error(`📁 FileProcessingService: Processing failed for ${file.name}:`, error);
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown processing error'
};
}
}
/**
* Clear all processing caches
*/
clearCache(): void {
this.processingCache.clear();
}
/**
* Check if a file is currently being processed
*/
isProcessing(fileId: string): boolean {
return this.processingCache.has(fileId);
}
}
// Export singleton instance
export const fileProcessingService = new FileProcessingService();

View File

@ -0,0 +1,292 @@
/**
* Typed operation model with discriminated unions
* Centralizes all PDF operations with proper type safety
*/
import { FileId } from './fileRecord';
export type OperationId = string;
export type OperationStatus =
| 'idle'
| 'preparing'
| 'uploading'
| 'processing'
| 'completed'
| 'failed'
| 'canceled';
// Base operation interface
export interface BaseOperation {
id: OperationId;
type: string;
status: OperationStatus;
progress: number;
error?: string | null;
createdAt: number;
startedAt?: number;
completedAt?: number;
abortController?: AbortController;1
}
// Split operations
export type SplitMode =
| 'pages'
| 'size'
| 'duplicates'
| 'bookmarks'
| 'sections';
export interface SplitPagesParams {
mode: 'pages';
pages: number[];
}
export interface SplitSizeParams {
mode: 'size';
maxSizeBytes: number;
}
export interface SplitDuplicatesParams {
mode: 'duplicates';
tolerance?: number;
}
export interface SplitBookmarksParams {
mode: 'bookmarks';
level?: number;
}
export interface SplitSectionsParams {
mode: 'sections';
sectionCount: number;
}
export type SplitParams =
| SplitPagesParams
| SplitSizeParams
| SplitDuplicatesParams
| SplitBookmarksParams
| SplitSectionsParams;
export interface SplitOperation extends BaseOperation {
type: 'split';
inputFileId: FileId;
params: SplitParams;
outputFileIds?: FileId[];
}
// Merge operations
export interface MergeOperation extends BaseOperation {
type: 'merge';
inputFileIds: FileId[];
params: {
sortBy?: 'name' | 'size' | 'date' | 'custom';
customOrder?: FileId[];
bookmarks?: boolean;
};
outputFileId?: FileId;
}
// Compress operations
export interface CompressOperation extends BaseOperation {
type: 'compress';
inputFileId: FileId;
params: {
level: 'low' | 'medium' | 'high' | 'extreme';
imageQuality?: number; // 0-100
grayscale?: boolean;
removeAnnotations?: boolean;
};
outputFileId?: FileId;
}
// Convert operations
export type ConvertFormat =
| 'pdf'
| 'docx'
| 'pptx'
| 'xlsx'
| 'html'
| 'txt'
| 'jpg'
| 'png';
export interface ConvertOperation extends BaseOperation {
type: 'convert';
inputFileIds: FileId[];
params: {
targetFormat: ConvertFormat;
imageSettings?: {
quality?: number;
dpi?: number;
colorSpace?: 'rgb' | 'grayscale' | 'cmyk';
};
pdfSettings?: {
pdfStandard?: 'PDF/A-1' | 'PDF/A-2' | 'PDF/A-3';
compliance?: boolean;
};
};
outputFileIds?: FileId[];
}
// OCR operations
export interface OcrOperation extends BaseOperation {
type: 'ocr';
inputFileId: FileId;
params: {
languages: string[];
mode: 'searchable' | 'text-only' | 'overlay';
preprocess?: boolean;
deskew?: boolean;
};
outputFileId?: FileId;
}
// Security operations
export interface SecurityOperation extends BaseOperation {
type: 'security';
inputFileId: FileId;
params: {
action: 'encrypt' | 'decrypt' | 'sign' | 'watermark';
password?: string;
permissions?: {
printing?: boolean;
copying?: boolean;
editing?: boolean;
annotations?: boolean;
};
watermark?: {
text: string;
position: 'center' | 'top-left' | 'top-right' | 'bottom-left' | 'bottom-right';
opacity: number;
};
};
outputFileId?: FileId;
}
// Union type for all operations
export type Operation =
| SplitOperation
| MergeOperation
| CompressOperation
| ConvertOperation
| OcrOperation
| SecurityOperation;
// Operation state management
export interface OperationState {
operations: Record<OperationId, Operation>;
queue: OperationId[];
active: OperationId[];
history: OperationId[];
}
// Operation creation helpers
export function createOperationId(): OperationId {
return `op-${Date.now()}-${Math.random().toString(36).substring(2, 8)}`;
}
export function createBaseOperation(type: string): BaseOperation {
return {
id: createOperationId(),
type,
status: 'idle',
progress: 0,
error: null,
createdAt: Date.now(),
abortController: new AbortController()
};
}
// Type guards for operations
export function isSplitOperation(op: Operation): op is SplitOperation {
return op.type === 'split';
}
export function isMergeOperation(op: Operation): op is MergeOperation {
return op.type === 'merge';
}
export function isCompressOperation(op: Operation): op is CompressOperation {
return op.type === 'compress';
}
export function isConvertOperation(op: Operation): op is ConvertOperation {
return op.type === 'convert';
}
export function isOcrOperation(op: Operation): op is OcrOperation {
return op.type === 'ocr';
}
export function isSecurityOperation(op: Operation): op is SecurityOperation {
return op.type === 'security';
}
// Operation status helpers
export function isOperationActive(op: Operation): boolean {
return ['preparing', 'uploading', 'processing'].includes(op.status);
}
export function isOperationComplete(op: Operation): boolean {
return op.status === 'completed';
}
export function isOperationFailed(op: Operation): boolean {
return op.status === 'failed';
}
export function canRetryOperation(op: Operation): boolean {
return op.status === 'failed' && !!op.abortController && !op.abortController.signal.aborted;
}
// Operation validation
export function validateSplitParams(params: SplitParams): string | null {
switch (params.mode) {
case 'pages':
if (!params.pages.length) return 'No pages specified';
if (params.pages.some(p => p < 1)) return 'Invalid page numbers';
break;
case 'size':
if (params.maxSizeBytes <= 0) return 'Invalid size limit';
break;
case 'sections':
if (params.sectionCount < 2) return 'Section count must be at least 2';
break;
}
return null;
}
export function validateMergeParams(params: MergeOperation['params'], fileIds: FileId[]): string | null {
if (fileIds.length < 2) return 'At least 2 files required for merge';
if (params.sortBy === 'custom' && !params.customOrder?.length) {
return 'Custom order required when sort by custom is selected';
}
return null;
}
export function validateCompressParams(params: CompressOperation['params']): string | null {
if (params.imageQuality !== undefined && (params.imageQuality < 0 || params.imageQuality > 100)) {
return 'Image quality must be between 0-100';
}
return null;
}
// Operation result types
export interface OperationResult {
operationId: OperationId;
success: boolean;
outputFileIds: FileId[];
error?: string;
metadata?: Record<string, unknown>;
}
// Operation events for pub/sub
export type OperationEvent =
| { type: 'operation:created'; operation: Operation }
| { type: 'operation:started'; operationId: OperationId }
| { type: 'operation:progress'; operationId: OperationId; progress: number }
| { type: 'operation:completed'; operationId: OperationId; result: OperationResult }
| { type: 'operation:failed'; operationId: OperationId; error: string }
| { type: 'operation:canceled'; operationId: OperationId };