Refactor file processing architecture to centralize metadata extraction, page counting, and thumbnail generation. Introduce FileProcessingService for improved file handling and performance. Update FileContext to utilize the new service for processing files upon addition, ensuring consistent metadata updates and thumbnail management.

2025-08-26 06:09:23 +00:00 · 2025-08-14 19:46:02 +01:00 · 2025-08-14 19:46:02 +01:00 · 29a4e03784
commit 29a4e03784
parent 4a0c577312
5 changed files with 641 additions and 363 deletions
--- a/frontend/src/components/fileEditor/FileEditor.tsx
+++ b/frontend/src/components/fileEditor/FileEditor.tsx
@ -10,7 +10,6 @@ import { useToolFileSelection, useProcessedFiles, useFileState, useFileManagemen
 import { FileOperation, createStableFileId } from '../../types/fileContext';
 import { fileStorage } from '../../services/fileStorage';
 import { generateThumbnailForFile } from '../../utils/thumbnailUtils';
-import { useThumbnailGeneration } from '../../hooks/useThumbnailGeneration';
 import { zipFileService } from '../../services/zipFileService';
 import { detectFileExtension } from '../../utils/fileUtils';
 import styles from '../pageEditor/PageEditor.module.css';
@ -48,9 +47,6 @@ const FileEditor = ({
 }: FileEditorProps) => {
  const { t } = useTranslation();

-  // Thumbnail cache for sharing with PageEditor
-  const { getThumbnailFromCache, addThumbnailToCache } = useThumbnailGeneration();
-
  // Utility function to check if a file extension is supported
  const isFileSupported = useCallback((fileName: string): boolean => {
    const extension = detectFileExtension(fileName);
@ -157,33 +153,19 @@ const FileEditor = ({

  // Convert shared files to FileEditor format
  const convertToFileItem = useCallback(async (sharedFile: any): Promise<FileItem> => {
-    let thumbnail = sharedFile.thumbnail;
-    
-    if (!thumbnail) {
-      // Check cache first using the file ID
-      const fileId = sharedFile.id || `file-${Date.now()}-${Math.random()}`;
-      const page1CacheKey = `${fileId}-page-1`;
-      thumbnail = getThumbnailFromCache(page1CacheKey);
-      
-      if (!thumbnail) {
-        // Generate and cache thumbnail
-        thumbnail = await generateThumbnailForFile(sharedFile.file || sharedFile);
-        if (thumbnail) {
-          addThumbnailToCache(page1CacheKey, thumbnail);
-          console.log(`📸 FileEditor: Cached page-1 thumbnail for legacy file (key: ${page1CacheKey})`);
-        }
-      }
-    }
+    // Use processed data if available, otherwise fallback to legacy approach
+    const thumbnail = sharedFile.thumbnail || sharedFile.thumbnailUrl || 
+      (await generateThumbnailForFile(sharedFile.file || sharedFile));

    return {
      id: sharedFile.id || `file-${Date.now()}-${Math.random()}`,
      name: (sharedFile.file?.name || sharedFile.name || 'unknown'),
-      pageCount: sharedFile.pageCount || 1, // Default to 1 page if unknown
+      pageCount: sharedFile.processedFile?.totalPages || sharedFile.pageCount || 1,
      thumbnail: thumbnail || '',
      size: sharedFile.file?.size || sharedFile.size || 0,
      file: sharedFile.file || sharedFile,
    };
-  }, [getThumbnailFromCache, addThumbnailToCache]);
+  }, []);

  // Convert activeFiles to FileItem format using context (async to avoid blocking)
  useEffect(() => {
@ -216,47 +198,11 @@ const FileEditor = ({

            if (!file) continue; // Skip if file not found
            
-            // Use record's thumbnail if available, otherwise check cache, then generate
-            let thumbnail: string | undefined = record.thumbnailUrl;
-            if (!thumbnail) {
-              // Check if PageEditor has already cached a page-1 thumbnail for this file
-              const page1CacheKey = `${record.id}-page-1`;
-              thumbnail = getThumbnailFromCache(page1CacheKey) || undefined;
-              
-              if (!thumbnail) {
-                try {
-                  thumbnail = await generateThumbnailForFile(file);
-                  // Store in cache for PageEditor to reuse
-                  if (thumbnail) {
-                    addThumbnailToCache(page1CacheKey, thumbnail);
-                    console.log(`📸 FileEditor: Cached page-1 thumbnail for ${file.name} (key: ${page1CacheKey})`);
-                  }
-                } catch (error) {
-                  console.warn(`Failed to generate thumbnail for ${file.name}:`, error);
-                  thumbnail = undefined; // Use placeholder
-                }
-              } else {
-                console.log(`📸 FileEditor: Reused cached page-1 thumbnail for ${file.name} (key: ${page1CacheKey})`);
-              }
-            }
-
-            // Page count estimation for display purposes only
-            let pageCount = 1; // Default for non-PDFs and display in FileEditor
+            // Use processed data from centralized file processing service
+            const thumbnail = record.thumbnailUrl; // Already processed by FileProcessingService
+            const pageCount = record.processedFile?.totalPages || 1; // Use processed page count
            
-            if (file.type === 'application/pdf') {
-              // Quick page count estimation for FileEditor display only
-              // PageEditor will do its own more thorough page detection
-              try {
-                const arrayBuffer = await file.arrayBuffer();
-                const text = new TextDecoder('latin1').decode(arrayBuffer);
-                const pageMatches = text.match(/\/Type\s*\/Page[^s]/g);
-                pageCount = pageMatches ? pageMatches.length : 1;
-                console.log(`📄 FileEditor estimated page count for ${file.name}: ${pageCount} pages (display only)`);
-              } catch (error) {
-                console.warn(`Failed to estimate page count for ${file.name}:`, error);
-                pageCount = 1; // Safe fallback
-              }
-            }
+            console.log(`📄 FileEditor: Using processed data for ${file.name}: ${pageCount} pages, thumbnail: ${!!thumbnail}`);
            
            const convertedFile = {
              id: record.id, // Use the record's UUID from FileContext
--- a/frontend/src/components/pageEditor/PageEditor.tsx
+++ b/frontend/src/components/pageEditor/PageEditor.tsx
@ -93,15 +93,16 @@ const PageEditor = ({
    destroyThumbnails
  } = useThumbnailGeneration();
  
-  // State for discovered page document
-  const [discoveredDocument, setDiscoveredDocument] = useState<PDFDocument | null>(null);
-  const [isDiscoveringPages, setIsDiscoveringPages] = useState(false);
+
+  // Get primary file record outside useMemo to track processedFile changes
+  const primaryFileRecord = primaryFileId ? selectors.getFileRecord(primaryFileId) : null;
+  const processedFilePages = primaryFileRecord?.processedFile?.pages;
+  const processedFileTotalPages = primaryFileRecord?.processedFile?.totalPages;

  // Compute merged document with stable signature (prevents infinite loops)
  const mergedPdfDocument = useMemo((): PDFDocument | null => {
    if (activeFileIds.length === 0) return null;

-    const primaryFileRecord = primaryFileId ? selectors.getFileRecord(primaryFileId) : null;
    const primaryFile = primaryFileId ? selectors.getFile(primaryFileId) : null;
    
    // If we have file IDs but no file record, something is wrong - return null to show loading
@ -124,60 +125,54 @@ const PageEditor = ({
    console.log(`🎬 PageEditor: Building document for ${name}`);
    console.log(`🎬 ProcessedFile exists:`, !!processedFile);
    console.log(`🎬 ProcessedFile pages:`, processedFile?.pages?.length || 0);
+    console.log(`🎬 ProcessedFile totalPages:`, processedFile?.totalPages || 'unknown');
    if (processedFile?.pages) {
      console.log(`🎬 Pages structure:`, processedFile.pages.map(p => ({ pageNumber: p.pageNumber || 'unknown', hasThumbnail: !!p.thumbnail })));
    }
+    console.log(`🎬 Will use ${(processedFile?.pages?.length || 0) > 0 ? 'PROCESSED' : 'FALLBACK'} pages`);
    
-    // Convert processed pages to PageEditor format, or discover pages if not processed yet
-    let pages: PDFPage[];
-    
-    if (processedFile?.pages && processedFile.pages.length > 0) {
-      // Use existing processed data
-      pages = processedFile.pages.map((page, index) => {
-        const pageId = `${primaryFileId}-page-${index + 1}`;
-        // Try multiple sources for thumbnails in order of preference:
-        // 1. Processed data thumbnail
-        // 2. Cached thumbnail from previous generation
-        // 3. For page 1: FileEditor's thumbnailUrl (sharing optimization)
-        let thumbnail = page.thumbnail || null;
-        if (!thumbnail) {
-          thumbnail = getThumbnailFromCache(pageId) || null;
-        }
-        if (!thumbnail && index === 0) {
-          // For page 1, also check if FileEditor has already generated a thumbnail
-          thumbnail = primaryFileRecord.thumbnailUrl || null;
-          // If we found a FileEditor thumbnail, cache it for consistency
-          if (thumbnail) {
-            addThumbnailToCache(pageId, thumbnail);
-            console.log(`📸 PageEditor: Reused FileEditor thumbnail for page 1 (${pageId})`);
+    // Convert processed pages to PageEditor format
+    // All processing is now handled by FileProcessingService when files are added
+    const pages: PDFPage[] = processedFile?.pages && processedFile.pages.length > 0
+      ? processedFile.pages.map((page, index) => {
+          const pageId = `${primaryFileId}-page-${index + 1}`;
+          // Try multiple sources for thumbnails in order of preference:
+          // 1. Processed data thumbnail
+          // 2. Cached thumbnail from previous generation
+          // 3. For page 1: FileRecord's thumbnailUrl (from FileProcessingService)
+          let thumbnail = page.thumbnail || null;
+          const cachedThumbnail = getThumbnailFromCache(pageId);
+          if (!thumbnail && cachedThumbnail) {
+            thumbnail = cachedThumbnail;
+            console.log(`📸 PageEditor: Using cached thumbnail for page ${index + 1} (${pageId})`);
          }
-        }
-        
-        return {
-          id: pageId,
-          pageNumber: index + 1,
-          thumbnail,
-          rotation: page.rotation || 0,
+          if (!thumbnail && index === 0) {
+            // For page 1, use the thumbnail from FileProcessingService
+            thumbnail = primaryFileRecord.thumbnailUrl || null;
+            if (thumbnail) {
+              addThumbnailToCache(pageId, thumbnail);
+              console.log(`📸 PageEditor: Using FileProcessingService thumbnail for page 1 (${pageId})`);
+            }
+          }
+          
+          
+          return {
+            id: pageId,
+            pageNumber: index + 1,
+            thumbnail,
+            rotation: page.rotation || 0,
+            selected: false,
+            splitBefore: page.splitBefore || false,
+          };
+        })
+      : [{ // Fallback while FileProcessingService is working
+          id: `${primaryFileId}-page-1`,
+          pageNumber: 1,
+          thumbnail: getThumbnailFromCache(`${primaryFileId}-page-1`) || primaryFileRecord.thumbnailUrl || null,
+          rotation: 0,
          selected: false,
-          splitBefore: page.splitBefore || false,
-        };
-      });
-    } else if (discoveredDocument && discoveredDocument.id === (primaryFileId ?? 'unknown')) {
-      // Use discovered document if available and matches current file
-      pages = discoveredDocument.pages;
-    } else {
-      // No processed data and no discovered data yet - show placeholder while discovering
-      console.log(`🎬 PageEditor: No processedFile data, showing placeholder while discovering pages for ${name}`);
-      
-      pages = [{
-        id: `${primaryFileId}-page-1`,
-        pageNumber: 1,
-        thumbnail: getThumbnailFromCache(`${primaryFileId}-page-1`) || primaryFileRecord.thumbnailUrl || null,
-        rotation: 0,
-        selected: false,
-        splitBefore: false,
-      }];
-    }
+          splitBefore: false,
+        }];

    // Create document with determined pages

@ -189,123 +184,8 @@ const PageEditor = ({
      totalPages: pages.length,
      destroy: () => {} // Optional cleanup function
    };
-  }, [filesSignature, activeFileIds, primaryFileId, selectors, getThumbnailFromCache, addThumbnailToCache, discoveredDocument]);
+  }, [filesSignature, activeFileIds, primaryFileId, primaryFileRecord, processedFilePages, processedFileTotalPages, selectors, getThumbnailFromCache, addThumbnailToCache]);

-  // Async page discovery effect
-  useEffect(() => {
-    const discoverPages = async () => {
-      if (!primaryFileId) return;
-      
-      const record = selectors.getFileRecord(primaryFileId);
-      const primaryFile = selectors.getFile(primaryFileId);
-      if (!record || !primaryFile) return;
-      
-      // Skip if we already have processed data or are currently discovering
-      if (record.processedFile?.pages || isDiscoveringPages) return;
-      
-      // Only discover for PDF files
-      if (primaryFile.type !== 'application/pdf') return;
-      
-      console.log(`🎬 PageEditor: Starting async page discovery for ${primaryFile.name}`);
-      setIsDiscoveringPages(true);
-      
-      try {
-        let discoveredPageCount = 1;
-        
-        // Try PDF.js first (more accurate)
-        try {
-          const arrayBuffer = await primaryFile.arrayBuffer();
-          const pdfDoc = await import('pdfjs-dist').then(pdfjs => pdfjs.getDocument({
-            data: arrayBuffer,
-            disableAutoFetch: true,
-            disableStream: true
-          }).promise);
-          
-          discoveredPageCount = pdfDoc.numPages;
-          console.log(`🎬 PageEditor: Discovered ${discoveredPageCount} pages using PDF.js`);
-          
-          // Clean up PDF document immediately
-          pdfDoc.destroy();
-        } catch (pdfError) {
-          console.warn(`🎬 PageEditor: PDF.js failed, trying text analysis:`, pdfError);
-          
-          // Fallback to text analysis
-          try {
-            const arrayBuffer = await primaryFile.arrayBuffer();
-            const text = new TextDecoder('latin1').decode(arrayBuffer);
-            const pageMatches = text.match(/\/Type\s*\/Page[^s]/g);
-            discoveredPageCount = pageMatches ? pageMatches.length : 1;
-            console.log(`🎬 PageEditor: Discovered ${discoveredPageCount} pages using text analysis`);
-          } catch (textError) {
-            console.warn(`🎬 PageEditor: Text analysis also failed:`, textError);
-            discoveredPageCount = 1;
-          }
-        }
-        
-        // Create page structure
-        const pages = Array.from({ length: discoveredPageCount }, (_, index) => {
-          const pageId = `${primaryFileId}-page-${index + 1}`;
-          let thumbnail = getThumbnailFromCache(pageId) || null;
-          
-          // For page 1, also check FileEditor's thumbnail
-          if (!thumbnail && index === 0) {
-            thumbnail = record.thumbnailUrl || null;
-            if (thumbnail) {
-              addThumbnailToCache(pageId, thumbnail);
-              console.log(`📸 PageEditor: Reused FileEditor thumbnail for page 1 (${pageId})`);
-            }
-          }
-          
-          return {
-            id: pageId,
-            pageNumber: index + 1,
-            thumbnail,
-            rotation: 0,
-            selected: false,
-            splitBefore: false,
-          };
-        });
-        
-        // Create discovered document
-        const discoveredDoc: PDFDocument = {
-          id: primaryFileId,
-          name: primaryFile.name,
-          file: primaryFile,
-          pages,
-          totalPages: pages.length,
-          destroy: () => {}
-        };
-        
-        // Save to state for immediate UI update
-        setDiscoveredDocument(discoveredDoc);
-        
-        // Save to FileContext for persistence
-        const processedFileData = {
-          pages: pages.map(page => ({
-            pageNumber: page.pageNumber,
-            thumbnail: page.thumbnail || undefined,
-            rotation: page.rotation,
-            splitBefore: page.splitBefore
-          })),
-          totalPages: discoveredPageCount,
-          lastProcessed: Date.now()
-        };
-        
-        actions.updateFileRecord(primaryFileId, {
-          processedFile: processedFileData
-        });
-        
-        console.log(`🎬 PageEditor: Page discovery complete - ${discoveredPageCount} pages saved to FileContext`);
-        
-      } catch (error) {
-        console.error(`🎬 PageEditor: Page discovery failed:`, error);
-      } finally {
-        setIsDiscoveringPages(false);
-      }
-    };
-    
-    discoverPages();
-  }, [primaryFileId, selectors, isDiscoveringPages, getThumbnailFromCache, addThumbnailToCache, actions]);

  // Display document: Use edited version if exists, otherwise original
  const displayDocument = editedDocument || mergedPdfDocument;
@ -372,150 +252,107 @@ const PageEditor = ({

  // PageEditor no longer handles cleanup - it's centralized in FileContext

-  /** 
-   * Using ref instead of state prevents infinite loops.
-   * State changes would trigger re-renders and effect re-runs.
-   */
-  const thumbnailGenerationStarted = useRef(false);
+  // Simple cache-first thumbnail generation (no complex detection needed)

-  // Start thumbnail generation process (guards against re-entry) - stable version
-  const startThumbnailGeneration = useCallback(() => {
-    // Access current values directly - avoid stale closures
-    const currentDocument = mergedPdfDocument;
-    const currentActiveFileIds = activeFileIds;
-    const currentPrimaryFileId = primaryFileId;
-    
-    if (!currentDocument || currentActiveFileIds.length !== 1 || !currentPrimaryFileId || thumbnailGenerationStarted.current) {
+  // Simple thumbnail generation - generate pages 2+ that aren't cached
+  const generateMissingThumbnails = useCallback(async () => {
+    if (!mergedPdfDocument || !primaryFileId || activeFileIds.length !== 1) {
      return;
    }

-    const file = selectors.getFile(currentPrimaryFileId);
+    const file = selectors.getFile(primaryFileId);
    if (!file) return;
-    const totalPages = currentDocument.totalPages || currentDocument.pages.length || 0;
-    if (totalPages <= 0) return; // nothing to generate yet
-
-    thumbnailGenerationStarted.current = true;
    
-    // Run everything asynchronously to avoid blocking the main thread
-    setTimeout(async () => {
-      try {
-        // Load PDF array buffer for Web Workers
-        const arrayBuffer = await file.arrayBuffer();
-
-        // Generate page numbers for pages that don't have thumbnails yet
-        const pageNumbers = Array.from({ length: totalPages }, (_, i) => i + 1)
-          .filter(pageNum => {
-            const page = currentDocument.pages.find(p => p.pageNumber === pageNum);
-            return !page?.thumbnail; // Only generate for pages without thumbnails
-          });
-
-        // If no pages need thumbnails, we're done
-        if (pageNumbers.length === 0) {
-          return;
-        }
-
-        // Calculate quality scale based on file size
-        const scale = currentActiveFileIds.length === 1 && currentPrimaryFileId ? 
-          calculateScaleFromFileSize(selectors.getFileRecord(currentPrimaryFileId)?.size || 0) : 0.2;
-
-        // Start parallel thumbnail generation WITHOUT blocking the main thread
-        const generationPromise = generateThumbnails(
-          arrayBuffer,
-          pageNumbers,
-          {
-            scale, // Dynamic quality based on file size
-            quality: 0.8,
-            batchSize: 15, // Smaller batches per worker for smoother UI
-            parallelBatches: 3 // Use 3 Web Workers in parallel
-          },
-          // Progress callback for thumbnail updates
-          (progress) => {
-            // Batch process thumbnails to reduce main thread work
-            requestAnimationFrame(() => {
-              progress.thumbnails.forEach(({ pageNumber, thumbnail }) => {
-                // Use stable fileId for cache key
-                const pageId = `${currentPrimaryFileId}-page-${pageNumber}`;
-                const cached = getThumbnailFromCache(pageId);
-
-                if (!cached) {
-                  addThumbnailToCache(pageId, thumbnail);
-
-                  // Persist thumbnail to FileContext for durability
-                  const fileRecord = selectors.getFileRecord(currentPrimaryFileId);
-                  if (fileRecord) {
-                    const updatedProcessedFile = {
-                      ...fileRecord.processedFile,
-                      pages: fileRecord.processedFile?.pages?.map((page, index) => 
-                        index + 1 === pageNumber 
-                          ? { ...page, thumbnail }
-                          : page
-                      ) || [{ thumbnail }] // Create pages array if it doesn't exist
-                    };
-                    
-                    // For page 1, also update the file record's thumbnailUrl so FileEditor can use it directly
-                    const updates: any = { processedFile: updatedProcessedFile };
-                    if (pageNumber === 1) {
-                      updates.thumbnailUrl = thumbnail;
-                      console.log(`📸 PageEditor: Set thumbnailUrl for FileEditor reuse (${currentPrimaryFileId})`);
-                    }
-                    
-                    actions.updateFileRecord(currentPrimaryFileId, updates);
-                  }
-
-                  window.dispatchEvent(new CustomEvent('thumbnailReady', {
-                    detail: { pageNumber, thumbnail, pageId }
-                  }));
-                }
-              });
-            });
-          }
-        );
-
-        // Handle completion
-        generationPromise
-          .then(() => {
-            // Keep thumbnailGenerationStarted as true to prevent restarts
-          })
-          .catch(error => {
-            console.error('PageEditor: Thumbnail generation failed:', error);
-            thumbnailGenerationStarted.current = false;
-          });
-
-      } catch (error) {
-        console.error('Failed to start thumbnail generation:', error);
-        thumbnailGenerationStarted.current = false;
+    const totalPages = mergedPdfDocument.totalPages;
+    if (totalPages <= 1) return; // Only page 1, nothing to generate
+    
+    // Check which pages 2+ need thumbnails (not in cache)
+    const pageNumbersToGenerate = [];
+    for (let pageNum = 2; pageNum <= totalPages; pageNum++) {
+      const pageId = `${primaryFileId}-page-${pageNum}`;
+      if (!getThumbnailFromCache(pageId)) {
+        pageNumbersToGenerate.push(pageNum);
      }
-    }, 0); // setTimeout with 0ms to defer to next tick
-  }, [generateThumbnails, getThumbnailFromCache, addThumbnailToCache, selectors, actions]); // Only stable function dependencies
-
-  // Start thumbnail generation when files change (stable signature prevents loops)
-  useEffect(() => {
-    if (mergedPdfDocument && !thumbnailGenerationStarted.current) {
-      // Check if ALL pages already have thumbnails
-      const totalPages = mergedPdfDocument.totalPages || mergedPdfDocument.pages.length || 0;
-      const pagesWithThumbnails = mergedPdfDocument.pages.filter(page => page.thumbnail).length;
-      const hasAllThumbnails = pagesWithThumbnails === totalPages;
-
-      if (hasAllThumbnails) {
-        return; // Skip generation if thumbnails exist
-      }
-
-      // Small delay to let document render, then start thumbnail generation
-      const timer = setTimeout(startThumbnailGeneration, 500);
-      return () => clearTimeout(timer);
    }
-  }, [filesSignature, startThumbnailGeneration]);
+
+    if (pageNumbersToGenerate.length === 0) {
+      console.log(`📸 PageEditor: All pages 2+ already cached, skipping generation`);
+      return;
+    }
+
+    console.log(`📸 PageEditor: Generating thumbnails for pages: [${pageNumbersToGenerate.join(', ')}]`);
+    
+    try {
+      // Load PDF array buffer for Web Workers
+      const arrayBuffer = await file.arrayBuffer();
+
+      // Calculate quality scale based on file size
+      const scale = calculateScaleFromFileSize(selectors.getFileRecord(primaryFileId)?.size || 0);
+
+      // Start parallel thumbnail generation WITHOUT blocking the main thread
+      await generateThumbnails(
+        arrayBuffer,
+        pageNumbersToGenerate,
+        {
+          scale, // Dynamic quality based on file size
+          quality: 0.8,
+          batchSize: 15, // Smaller batches per worker for smoother UI
+          parallelBatches: 3 // Use 3 Web Workers in parallel
+        },
+        // Progress callback for thumbnail updates
+        (progress) => {
+          // Batch process thumbnails to reduce main thread work
+          requestAnimationFrame(() => {
+            progress.thumbnails.forEach(({ pageNumber, thumbnail }) => {
+              // Use stable fileId for cache key
+              const pageId = `${primaryFileId}-page-${pageNumber}`;
+              addThumbnailToCache(pageId, thumbnail);
+
+              // Also update the processedFile so document rebuilds include the thumbnail
+              const fileRecord = selectors.getFileRecord(primaryFileId);
+              if (fileRecord?.processedFile?.pages) {
+                const updatedProcessedFile = {
+                  ...fileRecord.processedFile,
+                  pages: fileRecord.processedFile.pages.map((page, index) => 
+                    index + 1 === pageNumber 
+                      ? { ...page, thumbnail }
+                      : page
+                  )
+                };
+                actions.updateFileRecord(primaryFileId, { processedFile: updatedProcessedFile });
+              }
+
+              window.dispatchEvent(new CustomEvent('thumbnailReady', {
+                detail: { pageNumber, thumbnail, pageId }
+              }));
+            });
+          });
+        }
+      );
+
+      console.log(`📸 PageEditor: Thumbnail generation completed for pages [${pageNumbersToGenerate.join(', ')}]`);
+    } catch (error) {
+      console.error('PageEditor: Thumbnail generation failed:', error);
+    }
+  }, [mergedPdfDocument, primaryFileId, activeFileIds, generateThumbnails, getThumbnailFromCache, addThumbnailToCache, selectors, actions]);
+
+  // Simple useEffect - just generate missing thumbnails when document is ready
+  useEffect(() => {
+    if (mergedPdfDocument && mergedPdfDocument.totalPages > 1) {
+      console.log(`📸 PageEditor: Document ready with ${mergedPdfDocument.totalPages} pages, checking for missing thumbnails`);
+      generateMissingThumbnails();
+    }
+  }, [mergedPdfDocument, generateMissingThumbnails]);

  // Cleanup thumbnail generation when component unmounts
  useEffect(() => {
    return () => {
-      thumbnailGenerationStarted.current = false;
      // Stop any ongoing thumbnail generation
      if (stopGeneration) {
        stopGeneration();
      }
    };
-  }, [stopGeneration]); // Only depend on the stopGeneration function
+  }, [stopGeneration]);

  // Clear selections when files change - use stable signature
  useEffect(() => {
@ -747,6 +584,11 @@ const PageEditor = ({

      request.onsuccess = () => {
        const db = request.result;
+        // Check if the object store exists before trying to access it
+        if (!db.objectStoreNames.contains('drafts')) {
+          console.warn('drafts object store does not exist, skipping auto-save');
+          return;
+        }
        const transaction = db.transaction('drafts', 'readwrite');
        const store = transaction.objectStore('drafts');
        store.put(draftData, draftKey);
@ -822,8 +664,20 @@ const PageEditor = ({
    try {
      const request = indexedDB.open('stirling-pdf-drafts', 1);

+      request.onupgradeneeded = () => {
+        const db = request.result;
+        if (!db.objectStoreNames.contains('drafts')) {
+          db.createObjectStore('drafts');
+        }
+      };
+
      request.onsuccess = () => {
        const db = request.result;
+        // Check if the object store exists before trying to access it
+        if (!db.objectStoreNames.contains('drafts')) {
+          console.warn('drafts object store does not exist, skipping cleanup');
+          return;
+        }
        const transaction = db.transaction('drafts', 'readwrite');
        const store = transaction.objectStore('drafts');
        store.delete(draftKey);
@ -838,6 +692,16 @@ const PageEditor = ({
          resolve(); // Don't fail the whole operation if cleanup fails
        };
        
+        dbRequest.onupgradeneeded = (event) => {
+          const db = (event.target as IDBOpenDBRequest).result;
+          
+          // Create object store if it doesn't exist
+          if (!db.objectStoreNames.contains('drafts')) {
+            db.createObjectStore('drafts');
+            console.log('Created drafts object store during cleanup fallback');
+          }
+        };
+        
        dbRequest.onsuccess = () => {
          const db = dbRequest.result;
          
--- a/frontend/src/contexts/FileContext.tsx
+++ b/frontend/src/contexts/FileContext.tsx
@ -44,6 +44,7 @@ import {
 import { EnhancedPDFProcessingService } from '../services/enhancedPDFProcessingService';
 import { thumbnailGenerationService } from '../services/thumbnailGenerationService';
 import { fileStorage } from '../services/fileStorage';
+import { fileProcessingService } from '../services/fileProcessingService';

 // Get service instances
 const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance();
@ -430,13 +431,37 @@ export function FileContextProvider({
      fileRecords.push(record);
      addedFiles.push(file);
      
+      // Start centralized file processing (async, non-blocking)
+      fileProcessingService.processFile(file, fileId).then(result => {
+        // Only update if file still exists in context
+        if (filesRef.current.has(fileId)) {
+          if (result.success && result.metadata) {
+            // Update with processed metadata using dispatch directly
+            dispatch({ 
+              type: 'UPDATE_FILE_RECORD', 
+              payload: { 
+                id: fileId, 
+                updates: {
+                  processedFile: result.metadata,
+                  thumbnailUrl: result.metadata.thumbnailUrl
+                }
+              }
+            });
+            console.log(`✅ File processing complete for ${file.name}: ${result.metadata.totalPages} pages`);
+          } else {
+            console.warn(`❌ File processing failed for ${file.name}:`, result.error);
+          }
+        }
+      }).catch(error => {
+        console.error(`❌ File processing error for ${file.name}:`, error);
+      });
+      
      // Optional: Persist to IndexedDB if enabled
      if (enablePersistence) {
        try {
-          // Generate thumbnail and store in IndexedDB with our UUID
-          import('../utils/thumbnailUtils').then(({ generateThumbnailForFile }) => {
-            return generateThumbnailForFile(file);
-          }).then(thumbnail => {
+          // Use the thumbnail from processing service if available
+          fileProcessingService.processFile(file, fileId).then(result => {
+            const thumbnail = result.metadata?.thumbnailUrl;
            return fileStorage.storeFile(file, fileId, thumbnail);
          }).then(() => {
            console.log('File persisted to IndexedDB:', fileId);
@ -472,7 +497,7 @@ export function FileContextProvider({

    // Return only the newly added files
    return addedFiles;
-  }, [enablePersistence]); // Include enablePersistence for persistence logic
+  }, [enablePersistence]); // Remove updateFileRecord dependency

  const removeFiles = useCallback((fileIds: FileId[], deleteFromStorage: boolean = true) => {
    // Clean up Files from ref map first
--- a/frontend/src/services/fileProcessingService.ts
+++ b/frontend/src/services/fileProcessingService.ts
@ -0,0 +1,151 @@
+/**
+ * Centralized file processing service
+ * Handles metadata discovery, page counting, and thumbnail generation
+ * Called when files are added to FileContext, before any view sees them
+ */
+
+import { getDocument } from 'pdfjs-dist';
+import { generateThumbnailForFile } from '../utils/thumbnailUtils';
+
+export interface ProcessedFileMetadata {
+  totalPages: number;
+  pages: Array<{
+    pageNumber: number;
+    thumbnail?: string;
+    rotation: number;
+    splitBefore: boolean;
+  }>;
+  thumbnailUrl?: string; // Page 1 thumbnail for FileEditor
+  lastProcessed: number;
+}
+
+export interface FileProcessingResult {
+  success: boolean;
+  metadata?: ProcessedFileMetadata;
+  error?: string;
+}
+
+class FileProcessingService {
+  private processingCache = new Map<string, Promise<FileProcessingResult>>();
+
+  /**
+   * Process a file to extract metadata, page count, and generate thumbnails
+   * This is the single source of truth for file processing
+   */
+  async processFile(file: File, fileId: string): Promise<FileProcessingResult> {
+    // Check if we're already processing this file
+    const existingPromise = this.processingCache.get(fileId);
+    if (existingPromise) {
+      console.log(`📁 FileProcessingService: Using cached processing for ${file.name}`);
+      return existingPromise;
+    }
+
+    // Create processing promise
+    const processingPromise = this.performProcessing(file, fileId);
+    this.processingCache.set(fileId, processingPromise);
+
+    // Clean up cache after completion
+    processingPromise.finally(() => {
+      this.processingCache.delete(fileId);
+    });
+
+    return processingPromise;
+  }
+
+  private async performProcessing(file: File, fileId: string): Promise<FileProcessingResult> {
+    console.log(`📁 FileProcessingService: Starting processing for ${file.name} (${fileId})`);
+
+    try {
+      let totalPages = 1;
+      let thumbnailUrl: string | undefined;
+
+      // Handle PDF files
+      if (file.type === 'application/pdf') {
+        // Discover page count using PDF.js (most accurate)
+        try {
+          const arrayBuffer = await file.arrayBuffer();
+          const pdfDoc = await getDocument({
+            data: arrayBuffer,
+            disableAutoFetch: true,
+            disableStream: true
+          }).promise;
+
+          totalPages = pdfDoc.numPages;
+          console.log(`📁 FileProcessingService: PDF.js discovered ${totalPages} pages for ${file.name}`);
+
+          // Clean up immediately
+          pdfDoc.destroy();
+        } catch (pdfError) {
+          console.warn(`📁 FileProcessingService: PDF.js failed for ${file.name}, trying fallback:`, pdfError);
+          
+          // Fallback to text analysis
+          try {
+            const arrayBuffer = await file.arrayBuffer();
+            const text = new TextDecoder('latin1').decode(arrayBuffer);
+            const pageMatches = text.match(/\/Type\s*\/Page[^s]/g);
+            totalPages = pageMatches ? pageMatches.length : 1;
+            console.log(`📁 FileProcessingService: Text analysis discovered ${totalPages} pages for ${file.name}`);
+          } catch (textError) {
+            console.warn(`📁 FileProcessingService: Text analysis also failed for ${file.name}:`, textError);
+            totalPages = 1;
+          }
+        }
+      }
+
+      // Generate page 1 thumbnail
+      try {
+        thumbnailUrl = await generateThumbnailForFile(file);
+        console.log(`📁 FileProcessingService: Generated thumbnail for ${file.name}`);
+      } catch (thumbError) {
+        console.warn(`📁 FileProcessingService: Thumbnail generation failed for ${file.name}:`, thumbError);
+      }
+
+      // Create page structure
+      const pages = Array.from({ length: totalPages }, (_, index) => ({
+        pageNumber: index + 1,
+        thumbnail: index === 0 ? thumbnailUrl : undefined, // Only page 1 gets thumbnail initially
+        rotation: 0,
+        splitBefore: false
+      }));
+
+      const metadata: ProcessedFileMetadata = {
+        totalPages,
+        pages,
+        thumbnailUrl, // For FileEditor display
+        lastProcessed: Date.now()
+      };
+
+      console.log(`📁 FileProcessingService: Processing complete for ${file.name} - ${totalPages} pages`);
+      
+      return {
+        success: true,
+        metadata
+      };
+
+    } catch (error) {
+      console.error(`📁 FileProcessingService: Processing failed for ${file.name}:`, error);
+      
+      return {
+        success: false,
+        error: error instanceof Error ? error.message : 'Unknown processing error'
+      };
+    }
+  }
+
+  /**
+   * Clear all processing caches
+   */
+  clearCache(): void {
+    this.processingCache.clear();
+  }
+
+  /**
+   * Check if a file is currently being processed
+   */
+  isProcessing(fileId: string): boolean {
+    return this.processingCache.has(fileId);
+  }
+}
+
+// Export singleton instance
+export const fileProcessingService = new FileProcessingService();
--- a/frontend/src/types/operations.ts
+++ b/frontend/src/types/operations.ts
@ -0,0 +1,292 @@
+/**
+ * Typed operation model with discriminated unions
+ * Centralizes all PDF operations with proper type safety
+ */
+
+import { FileId } from './fileRecord';
+
+export type OperationId = string;
+
+export type OperationStatus = 
+  | 'idle'
+  | 'preparing'
+  | 'uploading'
+  | 'processing'
+  | 'completed'
+  | 'failed'
+  | 'canceled';
+
+// Base operation interface
+export interface BaseOperation {
+  id: OperationId;
+  type: string;
+  status: OperationStatus;
+  progress: number;
+  error?: string | null;
+  createdAt: number;
+  startedAt?: number;
+  completedAt?: number;
+  abortController?: AbortController;1
+}
+
+// Split operations
+export type SplitMode = 
+  | 'pages'
+  | 'size' 
+  | 'duplicates'
+  | 'bookmarks'
+  | 'sections';
+
+export interface SplitPagesParams {
+  mode: 'pages';
+  pages: number[];
+}
+
+export interface SplitSizeParams {
+  mode: 'size';
+  maxSizeBytes: number;
+}
+
+export interface SplitDuplicatesParams {
+  mode: 'duplicates';
+  tolerance?: number;
+}
+
+export interface SplitBookmarksParams {
+  mode: 'bookmarks';
+  level?: number;
+}
+
+export interface SplitSectionsParams {
+  mode: 'sections';
+  sectionCount: number;
+}
+
+export type SplitParams = 
+  | SplitPagesParams
+  | SplitSizeParams
+  | SplitDuplicatesParams
+  | SplitBookmarksParams
+  | SplitSectionsParams;
+
+export interface SplitOperation extends BaseOperation {
+  type: 'split';
+  inputFileId: FileId;
+  params: SplitParams;
+  outputFileIds?: FileId[];
+}
+
+// Merge operations
+export interface MergeOperation extends BaseOperation {
+  type: 'merge';
+  inputFileIds: FileId[];
+  params: {
+    sortBy?: 'name' | 'size' | 'date' | 'custom';
+    customOrder?: FileId[];
+    bookmarks?: boolean;
+  };
+  outputFileId?: FileId;
+}
+
+// Compress operations
+export interface CompressOperation extends BaseOperation {
+  type: 'compress';
+  inputFileId: FileId;
+  params: {
+    level: 'low' | 'medium' | 'high' | 'extreme';
+    imageQuality?: number; // 0-100
+    grayscale?: boolean;
+    removeAnnotations?: boolean;
+  };
+  outputFileId?: FileId;
+}
+
+// Convert operations
+export type ConvertFormat = 
+  | 'pdf'
+  | 'docx' 
+  | 'pptx'
+  | 'xlsx'
+  | 'html'
+  | 'txt'
+  | 'jpg'
+  | 'png';
+
+export interface ConvertOperation extends BaseOperation {
+  type: 'convert';
+  inputFileIds: FileId[];
+  params: {
+    targetFormat: ConvertFormat;
+    imageSettings?: {
+      quality?: number;
+      dpi?: number;
+      colorSpace?: 'rgb' | 'grayscale' | 'cmyk';
+    };
+    pdfSettings?: {
+      pdfStandard?: 'PDF/A-1' | 'PDF/A-2' | 'PDF/A-3';
+      compliance?: boolean;
+    };
+  };
+  outputFileIds?: FileId[];
+}
+
+// OCR operations
+export interface OcrOperation extends BaseOperation {
+  type: 'ocr';
+  inputFileId: FileId;
+  params: {
+    languages: string[];
+    mode: 'searchable' | 'text-only' | 'overlay';
+    preprocess?: boolean;
+    deskew?: boolean;
+  };
+  outputFileId?: FileId;
+}
+
+// Security operations
+export interface SecurityOperation extends BaseOperation {
+  type: 'security';
+  inputFileId: FileId;
+  params: {
+    action: 'encrypt' | 'decrypt' | 'sign' | 'watermark';
+    password?: string;
+    permissions?: {
+      printing?: boolean;
+      copying?: boolean;
+      editing?: boolean;
+      annotations?: boolean;
+    };
+    watermark?: {
+      text: string;
+      position: 'center' | 'top-left' | 'top-right' | 'bottom-left' | 'bottom-right';
+      opacity: number;
+    };
+  };
+  outputFileId?: FileId;
+}
+
+// Union type for all operations
+export type Operation = 
+  | SplitOperation
+  | MergeOperation
+  | CompressOperation
+  | ConvertOperation
+  | OcrOperation
+  | SecurityOperation;
+
+// Operation state management
+export interface OperationState {
+  operations: Record<OperationId, Operation>;
+  queue: OperationId[];
+  active: OperationId[];
+  history: OperationId[];
+}
+
+// Operation creation helpers
+export function createOperationId(): OperationId {
+  return `op-${Date.now()}-${Math.random().toString(36).substring(2, 8)}`;
+}
+
+export function createBaseOperation(type: string): BaseOperation {
+  return {
+    id: createOperationId(),
+    type,
+    status: 'idle',
+    progress: 0,
+    error: null,
+    createdAt: Date.now(),
+    abortController: new AbortController()
+  };
+}
+
+// Type guards for operations
+export function isSplitOperation(op: Operation): op is SplitOperation {
+  return op.type === 'split';
+}
+
+export function isMergeOperation(op: Operation): op is MergeOperation {
+  return op.type === 'merge';
+}
+
+export function isCompressOperation(op: Operation): op is CompressOperation {
+  return op.type === 'compress';
+}
+
+export function isConvertOperation(op: Operation): op is ConvertOperation {
+  return op.type === 'convert';
+}
+
+export function isOcrOperation(op: Operation): op is OcrOperation {
+  return op.type === 'ocr';
+}
+
+export function isSecurityOperation(op: Operation): op is SecurityOperation {
+  return op.type === 'security';
+}
+
+// Operation status helpers
+export function isOperationActive(op: Operation): boolean {
+  return ['preparing', 'uploading', 'processing'].includes(op.status);
+}
+
+export function isOperationComplete(op: Operation): boolean {
+  return op.status === 'completed';
+}
+
+export function isOperationFailed(op: Operation): boolean {
+  return op.status === 'failed';
+}
+
+export function canRetryOperation(op: Operation): boolean {
+  return op.status === 'failed' && !!op.abortController && !op.abortController.signal.aborted;
+}
+
+// Operation validation
+export function validateSplitParams(params: SplitParams): string | null {
+  switch (params.mode) {
+    case 'pages':
+      if (!params.pages.length) return 'No pages specified';
+      if (params.pages.some(p => p < 1)) return 'Invalid page numbers';
+      break;
+    case 'size':
+      if (params.maxSizeBytes <= 0) return 'Invalid size limit';
+      break;
+    case 'sections':
+      if (params.sectionCount < 2) return 'Section count must be at least 2';
+      break;
+  }
+  return null;
+}
+
+export function validateMergeParams(params: MergeOperation['params'], fileIds: FileId[]): string | null {
+  if (fileIds.length < 2) return 'At least 2 files required for merge';
+  if (params.sortBy === 'custom' && !params.customOrder?.length) {
+    return 'Custom order required when sort by custom is selected';
+  }
+  return null;
+}
+
+export function validateCompressParams(params: CompressOperation['params']): string | null {
+  if (params.imageQuality !== undefined && (params.imageQuality < 0 || params.imageQuality > 100)) {
+    return 'Image quality must be between 0-100';
+  }
+  return null;
+}
+
+// Operation result types
+export interface OperationResult {
+  operationId: OperationId;
+  success: boolean;
+  outputFileIds: FileId[];
+  error?: string;
+  metadata?: Record<string, unknown>;
+}
+
+// Operation events for pub/sub
+export type OperationEvent = 
+  | { type: 'operation:created'; operation: Operation }
+  | { type: 'operation:started'; operationId: OperationId }
+  | { type: 'operation:progress'; operationId: OperationId; progress: number }
+  | { type: 'operation:completed'; operationId: OperationId; result: OperationResult }
+  | { type: 'operation:failed'; operationId: OperationId; error: string }
+  | { type: 'operation:canceled'; operationId: OperationId };