Fix page editor multi-file support with dynamic worker management

- Update PageEditor to merge pages from all active files, not just primary file - Implement dynamic PDF worker allocation based on number of files (up to 12 workers) - Add sequential file processing to prevent worker contention - Include cache polling in PageThumbnail for real-time UI updates - Increase PDFWorkerManager limit to 15 workers for multi-file scenarios Resolves issues where only the first file's pages were shown in page editor and thumbnails failed to generate due to worker pool exhaustion. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-26 14:19:24 +00:00 · 2025-08-21 19:31:17 +01:00 · 2025-08-21 19:31:17 +01:00 · c8714a279e
commit c8714a279e
parent 949ffa01ad
3 changed files with 222 additions and 140 deletions
--- a/frontend/src/components/pageEditor/PageEditor.tsx
+++ b/frontend/src/components/pageEditor/PageEditor.tsx
@ -123,95 +123,116 @@ const PageEditor = ({
            .map(id => (selectors.getFileRecord(id)?.name ?? 'file').replace(/\.pdf$/i, ''))
            .join(' + ');

-    // Get pages from processed file data
-    const processedFile = primaryFileRecord.processedFile;
+    // Debug logging for merged document creation
+    console.log(`🎬 PageEditor: Building merged document for ${name} with ${activeFileIds.length} files`);
    
-    // Debug logging for processed file data
-    console.log(`🎬 PageEditor: Building document for ${name}`);
-    console.log(`🎬 ProcessedFile exists:`, !!processedFile);
-    console.log(`🎬 ProcessedFile pages:`, processedFile?.pages?.length || 0);
-    console.log(`🎬 ProcessedFile totalPages:`, processedFile?.totalPages || 'unknown');
-    if (processedFile?.pages) {
-      console.log(`🎬 Pages structure:`, processedFile.pages.map(p => ({ pageNumber: p.pageNumber || 'unknown', hasThumbnail: !!p.thumbnail })));
-    }
-    console.log(`🎬 Will use ${(processedFile?.pages?.length || 0) > 0 ? 'PROCESSED' : 'FALLBACK'} pages`);
-    
-    // Convert processed pages to PageEditor format or create placeholders from metadata
+    // Collect pages from ALL active files, not just the primary file
    let pages: PDFPage[] = [];
+    let totalPageCount = 0;
    
-    if (processedFile?.pages && processedFile.pages.length > 0) {
-      // Use fully processed pages with thumbnails
-      pages = processedFile.pages.map((page, index) => {
-        const pageId = `${primaryFileId}-page-${index + 1}`;
-        // Try multiple sources for thumbnails in order of preference:
-        // 1. Processed data thumbnail
-        // 2. Cached thumbnail from previous generation
-        // 3. For page 1: FileRecord's thumbnailUrl (from FileProcessingService)
-        let thumbnail = page.thumbnail || null;
-        const cachedThumbnail = getThumbnailFromCache(pageId);
-        if (!thumbnail && cachedThumbnail) {
-          thumbnail = cachedThumbnail;
-          console.log(`📸 PageEditor: Using cached thumbnail for page ${index + 1} (${pageId})`);
-        }
-        if (!thumbnail && index === 0) {
-          // For page 1, use the thumbnail from FileProcessingService
-          thumbnail = primaryFileRecord.thumbnailUrl || null;
-          if (thumbnail) {
-            addThumbnailToCache(pageId, thumbnail);
-            console.log(`📸 PageEditor: Using FileProcessingService thumbnail for page 1 (${pageId})`);
+    activeFileIds.forEach((fileId, fileIndex) => {
+      const fileRecord = selectors.getFileRecord(fileId);
+      if (!fileRecord) {
+        console.warn(`🎬 PageEditor: No record found for file ${fileId}`);
+        return;
+      }
+      
+      const processedFile = fileRecord.processedFile;
+      console.log(`🎬 PageEditor: Processing file ${fileIndex + 1}/${activeFileIds.length} (${fileRecord.name})`);
+      console.log(`🎬 ProcessedFile exists:`, !!processedFile);
+      console.log(`🎬 ProcessedFile pages:`, processedFile?.pages?.length || 0);
+      console.log(`🎬 ProcessedFile totalPages:`, processedFile?.totalPages || 'unknown');
+      
+      let filePages: PDFPage[] = [];
+      
+      if (processedFile?.pages && processedFile.pages.length > 0) {
+        // Use fully processed pages with thumbnails
+        filePages = processedFile.pages.map((page, pageIndex) => {
+          const pageId = `${fileId}-page-${pageIndex + 1}`;
+          const globalPageNumber = totalPageCount + pageIndex + 1;
+          
+          // Try multiple sources for thumbnails in order of preference:
+          // 1. Processed data thumbnail
+          // 2. Cached thumbnail from previous generation
+          // 3. For page 1: FileRecord's thumbnailUrl (from FileProcessingService)
+          let thumbnail = page.thumbnail || null;
+          const cachedThumbnail = getThumbnailFromCache(pageId);
+          if (!thumbnail && cachedThumbnail) {
+            thumbnail = cachedThumbnail;
+            console.log(`📸 PageEditor: Using cached thumbnail for ${fileRecord.name} page ${pageIndex + 1} (${pageId})`);
          }
-        }
-        
-        return {
-          id: pageId,
-          pageNumber: index + 1,
-          thumbnail,
-          rotation: page.rotation || 0,
-          selected: false,
-          splitBefore: page.splitBefore || false,
-        };
-      });
-    } else if (processedFile?.totalPages && processedFile.totalPages > 0) {
-      // Create placeholder pages from metadata while thumbnails are being generated
-      console.log(`🎬 PageEditor: Creating ${processedFile.totalPages} placeholder pages from metadata`);
-      pages = Array.from({ length: processedFile.totalPages }, (_, index) => {
-        const pageId = `${primaryFileId}-page-${index + 1}`;
-        
-        // Check for existing cached thumbnail
-        let thumbnail = getThumbnailFromCache(pageId) || null;
-        
-        // For page 1, try to use the FileRecord thumbnail
-        if (!thumbnail && index === 0) {
-          thumbnail = primaryFileRecord.thumbnailUrl || null;
-          if (thumbnail) {
-            addThumbnailToCache(pageId, thumbnail);
-            console.log(`📸 PageEditor: Using FileProcessingService thumbnail for placeholder page 1 (${pageId})`);
+          if (!thumbnail && pageIndex === 0) {
+            // For page 1 of each file, use the thumbnail from FileProcessingService
+            thumbnail = fileRecord.thumbnailUrl || null;
+            if (thumbnail) {
+              addThumbnailToCache(pageId, thumbnail);
+              console.log(`📸 PageEditor: Using FileProcessingService thumbnail for ${fileRecord.name} page 1 (${pageId})`);
+            }
          }
-        }
          
-        return {
+          return {
+            id: pageId,
+            pageNumber: globalPageNumber,
+            thumbnail,
+            rotation: page.rotation || 0,
+            selected: false,
+            splitBefore: page.splitBefore || false,
+          };
+        });
+        
+        totalPageCount += processedFile.pages.length;
+      } else if (processedFile?.totalPages && processedFile.totalPages > 0) {
+        // Create placeholder pages from metadata while thumbnails are being generated
+        console.log(`🎬 PageEditor: Creating ${processedFile.totalPages} placeholder pages for ${fileRecord.name} from metadata`);
+        filePages = Array.from({ length: processedFile.totalPages }, (_, pageIndex) => {
+          const pageId = `${fileId}-page-${pageIndex + 1}`;
+          const globalPageNumber = totalPageCount + pageIndex + 1;
+          
+          // Check for existing cached thumbnail
+          let thumbnail = getThumbnailFromCache(pageId) || null;
+          
+          // For page 1 of each file, try to use the FileRecord thumbnail
+          if (!thumbnail && pageIndex === 0) {
+            thumbnail = fileRecord.thumbnailUrl || null;
+            if (thumbnail) {
+              addThumbnailToCache(pageId, thumbnail);
+              console.log(`📸 PageEditor: Using FileProcessingService thumbnail for ${fileRecord.name} placeholder page 1 (${pageId})`);
+            }
+          }
+          
+          return {
+            id: pageId,
+            pageNumber: globalPageNumber,
+            thumbnail, // Will be null initially, populated by PageThumbnail components
+            rotation: 0,
+            selected: false,
+            splitBefore: false,
+          };
+        });
+        
+        totalPageCount += processedFile.totalPages;
+      } else {
+        // Ultimate fallback - single page while we wait for metadata
+        const pageId = `${fileId}-page-1`;
+        const globalPageNumber = totalPageCount + 1;
+        filePages = [{
          id: pageId,
-          pageNumber: index + 1,
-          thumbnail, // Will be null initially, populated by PageThumbnail components
+          pageNumber: globalPageNumber,
+          thumbnail: getThumbnailFromCache(pageId) || fileRecord.thumbnailUrl || null,
          rotation: 0,
          selected: false,
          splitBefore: false,
-        };
-      });
-    } else {
-      // Ultimate fallback - single page while we wait for metadata
-      pages = [{
-        id: `${primaryFileId}-page-1`,
-        pageNumber: 1,
-        thumbnail: getThumbnailFromCache(`${primaryFileId}-page-1`) || primaryFileRecord.thumbnailUrl || null,
-        rotation: 0,
-        selected: false,
-        splitBefore: false,
-      }];
-    }
+        }];
+        
+        totalPageCount += 1;
+      }
+      
+      pages.push(...filePages);
+    });
+
+    console.log(`🎬 PageEditor: Created merged document with ${pages.length} total pages from ${activeFileIds.length} files`);

    // Create document with determined pages
-
    return {
      id: activeFileIds.length === 1 ? (primaryFileId ?? 'unknown') : `merged:${filesSignature}`,
      name,
@ -220,7 +241,7 @@ const PageEditor = ({
      totalPages: pages.length,
      destroy: () => {} // Optional cleanup function
    };
-  }, [filesSignature, primaryFileId, primaryFileRecord]);
+  }, [filesSignature, activeFileIds, selectors, getThumbnailFromCache, addThumbnailToCache]);


  // Display document: Use edited version if exists, otherwise original
@ -286,74 +307,20 @@ const PageEditor = ({

  // Simple cache-first thumbnail generation (no complex detection needed)

-  // Lazy thumbnail generation - only generate when needed, with intelligent batching
-  const generateMissingThumbnails = useCallback(async () => {
-    if (!mergedPdfDocument || !primaryFileId || activeFileIds.length !== 1) {
-      return;
-    }
-
-    const file = selectors.getFile(primaryFileId);
-    if (!file) return;
-    
-    const totalPages = mergedPdfDocument.totalPages;
-    if (totalPages <= 1) return; // Only page 1, nothing to generate
-    
-    // For very large documents (2000+ pages), be much more conservative
-    const isVeryLargeDocument = totalPages > 2000;
-    
-    if (isVeryLargeDocument) {
-      console.log(`📸 PageEditor: Very large document (${totalPages} pages) - using minimal thumbnail generation`);
-      // For very large docs, only generate the next visible batch (pages 2-25) to avoid UI blocking
-      const pageNumbersToGenerate = [];
-      for (let pageNum = 2; pageNum <= Math.min(25, totalPages); pageNum++) {
-        const pageId = `${primaryFileId}-page-${pageNum}`;
-        if (!getThumbnailFromCache(pageId)) {
-          pageNumbersToGenerate.push(pageNum);
-        }
-      }
-      
-      if (pageNumbersToGenerate.length > 0) {
-        console.log(`📸 PageEditor: Generating initial batch for large doc: pages [${pageNumbersToGenerate.join(', ')}]`);
-        await generateThumbnailBatch(file, primaryFileId, pageNumbersToGenerate);
-      }
-      
-      // Schedule remaining thumbnails with delay to avoid blocking
-      setTimeout(() => {
-        generateRemainingThumbnailsLazily(file, primaryFileId, totalPages, 26);
-      }, 2000); // 2 second delay before starting background generation
-      
-      return;
-    }
-    
-    // For smaller documents, check which pages 2+ need thumbnails
-    const pageNumbersToGenerate = [];
-    for (let pageNum = 2; pageNum <= totalPages; pageNum++) {
-      const pageId = `${primaryFileId}-page-${pageNum}`;
-      if (!getThumbnailFromCache(pageId)) {
-        pageNumbersToGenerate.push(pageNum);
-      }
-    }
-
-    if (pageNumbersToGenerate.length === 0) {
-      console.log(`📸 PageEditor: All pages 2+ already cached, skipping generation`);
-      return;
-    }
-
-    console.log(`📸 PageEditor: Generating thumbnails for pages: [${pageNumbersToGenerate.slice(0, 5).join(', ')}${pageNumbersToGenerate.length > 5 ? '...' : ''}]`);
-    await generateThumbnailBatch(file, primaryFileId, pageNumbersToGenerate);
-  }, [mergedPdfDocument, primaryFileId, activeFileIds, selectors]);
-
  // Helper function to generate thumbnails in batches
  const generateThumbnailBatch = useCallback(async (file: File, fileId: string, pageNumbers: number[]) => {
+    console.log(`📸 PageEditor: Starting thumbnail batch for ${file.name}, pages: [${pageNumbers.join(', ')}]`);
+    
    try {
      // Load PDF array buffer for Web Workers
      const arrayBuffer = await file.arrayBuffer();
+      console.log(`📸 PageEditor: Loaded array buffer for ${file.name} (${arrayBuffer.byteLength} bytes)`);

      // Calculate quality scale based on file size
      const scale = calculateScaleFromFileSize(selectors.getFileRecord(fileId)?.size || 0);

      // Start parallel thumbnail generation WITHOUT blocking the main thread
-      await generateThumbnails(
+      const results = await generateThumbnails(
        fileId, // Add fileId as first parameter
        arrayBuffer,
        pageNumbers,
@ -365,12 +332,15 @@ const PageEditor = ({
        },
        // Progress callback for thumbnail updates
        (progress: { completed: number; total: number; thumbnails: Array<{ pageNumber: number; thumbnail: string }> }) => {
+          console.log(`📸 PageEditor: Progress update - ${progress.completed}/${progress.total} completed, ${progress.thumbnails.length} new thumbnails`);
+          
          // Batch process thumbnails to reduce main thread work
          requestAnimationFrame(() => {
            progress.thumbnails.forEach(({ pageNumber, thumbnail }: { pageNumber: number; thumbnail: string }) => {
              // Use stable fileId for cache key
              const pageId = `${fileId}-page-${pageNumber}`;
              addThumbnailToCache(pageId, thumbnail);
+              console.log(`📸 PageEditor: Cached thumbnail for ${pageId}`);

              // Don't update context state - thumbnails stay in cache only
              // This eliminates per-page context rerenders
@ -380,9 +350,9 @@ const PageEditor = ({
        }
      );

-      // Removed verbose logging - only log errors
+      console.log(`📸 PageEditor: Thumbnail batch completed for ${file.name}. Generated ${results.length} thumbnails`);
    } catch (error) {
-      console.error('PageEditor: Thumbnail generation failed:', error);
+      console.error(`PageEditor: Thumbnail generation failed for ${file.name}:`, error);
    }
  }, [generateThumbnails, addThumbnailToCache, selectors]);

@ -415,6 +385,80 @@ const PageEditor = ({
    console.log(`📸 PageEditor: Background thumbnail generation completed for ${totalPages} pages`);
  }, [getThumbnailFromCache, generateThumbnailBatch]);

+  // Lazy thumbnail generation - only generate when needed, with intelligent batching for all files
+  const generateMissingThumbnails = useCallback(async () => {
+    if (!mergedPdfDocument || activeFileIds.length === 0) {
+      return;
+    }
+
+    const totalPages = mergedPdfDocument.totalPages;
+    if (totalPages <= activeFileIds.length) return; // Only page 1 per file, nothing to generate
+    
+    // Set a high worker limit for multi-file processing
+    // Each file may need multiple PDF document instances for parallel page processing
+    const neededWorkers = Math.min(activeFileIds.length * 3, 12); // Allow 3 workers per file, cap at 12
+    pdfWorkerManager.setMaxWorkers(neededWorkers);
+    console.log(`📸 PageEditor: Set worker limit to ${neededWorkers} for ${activeFileIds.length} files`);
+    
+    // For very large documents (2000+ pages), be much more conservative
+    const isVeryLargeDocument = totalPages > 2000;
+    
+    console.log(`📸 PageEditor: Generating thumbnails for ${activeFileIds.length} files with ${totalPages} total pages`);
+    
+    // Process files strictly sequentially to avoid PDF document contention
+    // Each file will use its own PDF document instance from the cache
+    for (const fileId of activeFileIds) {
+      const file = selectors.getFile(fileId);
+      const fileRecord = selectors.getFileRecord(fileId);
+      
+      if (!file || !fileRecord?.processedFile) continue;
+      
+      const fileTotalPages = fileRecord.processedFile.totalPages || fileRecord.processedFile.pages?.length || 1;
+      if (fileTotalPages <= 1) continue; // Only page 1 for this file, skip
+      
+      if (isVeryLargeDocument) {
+        console.log(`📸 PageEditor: Very large document (${totalPages} pages) - using minimal thumbnail generation for ${fileRecord.name}`);
+        // For very large docs, only generate the next visible batch (pages 2-25) per file to avoid UI blocking
+        const pageNumbersToGenerate = [];
+        for (let pageNum = 2; pageNum <= Math.min(25, fileTotalPages); pageNum++) {
+          const pageId = `${fileId}-page-${pageNum}`;
+          if (!getThumbnailFromCache(pageId)) {
+            pageNumbersToGenerate.push(pageNum);
+          }
+        }
+        
+        if (pageNumbersToGenerate.length > 0) {
+          console.log(`📸 PageEditor: Generating initial batch for ${fileRecord.name}: pages [${pageNumbersToGenerate.join(', ')}]`);
+          await generateThumbnailBatch(file, fileId, pageNumbersToGenerate);
+        }
+        
+        // Schedule remaining thumbnails with delay to avoid blocking
+        setTimeout(() => {
+          generateRemainingThumbnailsLazily(file, fileId, fileTotalPages, 26);
+        }, 2000); // 2 second delay before starting background generation
+      } else {
+        // For smaller documents, check which pages 2+ need thumbnails for this file
+        const pageNumbersToGenerate = [];
+        for (let pageNum = 2; pageNum <= fileTotalPages; pageNum++) {
+          const pageId = `${fileId}-page-${pageNum}`;
+          if (!getThumbnailFromCache(pageId)) {
+            pageNumbersToGenerate.push(pageNum);
+          }
+        }
+
+        if (pageNumbersToGenerate.length > 0) {
+          console.log(`📸 PageEditor: Generating thumbnails for ${fileRecord.name}: [${pageNumbersToGenerate.slice(0, 5).join(', ')}${pageNumbersToGenerate.length > 5 ? '...' : ''}]`);
+          await generateThumbnailBatch(file, fileId, pageNumbersToGenerate);
+        }
+      }
+      
+      // Delay between files to ensure proper sequential processing and worker cleanup
+      if (activeFileIds.length > 1) {
+        await new Promise(resolve => setTimeout(resolve, 500)); // Increased delay
+      }
+    }
+  }, [mergedPdfDocument, activeFileIds, selectors, getThumbnailFromCache, generateThumbnailBatch, generateRemainingThumbnailsLazily]);
+
  // Simple useEffect - just generate missing thumbnails when document is ready
  useEffect(() => {
    if (mergedPdfDocument && mergedPdfDocument.totalPages > 1) {
@ -437,7 +481,8 @@ const PageEditor = ({
      enhancedPDFProcessingService.emergencyCleanup();
      fileProcessingService.emergencyCleanup();
      pdfProcessingService.clearAll();
-      // Final emergency cleanup of all workers
+      // Reset worker limit to default and cleanup
+      pdfWorkerManager.setMaxWorkers(3); // Reset to conservative default
      pdfWorkerManager.emergencyCleanup();
    };
  }, [stopGeneration, destroyThumbnails]);
--- a/frontend/src/components/pageEditor/PageThumbnail.tsx
+++ b/frontend/src/components/pageEditor/PageThumbnail.tsx
@ -111,6 +111,43 @@ const PageThumbnail = React.memo(({
    };
  }, [page.id, originalFile, requestThumbnail, getThumbnailFromCache]); // Removed thumbnailUrl to prevent loops

+  // Poll cache for thumbnails that might be generated by other processes (like PageEditor batch generation)
+  useEffect(() => {
+    if (thumbnailUrl) {
+      return; // Already have a thumbnail
+    }
+
+    let cancelled = false;
+    
+    const pollCache = () => {
+      if (cancelled) return;
+      
+      const cachedThumbnail = getThumbnailFromCache(page.id);
+      if (cachedThumbnail) {
+        console.log(`📸 PageThumbnail: Found cached thumbnail for page ${page.pageNumber} via polling`);
+        setThumbnailUrl(cachedThumbnail);
+        return;
+      }
+      
+      // Continue polling every 1 second for up to 30 seconds
+      setTimeout(pollCache, 1000);
+    };
+    
+    // Start polling after a short delay
+    const pollTimer = setTimeout(pollCache, 500);
+    
+    // Stop polling after 30 seconds to avoid infinite polling
+    const stopTimer = setTimeout(() => {
+      cancelled = true;
+    }, 30000);
+
+    return () => {
+      cancelled = true;
+      clearTimeout(pollTimer);
+      clearTimeout(stopTimer);
+    };
+  }, [page.id, thumbnailUrl, getThumbnailFromCache]);
+

  const pageElementRef = useCallback((element: HTMLDivElement | null) => {
    if (element) {
--- a/frontend/src/services/pdfWorkerManager.ts
+++ b/frontend/src/services/pdfWorkerManager.ts
@ -194,7 +194,7 @@ class PDFWorkerManager {
   * Set maximum concurrent workers
   */
  setMaxWorkers(max: number): void {
-    this.maxWorkers = Math.max(1, Math.min(max, 10)); // Between 1-10 workers
+    this.maxWorkers = Math.max(1, Math.min(max, 15)); // Between 1-15 workers for multi-file support
    console.log(`🏭 Max workers set to ${this.maxWorkers}`);
  }
 }