Stirling-PDF/frontend/src/services/fileProcessingService.ts
James Brunton e142af2863
V2 Make FileId type opaque and use consistently throughout project (#4307)
# Description of Changes
The `FileId` type in V2 currently is just defined to be a string. This
makes it really easy to accidentally pass strings into things accepting
file IDs (such as file names). This PR makes the `FileId` type [an
opaque
type](https://www.geeksforgeeks.org/typescript/opaque-types-in-typescript/),
so it is compatible with things accepting strings (arguably not ideal
for this...) but strings are not compatible with it without explicit
conversion.

The PR also includes changes to use `FileId` consistently throughout the
project (everywhere I could find uses of `fileId: string`), so that we
have the maximum benefit from the type safety.

> [!note]
> I've marked quite a few things as `FIX ME` where we're passing names
in as IDs. If that is intended behaviour, I'm happy to remove the fix me
and insert a cast instead, but they probably need comments explaining
why we're using a file name as an ID.
2025-08-28 09:56:07 +00:00

211 lines
6.4 KiB
TypeScript

/**
* Centralized file processing service
* Handles metadata discovery, page counting, and thumbnail generation
* Called when files are added to FileContext, before any view sees them
*/
import * as pdfjsLib from 'pdfjs-dist';
import { generateThumbnailForFile } from '../utils/thumbnailUtils';
import { pdfWorkerManager } from './pdfWorkerManager';
import { FileId } from '../types/file';
export interface ProcessedFileMetadata {
totalPages: number;
pages: Array<{
pageNumber: number;
thumbnail?: string;
rotation: number;
splitBefore: boolean;
}>;
thumbnailUrl?: string; // Page 1 thumbnail for FileEditor
lastProcessed: number;
}
export interface FileProcessingResult {
success: boolean;
metadata?: ProcessedFileMetadata;
error?: string;
}
interface ProcessingOperation {
promise: Promise<FileProcessingResult>;
abortController: AbortController;
}
class FileProcessingService {
private processingCache = new Map<string, ProcessingOperation>();
/**
* Process a file to extract metadata, page count, and generate thumbnails
* This is the single source of truth for file processing
*/
async processFile(file: File, fileId: FileId): Promise<FileProcessingResult> {
// Check if we're already processing this file
const existingOperation = this.processingCache.get(fileId);
if (existingOperation) {
console.log(`📁 FileProcessingService: Using cached processing for ${file.name}`);
return existingOperation.promise;
}
// Create abort controller for this operation
const abortController = new AbortController();
// Create processing promise
const processingPromise = this.performProcessing(file, fileId, abortController);
// Store operation with abort controller
const operation: ProcessingOperation = {
promise: processingPromise,
abortController
};
this.processingCache.set(fileId, operation);
// Clean up cache after completion
processingPromise.finally(() => {
this.processingCache.delete(fileId);
});
return processingPromise;
}
private async performProcessing(file: File, fileId: FileId, abortController: AbortController): Promise<FileProcessingResult> {
console.log(`📁 FileProcessingService: Starting processing for ${file.name} (${fileId})`);
try {
// Check for cancellation at start
if (abortController.signal.aborted) {
throw new Error('Processing cancelled');
}
let totalPages = 1;
let thumbnailUrl: string | undefined;
// Handle PDF files
if (file.type === 'application/pdf') {
// Read arrayBuffer once and reuse for both PDF.js and fallback
const arrayBuffer = await file.arrayBuffer();
// Check for cancellation after async operation
if (abortController.signal.aborted) {
throw new Error('Processing cancelled');
}
// Discover page count using PDF.js (most accurate)
try {
const pdfDoc = await pdfWorkerManager.createDocument(arrayBuffer, {
disableAutoFetch: true,
disableStream: true
});
totalPages = pdfDoc.numPages;
console.log(`📁 FileProcessingService: PDF.js discovered ${totalPages} pages for ${file.name}`);
// Clean up immediately
pdfWorkerManager.destroyDocument(pdfDoc);
// Check for cancellation after PDF.js processing
if (abortController.signal.aborted) {
throw new Error('Processing cancelled');
}
} catch (pdfError) {
console.warn(`📁 FileProcessingService: PDF.js failed for ${file.name}, setting pages to 0:`, pdfError);
totalPages = 0; // Unknown page count - UI will hide page count display
}
}
// Generate page 1 thumbnail
try {
thumbnailUrl = await generateThumbnailForFile(file);
console.log(`📁 FileProcessingService: Generated thumbnail for ${file.name}`);
// Check for cancellation after thumbnail generation
if (abortController.signal.aborted) {
throw new Error('Processing cancelled');
}
} catch (thumbError) {
console.warn(`📁 FileProcessingService: Thumbnail generation failed for ${file.name}:`, thumbError);
}
// Create page structure
const pages = Array.from({ length: totalPages }, (_, index) => ({
pageNumber: index + 1,
thumbnail: index === 0 ? thumbnailUrl : undefined, // Only page 1 gets thumbnail initially
rotation: 0,
splitBefore: false
}));
const metadata: ProcessedFileMetadata = {
totalPages,
pages,
thumbnailUrl, // For FileEditor display
lastProcessed: Date.now()
};
console.log(`📁 FileProcessingService: Processing complete for ${file.name} - ${totalPages} pages`);
return {
success: true,
metadata
};
} catch (error) {
console.error(`📁 FileProcessingService: Processing failed for ${file.name}:`, error);
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown processing error'
};
}
}
/**
* Clear all processing caches
*/
clearCache(): void {
this.processingCache.clear();
}
/**
* Check if a file is currently being processed
*/
isProcessing(fileId: FileId): boolean {
return this.processingCache.has(fileId);
}
/**
* Cancel processing for a specific file
*/
cancelProcessing(fileId: FileId): boolean {
const operation = this.processingCache.get(fileId);
if (operation) {
operation.abortController.abort();
console.log(`📁 FileProcessingService: Cancelled processing for ${fileId}`);
return true;
}
return false;
}
/**
* Cancel all ongoing processing operations
*/
cancelAllProcessing(): void {
this.processingCache.forEach((operation, fileId) => {
operation.abortController.abort();
console.log(`📁 FileProcessingService: Cancelled processing for ${fileId}`);
});
console.log(`📁 FileProcessingService: Cancelled ${this.processingCache.size} processing operations`);
}
/**
* Emergency cleanup - cancel all processing and destroy workers
*/
emergencyCleanup(): void {
this.cancelAllProcessing();
this.clearCache();
pdfWorkerManager.destroyAllDocuments();
}
}
// Export singleton instance
export const fileProcessingService = new FileProcessingService();