mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-07-27 23:55:21 +00:00

# Description of Changes <!-- File context for managing files between tools and views Optimisation for large files Updated Split to work with new file system and match Matts stepped design closer --> --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. --------- Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
546 lines
15 KiB
TypeScript
546 lines
15 KiB
TypeScript
import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
|
|
import { ProcessedFile, ProcessingState, PDFPage, ProcessingStrategy, ProcessingConfig, ProcessingMetrics } from '../types/processing';
|
|
import { ProcessingCache } from './processingCache';
|
|
import { FileHasher } from '../utils/fileHash';
|
|
import { FileAnalyzer } from './fileAnalyzer';
|
|
import { ProcessingErrorHandler } from './processingErrorHandler';
|
|
|
|
// Set up PDF.js worker
|
|
GlobalWorkerOptions.workerSrc = '/pdf.worker.js';
|
|
|
|
export class EnhancedPDFProcessingService {
|
|
private static instance: EnhancedPDFProcessingService;
|
|
private cache = new ProcessingCache();
|
|
private processing = new Map<string, ProcessingState>();
|
|
private processingListeners = new Set<(states: Map<string, ProcessingState>) => void>();
|
|
private metrics: ProcessingMetrics = {
|
|
totalFiles: 0,
|
|
completedFiles: 0,
|
|
failedFiles: 0,
|
|
averageProcessingTime: 0,
|
|
cacheHitRate: 0,
|
|
memoryUsage: 0
|
|
};
|
|
|
|
private defaultConfig: ProcessingConfig = {
|
|
strategy: 'immediate_full',
|
|
chunkSize: 20,
|
|
thumbnailQuality: 'medium',
|
|
priorityPageCount: 10,
|
|
useWebWorker: false,
|
|
maxRetries: 3,
|
|
timeoutMs: 300000 // 5 minutes
|
|
};
|
|
|
|
private constructor() {}
|
|
|
|
static getInstance(): EnhancedPDFProcessingService {
|
|
if (!EnhancedPDFProcessingService.instance) {
|
|
EnhancedPDFProcessingService.instance = new EnhancedPDFProcessingService();
|
|
}
|
|
return EnhancedPDFProcessingService.instance;
|
|
}
|
|
|
|
/**
|
|
* Process a file with intelligent strategy selection
|
|
*/
|
|
async processFile(file: File, customConfig?: Partial<ProcessingConfig>): Promise<ProcessedFile | null> {
|
|
const fileKey = await this.generateFileKey(file);
|
|
|
|
// Check cache first
|
|
const cached = this.cache.get(fileKey);
|
|
if (cached) {
|
|
this.updateMetrics('cacheHit');
|
|
return cached;
|
|
}
|
|
|
|
// Check if already processing
|
|
if (this.processing.has(fileKey)) {
|
|
return null;
|
|
}
|
|
|
|
// Analyze file to determine optimal strategy
|
|
const analysis = await FileAnalyzer.analyzeFile(file);
|
|
if (analysis.isCorrupted) {
|
|
throw new Error(`File ${file.name} appears to be corrupted`);
|
|
}
|
|
|
|
// Create processing config
|
|
const config: ProcessingConfig = {
|
|
...this.defaultConfig,
|
|
strategy: analysis.recommendedStrategy,
|
|
...customConfig
|
|
};
|
|
|
|
// Start processing
|
|
this.startProcessing(file, fileKey, config, analysis.estimatedProcessingTime);
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Start processing a file with the specified configuration
|
|
*/
|
|
private async startProcessing(
|
|
file: File,
|
|
fileKey: string,
|
|
config: ProcessingConfig,
|
|
estimatedTime: number
|
|
): Promise<void> {
|
|
// Create cancellation token
|
|
const cancellationToken = ProcessingErrorHandler.createTimeoutController(config.timeoutMs);
|
|
|
|
// Set initial state
|
|
const state: ProcessingState = {
|
|
fileKey,
|
|
fileName: file.name,
|
|
status: 'processing',
|
|
progress: 0,
|
|
strategy: config.strategy,
|
|
startedAt: Date.now(),
|
|
estimatedTimeRemaining: estimatedTime,
|
|
cancellationToken
|
|
};
|
|
|
|
this.processing.set(fileKey, state);
|
|
this.notifyListeners();
|
|
this.updateMetrics('started');
|
|
|
|
try {
|
|
// Execute processing with retry logic
|
|
const processedFile = await ProcessingErrorHandler.executeWithRetry(
|
|
() => this.executeProcessingStrategy(file, config, state),
|
|
(error) => {
|
|
state.error = error;
|
|
this.notifyListeners();
|
|
},
|
|
config.maxRetries
|
|
);
|
|
|
|
// Cache the result
|
|
this.cache.set(fileKey, processedFile);
|
|
|
|
// Update state to completed
|
|
state.status = 'completed';
|
|
state.progress = 100;
|
|
state.completedAt = Date.now();
|
|
this.notifyListeners();
|
|
this.updateMetrics('completed', Date.now() - state.startedAt);
|
|
|
|
// Remove from processing map after brief delay
|
|
setTimeout(() => {
|
|
this.processing.delete(fileKey);
|
|
this.notifyListeners();
|
|
}, 2000);
|
|
|
|
} catch (error) {
|
|
console.error('Processing failed for', file.name, ':', error);
|
|
|
|
const processingError = ProcessingErrorHandler.createProcessingError(error);
|
|
state.status = 'error';
|
|
state.error = processingError;
|
|
this.notifyListeners();
|
|
this.updateMetrics('failed');
|
|
|
|
// Remove failed processing after delay
|
|
setTimeout(() => {
|
|
this.processing.delete(fileKey);
|
|
this.notifyListeners();
|
|
}, 10000);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Execute the actual processing based on strategy
|
|
*/
|
|
private async executeProcessingStrategy(
|
|
file: File,
|
|
config: ProcessingConfig,
|
|
state: ProcessingState
|
|
): Promise<ProcessedFile> {
|
|
switch (config.strategy) {
|
|
case 'immediate_full':
|
|
return this.processImmediateFull(file, config, state);
|
|
|
|
case 'priority_pages':
|
|
return this.processPriorityPages(file, config, state);
|
|
|
|
case 'progressive_chunked':
|
|
return this.processProgressiveChunked(file, config, state);
|
|
|
|
case 'metadata_only':
|
|
return this.processMetadataOnly(file, config, state);
|
|
|
|
default:
|
|
return this.processImmediateFull(file, config, state);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process all pages immediately (for small files)
|
|
*/
|
|
private async processImmediateFull(
|
|
file: File,
|
|
config: ProcessingConfig,
|
|
state: ProcessingState
|
|
): Promise<ProcessedFile> {
|
|
const arrayBuffer = await file.arrayBuffer();
|
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
|
const totalPages = pdf.numPages;
|
|
|
|
state.progress = 10;
|
|
this.notifyListeners();
|
|
|
|
const pages: PDFPage[] = [];
|
|
|
|
for (let i = 1; i <= totalPages; i++) {
|
|
// Check for cancellation
|
|
if (state.cancellationToken?.signal.aborted) {
|
|
pdf.destroy();
|
|
throw new Error('Processing cancelled');
|
|
}
|
|
|
|
const page = await pdf.getPage(i);
|
|
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
|
|
|
pages.push({
|
|
id: `${file.name}-page-${i}`,
|
|
pageNumber: i,
|
|
thumbnail,
|
|
rotation: 0,
|
|
selected: false
|
|
});
|
|
|
|
// Update progress
|
|
state.progress = 10 + (i / totalPages) * 85;
|
|
state.currentPage = i;
|
|
this.notifyListeners();
|
|
}
|
|
|
|
pdf.destroy();
|
|
state.progress = 100;
|
|
this.notifyListeners();
|
|
|
|
return this.createProcessedFile(file, pages, totalPages);
|
|
}
|
|
|
|
/**
|
|
* Process priority pages first, then queue the rest
|
|
*/
|
|
private async processPriorityPages(
|
|
file: File,
|
|
config: ProcessingConfig,
|
|
state: ProcessingState
|
|
): Promise<ProcessedFile> {
|
|
const arrayBuffer = await file.arrayBuffer();
|
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
|
const totalPages = pdf.numPages;
|
|
|
|
state.progress = 10;
|
|
this.notifyListeners();
|
|
|
|
const pages: PDFPage[] = [];
|
|
const priorityCount = Math.min(config.priorityPageCount, totalPages);
|
|
|
|
// Process priority pages first
|
|
for (let i = 1; i <= priorityCount; i++) {
|
|
if (state.cancellationToken?.signal.aborted) {
|
|
pdf.destroy();
|
|
throw new Error('Processing cancelled');
|
|
}
|
|
|
|
const page = await pdf.getPage(i);
|
|
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
|
|
|
pages.push({
|
|
id: `${file.name}-page-${i}`,
|
|
pageNumber: i,
|
|
thumbnail,
|
|
rotation: 0,
|
|
selected: false
|
|
});
|
|
|
|
state.progress = 10 + (i / priorityCount) * 60;
|
|
state.currentPage = i;
|
|
this.notifyListeners();
|
|
}
|
|
|
|
// Create placeholder pages for remaining pages
|
|
for (let i = priorityCount + 1; i <= totalPages; i++) {
|
|
pages.push({
|
|
id: `${file.name}-page-${i}`,
|
|
pageNumber: i,
|
|
thumbnail: null, // Will be loaded lazily
|
|
rotation: 0,
|
|
selected: false
|
|
});
|
|
}
|
|
|
|
pdf.destroy();
|
|
state.progress = 100;
|
|
this.notifyListeners();
|
|
|
|
return this.createProcessedFile(file, pages, totalPages);
|
|
}
|
|
|
|
/**
|
|
* Process in chunks with breaks between chunks
|
|
*/
|
|
private async processProgressiveChunked(
|
|
file: File,
|
|
config: ProcessingConfig,
|
|
state: ProcessingState
|
|
): Promise<ProcessedFile> {
|
|
const arrayBuffer = await file.arrayBuffer();
|
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
|
const totalPages = pdf.numPages;
|
|
|
|
state.progress = 10;
|
|
this.notifyListeners();
|
|
|
|
const pages: PDFPage[] = [];
|
|
const chunkSize = config.chunkSize;
|
|
let processedPages = 0;
|
|
|
|
// Process first chunk immediately
|
|
const firstChunkEnd = Math.min(chunkSize, totalPages);
|
|
|
|
for (let i = 1; i <= firstChunkEnd; i++) {
|
|
if (state.cancellationToken?.signal.aborted) {
|
|
pdf.destroy();
|
|
throw new Error('Processing cancelled');
|
|
}
|
|
|
|
const page = await pdf.getPage(i);
|
|
const thumbnail = await this.renderPageThumbnail(page, config.thumbnailQuality);
|
|
|
|
pages.push({
|
|
id: `${file.name}-page-${i}`,
|
|
pageNumber: i,
|
|
thumbnail,
|
|
rotation: 0,
|
|
selected: false
|
|
});
|
|
|
|
processedPages++;
|
|
state.progress = 10 + (processedPages / totalPages) * 70;
|
|
state.currentPage = i;
|
|
this.notifyListeners();
|
|
|
|
// Small delay to prevent UI blocking
|
|
if (i % 5 === 0) {
|
|
await new Promise(resolve => setTimeout(resolve, 10));
|
|
}
|
|
}
|
|
|
|
// Create placeholders for remaining pages
|
|
for (let i = firstChunkEnd + 1; i <= totalPages; i++) {
|
|
pages.push({
|
|
id: `${file.name}-page-${i}`,
|
|
pageNumber: i,
|
|
thumbnail: null,
|
|
rotation: 0,
|
|
selected: false
|
|
});
|
|
}
|
|
|
|
pdf.destroy();
|
|
state.progress = 100;
|
|
this.notifyListeners();
|
|
|
|
return this.createProcessedFile(file, pages, totalPages);
|
|
}
|
|
|
|
/**
|
|
* Process metadata only (for very large files)
|
|
*/
|
|
private async processMetadataOnly(
|
|
file: File,
|
|
config: ProcessingConfig,
|
|
state: ProcessingState
|
|
): Promise<ProcessedFile> {
|
|
const arrayBuffer = await file.arrayBuffer();
|
|
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
|
const totalPages = pdf.numPages;
|
|
|
|
state.progress = 50;
|
|
this.notifyListeners();
|
|
|
|
// Create placeholder pages without thumbnails
|
|
const pages: PDFPage[] = [];
|
|
for (let i = 1; i <= totalPages; i++) {
|
|
pages.push({
|
|
id: `${file.name}-page-${i}`,
|
|
pageNumber: i,
|
|
thumbnail: null,
|
|
rotation: 0,
|
|
selected: false
|
|
});
|
|
}
|
|
|
|
pdf.destroy();
|
|
state.progress = 100;
|
|
this.notifyListeners();
|
|
|
|
return this.createProcessedFile(file, pages, totalPages);
|
|
}
|
|
|
|
/**
|
|
* Render a page thumbnail with specified quality
|
|
*/
|
|
private async renderPageThumbnail(page: any, quality: 'low' | 'medium' | 'high'): Promise<string> {
|
|
const scales = { low: 0.2, medium: 0.5, high: 0.8 }; // Reduced low quality for page editor
|
|
const scale = scales[quality];
|
|
|
|
const viewport = page.getViewport({ scale });
|
|
const canvas = document.createElement('canvas');
|
|
canvas.width = viewport.width;
|
|
canvas.height = viewport.height;
|
|
|
|
const context = canvas.getContext('2d');
|
|
if (!context) {
|
|
throw new Error('Could not get canvas context');
|
|
}
|
|
|
|
await page.render({ canvasContext: context, viewport }).promise;
|
|
return canvas.toDataURL('image/jpeg', 0.8); // Use JPEG for better compression
|
|
}
|
|
|
|
/**
|
|
* Create a ProcessedFile object
|
|
*/
|
|
private createProcessedFile(file: File, pages: PDFPage[], totalPages: number): ProcessedFile {
|
|
return {
|
|
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
|
pages,
|
|
totalPages,
|
|
metadata: {
|
|
title: file.name,
|
|
createdAt: new Date().toISOString(),
|
|
modifiedAt: new Date().toISOString()
|
|
}
|
|
};
|
|
}
|
|
|
|
|
|
/**
|
|
* Generate a unique, collision-resistant cache key
|
|
*/
|
|
private async generateFileKey(file: File): Promise<string> {
|
|
return await FileHasher.generateHybridHash(file);
|
|
}
|
|
|
|
/**
|
|
* Cancel processing for a specific file
|
|
*/
|
|
cancelProcessing(fileKey: string): void {
|
|
const state = this.processing.get(fileKey);
|
|
if (state && state.cancellationToken) {
|
|
state.cancellationToken.abort();
|
|
state.status = 'cancelled';
|
|
this.notifyListeners();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update processing metrics
|
|
*/
|
|
private updateMetrics(event: 'started' | 'completed' | 'failed' | 'cacheHit', processingTime?: number): void {
|
|
switch (event) {
|
|
case 'started':
|
|
this.metrics.totalFiles++;
|
|
break;
|
|
case 'completed':
|
|
this.metrics.completedFiles++;
|
|
if (processingTime) {
|
|
// Update rolling average
|
|
const totalProcessingTime = this.metrics.averageProcessingTime * (this.metrics.completedFiles - 1) + processingTime;
|
|
this.metrics.averageProcessingTime = totalProcessingTime / this.metrics.completedFiles;
|
|
}
|
|
break;
|
|
case 'failed':
|
|
this.metrics.failedFiles++;
|
|
break;
|
|
case 'cacheHit':
|
|
// Update cache hit rate
|
|
const totalAttempts = this.metrics.totalFiles + 1;
|
|
this.metrics.cacheHitRate = (this.metrics.cacheHitRate * this.metrics.totalFiles + 1) / totalAttempts;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get processing metrics
|
|
*/
|
|
getMetrics(): ProcessingMetrics {
|
|
return { ...this.metrics };
|
|
}
|
|
|
|
/**
|
|
* State subscription for components
|
|
*/
|
|
onProcessingChange(callback: (states: Map<string, ProcessingState>) => void): () => void {
|
|
this.processingListeners.add(callback);
|
|
return () => this.processingListeners.delete(callback);
|
|
}
|
|
|
|
getProcessingStates(): Map<string, ProcessingState> {
|
|
return new Map(this.processing);
|
|
}
|
|
|
|
private notifyListeners(): void {
|
|
this.processingListeners.forEach(callback => callback(this.processing));
|
|
}
|
|
|
|
/**
|
|
* Cleanup method for removed files
|
|
*/
|
|
cleanup(removedFiles: File[]): void {
|
|
removedFiles.forEach(async (file) => {
|
|
const key = await this.generateFileKey(file);
|
|
this.cache.delete(key);
|
|
this.cancelProcessing(key);
|
|
this.processing.delete(key);
|
|
});
|
|
this.notifyListeners();
|
|
}
|
|
|
|
/**
|
|
* Clear all processing for view switches
|
|
*/
|
|
clearAllProcessing(): void {
|
|
// Cancel all ongoing processing
|
|
this.processing.forEach((state, key) => {
|
|
if (state.cancellationToken) {
|
|
state.cancellationToken.abort();
|
|
}
|
|
});
|
|
|
|
// Clear processing states
|
|
this.processing.clear();
|
|
this.notifyListeners();
|
|
|
|
// Force memory cleanup hint
|
|
if (typeof window !== 'undefined' && window.gc) {
|
|
setTimeout(() => window.gc(), 100);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get cache statistics
|
|
*/
|
|
getCacheStats() {
|
|
return this.cache.getStats();
|
|
}
|
|
|
|
/**
|
|
* Clear all cache and processing
|
|
*/
|
|
clearAll(): void {
|
|
this.cache.clear();
|
|
this.processing.clear();
|
|
this.notifyListeners();
|
|
}
|
|
}
|
|
|
|
// Export singleton instance
|
|
export const enhancedPDFProcessingService = EnhancedPDFProcessingService.getInstance(); |