Stirling-PDF/frontend/src/services/zipFileService.ts
James Brunton bd13f6bf57
Enable ESLint no-unused-vars rule (#4367)
# Description of Changes
Enable ESLint [no-unused-vars
rule](https://typescript-eslint.io/rules/no-unused-vars/)
2025-09-05 11:16:17 +00:00

345 lines
10 KiB
TypeScript

import JSZip, { JSZipObject } from 'jszip';
// Undocumented interface in JSZip for JSZipObject._data
interface CompressedObject {
compressedSize: number;
uncompressedSize: number;
crc32: number;
compression: object;
compressedContent: string|ArrayBuffer|Uint8Array|Buffer;
}
const getData = (zipEntry: JSZipObject): CompressedObject | undefined => {
return (zipEntry as any)._data as CompressedObject;
}
export interface ZipExtractionResult {
success: boolean;
extractedFiles: File[];
errors: string[];
totalFiles: number;
extractedCount: number;
}
export interface ZipValidationResult {
isValid: boolean;
fileCount: number;
totalSizeBytes: number;
containsPDFs: boolean;
errors: string[];
}
export interface ZipExtractionProgress {
currentFile: string;
extractedCount: number;
totalFiles: number;
progress: number;
}
export class ZipFileService {
private readonly maxFileSize = 100 * 1024 * 1024; // 100MB per file
private readonly maxTotalSize = 500 * 1024 * 1024; // 500MB total extraction limit
private readonly supportedExtensions = ['.pdf'];
/**
* Validate a ZIP file without extracting it
*/
async validateZipFile(file: File): Promise<ZipValidationResult> {
const result: ZipValidationResult = {
isValid: false,
fileCount: 0,
totalSizeBytes: 0,
containsPDFs: false,
errors: []
};
try {
// Check file size
if (file.size > this.maxTotalSize) {
result.errors.push(`ZIP file too large: ${this.formatFileSize(file.size)} (max: ${this.formatFileSize(this.maxTotalSize)})`);
return result;
}
// Check file type
if (!this.isZipFile(file)) {
result.errors.push('File is not a valid ZIP archive');
return result;
}
// Load and validate ZIP contents
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
let totalSize = 0;
let fileCount = 0;
let containsPDFs = false;
// Analyze ZIP contents
for (const [filename, zipEntry] of Object.entries(zipContents.files)) {
if (zipEntry.dir) {
continue; // Skip directories
}
fileCount++;
const uncompressedSize = getData(zipEntry)?.uncompressedSize || 0;
totalSize += uncompressedSize;
// Check if file is a PDF
if (this.isPdfFile(filename)) {
containsPDFs = true;
}
// Check individual file size
if (uncompressedSize > this.maxFileSize) {
result.errors.push(`File "${filename}" too large: ${this.formatFileSize(uncompressedSize)} (max: ${this.formatFileSize(this.maxFileSize)})`);
}
}
// Check total uncompressed size
if (totalSize > this.maxTotalSize) {
result.errors.push(`Total uncompressed size too large: ${this.formatFileSize(totalSize)} (max: ${this.formatFileSize(this.maxTotalSize)})`);
}
result.fileCount = fileCount;
result.totalSizeBytes = totalSize;
result.containsPDFs = containsPDFs;
result.isValid = result.errors.length === 0 && containsPDFs;
if (!containsPDFs) {
result.errors.push('ZIP file does not contain any PDF files');
}
return result;
} catch (error) {
result.errors.push(`Failed to validate ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
return result;
}
}
/**
* Create a ZIP file from an array of files
*/
async createZipFromFiles(files: File[], zipFilename: string): Promise<{ zipFile: File; size: number }> {
try {
const zip = new JSZip();
// Add each file to the ZIP
for (const file of files) {
const content = await file.arrayBuffer();
zip.file(file.name, content);
}
// Generate ZIP blob
const zipBlob = await zip.generateAsync({
type: 'blob',
compression: 'DEFLATE',
compressionOptions: { level: 6 }
});
const zipFile = new File([zipBlob], zipFilename, {
type: 'application/zip',
lastModified: Date.now()
});
return { zipFile, size: zipFile.size };
} catch (error) {
throw new Error(`Failed to create ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Extract PDF files from a ZIP archive
*/
async extractPdfFiles(
file: File,
onProgress?: (progress: ZipExtractionProgress) => void
): Promise<ZipExtractionResult> {
const result: ZipExtractionResult = {
success: false,
extractedFiles: [],
errors: [],
totalFiles: 0,
extractedCount: 0
};
try {
// Validate ZIP file first
const validation = await this.validateZipFile(file);
if (!validation.isValid) {
result.errors = validation.errors;
return result;
}
// Load ZIP contents
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
// Get all PDF files
const pdfFiles = Object.entries(zipContents.files).filter(([filename, zipEntry]) =>
!zipEntry.dir && this.isPdfFile(filename)
);
result.totalFiles = pdfFiles.length;
// Extract each PDF file
for (let i = 0; i < pdfFiles.length; i++) {
const [filename, zipEntry] = pdfFiles[i];
try {
// Report progress
if (onProgress) {
onProgress({
currentFile: filename,
extractedCount: i,
totalFiles: pdfFiles.length,
progress: (i / pdfFiles.length) * 100
});
}
// Extract file content
const content = await zipEntry.async('uint8array');
// Create File object
const extractedFile = new File([content as any], this.sanitizeFilename(filename), {
type: 'application/pdf',
lastModified: zipEntry.date?.getTime() || Date.now()
});
// Validate extracted PDF
if (await this.isValidPdfFile(extractedFile)) {
result.extractedFiles.push(extractedFile);
result.extractedCount++;
} else {
result.errors.push(`File "${filename}" is not a valid PDF`);
}
} catch (error) {
result.errors.push(`Failed to extract "${filename}": ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
// Final progress report
if (onProgress) {
onProgress({
currentFile: '',
extractedCount: result.extractedCount,
totalFiles: result.totalFiles,
progress: 100
});
}
result.success = result.extractedCount > 0;
return result;
} catch (error) {
result.errors.push(`Failed to extract ZIP file: ${error instanceof Error ? error.message : 'Unknown error'}`);
return result;
}
}
/**
* Check if a file is a ZIP file based on type and extension
*/
private isZipFile(file: File): boolean {
const validTypes = [
'application/zip',
'application/x-zip-compressed',
'application/x-zip',
'application/octet-stream' // Some browsers use this for ZIP files
];
const validExtensions = ['.zip'];
const hasValidType = validTypes.includes(file.type);
const hasValidExtension = validExtensions.some(ext =>
file.name.toLowerCase().endsWith(ext)
);
return hasValidType || hasValidExtension;
}
/**
* Check if a filename indicates a PDF file
*/
private isPdfFile(filename: string): boolean {
return filename.toLowerCase().endsWith('.pdf');
}
/**
* Validate that a file is actually a PDF by checking its header
*/
private async isValidPdfFile(file: File): Promise<boolean> {
try {
// Read first few bytes to check PDF header
const buffer = await file.slice(0, 8).arrayBuffer();
const bytes = new Uint8Array(buffer);
// Check for PDF header: %PDF-
return bytes[0] === 0x25 && // %
bytes[1] === 0x50 && // P
bytes[2] === 0x44 && // D
bytes[3] === 0x46 && // F
bytes[4] === 0x2D; // -
} catch {
return false;
}
}
/**
* Sanitize filename for safe use
*/
private sanitizeFilename(filename: string): string {
// Remove directory path and get just the filename
const basename = filename.split('/').pop() || filename;
// Remove or replace unsafe characters
return basename
.replace(/[<>:"/\\|?*]/g, '_') // Replace unsafe chars with underscore
.replace(/\s+/g, '_') // Replace spaces with underscores
.replace(/_{2,}/g, '_') // Replace multiple underscores with single
.replace(/^_|_$/g, ''); // Remove leading/trailing underscores
}
/**
* Format file size for display
*/
private formatFileSize(bytes: number): string {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
/**
* Get file extension from filename
*/
private getFileExtension(filename: string): string {
return filename.substring(filename.lastIndexOf('.')).toLowerCase();
}
/**
* Check if ZIP file contains password protection
*/
private async isPasswordProtected(file: File): Promise<boolean> {
try {
const zip = new JSZip();
await zip.loadAsync(file);
// Check if any files are encrypted
for (const [_filename, zipEntry] of Object.entries(zip.files)) {
if (zipEntry.options?.compression === 'STORE' && getData(zipEntry)?.compressedSize === 0) {
// This might indicate encryption, but JSZip doesn't provide direct encryption detection
// We'll handle this in the extraction phase
}
}
return false; // JSZip will throw an error if password is required
} catch (error) {
// If we can't load the ZIP, it might be password protected
const errorMessage = error instanceof Error ? error.message : '';
return errorMessage.includes('password') || errorMessage.includes('encrypted');
}
}
}
// Export singleton instance
export const zipFileService = new ZipFileService();