diff --git a/frontend/src/services/pdfMetadataService.ts b/frontend/src/services/pdfMetadataService.ts deleted file mode 100644 index 46ba32c52..000000000 --- a/frontend/src/services/pdfMetadataService.ts +++ /dev/null @@ -1,450 +0,0 @@ -/** - * PDF Metadata Service - File History Tracking with pdf-lib - * - * Handles injection and extraction of file history metadata in PDFs using pdf-lib. - * This service embeds file history directly into PDF metadata, making it persistent - * across all tool operations and downloads. - */ - -import { PDFDocument } from 'pdf-lib'; -import { ContentCache, type CacheConfig } from '../utils/ContentCache'; - -const DEBUG = process.env.NODE_ENV === 'development'; - -/** - * Tool operation metadata for history tracking - * Note: Parameters removed for security - sensitive data like passwords should not be stored - */ -export interface ToolOperation { - toolName: string; - timestamp: number; -} - -/** - * Complete file history metadata structure - * Uses standard PDF metadata fields (Creator, Producer, CreationDate, ModificationDate) - * and embeds Stirling-specific history in keywords - */ -export interface PDFHistoryMetadata { - stirlingHistory: { - originalFileId: string; - parentFileId?: string; - versionNumber: number; - toolChain: ToolOperation[]; - formatVersion: '1.0'; - }; -} - -/** - * Service for managing PDF file history metadata - */ -export class PDFMetadataService { - private static readonly HISTORY_KEYWORD = 'stirling-history'; - private static readonly FORMAT_VERSION = '1.0'; - - private metadataCache: ContentCache; - - constructor(cacheConfig?: Partial) { - const defaultConfig: CacheConfig = { - ttl: 5 * 60 * 1000, // 5 minutes - maxSize: 100, // 100 files - enableWarnings: DEBUG - }; - - this.metadataCache = new ContentCache({ - ...defaultConfig, - ...cacheConfig - }); - } - - /** - * Inject file history metadata into a PDF - */ - async injectHistoryMetadata( - pdfBytes: ArrayBuffer, - originalFileId: string, - parentFileId?: string, - toolChain: ToolOperation[] = [], - versionNumber: number = 1 - ): Promise { - try { - const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true }); - - const historyMetadata: PDFHistoryMetadata = { - stirlingHistory: { - originalFileId, - parentFileId, - versionNumber, - toolChain: [...toolChain], - formatVersion: PDFMetadataService.FORMAT_VERSION - } - }; - - // Set Stirling-PDF identification fields only (don't touch dates) - pdfDoc.setCreator('Stirling-PDF'); - pdfDoc.setProducer('Stirling-PDF'); - - // Embed history metadata in keywords field (most compatible) - const historyJson = JSON.stringify(historyMetadata); - const existingKeywords = pdfDoc.getKeywords(); - - // Handle keywords as array (pdf-lib stores them as array) - let keywordList: string[] = []; - if (Array.isArray(existingKeywords)) { - // Remove any existing history keywords to avoid duplicates - keywordList = existingKeywords.filter(keyword => - !keyword.startsWith(`${PDFMetadataService.HISTORY_KEYWORD}:`) - ); - } else if (existingKeywords) { - // Remove history from single keyword string - const cleanKeyword = this.extractHistoryFromKeywords(existingKeywords, true); - if (cleanKeyword) { - keywordList = [cleanKeyword]; - } - } - - // Add our new history metadata as a keyword (replacing any previous history) - const historyKeyword = `${PDFMetadataService.HISTORY_KEYWORD}:${historyJson}`; - keywordList.push(historyKeyword); - - pdfDoc.setKeywords(keywordList); - - if (DEBUG) { - console.log('📄 Injected PDF history metadata:', { - originalFileId, - parentFileId, - versionNumber, - toolCount: toolChain.length - }); - } - - const savedPdfBytes = await pdfDoc.save(); - // Convert Uint8Array to ArrayBuffer - const arrayBuffer = new ArrayBuffer(savedPdfBytes.byteLength); - new Uint8Array(arrayBuffer).set(savedPdfBytes); - return arrayBuffer; - } catch (error) { - if (DEBUG) console.error('📄 Failed to inject PDF metadata:', error); - // Return original bytes if metadata injection fails - return pdfBytes; - } - } - - /** - * Extract file history metadata from a PDF - */ - async extractHistoryMetadata(pdfBytes: ArrayBuffer): Promise { - const cacheKey = this.metadataCache.generateKeyFromBuffer(pdfBytes); - - // Check cache first - const cached = this.metadataCache.get(cacheKey); - if (cached !== null) { - return cached; - } - - // Extract from PDF - const metadata = await this.extractHistoryMetadataInternal(pdfBytes); - - // Cache the result - this.metadataCache.set(cacheKey, metadata); - - return metadata; - } - - /** - * Internal method for actual PDF metadata extraction - */ - private async extractHistoryMetadataInternal(pdfBytes: ArrayBuffer): Promise { - try { - const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true }); - const keywords = pdfDoc.getKeywords(); - - // Look for history keyword directly in array or convert to string - let historyJson: string | null = null; - - if (Array.isArray(keywords)) { - // Search through keywords array for our history keyword - get the LATEST one - const historyKeywords = keywords.filter(keyword => - keyword.startsWith(`${PDFMetadataService.HISTORY_KEYWORD}:`) - ); - - if (historyKeywords.length > 0) { - // If multiple history keywords exist, parse all and get the highest version number - let latestVersionNumber = 0; - - for (const historyKeyword of historyKeywords) { - try { - const json = historyKeyword.substring(`${PDFMetadataService.HISTORY_KEYWORD}:`.length); - const parsed = JSON.parse(json) as PDFHistoryMetadata; - - if (parsed.stirlingHistory.versionNumber > latestVersionNumber) { - latestVersionNumber = parsed.stirlingHistory.versionNumber; - historyJson = json; - } - } catch { - // Silent fallback for corrupted history - } - } - } - } else if (keywords) { - // Fallback to string parsing - historyJson = this.extractHistoryFromKeywords(keywords); - } - - if (!historyJson) return null; - - const metadata = JSON.parse(historyJson) as PDFHistoryMetadata; - - // Validate metadata structure - if (!this.isValidHistoryMetadata(metadata)) { - return null; - } - - return metadata; - } catch (error) { - if (DEBUG) console.error('📄 Failed to extract PDF metadata:', error); - return null; - } - } - - /** - * Add a tool operation to existing PDF history - */ - async addToolOperation( - pdfBytes: ArrayBuffer, - toolOperation: ToolOperation - ): Promise { - try { - // Extract existing history - const existingHistory = await this.extractHistoryMetadata(pdfBytes); - - if (!existingHistory) { - if (DEBUG) console.warn('📄 No existing history found, cannot add tool operation'); - return pdfBytes; - } - - // Add new tool operation - const updatedToolChain = [...existingHistory.stirlingHistory.toolChain, toolOperation]; - - // Re-inject with updated history - return await this.injectHistoryMetadata( - pdfBytes, - existingHistory.stirlingHistory.originalFileId, - existingHistory.stirlingHistory.parentFileId, - updatedToolChain, - existingHistory.stirlingHistory.versionNumber - ); - } catch (error) { - if (DEBUG) console.error('📄 Failed to add tool operation:', error); - return pdfBytes; - } - } - - /** - * Create a new version of a PDF with incremented version number - */ - async createNewVersion( - pdfBytes: ArrayBuffer, - parentFileId: string, - toolOperation: ToolOperation - ): Promise { - try { - const parentHistory = await this.extractHistoryMetadata(pdfBytes); - - const originalFileId = parentHistory?.stirlingHistory.originalFileId || parentFileId; - const parentToolChain = parentHistory?.stirlingHistory.toolChain || []; - const newVersionNumber = (parentHistory?.stirlingHistory.versionNumber || 0) + 1; - - // Create new tool chain with the new operation - const newToolChain = [...parentToolChain, toolOperation]; - - return await this.injectHistoryMetadata( - pdfBytes, - originalFileId, - parentFileId, - newToolChain, - newVersionNumber - ); - } catch (error) { - if (DEBUG) console.error('📄 Failed to create new version:', error); - return pdfBytes; - } - } - - /** - * Extract standard PDF document metadata - */ - async extractStandardMetadata(pdfBytes: ArrayBuffer): Promise<{ - title?: string; - author?: string; - subject?: string; - creator?: string; - producer?: string; - creationDate?: Date; - modificationDate?: Date; - } | null> { - try { - const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true }); - - return { - title: pdfDoc.getTitle() || undefined, - author: pdfDoc.getAuthor() || undefined, - subject: pdfDoc.getSubject() || undefined, - creator: pdfDoc.getCreator() || undefined, - producer: pdfDoc.getProducer() || undefined, - creationDate: pdfDoc.getCreationDate() || undefined, - modificationDate: pdfDoc.getModificationDate() || undefined - }; - } catch (error) { - if (DEBUG) console.warn('📄 Failed to extract standard PDF metadata:', error); - return null; - } - } - - /** - * Verify that tool preserved standard PDF metadata - * Logs warnings for tools that strip metadata - */ - async verifyMetadataPreservation( - originalBytes: ArrayBuffer, - processedBytes: ArrayBuffer, - toolName: string - ): Promise { - try { - const [originalMetadata, processedMetadata] = await Promise.all([ - this.extractStandardMetadata(originalBytes), - this.extractStandardMetadata(processedBytes) - ]); - - if (!originalMetadata || !processedMetadata) return; - - // Check each metadata field for preservation - const issues: string[] = []; - - if (originalMetadata.title && !processedMetadata.title) { - issues.push('Title stripped'); - } - if (originalMetadata.author && !processedMetadata.author) { - issues.push('Author stripped'); - } - if (originalMetadata.subject && !processedMetadata.subject) { - issues.push('Subject stripped'); - } - if (originalMetadata.creationDate && !processedMetadata.creationDate) { - issues.push('CreationDate stripped'); - } - if (originalMetadata.creationDate && processedMetadata.creationDate && - Math.abs(originalMetadata.creationDate.getTime() - processedMetadata.creationDate.getTime()) > 1000) { - issues.push(`CreationDate modified (${originalMetadata.creationDate.toISOString()} → ${processedMetadata.creationDate.toISOString()})`); - } - - // Note: We don't check ModificationDate preservation since we use File.lastModified as source of truth - - if (issues.length > 0) { - console.warn(`⚠️ METADATA LOSS: Tool '${toolName}' did not preserve PDF metadata:`, issues.join(', ')); - console.warn(`⚠️ This backend tool should be updated to preserve standard PDF metadata fields.`); - } else { - console.log(`✅ METADATA PRESERVED: Tool '${toolName}' correctly preserved all PDF metadata`); - } - - } catch (error) { - if (DEBUG) console.warn(`📄 Failed to verify metadata preservation for ${toolName}:`, error); - } - } - - /** - * Check if a PDF has Stirling history metadata - */ - async hasStirlingHistory(pdfBytes: ArrayBuffer): Promise { - const metadata = await this.extractHistoryMetadata(pdfBytes); - return metadata !== null; - } - - /** - * Get version information from PDF - */ - async getVersionInfo(pdfBytes: ArrayBuffer): Promise<{ - originalFileId: string; - versionNumber: number; - toolCount: number; - parentFileId?: string; - } | null> { - const metadata = await this.extractHistoryMetadata(pdfBytes); - if (!metadata) return null; - - return { - originalFileId: metadata.stirlingHistory.originalFileId, - versionNumber: metadata.stirlingHistory.versionNumber, - toolCount: metadata.stirlingHistory.toolChain.length, - parentFileId: metadata.stirlingHistory.parentFileId - }; - } - - /** - * Embed history JSON in keywords field with delimiter - */ - private embedHistoryInKeywords(existingKeywords: string, historyJson: string): string { - // Remove any existing history - const cleanKeywords = this.extractHistoryFromKeywords(existingKeywords, true) || existingKeywords; - - // Add new history with delimiter - const historyKeyword = `${PDFMetadataService.HISTORY_KEYWORD}:${historyJson}`; - - if (cleanKeywords.trim()) { - return `${cleanKeywords.trim()} ${historyKeyword}`; - } - return historyKeyword; - } - - /** - * Extract history JSON from keywords field - */ - private extractHistoryFromKeywords(keywords: string, returnRemainder = false): string | null { - const historyPrefix = `${PDFMetadataService.HISTORY_KEYWORD}:`; - const historyIndex = keywords.indexOf(historyPrefix); - - if (historyIndex === -1) return null; - - const historyStart = historyIndex + historyPrefix.length; - let historyEnd = keywords.length; - - // Look for the next keyword (space followed by non-JSON content) - // Simple heuristic: find space followed by word that doesn't look like JSON - const afterHistory = keywords.substring(historyStart); - const nextSpaceIndex = afterHistory.indexOf(' '); - if (nextSpaceIndex > 0) { - const afterSpace = afterHistory.substring(nextSpaceIndex + 1); - if (afterSpace && !afterSpace.trim().startsWith('{')) { - historyEnd = historyStart + nextSpaceIndex; - } - } - - if (returnRemainder) { - // Return keywords with history removed - const before = keywords.substring(0, historyIndex); - const after = keywords.substring(historyEnd); - return `${before}${after}`.replace(/\s+/g, ' ').trim(); - } - - return keywords.substring(historyStart, historyEnd).trim(); - } - - /** - * Validate metadata structure - */ - private isValidHistoryMetadata(metadata: any): metadata is PDFHistoryMetadata { - return metadata && - metadata.stirlingHistory && - typeof metadata.stirlingHistory.originalFileId === 'string' && - typeof metadata.stirlingHistory.versionNumber === 'number' && - Array.isArray(metadata.stirlingHistory.toolChain) && - metadata.stirlingHistory.formatVersion === PDFMetadataService.FORMAT_VERSION; - } -} - -// Export singleton instance with optimized cache settings -export const pdfMetadataService = new PDFMetadataService({ - ttl: 10 * 60 * 1000, // 10 minutes for PDF metadata (longer than default) - maxSize: 50, // Smaller cache for memory efficiency - enableWarnings: DEBUG -}); \ No newline at end of file diff --git a/frontend/src/types/file.ts b/frontend/src/types/file.ts index ea11e9d85..c5766bbe3 100644 --- a/frontend/src/types/file.ts +++ b/frontend/src/types/file.ts @@ -15,17 +15,6 @@ export interface ToolOperation { timestamp: number; } -/** - * File history information extracted from PDF metadata - * Timestamps come from standard PDF metadata fields (CreationDate, ModificationDate) - */ -export interface FileHistoryInfo { - originalFileId: string; - parentFileId?: FileId; - versionNumber: number; - toolChain: ToolOperation[]; -} - /** * Base file metadata shared between storage and runtime layers * Contains all common file properties and history tracking @@ -59,47 +48,3 @@ export interface BaseFileMetadata { modificationDate?: Date; }; } - -// FileMetadata has been replaced with StoredFileMetadata from '../services/fileStorage' -// This ensures clear type relationships and eliminates duplication - - -export interface StorageConfig { - useIndexedDB: boolean; - maxFileSize: number; // Maximum size per file in bytes - maxTotalStorage: number; // Maximum total storage in bytes - warningThreshold: number; // Warning threshold (percentage 0-1) -} - -export const defaultStorageConfig: StorageConfig = { - useIndexedDB: true, - maxFileSize: 100 * 1024 * 1024, // 100MB per file - maxTotalStorage: 1024 * 1024 * 1024, // 1GB default, will be updated dynamically - warningThreshold: 0.8, // Warn at 80% capacity -}; - -// Calculate and update storage limit: half of available storage or 10GB, whichever is smaller -export const initializeStorageConfig = async (): Promise => { - const tenGB = 10 * 1024 * 1024 * 1024; // 10GB in bytes - const oneGB = 1024 * 1024 * 1024; // 1GB fallback - - let maxTotalStorage = oneGB; // Default fallback - - // Try to estimate available storage - if ('storage' in navigator && 'estimate' in navigator.storage) { - try { - const estimate = await navigator.storage.estimate(); - if (estimate.quota) { - const halfQuota = estimate.quota / 2; - maxTotalStorage = Math.min(halfQuota, tenGB); - } - } catch (error) { - console.warn('Could not estimate storage quota, using 1GB default:', error); - } - } - - return { - ...defaultStorageConfig, - maxTotalStorage - }; -}; diff --git a/frontend/src/utils/ContentCache.ts b/frontend/src/utils/ContentCache.ts deleted file mode 100644 index d20c258d1..000000000 --- a/frontend/src/utils/ContentCache.ts +++ /dev/null @@ -1,173 +0,0 @@ -/** - * Generic content cache with TTL and size limits - * Reusable for any cached data with configurable parameters - */ - -const DEBUG = process.env.NODE_ENV === 'development'; - -interface CacheEntry { - value: T; - timestamp: number; -} - -export interface CacheConfig { - /** Time-to-live in milliseconds */ - ttl: number; - /** Maximum number of cache entries */ - maxSize: number; - /** Enable cleanup warnings in development */ - enableWarnings?: boolean; -} - -export class ContentCache { - private cache = new Map>(); - private hits = 0; - private misses = 0; - - constructor(private readonly config: CacheConfig) {} - - /** - * Get cached value if valid - */ - get(key: string): T | null { - const entry = this.cache.get(key); - - if (!entry) { - this.misses++; - return null; - } - - // Check if expired - if (Date.now() - entry.timestamp > this.config.ttl) { - this.cache.delete(key); - this.misses++; - return null; - } - - this.hits++; - return entry.value; - } - - /** - * Set cached value - */ - set(key: string, value: T): void { - // Clean up before adding if at capacity - if (this.cache.size >= this.config.maxSize) { - this.evictOldest(); - } - - this.cache.set(key, { - value, - timestamp: Date.now() - }); - } - - /** - * Generate cache key from ArrayBuffer content - */ - generateKeyFromBuffer(data: ArrayBuffer): string { - // Use file size + hash of first/last bytes as cache key - const view = new Uint8Array(data); - const size = data.byteLength; - const start = Array.from(view.slice(0, 16)).join(','); - const end = Array.from(view.slice(-16)).join(','); - return `${size}-${this.simpleHash(start + end)}`; - } - - /** - * Generate cache key from string content - */ - generateKeyFromString(content: string): string { - return this.simpleHash(content); - } - - /** - * Check if key exists and is valid - */ - has(key: string): boolean { - return this.get(key) !== null; - } - - /** - * Clear all cache entries - */ - clear(): void { - this.cache.clear(); - this.hits = 0; - this.misses = 0; - } - - /** - * Get cache statistics - */ - getStats(): { - size: number; - maxSize: number; - hitRate: number; - hits: number; - misses: number; - } { - const total = this.hits + this.misses; - const hitRate = total > 0 ? this.hits / total : 0; - - return { - size: this.cache.size, - maxSize: this.config.maxSize, - hitRate, - hits: this.hits, - misses: this.misses - }; - } - - /** - * Cleanup expired entries - */ - cleanup(): void { - const now = Date.now(); - let cleaned = 0; - - for (const [key, entry] of this.cache.entries()) { - if (now - entry.timestamp > this.config.ttl) { - this.cache.delete(key); - cleaned++; - } - } - - if (DEBUG && this.config.enableWarnings && this.cache.size > this.config.maxSize * 0.8) { - console.warn(`📦 ContentCache: High cache usage (${this.cache.size}/${this.config.maxSize}), cleaned ${cleaned} expired entries`); - } - } - - /** - * Evict oldest entry when at capacity - */ - private evictOldest(): void { - let oldestKey: string | null = null; - let oldestTime = Date.now(); - - for (const [key, entry] of this.cache.entries()) { - if (entry.timestamp < oldestTime) { - oldestTime = entry.timestamp; - oldestKey = key; - } - } - - if (oldestKey) { - this.cache.delete(oldestKey); - } - } - - /** - * Simple hash function for cache keys - */ - private simpleHash(str: string): string { - let hash = 0; - for (let i = 0; i < str.length; i++) { - const char = str.charCodeAt(i); - hash = ((hash << 5) - hash) + char; - hash = hash & hash; // Convert to 32-bit integer - } - return Math.abs(hash).toString(36); - } -} \ No newline at end of file