Delete redundant code

This commit is contained in:
Connor Yoh 2025-09-12 18:20:09 +01:00
parent 18097a6d9b
commit 9c49cf9584
3 changed files with 0 additions and 678 deletions

View File

@ -1,450 +0,0 @@
/**
* PDF Metadata Service - File History Tracking with pdf-lib
*
* Handles injection and extraction of file history metadata in PDFs using pdf-lib.
* This service embeds file history directly into PDF metadata, making it persistent
* across all tool operations and downloads.
*/
import { PDFDocument } from 'pdf-lib';
import { ContentCache, type CacheConfig } from '../utils/ContentCache';
const DEBUG = process.env.NODE_ENV === 'development';
/**
* Tool operation metadata for history tracking
* Note: Parameters removed for security - sensitive data like passwords should not be stored
*/
export interface ToolOperation {
toolName: string;
timestamp: number;
}
/**
* Complete file history metadata structure
* Uses standard PDF metadata fields (Creator, Producer, CreationDate, ModificationDate)
* and embeds Stirling-specific history in keywords
*/
export interface PDFHistoryMetadata {
stirlingHistory: {
originalFileId: string;
parentFileId?: string;
versionNumber: number;
toolChain: ToolOperation[];
formatVersion: '1.0';
};
}
/**
* Service for managing PDF file history metadata
*/
export class PDFMetadataService {
private static readonly HISTORY_KEYWORD = 'stirling-history';
private static readonly FORMAT_VERSION = '1.0';
private metadataCache: ContentCache<PDFHistoryMetadata | null>;
constructor(cacheConfig?: Partial<CacheConfig>) {
const defaultConfig: CacheConfig = {
ttl: 5 * 60 * 1000, // 5 minutes
maxSize: 100, // 100 files
enableWarnings: DEBUG
};
this.metadataCache = new ContentCache<PDFHistoryMetadata | null>({
...defaultConfig,
...cacheConfig
});
}
/**
* Inject file history metadata into a PDF
*/
async injectHistoryMetadata(
pdfBytes: ArrayBuffer,
originalFileId: string,
parentFileId?: string,
toolChain: ToolOperation[] = [],
versionNumber: number = 1
): Promise<ArrayBuffer> {
try {
const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true });
const historyMetadata: PDFHistoryMetadata = {
stirlingHistory: {
originalFileId,
parentFileId,
versionNumber,
toolChain: [...toolChain],
formatVersion: PDFMetadataService.FORMAT_VERSION
}
};
// Set Stirling-PDF identification fields only (don't touch dates)
pdfDoc.setCreator('Stirling-PDF');
pdfDoc.setProducer('Stirling-PDF');
// Embed history metadata in keywords field (most compatible)
const historyJson = JSON.stringify(historyMetadata);
const existingKeywords = pdfDoc.getKeywords();
// Handle keywords as array (pdf-lib stores them as array)
let keywordList: string[] = [];
if (Array.isArray(existingKeywords)) {
// Remove any existing history keywords to avoid duplicates
keywordList = existingKeywords.filter(keyword =>
!keyword.startsWith(`${PDFMetadataService.HISTORY_KEYWORD}:`)
);
} else if (existingKeywords) {
// Remove history from single keyword string
const cleanKeyword = this.extractHistoryFromKeywords(existingKeywords, true);
if (cleanKeyword) {
keywordList = [cleanKeyword];
}
}
// Add our new history metadata as a keyword (replacing any previous history)
const historyKeyword = `${PDFMetadataService.HISTORY_KEYWORD}:${historyJson}`;
keywordList.push(historyKeyword);
pdfDoc.setKeywords(keywordList);
if (DEBUG) {
console.log('📄 Injected PDF history metadata:', {
originalFileId,
parentFileId,
versionNumber,
toolCount: toolChain.length
});
}
const savedPdfBytes = await pdfDoc.save();
// Convert Uint8Array to ArrayBuffer
const arrayBuffer = new ArrayBuffer(savedPdfBytes.byteLength);
new Uint8Array(arrayBuffer).set(savedPdfBytes);
return arrayBuffer;
} catch (error) {
if (DEBUG) console.error('📄 Failed to inject PDF metadata:', error);
// Return original bytes if metadata injection fails
return pdfBytes;
}
}
/**
* Extract file history metadata from a PDF
*/
async extractHistoryMetadata(pdfBytes: ArrayBuffer): Promise<PDFHistoryMetadata | null> {
const cacheKey = this.metadataCache.generateKeyFromBuffer(pdfBytes);
// Check cache first
const cached = this.metadataCache.get(cacheKey);
if (cached !== null) {
return cached;
}
// Extract from PDF
const metadata = await this.extractHistoryMetadataInternal(pdfBytes);
// Cache the result
this.metadataCache.set(cacheKey, metadata);
return metadata;
}
/**
* Internal method for actual PDF metadata extraction
*/
private async extractHistoryMetadataInternal(pdfBytes: ArrayBuffer): Promise<PDFHistoryMetadata | null> {
try {
const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true });
const keywords = pdfDoc.getKeywords();
// Look for history keyword directly in array or convert to string
let historyJson: string | null = null;
if (Array.isArray(keywords)) {
// Search through keywords array for our history keyword - get the LATEST one
const historyKeywords = keywords.filter(keyword =>
keyword.startsWith(`${PDFMetadataService.HISTORY_KEYWORD}:`)
);
if (historyKeywords.length > 0) {
// If multiple history keywords exist, parse all and get the highest version number
let latestVersionNumber = 0;
for (const historyKeyword of historyKeywords) {
try {
const json = historyKeyword.substring(`${PDFMetadataService.HISTORY_KEYWORD}:`.length);
const parsed = JSON.parse(json) as PDFHistoryMetadata;
if (parsed.stirlingHistory.versionNumber > latestVersionNumber) {
latestVersionNumber = parsed.stirlingHistory.versionNumber;
historyJson = json;
}
} catch {
// Silent fallback for corrupted history
}
}
}
} else if (keywords) {
// Fallback to string parsing
historyJson = this.extractHistoryFromKeywords(keywords);
}
if (!historyJson) return null;
const metadata = JSON.parse(historyJson) as PDFHistoryMetadata;
// Validate metadata structure
if (!this.isValidHistoryMetadata(metadata)) {
return null;
}
return metadata;
} catch (error) {
if (DEBUG) console.error('📄 Failed to extract PDF metadata:', error);
return null;
}
}
/**
* Add a tool operation to existing PDF history
*/
async addToolOperation(
pdfBytes: ArrayBuffer,
toolOperation: ToolOperation
): Promise<ArrayBuffer> {
try {
// Extract existing history
const existingHistory = await this.extractHistoryMetadata(pdfBytes);
if (!existingHistory) {
if (DEBUG) console.warn('📄 No existing history found, cannot add tool operation');
return pdfBytes;
}
// Add new tool operation
const updatedToolChain = [...existingHistory.stirlingHistory.toolChain, toolOperation];
// Re-inject with updated history
return await this.injectHistoryMetadata(
pdfBytes,
existingHistory.stirlingHistory.originalFileId,
existingHistory.stirlingHistory.parentFileId,
updatedToolChain,
existingHistory.stirlingHistory.versionNumber
);
} catch (error) {
if (DEBUG) console.error('📄 Failed to add tool operation:', error);
return pdfBytes;
}
}
/**
* Create a new version of a PDF with incremented version number
*/
async createNewVersion(
pdfBytes: ArrayBuffer,
parentFileId: string,
toolOperation: ToolOperation
): Promise<ArrayBuffer> {
try {
const parentHistory = await this.extractHistoryMetadata(pdfBytes);
const originalFileId = parentHistory?.stirlingHistory.originalFileId || parentFileId;
const parentToolChain = parentHistory?.stirlingHistory.toolChain || [];
const newVersionNumber = (parentHistory?.stirlingHistory.versionNumber || 0) + 1;
// Create new tool chain with the new operation
const newToolChain = [...parentToolChain, toolOperation];
return await this.injectHistoryMetadata(
pdfBytes,
originalFileId,
parentFileId,
newToolChain,
newVersionNumber
);
} catch (error) {
if (DEBUG) console.error('📄 Failed to create new version:', error);
return pdfBytes;
}
}
/**
* Extract standard PDF document metadata
*/
async extractStandardMetadata(pdfBytes: ArrayBuffer): Promise<{
title?: string;
author?: string;
subject?: string;
creator?: string;
producer?: string;
creationDate?: Date;
modificationDate?: Date;
} | null> {
try {
const pdfDoc = await PDFDocument.load(pdfBytes, { ignoreEncryption: true });
return {
title: pdfDoc.getTitle() || undefined,
author: pdfDoc.getAuthor() || undefined,
subject: pdfDoc.getSubject() || undefined,
creator: pdfDoc.getCreator() || undefined,
producer: pdfDoc.getProducer() || undefined,
creationDate: pdfDoc.getCreationDate() || undefined,
modificationDate: pdfDoc.getModificationDate() || undefined
};
} catch (error) {
if (DEBUG) console.warn('📄 Failed to extract standard PDF metadata:', error);
return null;
}
}
/**
* Verify that tool preserved standard PDF metadata
* Logs warnings for tools that strip metadata
*/
async verifyMetadataPreservation(
originalBytes: ArrayBuffer,
processedBytes: ArrayBuffer,
toolName: string
): Promise<void> {
try {
const [originalMetadata, processedMetadata] = await Promise.all([
this.extractStandardMetadata(originalBytes),
this.extractStandardMetadata(processedBytes)
]);
if (!originalMetadata || !processedMetadata) return;
// Check each metadata field for preservation
const issues: string[] = [];
if (originalMetadata.title && !processedMetadata.title) {
issues.push('Title stripped');
}
if (originalMetadata.author && !processedMetadata.author) {
issues.push('Author stripped');
}
if (originalMetadata.subject && !processedMetadata.subject) {
issues.push('Subject stripped');
}
if (originalMetadata.creationDate && !processedMetadata.creationDate) {
issues.push('CreationDate stripped');
}
if (originalMetadata.creationDate && processedMetadata.creationDate &&
Math.abs(originalMetadata.creationDate.getTime() - processedMetadata.creationDate.getTime()) > 1000) {
issues.push(`CreationDate modified (${originalMetadata.creationDate.toISOString()}${processedMetadata.creationDate.toISOString()})`);
}
// Note: We don't check ModificationDate preservation since we use File.lastModified as source of truth
if (issues.length > 0) {
console.warn(`⚠️ METADATA LOSS: Tool '${toolName}' did not preserve PDF metadata:`, issues.join(', '));
console.warn(`⚠️ This backend tool should be updated to preserve standard PDF metadata fields.`);
} else {
console.log(`✅ METADATA PRESERVED: Tool '${toolName}' correctly preserved all PDF metadata`);
}
} catch (error) {
if (DEBUG) console.warn(`📄 Failed to verify metadata preservation for ${toolName}:`, error);
}
}
/**
* Check if a PDF has Stirling history metadata
*/
async hasStirlingHistory(pdfBytes: ArrayBuffer): Promise<boolean> {
const metadata = await this.extractHistoryMetadata(pdfBytes);
return metadata !== null;
}
/**
* Get version information from PDF
*/
async getVersionInfo(pdfBytes: ArrayBuffer): Promise<{
originalFileId: string;
versionNumber: number;
toolCount: number;
parentFileId?: string;
} | null> {
const metadata = await this.extractHistoryMetadata(pdfBytes);
if (!metadata) return null;
return {
originalFileId: metadata.stirlingHistory.originalFileId,
versionNumber: metadata.stirlingHistory.versionNumber,
toolCount: metadata.stirlingHistory.toolChain.length,
parentFileId: metadata.stirlingHistory.parentFileId
};
}
/**
* Embed history JSON in keywords field with delimiter
*/
private embedHistoryInKeywords(existingKeywords: string, historyJson: string): string {
// Remove any existing history
const cleanKeywords = this.extractHistoryFromKeywords(existingKeywords, true) || existingKeywords;
// Add new history with delimiter
const historyKeyword = `${PDFMetadataService.HISTORY_KEYWORD}:${historyJson}`;
if (cleanKeywords.trim()) {
return `${cleanKeywords.trim()} ${historyKeyword}`;
}
return historyKeyword;
}
/**
* Extract history JSON from keywords field
*/
private extractHistoryFromKeywords(keywords: string, returnRemainder = false): string | null {
const historyPrefix = `${PDFMetadataService.HISTORY_KEYWORD}:`;
const historyIndex = keywords.indexOf(historyPrefix);
if (historyIndex === -1) return null;
const historyStart = historyIndex + historyPrefix.length;
let historyEnd = keywords.length;
// Look for the next keyword (space followed by non-JSON content)
// Simple heuristic: find space followed by word that doesn't look like JSON
const afterHistory = keywords.substring(historyStart);
const nextSpaceIndex = afterHistory.indexOf(' ');
if (nextSpaceIndex > 0) {
const afterSpace = afterHistory.substring(nextSpaceIndex + 1);
if (afterSpace && !afterSpace.trim().startsWith('{')) {
historyEnd = historyStart + nextSpaceIndex;
}
}
if (returnRemainder) {
// Return keywords with history removed
const before = keywords.substring(0, historyIndex);
const after = keywords.substring(historyEnd);
return `${before}${after}`.replace(/\s+/g, ' ').trim();
}
return keywords.substring(historyStart, historyEnd).trim();
}
/**
* Validate metadata structure
*/
private isValidHistoryMetadata(metadata: any): metadata is PDFHistoryMetadata {
return metadata &&
metadata.stirlingHistory &&
typeof metadata.stirlingHistory.originalFileId === 'string' &&
typeof metadata.stirlingHistory.versionNumber === 'number' &&
Array.isArray(metadata.stirlingHistory.toolChain) &&
metadata.stirlingHistory.formatVersion === PDFMetadataService.FORMAT_VERSION;
}
}
// Export singleton instance with optimized cache settings
export const pdfMetadataService = new PDFMetadataService({
ttl: 10 * 60 * 1000, // 10 minutes for PDF metadata (longer than default)
maxSize: 50, // Smaller cache for memory efficiency
enableWarnings: DEBUG
});

View File

@ -15,17 +15,6 @@ export interface ToolOperation {
timestamp: number;
}
/**
* File history information extracted from PDF metadata
* Timestamps come from standard PDF metadata fields (CreationDate, ModificationDate)
*/
export interface FileHistoryInfo {
originalFileId: string;
parentFileId?: FileId;
versionNumber: number;
toolChain: ToolOperation[];
}
/**
* Base file metadata shared between storage and runtime layers
* Contains all common file properties and history tracking
@ -59,47 +48,3 @@ export interface BaseFileMetadata {
modificationDate?: Date;
};
}
// FileMetadata has been replaced with StoredFileMetadata from '../services/fileStorage'
// This ensures clear type relationships and eliminates duplication
export interface StorageConfig {
useIndexedDB: boolean;
maxFileSize: number; // Maximum size per file in bytes
maxTotalStorage: number; // Maximum total storage in bytes
warningThreshold: number; // Warning threshold (percentage 0-1)
}
export const defaultStorageConfig: StorageConfig = {
useIndexedDB: true,
maxFileSize: 100 * 1024 * 1024, // 100MB per file
maxTotalStorage: 1024 * 1024 * 1024, // 1GB default, will be updated dynamically
warningThreshold: 0.8, // Warn at 80% capacity
};
// Calculate and update storage limit: half of available storage or 10GB, whichever is smaller
export const initializeStorageConfig = async (): Promise<StorageConfig> => {
const tenGB = 10 * 1024 * 1024 * 1024; // 10GB in bytes
const oneGB = 1024 * 1024 * 1024; // 1GB fallback
let maxTotalStorage = oneGB; // Default fallback
// Try to estimate available storage
if ('storage' in navigator && 'estimate' in navigator.storage) {
try {
const estimate = await navigator.storage.estimate();
if (estimate.quota) {
const halfQuota = estimate.quota / 2;
maxTotalStorage = Math.min(halfQuota, tenGB);
}
} catch (error) {
console.warn('Could not estimate storage quota, using 1GB default:', error);
}
}
return {
...defaultStorageConfig,
maxTotalStorage
};
};

View File

@ -1,173 +0,0 @@
/**
* Generic content cache with TTL and size limits
* Reusable for any cached data with configurable parameters
*/
const DEBUG = process.env.NODE_ENV === 'development';
interface CacheEntry<T> {
value: T;
timestamp: number;
}
export interface CacheConfig {
/** Time-to-live in milliseconds */
ttl: number;
/** Maximum number of cache entries */
maxSize: number;
/** Enable cleanup warnings in development */
enableWarnings?: boolean;
}
export class ContentCache<T> {
private cache = new Map<string, CacheEntry<T>>();
private hits = 0;
private misses = 0;
constructor(private readonly config: CacheConfig) {}
/**
* Get cached value if valid
*/
get(key: string): T | null {
const entry = this.cache.get(key);
if (!entry) {
this.misses++;
return null;
}
// Check if expired
if (Date.now() - entry.timestamp > this.config.ttl) {
this.cache.delete(key);
this.misses++;
return null;
}
this.hits++;
return entry.value;
}
/**
* Set cached value
*/
set(key: string, value: T): void {
// Clean up before adding if at capacity
if (this.cache.size >= this.config.maxSize) {
this.evictOldest();
}
this.cache.set(key, {
value,
timestamp: Date.now()
});
}
/**
* Generate cache key from ArrayBuffer content
*/
generateKeyFromBuffer(data: ArrayBuffer): string {
// Use file size + hash of first/last bytes as cache key
const view = new Uint8Array(data);
const size = data.byteLength;
const start = Array.from(view.slice(0, 16)).join(',');
const end = Array.from(view.slice(-16)).join(',');
return `${size}-${this.simpleHash(start + end)}`;
}
/**
* Generate cache key from string content
*/
generateKeyFromString(content: string): string {
return this.simpleHash(content);
}
/**
* Check if key exists and is valid
*/
has(key: string): boolean {
return this.get(key) !== null;
}
/**
* Clear all cache entries
*/
clear(): void {
this.cache.clear();
this.hits = 0;
this.misses = 0;
}
/**
* Get cache statistics
*/
getStats(): {
size: number;
maxSize: number;
hitRate: number;
hits: number;
misses: number;
} {
const total = this.hits + this.misses;
const hitRate = total > 0 ? this.hits / total : 0;
return {
size: this.cache.size,
maxSize: this.config.maxSize,
hitRate,
hits: this.hits,
misses: this.misses
};
}
/**
* Cleanup expired entries
*/
cleanup(): void {
const now = Date.now();
let cleaned = 0;
for (const [key, entry] of this.cache.entries()) {
if (now - entry.timestamp > this.config.ttl) {
this.cache.delete(key);
cleaned++;
}
}
if (DEBUG && this.config.enableWarnings && this.cache.size > this.config.maxSize * 0.8) {
console.warn(`📦 ContentCache: High cache usage (${this.cache.size}/${this.config.maxSize}), cleaned ${cleaned} expired entries`);
}
}
/**
* Evict oldest entry when at capacity
*/
private evictOldest(): void {
let oldestKey: string | null = null;
let oldestTime = Date.now();
for (const [key, entry] of this.cache.entries()) {
if (entry.timestamp < oldestTime) {
oldestTime = entry.timestamp;
oldestKey = key;
}
}
if (oldestKey) {
this.cache.delete(oldestKey);
}
}
/**
* Simple hash function for cache keys
*/
private simpleHash(str: string): string {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash).toString(36);
}
}