Stirling-PDF/src/main/java/stirling/software/SPDF/service/CustomPDFDocumentFactory.java

package stirling.software.SPDF.service;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.examples.util.DeletingRandomAccessFile;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.io.RandomAccessStreamCache.StreamCacheCreateFunction;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;

import lombok.extern.slf4j.Slf4j;

import stirling.software.SPDF.model.api.PDFFile;

/**
 * Adaptive PDF document factory that optimizes memory usage based on file size and available system
 * resources.
 */
@Component
@Slf4j
public class CustomPDFDocumentFactory {

    private final PdfMetadataService pdfMetadataService;

    // Memory thresholds and limits

    private static final long SMALL_FILE_THRESHOLD = 10 * 1024 * 1024; // 10 MB
    // Files smaller than this threshold are loaded entirely in memory for better performance.
    // These files use IOUtils.createMemoryOnlyStreamCache() which keeps all document data in RAM.
    // No temp files are created for document data, reducing I/O operations but consuming more
    // memory.

    private static final long LARGE_FILE_THRESHOLD = 50 * 1024 * 1024; // 50 MB
    // Files between SMALL and LARGE thresholds use file-based caching with ScratchFile,
    // but are loaded directly from byte arrays if provided that way.
    // When loading from byte arrays, once size exceeds this threshold, bytes are first
    // written to temp files before loading to reduce memory pressure.

    private static final long LARGE_FILE_USAGE = 10 * 1024 * 1024;

    private static final long EXTREMELY_LARGE_THRESHOLD = 100 * 1024 * 1024; // 100 MB
    // Files exceeding this threshold use specialized loading with RandomAccessReadBufferedFile
    // which provides buffered access to the file without loading the entire content at once.
    // These files are always processed using file-based caching with minimal memory footprint,
    // trading some performance for significantly reduced memory usage.
    // For extremely large PDFs, this prevents OutOfMemoryErrors at the cost of being more I/O
    // bound.

    private static final double MIN_FREE_MEMORY_PERCENTAGE = 30.0; // 30%
    private static final long MIN_FREE_MEMORY_BYTES = 4L * 1024 * 1024 * 1024; // 4 GB

    // Counter for tracking temporary resources
    private static final AtomicLong tempCounter = new AtomicLong(0);

    public CustomPDFDocumentFactory(PdfMetadataService pdfMetadataService) {
        this.pdfMetadataService = pdfMetadataService;
    }

    /**
     * Main entry point for loading a PDF document from a file. Automatically selects the most
     * appropriate loading strategy.
     */
    public PDDocument load(File file) throws IOException {
        return load(file, false);
    }

    /**
     * Main entry point for loading a PDF document from a file with read-only option. Automatically
     * selects the most appropriate loading strategy.
     */
    public PDDocument load(File file, boolean readOnly) throws IOException {
        if (file == null) {
            throw new IllegalArgumentException("File cannot be null");
        }

        long fileSize = file.length();
        log.debug("Loading PDF from file, size: {}MB", fileSize / (1024 * 1024));

        PDDocument doc = loadAdaptively(file, fileSize);
        if (!readOnly) {
            postProcessDocument(doc);
        }
        return doc;
    }

    /**
     * Main entry point for loading a PDF document from a Path. Automatically selects the most
     * appropriate loading strategy.
     */
    public PDDocument load(Path path) throws IOException {
        return load(path, false);
    }

    /**
     * Main entry point for loading a PDF document from a Path with read-only option. Automatically
     * selects the most appropriate loading strategy.
     */
    public PDDocument load(Path path, boolean readOnly) throws IOException {
        if (path == null) {
            throw new IllegalArgumentException("File cannot be null");
        }

        long fileSize = Files.size(path);
        log.debug("Loading PDF from file, size: {}MB", fileSize / (1024 * 1024));

        PDDocument doc = loadAdaptively(path.toFile(), fileSize);
        if (!readOnly) {
            postProcessDocument(doc);
        }
        return doc;
    }

    /** Load a PDF from byte array with automatic optimization. */
    public PDDocument load(byte[] input) throws IOException {
        return load(input, false);
    }

    /** Load a PDF from byte array with automatic optimization and read-only option. */
    public PDDocument load(byte[] input, boolean readOnly) throws IOException {
        if (input == null) {
            throw new IllegalArgumentException("Input bytes cannot be null");
        }

        long dataSize = input.length;
        log.debug("Loading PDF from byte array, size: {}MB", dataSize / (1024 * 1024));

        PDDocument doc = loadAdaptively(input, dataSize);
        if (!readOnly) {
            postProcessDocument(doc);
        }
        return doc;
    }

    /** Load a PDF from InputStream with automatic optimization. */
    public PDDocument load(InputStream input) throws IOException {
        return load(input, false);
    }

    /** Load a PDF from InputStream with automatic optimization and read-only option. */
    public PDDocument load(InputStream input, boolean readOnly) throws IOException {
        if (input == null) {
            throw new IllegalArgumentException("InputStream cannot be null");
        }

        // Since we don't know the size upfront, buffer to a temp file
        Path tempFile = createTempFile("pdf-stream-");

        Files.copy(input, tempFile, StandardCopyOption.REPLACE_EXISTING);
        PDDocument doc = loadAdaptively(tempFile.toFile(), Files.size(tempFile));
        if (!readOnly) {
            postProcessDocument(doc);
        }
        return doc;
    }

    /** Load with password from InputStream */
    public PDDocument load(InputStream input, String password) throws IOException {
        return load(input, password, false);
    }

    /** Load with password from InputStream and read-only option */
    public PDDocument load(InputStream input, String password, boolean readOnly)
            throws IOException {
        if (input == null) {
            throw new IllegalArgumentException("InputStream cannot be null");
        }

        // Since we don't know the size upfront, buffer to a temp file
        Path tempFile = createTempFile("pdf-stream-");

        Files.copy(input, tempFile, StandardCopyOption.REPLACE_EXISTING);
        PDDocument doc =
                loadAdaptivelyWithPassword(tempFile.toFile(), Files.size(tempFile), password);
        if (!readOnly) {
            postProcessDocument(doc);
        }
        return doc;
    }

    /** Load from a file path string */
    public PDDocument load(String path) throws IOException {
        return load(path, false);
    }

    /** Load from a file path string with read-only option */
    public PDDocument load(String path, boolean readOnly) throws IOException {
        return load(new File(path), readOnly);
    }

    /** Load from a PDFFile object */
    public PDDocument load(PDFFile pdfFile) throws IOException {
        return load(pdfFile, false);
    }

    /** Load from a PDFFile object with read-only option */
    public PDDocument load(PDFFile pdfFile, boolean readOnly) throws IOException {
        return load(pdfFile.getFileInput(), readOnly);
    }

    /** Load from a MultipartFile */
    public PDDocument load(MultipartFile pdfFile) throws IOException {
        return load(pdfFile, false);
    }

    /** Load from a MultipartFile with read-only option */
    public PDDocument load(MultipartFile pdfFile, boolean readOnly) throws IOException {
        return load(pdfFile.getInputStream(), readOnly);
    }

    /** Load with password from MultipartFile */
    public PDDocument load(MultipartFile fileInput, String password) throws IOException {
        return load(fileInput, password, false);
    }

    /** Load with password from MultipartFile with read-only option */
    public PDDocument load(MultipartFile fileInput, String password, boolean readOnly)
            throws IOException {
        return load(fileInput.getInputStream(), password, readOnly);
    }

    /**
     * Determine the appropriate caching strategy based on file size and available memory. This
     * common method is used by both password and non-password loading paths.
     */
    public StreamCacheCreateFunction getStreamCacheFunction(long contentSize) {
        long maxMemory = Runtime.getRuntime().maxMemory();
        long freeMemory = Runtime.getRuntime().freeMemory();
        long totalMemory = Runtime.getRuntime().totalMemory();
        long usedMemory = totalMemory - freeMemory;

        // Calculate percentage of free memory
        double freeMemoryPercent = (double) (maxMemory - usedMemory) / maxMemory * 100;
        long actualFreeMemory = maxMemory - usedMemory;

        // Log memory status
        log.debug(
                "Memory status - Free: {}MB ({}%), Used: {}MB, Max: {}MB",
                actualFreeMemory / (1024 * 1024),
                String.format("%.2f", freeMemoryPercent),
                usedMemory / (1024 * 1024),
                maxMemory / (1024 * 1024));

        // If free memory is critically low, always use file-based caching
        if (freeMemoryPercent < MIN_FREE_MEMORY_PERCENTAGE
                || actualFreeMemory < MIN_FREE_MEMORY_BYTES) {
            log.debug(
                    "Low memory detected ({}%), forcing file-based cache",
                    String.format("%.2f", freeMemoryPercent));
            return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
        } else if (contentSize < SMALL_FILE_THRESHOLD) {
            log.debug("Using memory-only cache for small document ({}KB)", contentSize / 1024);
            return IOUtils.createMemoryOnlyStreamCache();
        } else if (contentSize < LARGE_FILE_THRESHOLD) {
            // For medium files (10-50MB), use a mixed approach
            log.debug(
                    "Using mixed memory/file cache for medium document ({}MB)",
                    contentSize / (1024 * 1024));
            return createScratchFileCacheFunction(MemoryUsageSetting.setupMixed(LARGE_FILE_USAGE));
        } else {
            log.debug("Using file-based cache for large document");
            return createScratchFileCacheFunction(MemoryUsageSetting.setupTempFileOnly());
        }
    }

    /** Update the existing loadAdaptively method to use the common function */
    private PDDocument loadAdaptively(Object source, long contentSize) throws IOException {
        // Get the appropriate caching strategy
        StreamCacheCreateFunction cacheFunction = getStreamCacheFunction(contentSize);

        // If small handle as bytes and remove original file
        if (contentSize <= SMALL_FILE_THRESHOLD && source instanceof File file) {
            source = Files.readAllBytes(file.toPath());
            file.delete();
        }
        PDDocument document;
        if (source instanceof File file) {
            document = loadFromFile(file, contentSize, cacheFunction);
        } else if (source instanceof byte[] bytes) {
            document = loadFromBytes(bytes, contentSize, cacheFunction);
        } else {
            throw new IllegalArgumentException("Unsupported source type: " + source.getClass());
        }
        return document;
    }

    /** Load a PDF with password protection using adaptive loading strategies */
    private PDDocument loadAdaptivelyWithPassword(Object source, long contentSize, String password)
            throws IOException {
        // Get the appropriate caching strategy
        StreamCacheCreateFunction cacheFunction = getStreamCacheFunction(contentSize);
        // If small handle as bytes and remove original file
        if (contentSize <= SMALL_FILE_THRESHOLD && source instanceof File file) {
            source = Files.readAllBytes(file.toPath());
            file.delete();
        }
        PDDocument document;
        if (source instanceof File file) {
            document = loadFromFileWithPassword(file, contentSize, cacheFunction, password);
        } else if (source instanceof byte[] bytes) {
            document = loadFromBytesWithPassword(bytes, contentSize, cacheFunction, password);
        } else {
            throw new IllegalArgumentException("Unsupported source type: " + source.getClass());
        }
        return document;
    }

    /** Load a file with password */
    private PDDocument loadFromFileWithPassword(
            File file, long size, StreamCacheCreateFunction cache, String password)
            throws IOException {
        return Loader.loadPDF(new DeletingRandomAccessFile(file), password, null, null, cache);
    }

    /** Load bytes with password */
    private PDDocument loadFromBytesWithPassword(
            byte[] bytes, long size, StreamCacheCreateFunction cache, String password)
            throws IOException {
        if (size >= SMALL_FILE_THRESHOLD) {
            log.debug("Writing large byte array to temp file for password-protected PDF");
            Path tempFile = createTempFile("pdf-bytes-");

            Files.write(tempFile, bytes);
            return Loader.loadPDF(tempFile.toFile(), password, null, null, cache);
        }
        return Loader.loadPDF(bytes, password, null, null, cache);
    }

    private StreamCacheCreateFunction createScratchFileCacheFunction(MemoryUsageSetting settings) {
        return () -> {
            try {
                return new ScratchFile(settings);
            } catch (IOException e) {
                throw new RuntimeException("ScratchFile initialization failed", e);
            }
        };
    }

    private void postProcessDocument(PDDocument doc) throws IOException {
        pdfMetadataService.setDefaultMetadata(doc);
        removePassword(doc);
    }

    private PDDocument loadFromFile(File file, long size, StreamCacheCreateFunction cache)
            throws IOException {
        return Loader.loadPDF(new DeletingRandomAccessFile(file), "", null, null, cache);
    }

    private PDDocument loadFromBytes(byte[] bytes, long size, StreamCacheCreateFunction cache)
            throws IOException {
        if (size >= SMALL_FILE_THRESHOLD) {
            log.debug("Writing large byte array to temp file");
            Path tempFile = createTempFile("pdf-bytes-");

            Files.write(tempFile, bytes);
            return loadFromFile(tempFile.toFile(), size, cache);
        }
        return Loader.loadPDF(bytes, "", null, null, cache);
    }

    public PDDocument createNewDocument(MemoryUsageSetting settings) throws IOException {
        PDDocument doc = new PDDocument(createScratchFileCacheFunction(settings));
        pdfMetadataService.setDefaultMetadata(doc);
        return doc;
    }

    public PDDocument createNewDocument() throws IOException {
        return createNewDocument(MemoryUsageSetting.setupTempFileOnly());
    }

    public byte[] saveToBytes(PDDocument document) throws IOException {
        if (document.getNumberOfPages() < 10) { // Simple heuristic
            try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
                document.save(baos);
                return baos.toByteArray();
            }
        } else {
            Path tempFile = createTempFile("pdf-save-");

            document.save(tempFile.toFile());
            return Files.readAllBytes(tempFile);
        }
    }

    // Improved password handling
    private void removePassword(PDDocument document) throws IOException {
        if (document.isEncrypted()) {
            try {
                document.setAllSecurityToBeRemoved(true);
            } catch (Exception e) {
                log.error("Decryption failed", e);
                throw new IOException("PDF decryption failed", e);
            }
        }
    }

    // Temp file handling with enhanced logging
    private Path createTempFile(String prefix) throws IOException {
        Path file = Files.createTempFile(prefix + tempCounter.incrementAndGet() + "-", ".tmp");
        log.debug("Created temp file: {}", file);
        return file;
    }

    /** Create a uniquely named temporary directory */
    private Path createTempDirectory(String prefix) throws IOException {
        return Files.createTempDirectory(prefix + tempCounter.incrementAndGet() + "-");
    }

    /** Create new document bytes based on an existing document */
    public byte[] createNewBytesBasedOnOldDocument(byte[] oldDocument) throws IOException {
        try (PDDocument document = load(oldDocument)) {
            return saveToBytes(document);
        }
    }

    /** Create new document bytes based on an existing document file */
    public byte[] createNewBytesBasedOnOldDocument(File oldDocument) throws IOException {
        try (PDDocument document = load(oldDocument)) {
            return saveToBytes(document);
        }
    }

    /** Create new document bytes based on an existing PDDocument */
    public byte[] createNewBytesBasedOnOldDocument(PDDocument oldDocument) throws IOException {
        pdfMetadataService.setMetadataToPdf(
                oldDocument, pdfMetadataService.extractMetadataFromPdf(oldDocument), true);
        return saveToBytes(oldDocument);
    }

    /** Create a new document based on an existing document bytes */
    public PDDocument createNewDocumentBasedOnOldDocument(byte[] oldDocument) throws IOException {
        try (PDDocument document = load(oldDocument)) {
            return createNewDocumentBasedOnOldDocument(document);
        }
    }

    /** Create a new document based on an existing document file */
    public PDDocument createNewDocumentBasedOnOldDocument(File oldDocument) throws IOException {
        try (PDDocument document = load(oldDocument)) {
            return createNewDocumentBasedOnOldDocument(document);
        }
    }

    /** Create a new document based on an existing PDDocument */
    public PDDocument createNewDocumentBasedOnOldDocument(PDDocument oldDocument)
            throws IOException {
        PDDocument document = createNewDocument();
        pdfMetadataService.setMetadataToPdf(
                document, pdfMetadataService.extractMetadataFromPdf(oldDocument), true);
        return document;
    }

    /** Load document from a file and convert it to bytes */
    public byte[] loadToBytes(File file) throws IOException {
        try (PDDocument document = load(file)) {
            return saveToBytes(document);
        }
    }

    /** Load document from bytes and convert it back to bytes */
    public byte[] loadToBytes(byte[] bytes) throws IOException {
        try (PDDocument document = load(bytes)) {
            return saveToBytes(document);
        }
    }
}