diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..6e006423a --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,12 @@ +{ + "permissions": { + "allow": [ + "Bash(chmod:*)", + "Bash(mkdir:*)", + "Bash(./gradlew:*)", + "Bash(grep:*)", + "Bash(cat:*)" + ], + "deny": [] + } +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index cbb20111c..fd02b29f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,7 +33,11 @@ ENV DISABLE_ADDITIONAL_FEATURES=true \ PYTHONPATH=/usr/lib/libreoffice/program:/opt/venv/lib/python3.12/site-packages \ UNO_PATH=/usr/lib/libreoffice/program \ URE_BOOTSTRAP=file:///usr/lib/libreoffice/program/fundamentalrc \ - PATH=$PATH:/opt/venv/bin + PATH=$PATH:/opt/venv/bin \ + STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \ + TMPDIR=/tmp/stirling-pdf \ + TEMP=/tmp/stirling-pdf \ + TMP=/tmp/stirling-pdf # JDK for app @@ -78,17 +82,17 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a ln -s /usr/lib/libreoffice/program/unohelper.py /opt/venv/lib/python3.12/site-packages/ && \ ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \ mv /usr/share/tessdata /usr/share/tessdata-original && \ - mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ + mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf && \ fc-cache -f -v && \ chmod +x /scripts/* && \ chmod +x /scripts/init.sh && \ # User permissions addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \ - chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline && \ + chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /tmp/stirling-pdf && \ chown stirlingpdfuser:stirlingpdfgroup /app.jar EXPOSE 8080/tcp # Set user and run command ENTRYPOINT ["tini", "--", "/scripts/init.sh"] -CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"] +CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"] diff --git a/Dockerfile.dev b/Dockerfile.dev index 37571373e..15de277b9 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -27,7 +27,11 @@ RUN apt-get update && apt-get install -y \ && apt-get clean && rm -rf /var/lib/apt/lists/* # Setze die Environment Variable für setuptools -ENV SETUPTOOLS_USE_DISTUTILS=local +ENV SETUPTOOLS_USE_DISTUTILS=local \ + STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \ + TMPDIR=/tmp/stirling-pdf \ + TEMP=/tmp/stirling-pdf \ + TMP=/tmp/stirling-pdf # Installation der benötigten Python-Pakete RUN python3 -m venv --system-site-packages /opt/venv \ @@ -40,8 +44,9 @@ ENV PATH="/opt/venv/bin:$PATH" COPY . /workspace -RUN adduser --disabled-password --gecos '' devuser \ - && chown -R devuser:devuser /home/devuser /workspace +RUN mkdir -p /tmp/stirling-pdf \ + && adduser --disabled-password --gecos '' devuser \ + && chown -R devuser:devuser /home/devuser /workspace /tmp/stirling-pdf RUN echo "devuser ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/devuser \ && chmod 0440 /etc/sudoers.d/devuser diff --git a/Dockerfile.fat b/Dockerfile.fat index 682fac663..666ba98be 100644 --- a/Dockerfile.fat +++ b/Dockerfile.fat @@ -46,7 +46,11 @@ ENV DISABLE_ADDITIONAL_FEATURES=true \ PYTHONPATH=/usr/lib/libreoffice/program:/opt/venv/lib/python3.12/site-packages \ UNO_PATH=/usr/lib/libreoffice/program \ URE_BOOTSTRAP=file:///usr/lib/libreoffice/program/fundamentalrc \ - PATH=$PATH:/opt/venv/bin + PATH=$PATH:/opt/venv/bin \ + STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \ + TMPDIR=/tmp/stirling-pdf \ + TEMP=/tmp/stirling-pdf \ + TMP=/tmp/stirling-pdf # JDK for app @@ -92,16 +96,16 @@ RUN echo "@main https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/a ln -s /usr/lib/libreoffice/program/unohelper.py /opt/venv/lib/python3.12/site-packages/ && \ ln -s /usr/lib/libreoffice/program /opt/venv/lib/python3.12/site-packages/LibreOffice && \ mv /usr/share/tessdata /usr/share/tessdata-original && \ - mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ + mkdir -p $HOME /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders /tmp/stirling-pdf && \ fc-cache -f -v && \ chmod +x /scripts/* && \ chmod +x /scripts/init.sh && \ # User permissions addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \ - chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline && \ + chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /tmp/stirling-pdf && \ chown stirlingpdfuser:stirlingpdfgroup /app.jar EXPOSE 8080/tcp # Set user and run command ENTRYPOINT ["tini", "--", "/scripts/init.sh"] -CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"] +CMD ["sh", "-c", "java -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/tmp/stirling-pdf -jar /app.jar & /opt/venv/bin/unoserver --port 2003 --interface 127.0.0.1"] diff --git a/Dockerfile.ultra-lite b/Dockerfile.ultra-lite index 83cd5e9c3..c4eb4ba46 100644 --- a/Dockerfile.ultra-lite +++ b/Dockerfile.ultra-lite @@ -11,7 +11,11 @@ ENV DISABLE_ADDITIONAL_FEATURES=true \ JAVA_CUSTOM_OPTS="" \ PUID=1000 \ PGID=1000 \ - UMASK=022 + UMASK=022 \ + STIRLING_TEMPFILES_DIRECTORY=/tmp/stirling-pdf \ + TMPDIR=/tmp/stirling-pdf \ + TEMP=/tmp/stirling-pdf \ + TMP=/tmp/stirling-pdf # Copy necessary files COPY scripts/download-security-jar.sh /scripts/download-security-jar.sh @@ -35,10 +39,10 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et su-exec \ openjdk21-jre && \ # User permissions - mkdir -p /configs /logs /customFiles /usr/share/fonts/opentype/noto && \ + mkdir -p /configs /logs /customFiles /usr/share/fonts/opentype/noto /tmp/stirling-pdf && \ chmod +x /scripts/*.sh && \ addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \ - chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /configs /customFiles /pipeline && \ + chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /configs /customFiles /pipeline /tmp/stirling-pdf && \ chown stirlingpdfuser:stirlingpdfgroup /app.jar # Set environment variables @@ -48,4 +52,4 @@ EXPOSE 8080/tcp # Run the application ENTRYPOINT ["tini", "--", "/scripts/init-without-ocr.sh"] -CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"] +CMD ["java", "-Dfile.encoding=UTF-8", "-Djava.io.tmpdir=/tmp/stirling-pdf", "-jar", "/app.jar"] diff --git a/common/src/main/java/stirling/software/common/annotations/AutoJobPostMapping.java b/common/src/main/java/stirling/software/common/annotations/AutoJobPostMapping.java index 062f3e0a1..8fb729560 100644 --- a/common/src/main/java/stirling/software/common/annotations/AutoJobPostMapping.java +++ b/common/src/main/java/stirling/software/common/annotations/AutoJobPostMapping.java @@ -8,22 +8,22 @@ import org.springframework.web.bind.annotation.RequestMethod; /** * Shortcut for a POST endpoint that is executed through the Stirling "auto‑job" framework. - *

- * Behaviour notes: - *

- *

* - *

Unless stated otherwise an attribute only affects async execution.

+ *

Behaviour notes: + * + *

+ * + *

Unless stated otherwise an attribute only affects async execution. */ @Target(ElementType.METHOD) @Retention(RetentionPolicy.RUNTIME) @@ -31,42 +31,42 @@ import org.springframework.web.bind.annotation.RequestMethod; @RequestMapping(method = RequestMethod.POST) public @interface AutoJobPostMapping { - /** - * Alias for {@link RequestMapping#value} – the path mapping of the endpoint. - */ + /** Alias for {@link RequestMapping#value} – the path mapping of the endpoint. */ @AliasFor(annotation = RequestMapping.class, attribute = "value") String[] value() default {}; - /** - * MIME types this endpoint accepts. Defaults to {@code multipart/form-data}. - */ + /** MIME types this endpoint accepts. Defaults to {@code multipart/form-data}. */ @AliasFor(annotation = RequestMapping.class, attribute = "consumes") String[] consumes() default {"multipart/form-data"}; /** - * Maximum execution time in milliseconds before the job is aborted. - * A negative value means "use the application default". - *

Only honoured when {@code async=true}.

+ * Maximum execution time in milliseconds before the job is aborted. A negative value means "use + * the application default". + * + *

Only honoured when {@code async=true}. */ long timeout() default -1; /** - * Total number of attempts (initial + retries). Must be at least 1. - * Retries are executed with exponential back‑off. - *

Only honoured when {@code async=true}.

+ * Total number of attempts (initial + retries). Must be at least 1. Retries are executed + * with exponential back‑off. + * + *

Only honoured when {@code async=true}. */ int retryCount() default 1; /** * Record percentage / note updates so they can be retrieved via the REST status endpoint. - *

Only honoured when {@code async=true}.

+ * + *

Only honoured when {@code async=true}. */ boolean trackProgress() default true; /** - * If {@code true} the job may be placed in a queue instead of being rejected when resources - * are scarce. - *

Only honoured when {@code async=true}.

+ * If {@code true} the job may be placed in a queue instead of being rejected when resources are + * scarce. + * + *

Only honoured when {@code async=true}. */ boolean queueable() default false; diff --git a/common/src/main/java/stirling/software/common/config/TempFileConfiguration.java b/common/src/main/java/stirling/software/common/config/TempFileConfiguration.java new file mode 100644 index 000000000..6fce7e0bf --- /dev/null +++ b/common/src/main/java/stirling/software/common/config/TempFileConfiguration.java @@ -0,0 +1,59 @@ +package stirling.software.common.config; + +import java.nio.file.Files; +import java.nio.file.Path; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import jakarta.annotation.PostConstruct; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import stirling.software.common.model.ApplicationProperties; +import stirling.software.common.util.TempFileRegistry; + +/** + * Configuration for the temporary file management system. Sets up the necessary beans and + * configures system properties. + */ +@Slf4j +@Configuration +@RequiredArgsConstructor +public class TempFileConfiguration { + + private final ApplicationProperties applicationProperties; + + /** + * Create the TempFileRegistry bean. + * + * @return A new TempFileRegistry instance + */ + @Bean + public TempFileRegistry tempFileRegistry() { + return new TempFileRegistry(); + } + + @PostConstruct + public void initTempFileConfig() { + try { + ApplicationProperties.TempFileManagement tempFiles = + applicationProperties.getSystem().getTempFileManagement(); + String customTempDirectory = tempFiles.getBaseTmpDir(); + + // Create the temp directory if it doesn't exist + Path tempDir = Path.of(customTempDirectory); + if (!Files.exists(tempDir)) { + Files.createDirectories(tempDir); + log.info("Created temporary directory: {}", tempDir); + } + + log.debug("Temporary file configuration initialized"); + log.debug("Using temp directory: {}", customTempDirectory); + log.debug("Temp file prefix: {}", tempFiles.getPrefix()); + } catch (Exception e) { + log.error("Failed to initialize temporary file configuration", e); + } + } +} diff --git a/common/src/main/java/stirling/software/common/config/TempFileShutdownHook.java b/common/src/main/java/stirling/software/common/config/TempFileShutdownHook.java new file mode 100644 index 000000000..6fd3bdeff --- /dev/null +++ b/common/src/main/java/stirling/software/common/config/TempFileShutdownHook.java @@ -0,0 +1,84 @@ +package stirling.software.common.config; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Set; + +import org.springframework.beans.factory.DisposableBean; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import lombok.extern.slf4j.Slf4j; + +import stirling.software.common.util.GeneralUtils; +import stirling.software.common.util.TempFileRegistry; + +/** + * Handles cleanup of temporary files on application shutdown. Implements Spring's DisposableBean + * interface to ensure cleanup happens during normal application shutdown. + */ +@Slf4j +@Component +public class TempFileShutdownHook implements DisposableBean { + + private final TempFileRegistry registry; + + @Autowired + public TempFileShutdownHook(TempFileRegistry registry) { + this.registry = registry; + + // Register a JVM shutdown hook as a backup in case Spring's + // DisposableBean mechanism doesn't trigger (e.g., during a crash) + Runtime.getRuntime().addShutdownHook(new Thread(this::cleanupTempFiles)); + } + + /** Spring's DisposableBean interface method. Called during normal application shutdown. */ + @Override + public void destroy() { + log.info("Application shutting down, cleaning up temporary files"); + cleanupTempFiles(); + } + + /** Clean up all registered temporary files and directories. */ + private void cleanupTempFiles() { + try { + // Clean up all registered files + Set files = registry.getAllRegisteredFiles(); + int deletedCount = 0; + + for (Path file : files) { + try { + if (Files.exists(file)) { + Files.deleteIfExists(file); + deletedCount++; + } + } catch (IOException e) { + log.warn("Failed to delete temp file during shutdown: {}", file, e); + } + } + + // Clean up all registered directories + Set directories = registry.getTempDirectories(); + for (Path dir : directories) { + try { + if (Files.exists(dir)) { + GeneralUtils.deleteDirectory(dir); + deletedCount++; + } + } catch (IOException e) { + log.warn("Failed to delete temp directory during shutdown: {}", dir, e); + } + } + + log.info( + "Shutdown cleanup complete. Deleted {} temporary files/directories", + deletedCount); + + // Clear the registry + registry.clear(); + } catch (Exception e) { + log.error("Error during shutdown cleanup", e); + } + } +} diff --git a/common/src/main/java/stirling/software/common/model/ApplicationProperties.java b/common/src/main/java/stirling/software/common/model/ApplicationProperties.java index f5b67c866..0017fa34a 100644 --- a/common/src/main/java/stirling/software/common/model/ApplicationProperties.java +++ b/common/src/main/java/stirling/software/common/model/ApplicationProperties.java @@ -292,6 +292,7 @@ public class ApplicationProperties { private Boolean enableUrlToPDF; private CustomPaths customPaths = new CustomPaths(); private String fileUploadLimit; + private TempFileManagement tempFileManagement = new TempFileManagement(); public boolean isAnalyticsEnabled() { return this.getEnableAnalytics() != null && this.getEnableAnalytics(); @@ -317,6 +318,30 @@ public class ApplicationProperties { } } + @Data + public static class TempFileManagement { + private String baseTmpDir = ""; + private String libreofficeDir = ""; + private String systemTempDir = ""; + private String prefix = "stirling-pdf-"; + private long maxAgeHours = 24; + private long cleanupIntervalMinutes = 30; + private boolean startupCleanup = true; + private boolean cleanupSystemTemp = false; + + public String getBaseTmpDir() { + return baseTmpDir != null && !baseTmpDir.isEmpty() + ? baseTmpDir + : java.lang.System.getProperty("java.io.tmpdir") + "/stirling-pdf"; + } + + public String getLibreofficeDir() { + return libreofficeDir != null && !libreofficeDir.isEmpty() + ? libreofficeDir + : getBaseTmpDir() + "/libreoffice"; + } + } + @Data public static class Datasource { private boolean enableCustomDatabase; diff --git a/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java b/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java index e4b9173d0..51f52c34d 100644 --- a/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java +++ b/common/src/main/java/stirling/software/common/service/CustomPDFDocumentFactory.java @@ -23,6 +23,9 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import stirling.software.common.model.api.PDFFile; +import stirling.software.common.util.ApplicationContextProvider; +import stirling.software.common.util.TempFileManager; +import stirling.software.common.util.TempFileRegistry; /** * Adaptive PDF document factory that optimizes memory usage based on file size and available system @@ -402,10 +405,37 @@ public class CustomPDFDocumentFactory { } } - // Temp file handling with enhanced logging + // Temp file handling with enhanced logging and registry integration private Path createTempFile(String prefix) throws IOException { + // Check if TempFileManager is available in the application context + try { + TempFileManager tempFileManager = + ApplicationContextProvider.getBean(TempFileManager.class); + if (tempFileManager != null) { + // Use TempFileManager to create and register the temp file + File file = tempFileManager.createTempFile(".tmp"); + log.debug("Created and registered temp file via TempFileManager: {}", file); + return file.toPath(); + } + } catch (Exception e) { + log.debug("TempFileManager not available, falling back to standard temp file creation"); + } + + // Fallback to standard temp file creation Path file = Files.createTempFile(prefix + tempCounter.incrementAndGet() + "-", ".tmp"); log.debug("Created temp file: {}", file); + + // Try to register the file with a static registry if possible + try { + TempFileRegistry registry = ApplicationContextProvider.getBean(TempFileRegistry.class); + if (registry != null) { + registry.register(file); + log.debug("Registered fallback temp file with registry: {}", file); + } + } catch (Exception e) { + log.debug("Could not register fallback temp file with registry: {}", file); + } + return file; } diff --git a/common/src/main/java/stirling/software/common/service/ResourceMonitor.java b/common/src/main/java/stirling/software/common/service/ResourceMonitor.java index 2791fff90..0e8073d8f 100644 --- a/common/src/main/java/stirling/software/common/service/ResourceMonitor.java +++ b/common/src/main/java/stirling/software/common/service/ResourceMonitor.java @@ -173,7 +173,9 @@ public class ResourceMonitor { log.info("System resource status changed from {} to {}", oldStatus, newStatus); log.info( "Current metrics - CPU: {}%, Memory: {}%, Free Memory: {} MB", - String.format("%.1f", cpuUsage * 100), String.format("%.1f", memoryUsage * 100), freeMemory / (1024 * 1024)); + String.format("%.1f", cpuUsage * 100), + String.format("%.1f", memoryUsage * 100), + freeMemory / (1024 * 1024)); } } catch (Exception e) { log.error("Error updating resource metrics: {}", e.getMessage(), e); diff --git a/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java new file mode 100644 index 000000000..d53c4ea84 --- /dev/null +++ b/common/src/main/java/stirling/software/common/service/TempFileCleanupService.java @@ -0,0 +1,447 @@ +package stirling.software.common.service; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.function.Predicate; +import java.util.stream.Stream; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Service; + +import jakarta.annotation.PostConstruct; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import stirling.software.common.model.ApplicationProperties; +import stirling.software.common.util.GeneralUtils; +import stirling.software.common.util.TempFileManager; +import stirling.software.common.util.TempFileRegistry; + +/** + * Service to periodically clean up temporary files. Runs scheduled tasks to delete old temp files + * and directories. + */ +@Slf4j +@Service +@RequiredArgsConstructor +public class TempFileCleanupService { + + private final TempFileRegistry registry; + private final TempFileManager tempFileManager; + private final ApplicationProperties applicationProperties; + + @Autowired + @Qualifier("machineType") + private String machineType; + + // Maximum recursion depth for directory traversal + private static final int MAX_RECURSION_DEPTH = 5; + + // File patterns that identify our temp files + private static final Predicate IS_OUR_TEMP_FILE = + fileName -> + fileName.startsWith("stirling-pdf-") + || fileName.startsWith("output_") + || fileName.startsWith("compressedPDF") + || fileName.startsWith("pdf-save-") + || fileName.startsWith("pdf-stream-") + || fileName.startsWith("PDFBox") + || fileName.startsWith("input_") + || fileName.startsWith("overlay-"); + + // File patterns that identify common system temp files + private static final Predicate IS_SYSTEM_TEMP_FILE = + fileName -> + fileName.matches("lu\\d+[a-z0-9]*\\.tmp") + || fileName.matches("ocr_process\\d+") + || (fileName.startsWith("tmp") && !fileName.contains("jetty")) + || fileName.startsWith("OSL_PIPE_") + || (fileName.endsWith(".tmp") && !fileName.contains("jetty")); + + // File patterns that should be excluded from cleanup + private static final Predicate SHOULD_SKIP = + fileName -> + fileName.contains("jetty") + || fileName.startsWith("jetty-") + || fileName.equals("proc") + || fileName.equals("sys") + || fileName.equals("dev") + || fileName.equals("hsperfdata_stirlingpdfuser") + || fileName.startsWith("hsperfdata_") + || fileName.equals(".pdfbox.cache"); + + @PostConstruct + public void init() { + // Create necessary directories + ensureDirectoriesExist(); + + // Perform startup cleanup if enabled + if (applicationProperties.getSystem().getTempFileManagement().isStartupCleanup()) { + runStartupCleanup(); + } + } + + /** Ensure that all required temp directories exist */ + private void ensureDirectoriesExist() { + try { + ApplicationProperties.TempFileManagement tempFiles = + applicationProperties.getSystem().getTempFileManagement(); + + // Create the main temp directory + String customTempDirectory = tempFiles.getBaseTmpDir(); + if (customTempDirectory != null && !customTempDirectory.isEmpty()) { + Path tempDir = Path.of(customTempDirectory); + if (!Files.exists(tempDir)) { + Files.createDirectories(tempDir); + log.info("Created temp directory: {}", tempDir); + } + } + + // Create LibreOffice temp directory + String libreOfficeTempDir = tempFiles.getLibreofficeDir(); + if (libreOfficeTempDir != null && !libreOfficeTempDir.isEmpty()) { + Path loTempDir = Path.of(libreOfficeTempDir); + if (!Files.exists(loTempDir)) { + Files.createDirectories(loTempDir); + log.info("Created LibreOffice temp directory: {}", loTempDir); + } + } + } catch (IOException e) { + log.error("Error creating temp directories", e); + } + } + + /** Scheduled task to clean up old temporary files. Runs at the configured interval. */ + @Scheduled( + fixedDelayString = + "#{applicationProperties.system.tempFileManagement.cleanupIntervalMinutes}", + timeUnit = TimeUnit.MINUTES) + public void scheduledCleanup() { + log.info("Running scheduled temporary file cleanup"); + long maxAgeMillis = tempFileManager.getMaxAgeMillis(); + + // Clean up registered temp files (managed by TempFileRegistry) + int registeredDeletedCount = tempFileManager.cleanupOldTempFiles(maxAgeMillis); + log.info("Cleaned up {} registered temporary files", registeredDeletedCount); + + // Clean up registered temp directories + int directoriesDeletedCount = 0; + for (Path directory : registry.getTempDirectories()) { + try { + if (Files.exists(directory)) { + GeneralUtils.deleteDirectory(directory); + directoriesDeletedCount++; + log.debug("Cleaned up temporary directory: {}", directory); + } + } catch (IOException e) { + log.warn("Failed to clean up temporary directory: {}", directory, e); + } + } + + // Clean up PDFBox cache file + cleanupPDFBoxCache(); + + // Clean up unregistered temp files based on our cleanup strategy + boolean containerMode = isContainerMode(); + int unregisteredDeletedCount = cleanupUnregisteredFiles(containerMode, true, maxAgeMillis); + + log.info( + "Scheduled cleanup complete. Deleted {} registered files, {} unregistered files, {} directories", + registeredDeletedCount, + unregisteredDeletedCount, + directoriesDeletedCount); + } + + /** + * Perform startup cleanup of stale temporary files from previous runs. This is especially + * important in Docker environments where temp files persist between container restarts. + */ + private void runStartupCleanup() { + log.info("Running startup temporary file cleanup"); + boolean containerMode = isContainerMode(); + + log.info( + "Running in {} mode, using {} cleanup strategy", + machineType, + containerMode ? "aggressive" : "conservative"); + + // For startup cleanup, we use a longer timeout for non-container environments + long maxAgeMillis = containerMode ? 0 : 24 * 60 * 60 * 1000; // 0 or 24 hours + + int totalDeletedCount = cleanupUnregisteredFiles(containerMode, false, maxAgeMillis); + + log.info( + "Startup cleanup complete. Deleted {} temporary files/directories", + totalDeletedCount); + } + + /** + * Clean up unregistered temporary files across all configured temp directories. + * + * @param containerMode Whether we're in container mode (more aggressive cleanup) + * @param isScheduled Whether this is a scheduled cleanup or startup cleanup + * @param maxAgeMillis Maximum age of files to clean in milliseconds + * @return Number of files deleted + */ + private int cleanupUnregisteredFiles( + boolean containerMode, boolean isScheduled, long maxAgeMillis) { + AtomicInteger totalDeletedCount = new AtomicInteger(0); + + try { + ApplicationProperties.TempFileManagement tempFiles = + applicationProperties.getSystem().getTempFileManagement(); + Path[] dirsToScan; + if (tempFiles.isCleanupSystemTemp() + && tempFiles.getSystemTempDir() != null + && !tempFiles.getSystemTempDir().isEmpty()) { + Path systemTempPath = getSystemTempPath(); + dirsToScan = + new Path[] { + systemTempPath, + Path.of(tempFiles.getBaseTmpDir()), + Path.of(tempFiles.getLibreofficeDir()) + }; + } else { + dirsToScan = + new Path[] { + Path.of(tempFiles.getBaseTmpDir()), + Path.of(tempFiles.getLibreofficeDir()) + }; + } + + // Process each directory + Arrays.stream(dirsToScan) + .filter(Files::exists) + .forEach( + tempDir -> { + try { + String phase = isScheduled ? "scheduled" : "startup"; + log.info( + "Scanning directory for {} cleanup: {}", + phase, + tempDir); + + AtomicInteger dirDeletedCount = new AtomicInteger(0); + cleanupDirectoryStreaming( + tempDir, + containerMode, + 0, + maxAgeMillis, + isScheduled, + path -> { + dirDeletedCount.incrementAndGet(); + if (log.isDebugEnabled()) { + log.debug( + "Deleted temp file during {} cleanup: {}", + phase, + path); + } + }); + + int count = dirDeletedCount.get(); + totalDeletedCount.addAndGet(count); + if (count > 0) { + log.info( + "Cleaned up {} files/directories in {}", + count, + tempDir); + } + } catch (IOException e) { + log.error("Error during cleanup of directory: {}", tempDir, e); + } + }); + } catch (Exception e) { + log.error("Error during cleanup of unregistered files", e); + } + + return totalDeletedCount.get(); + } + + /** Get the system temp directory path based on configuration or system property. */ + private Path getSystemTempPath() { + String systemTempDir = + applicationProperties.getSystem().getTempFileManagement().getSystemTempDir(); + if (systemTempDir != null && !systemTempDir.isEmpty()) { + return Path.of(systemTempDir); + } else { + return Path.of(System.getProperty("java.io.tmpdir")); + } + } + + /** Determine if we're running in a container environment. */ + private boolean isContainerMode() { + return "Docker".equals(machineType) || "Kubernetes".equals(machineType); + } + + /** + * Recursively clean up a directory using a streaming approach to reduce memory usage. + * + * @param directory The directory to clean + * @param containerMode Whether we're in container mode (more aggressive cleanup) + * @param depth Current recursion depth + * @param maxAgeMillis Maximum age of files to delete + * @param isScheduled Whether this is a scheduled cleanup (vs startup) + * @param onDeleteCallback Callback function when a file is deleted + * @throws IOException If an I/O error occurs + */ + private void cleanupDirectoryStreaming( + Path directory, + boolean containerMode, + int depth, + long maxAgeMillis, + boolean isScheduled, + Consumer onDeleteCallback) + throws IOException { + + if (depth > MAX_RECURSION_DEPTH) { + log.debug("Maximum directory recursion depth reached for: {}", directory); + return; + } + + java.util.List subdirectories = new java.util.ArrayList<>(); + + try (Stream pathStream = Files.list(directory)) { + pathStream.forEach( + path -> { + try { + String fileName = path.getFileName().toString(); + + if (SHOULD_SKIP.test(fileName)) { + return; + } + + if (Files.isDirectory(path)) { + subdirectories.add(path); + return; + } + + if (registry.contains(path.toFile())) { + return; + } + + if (shouldDeleteFile(path, fileName, containerMode, maxAgeMillis)) { + try { + Files.deleteIfExists(path); + onDeleteCallback.accept(path); + } catch (IOException e) { + if (e.getMessage() != null + && e.getMessage() + .contains("being used by another process")) { + log.debug("File locked, skipping delete: {}", path); + } else { + log.warn("Failed to delete temp file: {}", path, e); + } + } + } + } catch (Exception e) { + log.warn("Error processing path: {}", path, e); + } + }); + } + + for (Path subdirectory : subdirectories) { + try { + cleanupDirectoryStreaming( + subdirectory, + containerMode, + depth + 1, + maxAgeMillis, + isScheduled, + onDeleteCallback); + } catch (IOException e) { + log.warn("Error processing subdirectory: {}", subdirectory, e); + } + } + } + + /** Determine if a file should be deleted based on its name, age, and other criteria. */ + private boolean shouldDeleteFile( + Path path, String fileName, boolean containerMode, long maxAgeMillis) { + // First check if it matches our known temp file patterns + boolean isOurTempFile = IS_OUR_TEMP_FILE.test(fileName); + boolean isSystemTempFile = IS_SYSTEM_TEMP_FILE.test(fileName); + + // Normal operation - check against temp file patterns + boolean shouldDelete = isOurTempFile || (containerMode && isSystemTempFile); + + // Get file info for age checks + long lastModified = 0; + long currentTime = System.currentTimeMillis(); + boolean isEmptyFile = false; + + try { + lastModified = Files.getLastModifiedTime(path).toMillis(); + // Special case for zero-byte files - these are often corrupted temp files + if (Files.size(path) == 0) { + isEmptyFile = true; + // For empty files, use a shorter timeout (5 minutes) + // Delete empty files older than 5 minutes + if ((currentTime - lastModified) > 5 * 60 * 1000) { + shouldDelete = true; + } + } + } catch (IOException e) { + log.debug("Could not check file info, skipping: {}", path); + } + + // Check file age against maxAgeMillis only if it's not an empty file that we've already + // decided to delete + if (!isEmptyFile && shouldDelete && maxAgeMillis > 0) { + // In normal mode, check age against maxAgeMillis + shouldDelete = (currentTime - lastModified) > maxAgeMillis; + } + + return shouldDelete; + } + + /** Clean up LibreOffice temporary files. This method is called after LibreOffice operations. */ + public void cleanupLibreOfficeTempFiles() { + // Cleanup known LibreOffice temp directories + try { + Set directories = registry.getTempDirectories(); + for (Path dir : directories) { + if (dir.getFileName().toString().contains("libreoffice") && Files.exists(dir)) { + // For directories containing "libreoffice", delete all contents + // but keep the directory itself for future use + cleanupDirectoryStreaming( + dir, + isContainerMode(), + 0, + 0, // age doesn't matter for LibreOffice cleanup + false, + path -> log.debug("Cleaned up LibreOffice temp file: {}", path)); + log.debug("Cleaned up LibreOffice temp directory contents: {}", dir); + } + } + } catch (IOException e) { + log.warn("Failed to clean up LibreOffice temp files", e); + } + } + + /** + * Clean up PDFBox cache file from user home directory. This cache file can grow large and + * should be periodically cleaned. + */ + private void cleanupPDFBoxCache() { + try { + Path userHome = Path.of(System.getProperty("user.home")); + Path pdfboxCache = userHome.resolve(".pdfbox.cache"); + + if (Files.exists(pdfboxCache)) { + Files.deleteIfExists(pdfboxCache); + log.debug("Cleaned up PDFBox cache file: {}", pdfboxCache); + } + } catch (IOException e) { + log.warn("Failed to clean up PDFBox cache file", e); + } + } +} diff --git a/common/src/main/java/stirling/software/common/util/ApplicationContextProvider.java b/common/src/main/java/stirling/software/common/util/ApplicationContextProvider.java new file mode 100644 index 000000000..505b21fab --- /dev/null +++ b/common/src/main/java/stirling/software/common/util/ApplicationContextProvider.java @@ -0,0 +1,76 @@ +package stirling.software.common.util; + +import org.springframework.beans.BeansException; +import org.springframework.context.ApplicationContext; +import org.springframework.context.ApplicationContextAware; +import org.springframework.stereotype.Component; + +/** + * Helper class that provides access to the ApplicationContext. Useful for getting beans in classes + * that are not managed by Spring. + */ +@Component +public class ApplicationContextProvider implements ApplicationContextAware { + + private static ApplicationContext applicationContext; + + @Override + public void setApplicationContext(ApplicationContext context) throws BeansException { + applicationContext = context; + } + + /** + * Get a bean by class type. + * + * @param The type of the bean + * @param beanClass The class of the bean + * @return The bean instance, or null if not found + */ + public static T getBean(Class beanClass) { + if (applicationContext == null) { + return null; + } + try { + return applicationContext.getBean(beanClass); + } catch (BeansException e) { + return null; + } + } + + /** + * Get a bean by name and class type. + * + * @param The type of the bean + * @param name The name of the bean + * @param beanClass The class of the bean + * @return The bean instance, or null if not found + */ + public static T getBean(String name, Class beanClass) { + if (applicationContext == null) { + return null; + } + try { + return applicationContext.getBean(name, beanClass); + } catch (BeansException e) { + return null; + } + } + + /** + * Check if a bean of the specified type exists. + * + * @param beanClass The class of the bean + * @return true if the bean exists, false otherwise + */ + public static boolean containsBean(Class beanClass) { + if (applicationContext == null) { + return false; + } + try { + applicationContext.getBean(beanClass); + return true; + } catch (BeansException e) { + return false; + } + } +} diff --git a/common/src/main/java/stirling/software/common/util/EmlToPdf.java b/common/src/main/java/stirling/software/common/util/EmlToPdf.java index 6c0514822..05e9cec5c 100644 --- a/common/src/main/java/stirling/software/common/util/EmlToPdf.java +++ b/common/src/main/java/stirling/software/common/util/EmlToPdf.java @@ -134,7 +134,8 @@ public class EmlToPdf { byte[] emlBytes, String fileName, boolean disableSanitize, - stirling.software.common.service.CustomPDFDocumentFactory pdfDocumentFactory) + stirling.software.common.service.CustomPDFDocumentFactory pdfDocumentFactory, + TempFileManager tempFileManager) throws IOException, InterruptedException { validateEmlInput(emlBytes); @@ -153,7 +154,8 @@ public class EmlToPdf { // Convert HTML to PDF byte[] pdfBytes = - convertHtmlToPdf(weasyprintPath, request, htmlContent, disableSanitize); + convertHtmlToPdf( + weasyprintPath, request, htmlContent, disableSanitize, tempFileManager); // Attach files if available and requested if (shouldAttachFiles(emailContent, request)) { @@ -194,7 +196,8 @@ public class EmlToPdf { String weasyprintPath, EmlToPdfRequest request, String htmlContent, - boolean disableSanitize) + boolean disableSanitize, + TempFileManager tempFileManager) throws IOException, InterruptedException { HTMLToPdfRequest htmlRequest = createHtmlRequest(request); @@ -205,7 +208,8 @@ public class EmlToPdf { htmlRequest, htmlContent.getBytes(StandardCharsets.UTF_8), "email.html", - disableSanitize); + disableSanitize, + tempFileManager); } catch (IOException | InterruptedException e) { log.warn("Initial HTML to PDF conversion failed, trying with simplified HTML"); String simplifiedHtml = simplifyHtmlContent(htmlContent); @@ -214,7 +218,8 @@ public class EmlToPdf { htmlRequest, simplifiedHtml.getBytes(StandardCharsets.UTF_8), "email.html", - disableSanitize); + disableSanitize, + tempFileManager); } } diff --git a/common/src/main/java/stirling/software/common/util/FileToPdf.java b/common/src/main/java/stirling/software/common/util/FileToPdf.java index 8439b67a2..7b3765084 100644 --- a/common/src/main/java/stirling/software/common/util/FileToPdf.java +++ b/common/src/main/java/stirling/software/common/util/FileToPdf.java @@ -26,88 +26,92 @@ public class FileToPdf { HTMLToPdfRequest request, byte[] fileBytes, String fileName, - boolean disableSanitize) + boolean disableSanitize, + TempFileManager tempFileManager) throws IOException, InterruptedException { - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - Path tempInputFile = null; - byte[] pdfBytes; - try { - if (fileName.endsWith(".html")) { - tempInputFile = Files.createTempFile("input_", ".html"); - String sanitizedHtml = - sanitizeHtmlContent( - new String(fileBytes, StandardCharsets.UTF_8), disableSanitize); - Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8)); - } else if (fileName.endsWith(".zip")) { - tempInputFile = Files.createTempFile("input_", ".zip"); - Files.write(tempInputFile, fileBytes); - sanitizeHtmlFilesInZip(tempInputFile, disableSanitize); - } else { - throw new IllegalArgumentException("Unsupported file format: " + fileName); - } + try (TempFile tempOutputFile = new TempFile(tempFileManager, ".pdf")) { + try (TempFile tempInputFile = + new TempFile(tempFileManager, fileName.endsWith(".html") ? ".html" : ".zip")) { - List command = new ArrayList<>(); - command.add(weasyprintPath); - command.add("-e"); - command.add("utf-8"); - command.add("-v"); - command.add("--pdf-forms"); - command.add(tempInputFile.toString()); - command.add(tempOutputFile.toString()); + if (fileName.endsWith(".html")) { + String sanitizedHtml = + sanitizeHtmlContent( + new String(fileBytes, StandardCharsets.UTF_8), disableSanitize); + Files.write( + tempInputFile.getPath(), + sanitizedHtml.getBytes(StandardCharsets.UTF_8)); + } else if (fileName.endsWith(".zip")) { + Files.write(tempInputFile.getPath(), fileBytes); + sanitizeHtmlFilesInZip( + tempInputFile.getPath(), disableSanitize, tempFileManager); + } else { + throw new IllegalArgumentException("Unsupported file format: " + fileName); + } - ProcessExecutorResult returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) - .runCommandWithOutputHandling(command); + List command = new ArrayList<>(); + command.add(weasyprintPath); + command.add("-e"); + command.add("utf-8"); + command.add("-v"); + command.add("--pdf-forms"); + command.add(tempInputFile.getAbsolutePath()); + command.add(tempOutputFile.getAbsolutePath()); - pdfBytes = Files.readAllBytes(tempOutputFile); - } catch (IOException e) { - pdfBytes = Files.readAllBytes(tempOutputFile); - if (pdfBytes.length < 1) { - throw e; - } - } finally { - Files.deleteIfExists(tempOutputFile); - Files.deleteIfExists(tempInputFile); - } + ProcessExecutorResult returnCode = + ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) + .runCommandWithOutputHandling(command); - return pdfBytes; + byte[] pdfBytes = Files.readAllBytes(tempOutputFile.getPath()); + try { + return pdfBytes; + } catch (Exception e) { + pdfBytes = Files.readAllBytes(tempOutputFile.getPath()); + if (pdfBytes.length < 1) { + throw e; + } + return pdfBytes; + } + } // tempInputFile auto-closed + } // tempOutputFile auto-closed } private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) { return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent; } - private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize) + private static void sanitizeHtmlFilesInZip( + Path zipFilePath, boolean disableSanitize, TempFileManager tempFileManager) throws IOException { - Path tempUnzippedDir = Files.createTempDirectory("unzipped_"); - try (ZipInputStream zipIn = - ZipSecurity.createHardenedInputStream( - new ByteArrayInputStream(Files.readAllBytes(zipFilePath)))) { - ZipEntry entry = zipIn.getNextEntry(); - while (entry != null) { - Path filePath = tempUnzippedDir.resolve(sanitizeZipFilename(entry.getName())); - if (!entry.isDirectory()) { - Files.createDirectories(filePath.getParent()); - if (entry.getName().toLowerCase().endsWith(".html") - || entry.getName().toLowerCase().endsWith(".htm")) { - String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8); - String sanitizedContent = sanitizeHtmlContent(content, disableSanitize); - Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8)); - } else { - Files.copy(zipIn, filePath); + try (TempDirectory tempUnzippedDir = new TempDirectory(tempFileManager)) { + try (ZipInputStream zipIn = + ZipSecurity.createHardenedInputStream( + new ByteArrayInputStream(Files.readAllBytes(zipFilePath)))) { + ZipEntry entry = zipIn.getNextEntry(); + while (entry != null) { + Path filePath = + tempUnzippedDir.getPath().resolve(sanitizeZipFilename(entry.getName())); + if (!entry.isDirectory()) { + Files.createDirectories(filePath.getParent()); + if (entry.getName().toLowerCase().endsWith(".html") + || entry.getName().toLowerCase().endsWith(".htm")) { + String content = + new String(zipIn.readAllBytes(), StandardCharsets.UTF_8); + String sanitizedContent = sanitizeHtmlContent(content, disableSanitize); + Files.write( + filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8)); + } else { + Files.copy(zipIn, filePath); + } } + zipIn.closeEntry(); + entry = zipIn.getNextEntry(); } - zipIn.closeEntry(); - entry = zipIn.getNextEntry(); } - } - // Repack the sanitized files - zipDirectory(tempUnzippedDir, zipFilePath); - - // Clean up - deleteDirectory(tempUnzippedDir); + // Repack the sanitized files + zipDirectory(tempUnzippedDir.getPath(), zipFilePath); + } // tempUnzippedDir auto-cleaned } private static void zipDirectory(Path sourceDir, Path zipFilePath) throws IOException { diff --git a/common/src/main/java/stirling/software/common/util/GeneralUtils.java b/common/src/main/java/stirling/software/common/util/GeneralUtils.java index 87496294d..ddbec92e0 100644 --- a/common/src/main/java/stirling/software/common/util/GeneralUtils.java +++ b/common/src/main/java/stirling/software/common/util/GeneralUtils.java @@ -34,7 +34,27 @@ import stirling.software.common.configuration.InstallationPathConfig; public class GeneralUtils { public static File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException { - File tempFile = Files.createTempFile("temp", null).toFile(); + String customTempDir = System.getenv("STIRLING_TEMPFILES_DIRECTORY"); + if (customTempDir == null || customTempDir.isEmpty()) { + customTempDir = System.getProperty("stirling.tempfiles.directory"); + } + + File tempFile; + + if (customTempDir != null && !customTempDir.isEmpty()) { + Path tempDir = Path.of(customTempDir); + if (!Files.exists(tempDir)) { + Files.createDirectories(tempDir); + } + tempFile = Files.createTempFile(tempDir, "stirling-pdf-", null).toFile(); + } else { + Path tempDir = Path.of(System.getProperty("java.io.tmpdir"), "stirling-pdf"); + if (!Files.exists(tempDir)) { + Files.createDirectories(tempDir); + } + tempFile = Files.createTempFile(tempDir, "stirling-pdf-", null).toFile(); + } + try (InputStream inputStream = multipartFile.getInputStream(); FileOutputStream outputStream = new FileOutputStream(tempFile)) { diff --git a/common/src/main/java/stirling/software/common/util/TempDirectory.java b/common/src/main/java/stirling/software/common/util/TempDirectory.java new file mode 100644 index 000000000..cd7036d68 --- /dev/null +++ b/common/src/main/java/stirling/software/common/util/TempDirectory.java @@ -0,0 +1,44 @@ +package stirling.software.common.util; + +import java.io.IOException; +import java.nio.file.Path; + +import lombok.extern.slf4j.Slf4j; + +/** + * A wrapper class for a temporary directory that implements AutoCloseable. Can be used with + * try-with-resources for automatic cleanup. + */ +@Slf4j +public class TempDirectory implements AutoCloseable { + + private final TempFileManager manager; + private final Path directory; + + public TempDirectory(TempFileManager manager) throws IOException { + this.manager = manager; + this.directory = manager.createTempDirectory(); + } + + public Path getPath() { + return directory; + } + + public String getAbsolutePath() { + return directory.toAbsolutePath().toString(); + } + + public boolean exists() { + return java.nio.file.Files.exists(directory); + } + + @Override + public void close() { + manager.deleteTempDirectory(directory); + } + + @Override + public String toString() { + return "TempDirectory{" + directory.toAbsolutePath() + "}"; + } +} diff --git a/common/src/main/java/stirling/software/common/util/TempFile.java b/common/src/main/java/stirling/software/common/util/TempFile.java new file mode 100644 index 000000000..db859c431 --- /dev/null +++ b/common/src/main/java/stirling/software/common/util/TempFile.java @@ -0,0 +1,49 @@ +package stirling.software.common.util; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +import lombok.extern.slf4j.Slf4j; + +/** + * A wrapper class for a temporary file that implements AutoCloseable. Can be used with + * try-with-resources for automatic cleanup. + */ +@Slf4j +public class TempFile implements AutoCloseable { + + private final TempFileManager manager; + private final File file; + + public TempFile(TempFileManager manager, String suffix) throws IOException { + this.manager = manager; + this.file = manager.createTempFile(suffix); + } + + public File getFile() { + return file; + } + + public Path getPath() { + return file.toPath(); + } + + public String getAbsolutePath() { + return file.getAbsolutePath(); + } + + public boolean exists() { + return file.exists(); + } + + @Override + public void close() { + manager.deleteTempFile(file); + } + + @Override + public String toString() { + return "TempFile{" + file.getAbsolutePath() + "}"; + } +} diff --git a/common/src/main/java/stirling/software/common/util/TempFileManager.java b/common/src/main/java/stirling/software/common/util/TempFileManager.java new file mode 100644 index 000000000..867931f8b --- /dev/null +++ b/common/src/main/java/stirling/software/common/util/TempFileManager.java @@ -0,0 +1,249 @@ +package stirling.software.common.util; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.Set; +import java.util.UUID; + +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import stirling.software.common.model.ApplicationProperties; + +/** + * Service for managing temporary files in Stirling-PDF. Provides methods for creating, tracking, + * and cleaning up temporary files. + */ +@Slf4j +@Service +@RequiredArgsConstructor +public class TempFileManager { + + private final TempFileRegistry registry; + private final ApplicationProperties applicationProperties; + + /** + * Create a temporary file with the Stirling-PDF prefix. The file is automatically registered + * with the registry. + * + * @param suffix The suffix for the temporary file + * @return The created temporary file + * @throws IOException If an I/O error occurs + */ + public File createTempFile(String suffix) throws IOException { + ApplicationProperties.TempFileManagement tempFiles = + applicationProperties.getSystem().getTempFileManagement(); + Path tempFilePath; + String customTempDirectory = tempFiles.getBaseTmpDir(); + if (customTempDirectory != null && !customTempDirectory.isEmpty()) { + Path tempDir = Path.of(customTempDirectory); + if (!Files.exists(tempDir)) { + Files.createDirectories(tempDir); + } + tempFilePath = Files.createTempFile(tempDir, tempFiles.getPrefix(), suffix); + } else { + tempFilePath = Files.createTempFile(tempFiles.getPrefix(), suffix); + } + File tempFile = tempFilePath.toFile(); + return registry.register(tempFile); + } + + /** + * Create a temporary directory with the Stirling-PDF prefix. The directory is automatically + * registered with the registry. + * + * @return The created temporary directory + * @throws IOException If an I/O error occurs + */ + public Path createTempDirectory() throws IOException { + ApplicationProperties.TempFileManagement tempFiles = + applicationProperties.getSystem().getTempFileManagement(); + Path tempDirPath; + String customTempDirectory = tempFiles.getBaseTmpDir(); + if (customTempDirectory != null && !customTempDirectory.isEmpty()) { + Path tempDir = Path.of(customTempDirectory); + if (!Files.exists(tempDir)) { + Files.createDirectories(tempDir); + } + tempDirPath = Files.createTempDirectory(tempDir, tempFiles.getPrefix()); + } else { + tempDirPath = Files.createTempDirectory(tempFiles.getPrefix()); + } + return registry.registerDirectory(tempDirPath); + } + + /** + * Convert a MultipartFile to a temporary File and register it. This is a wrapper around + * GeneralUtils.convertMultipartFileToFile that ensures the created temp file is registered. + * + * @param multipartFile The MultipartFile to convert + * @return The created temporary file + * @throws IOException If an I/O error occurs + */ + public File convertMultipartFileToFile(MultipartFile multipartFile) throws IOException { + File tempFile = GeneralUtils.convertMultipartFileToFile(multipartFile); + return registry.register(tempFile); + } + + /** + * Delete a temporary file and unregister it from the registry. + * + * @param file The file to delete + * @return true if the file was deleted successfully, false otherwise + */ + public boolean deleteTempFile(File file) { + if (file != null && file.exists()) { + boolean deleted = file.delete(); + if (deleted) { + registry.unregister(file); + log.debug("Deleted temp file: {}", file.getAbsolutePath()); + } else { + log.warn("Failed to delete temp file: {}", file.getAbsolutePath()); + } + return deleted; + } + return false; + } + + /** + * Delete a temporary file and unregister it from the registry. + * + * @param path The path to delete + * @return true if the file was deleted successfully, false otherwise + */ + public boolean deleteTempFile(Path path) { + if (path != null) { + try { + boolean deleted = Files.deleteIfExists(path); + if (deleted) { + registry.unregister(path); + log.debug("Deleted temp file: {}", path.toString()); + } else { + log.debug("Temp file already deleted or does not exist: {}", path.toString()); + } + return deleted; + } catch (IOException e) { + log.warn("Failed to delete temp file: {}", path.toString(), e); + return false; + } + } + return false; + } + + /** + * Delete a temporary directory and all its contents. + * + * @param directory The directory to delete + */ + public void deleteTempDirectory(Path directory) { + if (directory != null && Files.isDirectory(directory)) { + try { + GeneralUtils.deleteDirectory(directory); + log.debug("Deleted temp directory: {}", directory.toString()); + } catch (IOException e) { + log.warn("Failed to delete temp directory: {}", directory.toString(), e); + } + } + } + + /** + * Register an existing file with the registry. + * + * @param file The file to register + * @return The same file for method chaining + */ + public File register(File file) { + if (file != null && file.exists()) { + return registry.register(file); + } + return file; + } + + /** + * Clean up old temporary files based on age. + * + * @param maxAgeMillis Maximum age in milliseconds for temp files + * @return Number of files deleted + */ + public int cleanupOldTempFiles(long maxAgeMillis) { + int deletedCount = 0; + + // Get files older than max age + Set oldFiles = registry.getFilesOlderThan(maxAgeMillis); + + // Delete each old file + for (Path file : oldFiles) { + if (deleteTempFile(file)) { + deletedCount++; + } + } + + log.info("Cleaned up {} old temporary files", deletedCount); + return deletedCount; + } + + /** + * Get the maximum age for temporary files in milliseconds. + * + * @return Maximum age in milliseconds + */ + public long getMaxAgeMillis() { + long maxAgeHours = + applicationProperties.getSystem().getTempFileManagement().getMaxAgeHours(); + return Duration.ofHours(maxAgeHours).toMillis(); + } + + /** + * Generate a unique temporary file name with the Stirling-PDF prefix. + * + * @param type Type identifier for the temp file + * @param extension File extension (without the dot) + * @return A unique temporary file name + */ + public String generateTempFileName(String type, String extension) { + String tempFilePrefix = + applicationProperties.getSystem().getTempFileManagement().getPrefix(); + String uuid = UUID.randomUUID().toString().substring(0, 8); + return tempFilePrefix + type + "-" + uuid + "." + extension; + } + + /** + * Register a known LibreOffice temporary directory. This is used when integrating with + * LibreOffice for file conversions. + * + * @return The LibreOffice temp directory + * @throws IOException If directory creation fails + */ + public Path registerLibreOfficeTempDir() throws IOException { + ApplicationProperties.TempFileManagement tempFiles = + applicationProperties.getSystem().getTempFileManagement(); + Path loTempDir; + String libreOfficeTempDir = tempFiles.getLibreofficeDir(); + String customTempDirectory = tempFiles.getBaseTmpDir(); + + // First check if explicitly configured + if (libreOfficeTempDir != null && !libreOfficeTempDir.isEmpty()) { + loTempDir = Path.of(libreOfficeTempDir); + } + // Next check if we have a custom temp directory + else if (customTempDirectory != null && !customTempDirectory.isEmpty()) { + loTempDir = Path.of(customTempDirectory, "libreoffice"); + } + // Fall back to system temp dir with our application prefix + else { + loTempDir = Path.of(System.getProperty("java.io.tmpdir"), "stirling-pdf-libreoffice"); + } + + if (!Files.exists(loTempDir)) { + Files.createDirectories(loTempDir); + } + + return registry.registerDirectory(loTempDir); + } +} diff --git a/common/src/main/java/stirling/software/common/util/TempFileRegistry.java b/common/src/main/java/stirling/software/common/util/TempFileRegistry.java new file mode 100644 index 000000000..1e55c6b15 --- /dev/null +++ b/common/src/main/java/stirling/software/common/util/TempFileRegistry.java @@ -0,0 +1,176 @@ +package stirling.software.common.util; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.stream.Collectors; + +import org.springframework.stereotype.Component; + +import lombok.extern.slf4j.Slf4j; + +/** + * Central registry for tracking temporary files created by Stirling-PDF. Maintains a thread-safe + * collection of paths with their creation timestamps. + */ +@Slf4j +@Component +public class TempFileRegistry { + + private final ConcurrentMap registeredFiles = new ConcurrentHashMap<>(); + private final Set thirdPartyTempFiles = + Collections.newSetFromMap(new ConcurrentHashMap<>()); + private final Set tempDirectories = + Collections.newSetFromMap(new ConcurrentHashMap<>()); + + /** + * Register a temporary file with the registry. + * + * @param file The temporary file to track + * @return The same file for method chaining + */ + public File register(File file) { + if (file != null) { + registeredFiles.put(file.toPath(), Instant.now()); + log.debug("Registered temp file: {}", file.getAbsolutePath()); + } + return file; + } + + /** + * Register a temporary path with the registry. + * + * @param path The temporary path to track + * @return The same path for method chaining + */ + public Path register(Path path) { + if (path != null) { + registeredFiles.put(path, Instant.now()); + log.debug("Registered temp path: {}", path.toString()); + } + return path; + } + + /** + * Register a temporary directory to be cleaned up. + * + * @param directory Directory to register + * @return The same directory for method chaining + */ + public Path registerDirectory(Path directory) { + if (directory != null && Files.isDirectory(directory)) { + tempDirectories.add(directory); + log.debug("Registered temp directory: {}", directory.toString()); + } + return directory; + } + + /** + * Register a third-party temporary file that requires special handling. + * + * @param file The third-party temp file + * @return The same file for method chaining + */ + public File registerThirdParty(File file) { + if (file != null) { + thirdPartyTempFiles.add(file.toPath()); + log.debug("Registered third-party temp file: {}", file.getAbsolutePath()); + } + return file; + } + + /** + * Unregister a file from the registry. + * + * @param file The file to unregister + */ + public void unregister(File file) { + if (file != null) { + registeredFiles.remove(file.toPath()); + thirdPartyTempFiles.remove(file.toPath()); + log.debug("Unregistered temp file: {}", file.getAbsolutePath()); + } + } + + /** + * Unregister a path from the registry. + * + * @param path The path to unregister + */ + public void unregister(Path path) { + if (path != null) { + registeredFiles.remove(path); + thirdPartyTempFiles.remove(path); + log.debug("Unregistered temp path: {}", path.toString()); + } + } + + /** + * Get all registered temporary files. + * + * @return Set of registered file paths + */ + public Set getAllRegisteredFiles() { + return registeredFiles.keySet(); + } + + /** + * Get temporary files older than the specified duration in milliseconds. + * + * @param maxAgeMillis Maximum age in milliseconds + * @return Set of paths older than the specified age + */ + public Set getFilesOlderThan(long maxAgeMillis) { + Instant cutoffTime = Instant.now().minusMillis(maxAgeMillis); + return registeredFiles.entrySet().stream() + .filter(entry -> entry.getValue().isBefore(cutoffTime)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } + + /** + * Get all registered third-party temporary files. + * + * @return Set of third-party file paths + */ + public Set getThirdPartyTempFiles() { + return thirdPartyTempFiles; + } + + /** + * Get all registered temporary directories. + * + * @return Set of temporary directory paths + */ + public Set getTempDirectories() { + return tempDirectories; + } + + /** + * Check if a file is registered in the registry. + * + * @param file The file to check + * @return True if the file is registered, false otherwise + */ + public boolean contains(File file) { + if (file == null) { + return false; + } + Path path = file.toPath(); + return registeredFiles.containsKey(path) || thirdPartyTempFiles.contains(path); + } + + /** Clear all registry data. */ + public void clear() { + registeredFiles.clear(); + thirdPartyTempFiles.clear(); + tempDirectories.clear(); + } +} diff --git a/common/src/main/java/stirling/software/common/util/TempFileUtil.java b/common/src/main/java/stirling/software/common/util/TempFileUtil.java new file mode 100644 index 000000000..2588b9ebb --- /dev/null +++ b/common/src/main/java/stirling/software/common/util/TempFileUtil.java @@ -0,0 +1,135 @@ +package stirling.software.common.util; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +import lombok.extern.slf4j.Slf4j; + +/** + * Utility class for handling temporary files with proper cleanup. Provides helper methods and + * wrappers to ensure temp files are properly cleaned up. + */ +@Slf4j +public class TempFileUtil { + + /** + * A collection of temporary files that implements AutoCloseable. All files in the collection + * are cleaned up when close() is called. + */ + public static class TempFileCollection implements AutoCloseable { + private final TempFileManager manager; + private final List tempFiles = new ArrayList<>(); + + public TempFileCollection(TempFileManager manager) { + this.manager = manager; + } + + public File addTempFile(String suffix) throws IOException { + File file = manager.createTempFile(suffix); + tempFiles.add(file); + return file; + } + + public List getFiles() { + return new ArrayList<>(tempFiles); + } + + @Override + public void close() { + for (File file : tempFiles) { + manager.deleteTempFile(file); + } + } + } + + /** + * Execute a function with a temporary file, ensuring cleanup in a finally block. + * + * @param The return type of the function + * @param tempFileManager The temp file manager + * @param suffix File suffix (e.g., ".pdf") + * @param function The function to execute with the temp file + * @return The result of the function + * @throws IOException If an I/O error occurs + */ + public static R withTempFile( + TempFileManager tempFileManager, String suffix, Function function) + throws IOException { + File tempFile = tempFileManager.createTempFile(suffix); + try { + return function.apply(tempFile); + } finally { + tempFileManager.deleteTempFile(tempFile); + } + } + + /** + * Execute a function with multiple temporary files, ensuring cleanup in a finally block. + * + * @param The return type of the function + * @param tempFileManager The temp file manager + * @param count Number of temp files to create + * @param suffix File suffix (e.g., ".pdf") + * @param function The function to execute with the temp files + * @return The result of the function + * @throws IOException If an I/O error occurs + */ + public static R withMultipleTempFiles( + TempFileManager tempFileManager, + int count, + String suffix, + Function, R> function) + throws IOException { + List tempFiles = new ArrayList<>(count); + try { + for (int i = 0; i < count; i++) { + tempFiles.add(tempFileManager.createTempFile(suffix)); + } + return function.apply(tempFiles); + } finally { + for (File file : tempFiles) { + tempFileManager.deleteTempFile(file); + } + } + } + + /** + * Safely delete a list of temporary files, logging any errors. + * + * @param files The list of files to delete + */ + public static void safeDeleteFiles(List files) { + if (files == null) return; + + for (Path file : files) { + if (file == null) continue; + + try { + Files.deleteIfExists(file); + log.debug("Deleted temp file: {}", file); + } catch (IOException e) { + log.warn("Failed to delete temp file: {}", file, e); + } + } + } + + /** + * Register an already created temp file with the registry. Use this for files created outside + * of TempFileManager. + * + * @param tempFileManager The temp file manager + * @param file The file to register + * @return The registered file + */ + public static File registerExistingTempFile(TempFileManager tempFileManager, File file) { + if (tempFileManager != null && file != null && file.exists()) { + return tempFileManager.register(file); + } + return file; + } +} diff --git a/common/src/test/java/stirling/software/common/service/TempFileCleanupServiceTest.java b/common/src/test/java/stirling/software/common/service/TempFileCleanupServiceTest.java new file mode 100644 index 000000000..009c00860 --- /dev/null +++ b/common/src/test/java/stirling/software/common/service/TempFileCleanupServiceTest.java @@ -0,0 +1,464 @@ +package stirling.software.common.service; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.*; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.FileTime; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.stream.Stream; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.MockedStatic; +import org.mockito.MockitoAnnotations; +import org.springframework.test.util.ReflectionTestUtils; + +import stirling.software.common.model.ApplicationProperties; +import stirling.software.common.util.TempFileManager; +import stirling.software.common.util.TempFileRegistry; + +/** + * Tests for the TempFileCleanupService, focusing on its pattern-matching and cleanup logic. + */ +public class TempFileCleanupServiceTest { + + @TempDir + Path tempDir; + + @Mock + private TempFileRegistry registry; + + @Mock + private TempFileManager tempFileManager; + + @Mock + private ApplicationProperties applicationProperties; + + @Mock + private ApplicationProperties.System system; + + @Mock + private ApplicationProperties.TempFileManagement tempFileManagement; + + @InjectMocks + private TempFileCleanupService cleanupService; + + private Path systemTempDir; + private Path customTempDir; + private Path libreOfficeTempDir; + + @BeforeEach + public void setup() throws IOException { + MockitoAnnotations.openMocks(this); + + // Create test directories + systemTempDir = tempDir.resolve("systemTemp"); + customTempDir = tempDir.resolve("customTemp"); + libreOfficeTempDir = tempDir.resolve("libreOfficeTemp"); + + Files.createDirectories(systemTempDir); + Files.createDirectories(customTempDir); + Files.createDirectories(libreOfficeTempDir); + + // Configure ApplicationProperties mocks + when(applicationProperties.getSystem()).thenReturn(system); + when(system.getTempFileManagement()).thenReturn(tempFileManagement); + when(tempFileManagement.getBaseTmpDir()).thenReturn(customTempDir.toString()); + when(tempFileManagement.getLibreofficeDir()).thenReturn(libreOfficeTempDir.toString()); + when(tempFileManagement.getSystemTempDir()).thenReturn(systemTempDir.toString()); + when(tempFileManagement.isStartupCleanup()).thenReturn(false); + when(tempFileManagement.isCleanupSystemTemp()).thenReturn(false); + when(tempFileManagement.getCleanupIntervalMinutes()).thenReturn(30L); + + // Set machineType using reflection (still needed for this field) + ReflectionTestUtils.setField(cleanupService, "machineType", "Standard"); + + when(tempFileManager.getMaxAgeMillis()).thenReturn(3600000L); // 1 hour + } + + @Test + public void testScheduledCleanup_RegisteredFiles() { + // Arrange + when(tempFileManager.cleanupOldTempFiles(anyLong())).thenReturn(5); // 5 files deleted + Set registeredDirs = new HashSet<>(); + registeredDirs.add(tempDir.resolve("registeredDir")); + when(registry.getTempDirectories()).thenReturn(registeredDirs); + + // Act + cleanupService.scheduledCleanup(); + + // Assert + verify(tempFileManager).cleanupOldTempFiles(anyLong()); + verify(registry, times(1)).getTempDirectories(); + } + + @Test + public void testCleanupTempFilePatterns() throws IOException { + // Arrange - Create various temp files + Path ourTempFile1 = Files.createFile(systemTempDir.resolve("output_123.pdf")); + Path ourTempFile2 = Files.createFile(systemTempDir.resolve("compressedPDF456.pdf")); + Path ourTempFile3 = Files.createFile(customTempDir.resolve("stirling-pdf-789.tmp")); + Path ourTempFile4 = Files.createFile(customTempDir.resolve("pdf-save-123-456.tmp")); + Path ourTempFile5 = Files.createFile(libreOfficeTempDir.resolve("input_file.pdf")); + + // Old temporary files + Path oldTempFile = Files.createFile(systemTempDir.resolve("output_old.pdf")); + + // System temp files that should be cleaned in container mode + Path sysTempFile1 = Files.createFile(systemTempDir.resolve("lu123abc.tmp")); + Path sysTempFile2 = Files.createFile(customTempDir.resolve("ocr_process123")); + Path sysTempFile3 = Files.createFile(customTempDir.resolve("tmp_upload.tmp")); + + // Files that should be preserved + Path jettyFile1 = Files.createFile(systemTempDir.resolve("jetty-123.tmp")); + Path jettyFile2 = Files.createFile(systemTempDir.resolve("something-with-jetty-inside.tmp")); + Path regularFile = Files.createFile(systemTempDir.resolve("important.txt")); + + // Create a nested directory with temp files + Path nestedDir = Files.createDirectories(systemTempDir.resolve("nested")); + Path nestedTempFile = Files.createFile(nestedDir.resolve("output_nested.pdf")); + + // Empty file (special case) + Path emptyFile = Files.createFile(systemTempDir.resolve("empty.tmp")); + + // Configure mock registry to say these files aren't registered + when(registry.contains(any(File.class))).thenReturn(false); + + // The set of files that will be deleted in our test + Set deletedFiles = new HashSet<>(); + + // Use MockedStatic to mock Files operations + try (MockedStatic mockedFiles = mockStatic(Files.class)) { + // Mock Files.list for each directory we'll process + mockedFiles.when(() -> Files.list(eq(systemTempDir))) + .thenReturn(Stream.of( + ourTempFile1, ourTempFile2, oldTempFile, sysTempFile1, + jettyFile1, jettyFile2, regularFile, emptyFile, nestedDir)); + + mockedFiles.when(() -> Files.list(eq(customTempDir))) + .thenReturn(Stream.of(ourTempFile3, ourTempFile4, sysTempFile2, sysTempFile3)); + + mockedFiles.when(() -> Files.list(eq(libreOfficeTempDir))) + .thenReturn(Stream.of(ourTempFile5)); + + mockedFiles.when(() -> Files.list(eq(nestedDir))) + .thenReturn(Stream.of(nestedTempFile)); + + // Configure Files.isDirectory for each path + mockedFiles.when(() -> Files.isDirectory(eq(nestedDir))).thenReturn(true); + mockedFiles.when(() -> Files.isDirectory(any(Path.class))).thenReturn(false); + + // Configure Files.exists to return true for all paths + mockedFiles.when(() -> Files.exists(any(Path.class))).thenReturn(true); + + // Configure Files.getLastModifiedTime to return different times based on file names + mockedFiles.when(() -> Files.getLastModifiedTime(any(Path.class))) + .thenAnswer(invocation -> { + Path path = invocation.getArgument(0); + String fileName = path.getFileName().toString(); + + // For files with "old" in the name, return a timestamp older than maxAgeMillis + if (fileName.contains("old")) { + return FileTime.fromMillis(System.currentTimeMillis() - 5000000); + } + // For empty.tmp file, return a timestamp older than 5 minutes (for empty file test) + else if (fileName.equals("empty.tmp")) { + return FileTime.fromMillis(System.currentTimeMillis() - 6 * 60 * 1000); + } + // For all other files, return a recent timestamp + else { + return FileTime.fromMillis(System.currentTimeMillis() - 60000); // 1 minute ago + } + }); + + // Configure Files.size to return different sizes based on file names + mockedFiles.when(() -> Files.size(any(Path.class))) + .thenAnswer(invocation -> { + Path path = invocation.getArgument(0); + String fileName = path.getFileName().toString(); + + // Return 0 bytes for the empty file + if (fileName.equals("empty.tmp")) { + return 0L; + } + // Return normal size for all other files + else { + return 1024L; // 1 KB + } + }); + + // For deleteIfExists, track which files would be deleted + mockedFiles.when(() -> Files.deleteIfExists(any(Path.class))) + .thenAnswer(invocation -> { + Path path = invocation.getArgument(0); + deletedFiles.add(path); + return true; + }); + + // Act - set containerMode to false for this test + invokeCleanupDirectoryStreaming(systemTempDir, false, 0, 3600000); + invokeCleanupDirectoryStreaming(customTempDir, false, 0, 3600000); + invokeCleanupDirectoryStreaming(libreOfficeTempDir, false, 0, 3600000); + + // Assert - Only old temp files and empty files should be deleted + assertTrue(deletedFiles.contains(oldTempFile), "Old temp file should be deleted"); + assertTrue(deletedFiles.contains(emptyFile), "Empty file should be deleted"); + + // Regular temp files should not be deleted because they're too new + assertFalse(deletedFiles.contains(ourTempFile1), "Recent temp file should be preserved"); + assertFalse(deletedFiles.contains(ourTempFile2), "Recent temp file should be preserved"); + assertFalse(deletedFiles.contains(ourTempFile3), "Recent temp file should be preserved"); + assertFalse(deletedFiles.contains(ourTempFile4), "Recent temp file should be preserved"); + assertFalse(deletedFiles.contains(ourTempFile5), "Recent temp file should be preserved"); + + // System temp files should not be deleted in non-container mode + assertFalse(deletedFiles.contains(sysTempFile1), "System temp file should be preserved in non-container mode"); + assertFalse(deletedFiles.contains(sysTempFile2), "System temp file should be preserved in non-container mode"); + assertFalse(deletedFiles.contains(sysTempFile3), "System temp file should be preserved in non-container mode"); + + // Jetty files and regular files should never be deleted + assertFalse(deletedFiles.contains(jettyFile1), "Jetty file should be preserved"); + assertFalse(deletedFiles.contains(jettyFile2), "File with jetty in name should be preserved"); + assertFalse(deletedFiles.contains(regularFile), "Regular file should be preserved"); + } + } + + @Test + public void testContainerModeCleanup() throws IOException { + // Arrange - Create various temp files + Path ourTempFile = Files.createFile(systemTempDir.resolve("output_123.pdf")); + Path sysTempFile = Files.createFile(systemTempDir.resolve("lu123abc.tmp")); + Path regularFile = Files.createFile(systemTempDir.resolve("important.txt")); + + // Configure mock registry to say these files aren't registered + when(registry.contains(any(File.class))).thenReturn(false); + + // The set of files that will be deleted in our test + Set deletedFiles = new HashSet<>(); + + // Use MockedStatic to mock Files operations + try (MockedStatic mockedFiles = mockStatic(Files.class)) { + // Mock Files.list for systemTempDir + mockedFiles.when(() -> Files.list(eq(systemTempDir))) + .thenReturn(Stream.of(ourTempFile, sysTempFile, regularFile)); + + // Configure Files.isDirectory + mockedFiles.when(() -> Files.isDirectory(any(Path.class))).thenReturn(false); + + // Configure Files.exists + mockedFiles.when(() -> Files.exists(any(Path.class))).thenReturn(true); + + // Configure Files.getLastModifiedTime to return recent timestamps + mockedFiles.when(() -> Files.getLastModifiedTime(any(Path.class))) + .thenReturn(FileTime.fromMillis(System.currentTimeMillis() - 60000)); // 1 minute ago + + // Configure Files.size to return normal size + mockedFiles.when(() -> Files.size(any(Path.class))) + .thenReturn(1024L); // 1 KB + + // For deleteIfExists, track which files would be deleted + mockedFiles.when(() -> Files.deleteIfExists(any(Path.class))) + .thenAnswer(invocation -> { + Path path = invocation.getArgument(0); + deletedFiles.add(path); + return true; + }); + + // Act - set containerMode to true and maxAgeMillis to 0 for container startup cleanup + invokeCleanupDirectoryStreaming(systemTempDir, true, 0, 0); + + // Assert - In container mode, both our temp files and system temp files should be deleted + // regardless of age (when maxAgeMillis is 0) + assertTrue(deletedFiles.contains(ourTempFile), "Our temp file should be deleted in container mode"); + assertTrue(deletedFiles.contains(sysTempFile), "System temp file should be deleted in container mode"); + assertFalse(deletedFiles.contains(regularFile), "Regular file should be preserved"); + } + } + + @Test + public void testEmptyFileHandling() throws IOException { + // Arrange - Create an empty file + Path emptyFile = Files.createFile(systemTempDir.resolve("empty.tmp")); + Path recentEmptyFile = Files.createFile(systemTempDir.resolve("recent_empty.tmp")); + + // Configure mock registry to say these files aren't registered + when(registry.contains(any(File.class))).thenReturn(false); + + // The set of files that will be deleted in our test + Set deletedFiles = new HashSet<>(); + + // Use MockedStatic to mock Files operations + try (MockedStatic mockedFiles = mockStatic(Files.class)) { + // Mock Files.list for systemTempDir + mockedFiles.when(() -> Files.list(eq(systemTempDir))) + .thenReturn(Stream.of(emptyFile, recentEmptyFile)); + + // Configure Files.isDirectory + mockedFiles.when(() -> Files.isDirectory(any(Path.class))).thenReturn(false); + + // Configure Files.exists + mockedFiles.when(() -> Files.exists(any(Path.class))).thenReturn(true); + + // Configure Files.getLastModifiedTime to return different times based on file names + mockedFiles.when(() -> Files.getLastModifiedTime(any(Path.class))) + .thenAnswer(invocation -> { + Path path = invocation.getArgument(0); + String fileName = path.getFileName().toString(); + + if (fileName.equals("empty.tmp")) { + // More than 5 minutes old + return FileTime.fromMillis(System.currentTimeMillis() - 6 * 60 * 1000); + } else { + // Less than 5 minutes old + return FileTime.fromMillis(System.currentTimeMillis() - 2 * 60 * 1000); + } + }); + + // Configure Files.size to return 0 for empty files + mockedFiles.when(() -> Files.size(any(Path.class))) + .thenReturn(0L); + + // For deleteIfExists, track which files would be deleted + mockedFiles.when(() -> Files.deleteIfExists(any(Path.class))) + .thenAnswer(invocation -> { + Path path = invocation.getArgument(0); + deletedFiles.add(path); + return true; + }); + + // Act + invokeCleanupDirectoryStreaming(systemTempDir, false, 0, 3600000); + + // Assert + assertTrue(deletedFiles.contains(emptyFile), + "Empty file older than 5 minutes should be deleted"); + assertFalse(deletedFiles.contains(recentEmptyFile), + "Empty file newer than 5 minutes should not be deleted"); + } + } + + @Test + public void testRecursiveDirectoryCleaning() throws IOException { + // Arrange - Create a nested directory structure with temp files + Path dir1 = Files.createDirectories(systemTempDir.resolve("dir1")); + Path dir2 = Files.createDirectories(dir1.resolve("dir2")); + Path dir3 = Files.createDirectories(dir2.resolve("dir3")); + + Path tempFile1 = Files.createFile(dir1.resolve("output_1.pdf")); + Path tempFile2 = Files.createFile(dir2.resolve("output_2.pdf")); + Path tempFile3 = Files.createFile(dir3.resolve("output_old_3.pdf")); + + // Configure mock registry to say these files aren't registered + when(registry.contains(any(File.class))).thenReturn(false); + + // The set of files that will be deleted in our test + Set deletedFiles = new HashSet<>(); + + // Use MockedStatic to mock Files operations + try (MockedStatic mockedFiles = mockStatic(Files.class)) { + // Mock Files.list for each directory + mockedFiles.when(() -> Files.list(eq(systemTempDir))) + .thenReturn(Stream.of(dir1)); + + mockedFiles.when(() -> Files.list(eq(dir1))) + .thenReturn(Stream.of(tempFile1, dir2)); + + mockedFiles.when(() -> Files.list(eq(dir2))) + .thenReturn(Stream.of(tempFile2, dir3)); + + mockedFiles.when(() -> Files.list(eq(dir3))) + .thenReturn(Stream.of(tempFile3)); + + // Configure Files.isDirectory for each path + mockedFiles.when(() -> Files.isDirectory(eq(dir1))).thenReturn(true); + mockedFiles.when(() -> Files.isDirectory(eq(dir2))).thenReturn(true); + mockedFiles.when(() -> Files.isDirectory(eq(dir3))).thenReturn(true); + mockedFiles.when(() -> Files.isDirectory(eq(tempFile1))).thenReturn(false); + mockedFiles.when(() -> Files.isDirectory(eq(tempFile2))).thenReturn(false); + mockedFiles.when(() -> Files.isDirectory(eq(tempFile3))).thenReturn(false); + + // Configure Files.exists to return true for all paths + mockedFiles.when(() -> Files.exists(any(Path.class))).thenReturn(true); + + // Configure Files.getLastModifiedTime to return different times based on file names + mockedFiles.when(() -> Files.getLastModifiedTime(any(Path.class))) + .thenAnswer(invocation -> { + Path path = invocation.getArgument(0); + String fileName = path.getFileName().toString(); + + if (fileName.contains("old")) { + // Old file + return FileTime.fromMillis(System.currentTimeMillis() - 5000000); + } else { + // Recent file + return FileTime.fromMillis(System.currentTimeMillis() - 60000); + } + }); + + // Configure Files.size to return normal size + mockedFiles.when(() -> Files.size(any(Path.class))) + .thenReturn(1024L); + + // For deleteIfExists, track which files would be deleted + mockedFiles.when(() -> Files.deleteIfExists(any(Path.class))) + .thenAnswer(invocation -> { + Path path = invocation.getArgument(0); + deletedFiles.add(path); + return true; + }); + + // Act + invokeCleanupDirectoryStreaming(systemTempDir, false, 0, 3600000); + + // Debug - print what was deleted + System.out.println("Deleted files: " + deletedFiles); + System.out.println("Looking for: " + tempFile3); + + // Assert + assertFalse(deletedFiles.contains(tempFile1), "Recent temp file should be preserved"); + assertFalse(deletedFiles.contains(tempFile2), "Recent temp file should be preserved"); + assertTrue(deletedFiles.contains(tempFile3), "Old temp file in nested directory should be deleted"); + } + } + + /** + * Helper method to invoke the private cleanupDirectoryStreaming method using reflection + */ + private void invokeCleanupDirectoryStreaming(Path directory, boolean containerMode, int depth, long maxAgeMillis) + throws IOException { + try { + // Create a consumer that tracks deleted files + AtomicInteger deleteCount = new AtomicInteger(0); + Consumer deleteCallback = path -> deleteCount.incrementAndGet(); + + // Get the method with updated signature + var method = TempFileCleanupService.class.getDeclaredMethod( + "cleanupDirectoryStreaming", + Path.class, boolean.class, int.class, long.class, boolean.class, Consumer.class); + method.setAccessible(true); + + // Invoke the method with appropriate parameters + method.invoke(cleanupService, directory, containerMode, depth, maxAgeMillis, false, deleteCallback); + } catch (Exception e) { + throw new RuntimeException("Error invoking cleanupDirectoryStreaming", e); + } + } + + // Matcher for exact path equality + private static Path eq(Path path) { + return argThat(arg -> arg != null && arg.equals(path)); + } +} \ No newline at end of file diff --git a/common/src/test/java/stirling/software/common/util/FileToPdfTest.java b/common/src/test/java/stirling/software/common/util/FileToPdfTest.java index a897e887b..f1df1cf25 100644 --- a/common/src/test/java/stirling/software/common/util/FileToPdfTest.java +++ b/common/src/test/java/stirling/software/common/util/FileToPdfTest.java @@ -3,7 +3,11 @@ package stirling.software.common.util; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.mockito.ArgumentMatchers.anyString; +import java.io.File; import java.io.IOException; import org.junit.jupiter.api.Test; @@ -22,14 +26,24 @@ public class FileToPdfTest { byte[] fileBytes = new byte[0]; // Sample file bytes (empty input) String fileName = "test.html"; // Sample file name indicating an HTML file boolean disableSanitize = false; // Flag to control sanitization + TempFileManager tempFileManager = mock(TempFileManager.class); // Mock TempFileManager + + // Mock the temp file creation to return real temp files + try { + when(tempFileManager.createTempFile(anyString())) + .thenReturn(File.createTempFile("test", ".pdf")) + .thenReturn(File.createTempFile("test", ".html")); + } catch (IOException e) { + throw new RuntimeException(e); + } - // Expect an IOException to be thrown due to empty input + // Expect an IOException to be thrown due to empty input or invalid weasyprint path Throwable thrown = assertThrows( - IOException.class, + Exception.class, () -> FileToPdf.convertHtmlToPdf( - "/path/", request, fileBytes, fileName, disableSanitize)); + "/path/", request, fileBytes, fileName, disableSanitize, tempFileManager)); assertNotNull(thrown); } diff --git a/proprietary/src/main/java/stirling/software/proprietary/config/HttpRequestAuditPublisher.java b/proprietary/src/main/java/stirling/software/proprietary/config/HttpRequestAuditPublisher.java deleted file mode 100644 index e69de29bb..000000000 diff --git a/proprietary/src/main/java/stirling/software/proprietary/security/configuration/ee/KeygenLicenseVerifier.java b/proprietary/src/main/java/stirling/software/proprietary/security/configuration/ee/KeygenLicenseVerifier.java index 969385a33..8a4dd7d3f 100644 --- a/proprietary/src/main/java/stirling/software/proprietary/security/configuration/ee/KeygenLicenseVerifier.java +++ b/proprietary/src/main/java/stirling/software/proprietary/security/configuration/ee/KeygenLicenseVerifier.java @@ -65,6 +65,9 @@ public class KeygenLicenseVerifier { } public License verifyLicense(String licenseKeyOrCert) { + if (!applicationProperties.getPremium().isEnabled()) { + return License.NORMAL; + } License license; LicenseContext context = new LicenseContext(); diff --git a/scripts/init-without-ocr.sh b/scripts/init-without-ocr.sh index 934c995a3..aade064bb 100644 --- a/scripts/init-without-ocr.sh +++ b/scripts/init-without-ocr.sh @@ -28,9 +28,11 @@ if [[ -n "$LANGS" ]]; then fi echo "Setting permissions and ownership for necessary directories..." +# Ensure temp directory exists and has correct permissions +mkdir -p /tmp/stirling-pdf || true # Attempt to change ownership of directories and files -if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /app.jar; then - chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /app.jar || true +if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /tmp/stirling-pdf /app.jar; then + chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /tmp/stirling-pdf /app.jar || true # If chown succeeds, execute the command as stirlingpdfuser exec su-exec stirlingpdfuser "$@" else diff --git a/scripts/init.sh b/scripts/init.sh index f839da2bd..4cde7db46 100644 --- a/scripts/init.sh +++ b/scripts/init.sh @@ -28,4 +28,9 @@ if [[ -n "$TESSERACT_LANGS" ]]; then done fi +# Ensure temp directory exists with correct permissions before running main init +mkdir -p /tmp/stirling-pdf || true +chown -R stirlingpdfuser:stirlingpdfgroup /tmp/stirling-pdf || true +chmod -R 755 /tmp/stirling-pdf || true + /scripts/init-without-ocr.sh "$@" \ No newline at end of file diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertEmlToPDF.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertEmlToPDF.java index 32aedf57c..33d51a2a1 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertEmlToPDF.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertEmlToPDF.java @@ -24,6 +24,7 @@ import stirling.software.common.configuration.RuntimePathConfig; import stirling.software.common.model.api.converters.EmlToPdfRequest; import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.util.EmlToPdf; +import stirling.software.common.util.TempFileManager; import stirling.software.common.util.WebResponseUtils; @RestController @@ -35,6 +36,7 @@ public class ConvertEmlToPDF { private final CustomPDFDocumentFactory pdfDocumentFactory; private final RuntimePathConfig runtimePathConfig; + private final TempFileManager tempFileManager; @PostMapping(consumes = "multipart/form-data", value = "/eml/pdf") @Operation( @@ -102,7 +104,8 @@ public class ConvertEmlToPDF { fileBytes, originalFilename, false, - pdfDocumentFactory); + pdfDocumentFactory, + tempFileManager); if (pdfBytes == null || pdfBytes.length == 0) { log.error("PDF conversion failed - empty output for {}", originalFilename); diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java index cdd9bc1a7..4eff3a872 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java @@ -18,6 +18,7 @@ import stirling.software.common.model.ApplicationProperties; import stirling.software.common.model.api.converters.HTMLToPdfRequest; import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.util.FileToPdf; +import stirling.software.common.util.TempFileManager; import stirling.software.common.util.WebResponseUtils; @RestController @@ -32,6 +33,8 @@ public class ConvertHtmlToPDF { private final RuntimePathConfig runtimePathConfig; + private final TempFileManager tempFileManager; + @PostMapping(consumes = "multipart/form-data", value = "/html/pdf") @Operation( summary = "Convert an HTML or ZIP (containing HTML and CSS) to PDF", @@ -62,7 +65,8 @@ public class ConvertHtmlToPDF { request, fileInput.getBytes(), originalFilename, - disableSanitize); + disableSanitize, + tempFileManager); pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java index 98f96fbdb..1bf2d94a8 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java @@ -28,6 +28,7 @@ import stirling.software.common.model.ApplicationProperties; import stirling.software.common.model.api.GeneralFile; import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.util.FileToPdf; +import stirling.software.common.util.TempFileManager; import stirling.software.common.util.WebResponseUtils; @RestController @@ -41,6 +42,8 @@ public class ConvertMarkdownToPdf { private final ApplicationProperties applicationProperties; private final RuntimePathConfig runtimePathConfig; + private final TempFileManager tempFileManager; + @PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf") @Operation( summary = "Convert a Markdown file to PDF", @@ -82,7 +85,8 @@ public class ConvertMarkdownToPdf { null, htmlContent.getBytes(), "converted.html", - disableSanitize); + disableSanitize, + tempFileManager); pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes); String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java index be6c4649c..93061b570 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java @@ -2,7 +2,6 @@ package stirling.software.SPDF.controller.api.misc; import java.awt.image.BufferedImage; import java.io.*; -import java.nio.file.Files; import java.nio.file.Path; import java.util.*; import java.util.zip.ZipEntry; @@ -23,7 +22,6 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; -import io.github.pixee.security.BoundedLineReader; import io.github.pixee.security.Filenames; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; @@ -34,6 +32,9 @@ import lombok.extern.slf4j.Slf4j; import stirling.software.SPDF.model.api.misc.ProcessPdfWithOcrRequest; import stirling.software.common.model.ApplicationProperties; import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.util.ProcessExecutor; +import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult; +import stirling.software.common.util.TempFileManager; @RestController @RequestMapping("/api/v1/misc") @@ -43,8 +44,8 @@ import stirling.software.common.service.CustomPDFDocumentFactory; public class OCRController { private final ApplicationProperties applicationProperties; - private final CustomPDFDocumentFactory pdfDocumentFactory; + private final TempFileManager tempFileManager; /** Gets the list of available Tesseract languages from the tessdata directory */ public List getAvailableTesseractLanguages() { @@ -73,93 +74,117 @@ public class OCRController { MultipartFile inputFile = request.getFileInput(); List languages = request.getLanguages(); String ocrType = request.getOcrType(); - Path tempDir = Files.createTempDirectory("ocr_process"); - Path tempInputFile = tempDir.resolve("input.pdf"); - Path tempOutputDir = tempDir.resolve("output"); - Path tempImagesDir = tempDir.resolve("images"); - Path finalOutputFile = tempDir.resolve("final_output.pdf"); - Files.createDirectories(tempOutputDir); - Files.createDirectories(tempImagesDir); - Process process = null; + + // Create a temp directory using TempFileManager directly + Path tempDirPath = tempFileManager.createTempDirectory(); + File tempDir = tempDirPath.toFile(); + try { + File tempInputFile = new File(tempDir, "input.pdf"); + File tempOutputDir = new File(tempDir, "output"); + File tempImagesDir = new File(tempDir, "images"); + File finalOutputFile = new File(tempDir, "final_output.pdf"); + + // Create directories + tempOutputDir.mkdirs(); + tempImagesDir.mkdirs(); + // Save input file - inputFile.transferTo(tempInputFile.toFile()); + inputFile.transferTo(tempInputFile); + PDFMergerUtility merger = new PDFMergerUtility(); merger.setDestinationFileName(finalOutputFile.toString()); - try (PDDocument document = pdfDocumentFactory.load(tempInputFile.toFile())) { + + try (PDDocument document = pdfDocumentFactory.load(tempInputFile)) { PDFRenderer pdfRenderer = new PDFRenderer(document); int pageCount = document.getNumberOfPages(); + for (int pageNum = 0; pageNum < pageCount; pageNum++) { PDPage page = document.getPage(pageNum); boolean hasText = false; + // Check for existing text try (PDDocument tempDoc = new PDDocument()) { tempDoc.addPage(page); PDFTextStripper stripper = new PDFTextStripper(); hasText = !stripper.getText(tempDoc).trim().isEmpty(); } + boolean shouldOcr = switch (ocrType) { case "skip-text" -> !hasText; case "force-ocr" -> true; default -> true; }; - Path pageOutputPath = - tempOutputDir.resolve(String.format("page_%d.pdf", pageNum)); + + File pageOutputPath = + new File(tempOutputDir, String.format("page_%d.pdf", pageNum)); + if (shouldOcr) { // Convert page to image BufferedImage image = pdfRenderer.renderImageWithDPI(pageNum, 300); - Path imagePath = - tempImagesDir.resolve(String.format("page_%d.png", pageNum)); - ImageIO.write(image, "png", imagePath.toFile()); + File imagePath = + new File(tempImagesDir, String.format("page_%d.png", pageNum)); + ImageIO.write(image, "png", imagePath); + // Build OCR command List command = new ArrayList<>(); command.add("tesseract"); command.add(imagePath.toString()); command.add( - tempOutputDir - .resolve(String.format("page_%d", pageNum)) + new File(tempOutputDir, String.format("page_%d", pageNum)) .toString()); command.add("-l"); command.add(String.join("+", languages)); // Always output PDF command.add("pdf"); - ProcessBuilder pb = new ProcessBuilder(command); - process = pb.start(); - // Capture any error output - try (BufferedReader reader = - new BufferedReader( - new InputStreamReader(process.getErrorStream()))) { - String line; - while ((line = BoundedLineReader.readLine(reader, 5_000_000)) != null) { - log.debug("Tesseract: {}", line); + + // Use ProcessExecutor to run tesseract command + try { + ProcessExecutorResult result = + ProcessExecutor.getInstance(ProcessExecutor.Processes.TESSERACT) + .runCommandWithOutputHandling(command); + + log.debug( + "Tesseract OCR completed for page {} with exit code {}", + pageNum, + result.getRc()); + + // Add OCR'd PDF to merger + merger.addSource(pageOutputPath); + } catch (IOException | InterruptedException e) { + log.error( + "Error processing page {} with tesseract: {}", + pageNum, + e.getMessage()); + // If OCR fails, fall back to the original page + try (PDDocument pageDoc = new PDDocument()) { + pageDoc.addPage(page); + pageDoc.save(pageOutputPath); + merger.addSource(pageOutputPath); } } - int exitCode = process.waitFor(); - if (exitCode != 0) { - throw new RuntimeException( - "Tesseract failed with exit code: " + exitCode); - } - // Add OCR'd PDF to merger - merger.addSource(pageOutputPath.toFile()); } else { // Save original page without OCR try (PDDocument pageDoc = new PDDocument()) { pageDoc.addPage(page); - pageDoc.save(pageOutputPath.toFile()); - merger.addSource(pageOutputPath.toFile()); + pageDoc.save(pageOutputPath); + merger.addSource(pageOutputPath); } } } } + // Merge all pages into final PDF merger.mergeDocuments(null); + // Read the final PDF file - byte[] pdfContent = Files.readAllBytes(finalOutputFile); + byte[] pdfContent = java.nio.file.Files.readAllBytes(finalOutputFile.toPath()); String outputFilename = Filenames.toSimpleFileName(inputFile.getOriginalFilename()) .replaceFirst("[.][^.]+$", "") + "_OCR.pdf"; + return ResponseEntity.ok() .header( "Content-Disposition", @@ -167,11 +192,8 @@ public class OCRController { .contentType(MediaType.APPLICATION_PDF) .body(pdfContent); } finally { - if (process != null) { - process.destroy(); - } - // Clean up temporary files - deleteDirectory(tempDir); + // Clean up the temp directory and all its contents + tempFileManager.deleteTempDirectory(tempDirPath); } } @@ -192,21 +214,4 @@ public class OCRController { zipOut.closeEntry(); } } - - private void deleteDirectory(Path directory) { - try { - Files.walk(directory) - .sorted(Comparator.reverseOrder()) - .forEach( - path -> { - try { - Files.delete(path); - } catch (IOException e) { - log.error("Error deleting {}: {}", path, e.getMessage()); - } - }); - } catch (IOException e) { - log.error("Error walking directory {}: {}", directory, e.getMessage()); - } - } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java index 85340a163..b8c347ef1 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/RepairController.java @@ -1,8 +1,6 @@ package stirling.software.SPDF.controller.api.misc; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; import java.util.List; @@ -23,6 +21,8 @@ import stirling.software.common.model.api.PDFFile; import stirling.software.common.service.CustomPDFDocumentFactory; import stirling.software.common.util.ProcessExecutor; import stirling.software.common.util.ProcessExecutor.ProcessExecutorResult; +import stirling.software.common.util.TempFile; +import stirling.software.common.util.TempFileManager; import stirling.software.common.util.WebResponseUtils; @RestController @@ -32,6 +32,7 @@ import stirling.software.common.util.WebResponseUtils; public class RepairController { private final CustomPDFDocumentFactory pdfDocumentFactory; + private final TempFileManager tempFileManager; @PostMapping(consumes = "multipart/form-data", value = "/repair") @Operation( @@ -43,25 +44,25 @@ public class RepairController { public ResponseEntity repairPdf(@ModelAttribute PDFFile file) throws IOException, InterruptedException { MultipartFile inputFile = file.getFileInput(); - // Save the uploaded file to a temporary location - Path tempInputFile = Files.createTempFile("input_", ".pdf"); - byte[] pdfBytes = null; - inputFile.transferTo(tempInputFile.toFile()); - try { + + // Use TempFile with try-with-resources for automatic cleanup + try (TempFile tempFile = new TempFile(tempFileManager, ".pdf")) { + // Save the uploaded file to the temporary location + inputFile.transferTo(tempFile.getFile()); List command = new ArrayList<>(); command.add("qpdf"); command.add("--replace-input"); // Automatically fixes problems it can command.add("--qdf"); // Linearizes and normalizes PDF structure command.add("--object-streams=disable"); // Can help with some corruptions - command.add(tempInputFile.toString()); + command.add(tempFile.getFile().getAbsolutePath()); ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.QPDF) .runCommandWithOutputHandling(command); // Read the optimized PDF file - pdfBytes = pdfDocumentFactory.loadToBytes(tempInputFile.toFile()); + byte[] pdfBytes = pdfDocumentFactory.loadToBytes(tempFile.getFile()); // Return the optimized PDF as a response String outputFilename = @@ -69,9 +70,6 @@ public class RepairController { .replaceFirst("[.][^.]+$", "") + "_repaired.pdf"; return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); - } finally { - // Clean up the temporary files - Files.deleteIfExists(tempInputFile); } } } diff --git a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java index 9c0ad2909..0defd510a 100644 --- a/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java +++ b/stirling-pdf/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java @@ -6,7 +6,6 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.nio.file.Files; import java.util.List; import javax.imageio.ImageIO; @@ -40,6 +39,8 @@ import lombok.RequiredArgsConstructor; import stirling.software.SPDF.model.api.misc.AddStampRequest; import stirling.software.common.service.CustomPDFDocumentFactory; +import stirling.software.common.util.TempFile; +import stirling.software.common.util.TempFileManager; import stirling.software.common.util.WebResponseUtils; @RestController @@ -49,6 +50,7 @@ import stirling.software.common.util.WebResponseUtils; public class StampController { private final CustomPDFDocumentFactory pdfDocumentFactory; + private final TempFileManager tempFileManager; @PostMapping(consumes = "multipart/form-data", value = "/add-stamp") @Operation( @@ -188,14 +190,14 @@ public class StampController { if (!"".equals(resourceDir)) { ClassPathResource classPathResource = new ClassPathResource(resourceDir); String fileExtension = resourceDir.substring(resourceDir.lastIndexOf(".")); - File tempFile = Files.createTempFile("NotoSansFont", fileExtension).toFile(); - try (InputStream is = classPathResource.getInputStream(); - FileOutputStream os = new FileOutputStream(tempFile)) { - IOUtils.copy(is, os); - font = PDType0Font.load(document, tempFile); - } finally { - if (tempFile != null) { - Files.deleteIfExists(tempFile.toPath()); + + // Use TempFile with try-with-resources for automatic cleanup + try (TempFile tempFileWrapper = new TempFile(tempFileManager, fileExtension)) { + File tempFile = tempFileWrapper.getFile(); + try (InputStream is = classPathResource.getInputStream(); + FileOutputStream os = new FileOutputStream(tempFile)) { + IOUtils.copy(is, os); + font = PDType0Font.load(document, tempFile); } } } diff --git a/stirling-pdf/src/main/resources/application.properties b/stirling-pdf/src/main/resources/application.properties index 00a2e87e1..ea30bf78e 100644 --- a/stirling-pdf/src/main/resources/application.properties +++ b/stirling-pdf/src/main/resources/application.properties @@ -44,4 +44,7 @@ springdoc.swagger-ui.path=/index.html posthog.api.key=phc_fiR65u5j6qmXTYL56MNrLZSWqLaDW74OrZH0Insd2xq posthog.host=https://eu.i.posthog.com -spring.main.allow-bean-definition-overriding=true \ No newline at end of file +spring.main.allow-bean-definition-overriding=true + +# Set up a consistent temporary directory location +java.io.tmpdir=${stirling.tempfiles.directory:${java.io.tmpdir}/stirling-pdf} \ No newline at end of file diff --git a/stirling-pdf/src/main/resources/settings.yml.template b/stirling-pdf/src/main/resources/settings.yml.template index d651eff9f..d45b8482b 100644 --- a/stirling-pdf/src/main/resources/settings.yml.template +++ b/stirling-pdf/src/main/resources/settings.yml.template @@ -125,6 +125,15 @@ system: weasyprint: '' #Defaults to /opt/venv/bin/weasyprint unoconvert: '' #Defaults to /opt/venv/bin/unoconvert fileUploadLimit: '' # Defaults to "". No limit when string is empty. Set a number, between 0 and 999, followed by one of the following strings to set a limit. "KB", "MB", "GB". + tempFileManagement: + baseTmpDir: '' # Defaults to java.io.tmpdir/stirling-pdf + libreofficeDir: '' # Defaults to tempFileManagement.baseTmpDir/libreoffice + systemTempDir: '' # Only used if cleanupSystemTemp is true + prefix: stirling-pdf- # Prefix for temp file names + maxAgeHours: 24 # Maximum age in hours before temp files are cleaned up + cleanupIntervalMinutes: 30 # How often to run cleanup (in minutes) + startupCleanup: true # Clean up old temp files on startup + cleanupSystemTemp: false # Whether to clean broader system temp directory ui: appName: '' # application's visible name diff --git a/testing/test.sh b/testing/test.sh index 4658edeb5..94370807b 100644 --- a/testing/test.sh +++ b/testing/test.sh @@ -55,10 +55,12 @@ capture_file_list() { -not -path '/config/*' \ -not -path '/logs/*' \ -not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \ - -not -path '*/tmp/PDFBox*' \ + -not -path '*/home/stirlingpdfuser/.pdfbox.cache' \ + -not -path '*/tmp/stirling-pdf/PDFBox*' \ + -not -path '*/tmp/stirling-pdf/hsperfdata_stirlingpdfuser/*' \ -not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \ - -not -path '*/tmp/lu*' \ - -not -path '*/tmp/tmp*' \ + -not -path '*/tmp/stirling-pdf/lu*' \ + -not -path '*/tmp/stirling-pdf/tmp*' \ 2>/dev/null | xargs -I{} sh -c 'stat -c \"%n %s %Y\" \"{}\" 2>/dev/null || true' | sort" > "$output_file" # Check if the output file has content @@ -74,8 +76,10 @@ capture_file_list() { -not -path '/config/*' \ -not -path '/logs/*' \ -not -path '*/home/stirlingpdfuser/.config/libreoffice/*' \ + -not -path '*/home/stirlingpdfuser/.pdfbox.cache' \ -not -path '*/tmp/PDFBox*' \ -not -path '*/tmp/hsperfdata_stirlingpdfuser/*' \ + -not -path '*/tmp/stirling-pdf/hsperfdata_stirlingpdfuser/*' \ -not -path '*/tmp/lu*' \ -not -path '*/tmp/tmp*' \ 2>/dev/null | sort" > "$output_file"