Remove convertBookTypeToPdf and Improve File Sanitization in FileToPdf (#3072)

# Description of Changes

Please provide a summary of the changes, including:

- **Removed `convertBookTypeToPdf` method**: 
- This method used `ebook-convert` from Calibre, which required external
dependencies.
- Its removal eliminates unnecessary process execution and simplifies
the codebase.
  
- **Enhanced `sanitizeZipFilename` function**:
  - Added handling for drive letters (e.g., `C:\`).
  - Ensured all slashes are normalized to forward slashes.
- Improved recursive path traversal removal to prevent directory escape
vulnerabilities.

- **Refactored `ProcessExecutor` output handling**:
  - Replaced redundant `.size() > 0` checks with `.isEmpty()`.
  
- **Expanded unit tests in `FileToPdfTest`**:
  - Added tests for `sanitizeZipFilename` to cover edge cases.
  - Improved test descriptions and added assertion messages.
  - Added debug print statements for easier test debugging.

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
This commit is contained in:
Ludy 2025-02-26 20:25:35 +01:00 committed by GitHub
parent 96655f7cac
commit 9152e64b9f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 79 additions and 53 deletions

View File

@ -169,7 +169,7 @@ public class FileToPdf {
}
}
// search for the main HTML file.
// Search for the main HTML file.
try (Stream<Path> walk = Files.walk(tempDirectory)) {
List<Path> htmlFiles =
walk.filter(file -> file.toString().endsWith(".html"))
@ -190,46 +190,20 @@ public class FileToPdf {
}
}
public static byte[] convertBookTypeToPdf(byte[] bytes, String originalFilename)
throws IOException, InterruptedException {
if (originalFilename == null || originalFilename.lastIndexOf('.') == -1) {
throw new IllegalArgumentException("Invalid original filename.");
}
String fileExtension = originalFilename.substring(originalFilename.lastIndexOf('.'));
List<String> command = new ArrayList<>();
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Path tempInputFile = null;
try {
// Create temp file with appropriate extension
tempInputFile = Files.createTempFile("input_", fileExtension);
Files.write(tempInputFile, bytes);
command.add("ebook-convert");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.CALIBRE)
.runCommandWithOutputHandling(command);
return Files.readAllBytes(tempOutputFile);
} finally {
// Clean up temporary files
if (tempInputFile != null) {
Files.deleteIfExists(tempInputFile);
}
Files.deleteIfExists(tempOutputFile);
}
}
static String sanitizeZipFilename(String entryName) {
if (entryName == null || entryName.trim().isEmpty()) {
return entryName;
return "";
}
// Remove any drive letters (e.g., "C:\") and leading forward/backslashes
entryName = entryName.replaceAll("^[a-zA-Z]:[\\\\/]+", "");
entryName = entryName.replaceAll("^[\\\\/]+", "");
// Recursively remove path traversal sequences
while (entryName.contains("../") || entryName.contains("..\\")) {
entryName = entryName.replace("../", "").replace("..\\", "");
}
// Normalize all backslashes to forward slashes
entryName = entryName.replaceAll("\\\\", "/");
return entryName;
}
}

View File

@ -1,6 +1,10 @@
package stirling.software.SPDF.utils;
import java.io.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.InterruptedIOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@ -222,7 +226,7 @@ public class ProcessExecutor {
boolean isQpdf =
command != null && !command.isEmpty() && command.get(0).contains("qpdf");
if (outputLines.size() > 0) {
if (!outputLines.isEmpty()) {
String outputMessage = String.join("\n", outputLines);
messages += outputMessage;
if (!liveUpdates) {
@ -230,7 +234,7 @@ public class ProcessExecutor {
}
}
if (errorLines.size() > 0) {
if (!errorLines.isEmpty()) {
String errorMessage = String.join("\n", errorLines);
messages += errorMessage;
if (!liveUpdates) {

View File

@ -5,31 +5,79 @@ import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.*;
public class FileToPdfTest {
/**
* Test the HTML to PDF conversion.
* This test expects an IOException when an empty HTML input is provided.
*/
@Test
public void testConvertHtmlToPdf() {
HTMLToPdfRequest request = new HTMLToPdfRequest();
byte[] fileBytes = new byte[0]; // Sample file bytes
String fileName = "test.html"; // Sample file name
boolean disableSanitize = false; // Sample boolean value
byte[] fileBytes = new byte[0]; // Sample file bytes (empty input)
String fileName = "test.html"; // Sample file name indicating an HTML file
boolean disableSanitize = false; // Flag to control sanitization
// Check if the method throws IOException
assertThrows(IOException.class, () -> {
FileToPdf.convertHtmlToPdf("/path/",request, fileBytes, fileName, disableSanitize);
});
// Expect an IOException to be thrown due to empty input
Throwable thrown =
assertThrows(
IOException.class,
() ->
FileToPdf.convertHtmlToPdf(
"/path/", request, fileBytes, fileName, disableSanitize));
assertNotNull(thrown);
}
/**
* Test sanitizeZipFilename with null or empty input.
* It should return an empty string in these cases.
*/
@Test
public void testConvertBookTypeToPdf() {
byte[] bytes = new byte[10]; // Sample bytes
String originalFilename = "test.epub"; // Sample original filename
public void testSanitizeZipFilename_NullOrEmpty() {
assertEquals("", FileToPdf.sanitizeZipFilename(null));
assertEquals("", FileToPdf.sanitizeZipFilename(" "));
}
// Check if the method throws IOException
assertThrows(IOException.class, () -> {
FileToPdf.convertBookTypeToPdf(bytes, originalFilename);
});
/**
* Test sanitizeZipFilename to ensure it removes path traversal sequences.
* This includes removing both forward and backward slash sequences.
*/
@Test
public void testSanitizeZipFilename_RemovesTraversalSequences() {
String input = "../some/../path/..\\to\\file.txt";
String expected = "some/path/to/file.txt";
// Print output for debugging purposes
System.out.println("sanitizeZipFilename " + FileToPdf.sanitizeZipFilename(input));
System.out.flush();
// Expect that the method replaces backslashes with forward slashes
// and removes path traversal sequences
assertEquals(expected, FileToPdf.sanitizeZipFilename(input));
}
/**
* Test sanitizeZipFilename to ensure that it removes leading drive letters and slashes.
*/
@Test
public void testSanitizeZipFilename_RemovesLeadingDriveAndSlashes() {
String input = "C:\\folder\\file.txt";
String expected = "folder/file.txt";
assertEquals(expected, FileToPdf.sanitizeZipFilename(input));
input = "/folder/file.txt";
expected = "folder/file.txt";
assertEquals(expected, FileToPdf.sanitizeZipFilename(input));
}
/**
* Test sanitizeZipFilename to verify that safe filenames remain unchanged.
*/
@Test
public void testSanitizeZipFilename_NoChangeForSafeNames() {
String input = "folder/subfolder/file.txt";
assertEquals(input, FileToPdf.sanitizeZipFilename(input));
}
}