mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2025-04-19 11:11:18 +00:00
first commit
This commit is contained in:
parent
4d6f951604
commit
782ae778a7
@ -0,0 +1,226 @@
|
||||
package stirling.software.SPDF.controller.api.converters;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.junit.jupiter.api.DisplayName;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.MockedStatic;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
|
||||
import stirling.software.SPDF.config.RuntimePathConfig;
|
||||
import stirling.software.SPDF.model.ApplicationProperties;
|
||||
import stirling.software.SPDF.model.api.GeneralFile;
|
||||
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
||||
import stirling.software.SPDF.utils.FileToPdf;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class ConvertMarkdownToPdfTest {
|
||||
|
||||
@Mock private CustomPDFDocumentFactory pdfDocumentFactory;
|
||||
|
||||
@Mock private ApplicationProperties applicationProperties;
|
||||
|
||||
@Mock private RuntimePathConfig runtimePathConfig;
|
||||
|
||||
@InjectMocks private ConvertMarkdownToPdf convertMarkdownToPdf;
|
||||
|
||||
@Test
|
||||
@DisplayName("Should convert valid markdown file to PDF")
|
||||
void testMarkdownToPdf_validMarkdown_success() throws Exception {
|
||||
String mdContent =
|
||||
"# Hello\n\nThis is a **test** of markdown to PDF.\n\n- Item 1\n- Item 2";
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile(
|
||||
"fileInput", "test.md", "text/markdown", mdContent.getBytes());
|
||||
|
||||
GeneralFile request = new GeneralFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
byte[] fakePdf = "%PDF-Mock".getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
ApplicationProperties.System systemMock = mock(ApplicationProperties.System.class);
|
||||
when(systemMock.getDisableSanitize()).thenReturn(false);
|
||||
when(applicationProperties.getSystem()).thenReturn(systemMock);
|
||||
when(runtimePathConfig.getWeasyPrintPath()).thenReturn("/usr/bin/weasyprint");
|
||||
|
||||
try (MockedStatic<FileToPdf> mockedStatic = mockStatic(FileToPdf.class)) {
|
||||
mockedStatic
|
||||
.when(
|
||||
() ->
|
||||
FileToPdf.convertHtmlToPdf(
|
||||
any(), any(), any(), any(), anyBoolean()))
|
||||
.thenReturn(fakePdf);
|
||||
|
||||
when(pdfDocumentFactory.createNewBytesBasedOnOldDocument(fakePdf)).thenReturn(fakePdf);
|
||||
|
||||
ResponseEntity<byte[]> response = convertMarkdownToPdf.markdownToPdf(request);
|
||||
|
||||
assertEquals(200, response.getStatusCodeValue());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw error when uploading .txt instead of .md")
|
||||
void testMarkdownToPdf_invalidExtension_throwsException() {
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile(
|
||||
"fileInput", "test.txt", "text/plain", "invalid content".getBytes());
|
||||
GeneralFile request = new GeneralFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class, () -> convertMarkdownToPdf.markdownToPdf(request));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw error when uploading empty markdown")
|
||||
void testMarkdownToPdf_emptyMarkdown_throwsException() {
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "test.md", "text/markdown", new byte[0]);
|
||||
GeneralFile request = new GeneralFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
assertThrows(Exception.class, () -> convertMarkdownToPdf.markdownToPdf(request));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw error when no file is provided")
|
||||
void testMarkdownToPdf_nullFile_throwsException() {
|
||||
GeneralFile request = new GeneralFile();
|
||||
request.setFileInput(null);
|
||||
|
||||
assertThrows(
|
||||
IllegalArgumentException.class, () -> convertMarkdownToPdf.markdownToPdf(request));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should convert real Markdown file (from resources) to PDF")
|
||||
void testMarkdownToPdf_fromFile_success() throws Exception {
|
||||
InputStream input = getClass().getClassLoader().getResourceAsStream("Markdown.md");
|
||||
assertNotNull(input, "Markdown.md file not found in test resources");
|
||||
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "Markdown.md", "text/markdown", input);
|
||||
GeneralFile request = new GeneralFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
byte[] fakePdf = "%PDF-Mock".getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
ApplicationProperties.System systemMock = mock(ApplicationProperties.System.class);
|
||||
when(systemMock.getDisableSanitize()).thenReturn(false);
|
||||
when(applicationProperties.getSystem()).thenReturn(systemMock);
|
||||
when(runtimePathConfig.getWeasyPrintPath()).thenReturn("/usr/bin/weasyprint");
|
||||
|
||||
try (MockedStatic<FileToPdf> mockedStatic = mockStatic(FileToPdf.class)) {
|
||||
mockedStatic
|
||||
.when(
|
||||
() ->
|
||||
FileToPdf.convertHtmlToPdf(
|
||||
any(), any(), any(), any(), anyBoolean()))
|
||||
.thenReturn(fakePdf);
|
||||
|
||||
when(pdfDocumentFactory.createNewBytesBasedOnOldDocument(fakePdf)).thenReturn(fakePdf);
|
||||
|
||||
ResponseEntity<byte[]> response = convertMarkdownToPdf.markdownToPdf(request);
|
||||
|
||||
assertEquals(200, response.getStatusCodeValue());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should convert markdown with table to PDF successfully")
|
||||
void testMarkdownToPdf_withTable_success() throws Exception {
|
||||
String tableMd =
|
||||
"| Name | Score |\n"
|
||||
+ "|-------|-------|\n"
|
||||
+ "| Alice | 95 |\n"
|
||||
+ "| Bob | 88 |";
|
||||
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "table.md", "text/markdown", tableMd.getBytes());
|
||||
GeneralFile request = new GeneralFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
byte[] fakePdf = "%PDF-Mock-TABLE".getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
ApplicationProperties.System systemMock = mock(ApplicationProperties.System.class);
|
||||
when(systemMock.getDisableSanitize()).thenReturn(false);
|
||||
when(applicationProperties.getSystem()).thenReturn(systemMock);
|
||||
when(runtimePathConfig.getWeasyPrintPath()).thenReturn("/usr/bin/weasyprint");
|
||||
|
||||
try (MockedStatic<FileToPdf> mockedStatic = mockStatic(FileToPdf.class)) {
|
||||
mockedStatic
|
||||
.when(
|
||||
() ->
|
||||
FileToPdf.convertHtmlToPdf(
|
||||
any(), any(), any(), any(), anyBoolean()))
|
||||
.thenReturn(fakePdf);
|
||||
|
||||
when(pdfDocumentFactory.createNewBytesBasedOnOldDocument(fakePdf)).thenReturn(fakePdf);
|
||||
|
||||
ResponseEntity<byte[]> response = convertMarkdownToPdf.markdownToPdf(request);
|
||||
|
||||
assertEquals(200, response.getStatusCodeValue());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should convert markdown with image to PDF successfully")
|
||||
void testMarkdownToPdf_withImage_success() throws Exception {
|
||||
String mdWithImage =
|
||||
"# Image Test\n\n\n";
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile(
|
||||
"fileInput", "image.md", "text/markdown", mdWithImage.getBytes());
|
||||
GeneralFile request = new GeneralFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
byte[] fakePdf = "%PDF-Mock-IMAGE".getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
ApplicationProperties.System systemMock = mock(ApplicationProperties.System.class);
|
||||
when(systemMock.getDisableSanitize()).thenReturn(false);
|
||||
when(applicationProperties.getSystem()).thenReturn(systemMock);
|
||||
when(runtimePathConfig.getWeasyPrintPath()).thenReturn("/usr/bin/weasyprint");
|
||||
|
||||
try (MockedStatic<FileToPdf> mockedStatic = mockStatic(FileToPdf.class)) {
|
||||
mockedStatic
|
||||
.when(
|
||||
() ->
|
||||
FileToPdf.convertHtmlToPdf(
|
||||
any(), any(), any(), any(), anyBoolean()))
|
||||
.thenReturn(fakePdf);
|
||||
when(pdfDocumentFactory.createNewBytesBasedOnOldDocument(fakePdf)).thenReturn(fakePdf);
|
||||
|
||||
ResponseEntity<byte[]> response = convertMarkdownToPdf.markdownToPdf(request);
|
||||
assertEquals(200, response.getStatusCodeValue());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw error when markdown content is null")
|
||||
void testMarkdownToPdf_nullContent_throwsException() {
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "null.md", "text/markdown", (byte[]) null);
|
||||
GeneralFile request = new GeneralFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
assertThrows(Exception.class, () -> convertMarkdownToPdf.markdownToPdf(request));
|
||||
}
|
||||
}
|
@ -0,0 +1,287 @@
|
||||
package stirling.software.SPDF.controller.api.converters;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.junit.jupiter.api.DisplayName;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
|
||||
import stirling.software.SPDF.model.api.PDFFile;
|
||||
import stirling.software.SPDF.model.api.converters.ConvertPDFToMarkdown;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class ConvertPDFToMarkdownTest {
|
||||
|
||||
@InjectMocks private ConvertPDFToMarkdown convertPDFToMarkdown;
|
||||
|
||||
@Test
|
||||
@DisplayName("Should return 400 for non-PDF file")
|
||||
void testNonPdfFileReturnsBadRequest() throws Exception {
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile(
|
||||
"fileInput", "note.txt", "text/plain", "not a pdf".getBytes());
|
||||
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
ResponseEntity<byte[]> response = convertPDFToMarkdown.processPdfToMarkdown(request);
|
||||
assertEquals(HttpStatus.BAD_REQUEST, response.getStatusCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw error on empty PDF file")
|
||||
void testEmptyPdfThrowsError() {
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "empty.pdf", "application/pdf", new byte[0]);
|
||||
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
assertThrows(Exception.class, () -> convertPDFToMarkdown.processPdfToMarkdown(request));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should convert single-page PDF to markdown content")
|
||||
void testSinglePagePdfToMarkdown() throws Exception {
|
||||
InputStream is = getClass().getClassLoader().getResourceAsStream("test.pdf");
|
||||
assertNotNull(is);
|
||||
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "test.pdf", "application/pdf", is);
|
||||
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
ResponseEntity<byte[]> response = convertPDFToMarkdown.processPdfToMarkdown(request);
|
||||
|
||||
assertEquals(200, response.getStatusCodeValue());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
|
||||
String content = new String(response.getBody(), StandardCharsets.UTF_8);
|
||||
assertTrue(content.contains("wkp")); // or other title
|
||||
}
|
||||
|
||||
/**
|
||||
* Why we use `zipBytes.length > 0` instead of checking .md file presence: Due to limitations in
|
||||
* the current PDF-to-Markdown conversion pipeline, the content extracted from certain PDFs
|
||||
* (especially those lacking structured tags or composed of image-based text) may not yield any
|
||||
* usable HTML elements that Flexmark can convert. As a result, even multi-page PDFs might
|
||||
* produce no .md output or an empty zip archive.
|
||||
*
|
||||
* <p>Here we simply assert that the zip is non-empty to verify the conversion process at least
|
||||
* completed and returned a binary response.
|
||||
*
|
||||
* <p>If heading-level Markdown output is required, the underlying HTML parser or converter
|
||||
* logic would need to be enhanced to recognize heading structures more robustly (e.g., based on
|
||||
* font size).
|
||||
*/
|
||||
@Test
|
||||
@DisplayName("Should return zip with markdown for multi-page PDF")
|
||||
void testMultiPagePdfReturnsZip() throws Exception {
|
||||
InputStream is = getClass().getClassLoader().getResourceAsStream("multi_page.pdf");
|
||||
assertNotNull(is);
|
||||
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "multi_page.pdf", "application/pdf", is);
|
||||
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
ResponseEntity<byte[]> response = convertPDFToMarkdown.processPdfToMarkdown(request);
|
||||
|
||||
assertEquals(200, response.getStatusCodeValue());
|
||||
byte[] zipBytes = response.getBody();
|
||||
|
||||
assertTrue(zipBytes.length > 0, "Zip content should not be empty");
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should convert a valid PDF to Markdown successfully")
|
||||
void testProcessPdfToMarkdown_validPdf_returnsMarkdownBytes() throws Exception {
|
||||
// Arrange
|
||||
InputStream input = getClass().getClassLoader().getResourceAsStream("test.pdf");
|
||||
assertNotNull(input, "Test PDF file not found in resources");
|
||||
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "test.pdf", "application/pdf", input);
|
||||
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
// Act
|
||||
ResponseEntity<byte[]> response = convertPDFToMarkdown.processPdfToMarkdown(request);
|
||||
|
||||
// Assert
|
||||
assertEquals(200, response.getStatusCodeValue(), "Response status should be 200 OK");
|
||||
assertNotNull(response.getBody(), "Returned body should not be null");
|
||||
assertTrue(response.getBody().length > 0, "Returned markdown should not be empty");
|
||||
|
||||
String markdownContent = new String(response.getBody(), StandardCharsets.UTF_8);
|
||||
System.out.println("testResult" + markdownContent);
|
||||
assertTrue(markdownContent.contains("wkp testing case"));
|
||||
assertTrue(markdownContent.contains("**first title**"));
|
||||
assertTrue(markdownContent.contains("*second title"));
|
||||
}
|
||||
|
||||
/*
|
||||
* ⚠️ Known Limitation in the Current Implementation:
|
||||
*
|
||||
* The method PDFToFile.processPdfToMarkdown first converts the PDF file to HTML using `pdftohtml`,
|
||||
* and then transforms the resulting HTML into Markdown using FlexmarkHtmlConverter.
|
||||
*
|
||||
* However, this conversion pipeline fails to accurately preserve heading hierarchy (e.g., level-1 `#`, level-2 `##` headings).
|
||||
* Instead, all heading-like elements from the original PDF are flattened and rendered as bold text (`**bold**`) in the final Markdown output,
|
||||
* resulting in a loss of structural semantics.
|
||||
*
|
||||
* Possible reasons include:
|
||||
* - The HTML generated by `pdftohtml` lacks proper semantic tags like `<h1>`, `<h2>`, etc.
|
||||
* - FlexmarkHtmlConverter interprets non-semantic tags (e.g., `<font>`, `<b>`) as plain bold formatting.
|
||||
*
|
||||
* 📌 Testing Suggestions:
|
||||
* - Focus on asserting the presence of key content in the output Markdown, rather than relying on heading syntax like `#`, `##`, etc.
|
||||
* - If heading hierarchy is required, consider enhancing the HTML-to-Markdown conversion logic or replacing `pdftohtml`
|
||||
* with a more semantically rich parsing toolchain (e.g., `pdf2json` with AST-level analysis).
|
||||
*/
|
||||
@Test
|
||||
@DisplayName("Should convert a valid PDF to Markdown successfully1")
|
||||
void testProcessPdfToMarkdown_validPdf_returnsMarkdownBytes1() throws Exception {
|
||||
// Arrange
|
||||
InputStream input = getClass().getClassLoader().getResourceAsStream("test.pdf");
|
||||
assertNotNull(input, "Test PDF file not found in resources");
|
||||
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "multi_page.pdf", "application/pdf", input);
|
||||
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
// Act
|
||||
ResponseEntity<byte[]> response = convertPDFToMarkdown.processPdfToMarkdown(request);
|
||||
|
||||
// Assert
|
||||
assertEquals(200, response.getStatusCodeValue(), "Response status should be 200 OK");
|
||||
assertNotNull(response.getBody(), "Returned body should not be null");
|
||||
assertTrue(response.getBody().length > 0, "Returned markdown should not be empty");
|
||||
|
||||
String markdownContent = new String(response.getBody(), StandardCharsets.UTF_8);
|
||||
assertTrue(markdownContent.contains("**wkp"));
|
||||
assertTrue(markdownContent.contains("**second title"));
|
||||
assertTrue(markdownContent.contains("**third title"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw exception for empty PDF input")
|
||||
void testProcessPdfToMarkdown_emptyFile_throwsException() {
|
||||
// Arrange
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "test.pdf", "application/pdf", new byte[0]);
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
// Act & Assert
|
||||
Exception exception =
|
||||
assertThrows(
|
||||
Exception.class,
|
||||
() -> convertPDFToMarkdown.processPdfToMarkdown(request),
|
||||
"Expected exception for empty input file");
|
||||
assertNotNull(exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw exception for non-PDF file with .pdf extension")
|
||||
void testProcessPdfToMarkdown_invalidPdf_throwsException() {
|
||||
// Arrange
|
||||
byte[] invalidContent = "This is not a real PDF".getBytes();
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "fake.pdf", "application/pdf", invalidContent);
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
// Act & Assert
|
||||
Exception exception =
|
||||
assertThrows(
|
||||
Exception.class,
|
||||
() -> convertPDFToMarkdown.processPdfToMarkdown(request),
|
||||
"Expected exception for invalid PDF file");
|
||||
assertTrue(
|
||||
exception.getMessage().contains("Syntax Error")
|
||||
|| exception.getMessage().contains("Couldn't"),
|
||||
"Error message should indicate syntax or parsing failure");
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw exception if no file is provided")
|
||||
void testProcessPdfToMarkdown_nullFile_throwsException() {
|
||||
// Arrange
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(null);
|
||||
|
||||
// Act & Assert
|
||||
assertThrows(
|
||||
NullPointerException.class,
|
||||
() -> convertPDFToMarkdown.processPdfToMarkdown(request),
|
||||
"Expected NullPointerException when no file is provided");
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should convert PDF with image to Markdown successfully")
|
||||
void testProcessPdfToMarkdown_withImagePdf_success() throws Exception {
|
||||
InputStream is = getClass().getClassLoader().getResourceAsStream("pdf_with_image.pdf");
|
||||
assertNotNull(is, "Test PDF with image not found");
|
||||
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "pdf_with_image.pdf", "application/pdf", is);
|
||||
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
ResponseEntity<byte[]> response = convertPDFToMarkdown.processPdfToMarkdown(request);
|
||||
|
||||
assertEquals(200, response.getStatusCodeValue());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
|
||||
String markdown = new String(response.getBody(), StandardCharsets.UTF_8);
|
||||
assertTrue(markdown.contains("!["), "Expected Markdown image syntax");
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should convert PDF with table to Markdown successfully")
|
||||
void testProcessPdfToMarkdown_withTablePdf_success() throws Exception {
|
||||
InputStream is = getClass().getClassLoader().getResourceAsStream("pdf_with_image.pdf");
|
||||
assertNotNull(is, "Test PDF with table not found");
|
||||
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "pdf_with_image.pdf", "application/pdf", is);
|
||||
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
ResponseEntity<byte[]> response = convertPDFToMarkdown.processPdfToMarkdown(request);
|
||||
|
||||
assertEquals(200, response.getStatusCodeValue());
|
||||
assertNotNull(response.getBody());
|
||||
assertTrue(response.getBody().length > 0);
|
||||
|
||||
String markdown = new String(response.getBody(), StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should throw error when PDF input content is null")
|
||||
void testProcessPdfToMarkdown_nullContentPdf_throwsException() {
|
||||
MockMultipartFile file =
|
||||
new MockMultipartFile("fileInput", "null.pdf", "application/pdf", (byte[]) null);
|
||||
PDFFile request = new PDFFile();
|
||||
request.setFileInput(file);
|
||||
|
||||
assertThrows(Exception.class, () -> convertPDFToMarkdown.processPdfToMarkdown(request));
|
||||
}
|
||||
}
|
85
src/test/resources/Markdown.md
Normal file
85
src/test/resources/Markdown.md
Normal file
@ -0,0 +1,85 @@
|
||||
# wkp
|
||||
|
||||
## second title
|
||||
|
||||
### third title
|
||||
|
||||
|
||||
|
||||
* *Content
|
||||
|
||||
|
||||
# Markdown Title
|
||||
|
||||
This is a paragraph.
|
||||
|
||||
- First item
|
||||
- Second item
|
||||
-
|
||||
-
|
||||
-
|
||||
| Name | Score |
|
||||
|-------|-------|
|
||||
| Alice | 95 |
|
||||
| Bob | 88 |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
* test multiple
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### multiple
|
0
src/test/resources/invalid.txt
Normal file
0
src/test/resources/invalid.txt
Normal file
BIN
src/test/resources/multi_page.pdf
Normal file
BIN
src/test/resources/multi_page.pdf
Normal file
Binary file not shown.
19
src/test/resources/pdf_with_image.md
Normal file
19
src/test/resources/pdf_with_image.md
Normal file
@ -0,0 +1,19 @@
|
||||
```
|
||||
pdf_with_image
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
wkp testing
|
||||
|
||||
|
||||
|
||||

|
||||
|
||||
-
|
||||
| Name | Score |
|
||||
|-------|-------|
|
||||
| Alice | 95 |
|
||||
| Bob | 88 |
|
BIN
src/test/resources/pdf_with_image.pdf
Normal file
BIN
src/test/resources/pdf_with_image.pdf
Normal file
Binary file not shown.
BIN
src/test/resources/test.pdf
Normal file
BIN
src/test/resources/test.pdf
Normal file
Binary file not shown.
0
src/test/resources/valid.md
Normal file
0
src/test/resources/valid.md
Normal file
77
testResult/MarkdowntoPDF/Markdown.md
Normal file
77
testResult/MarkdowntoPDF/Markdown.md
Normal file
@ -0,0 +1,77 @@
|
||||
# wkp
|
||||
|
||||
## second title
|
||||
|
||||
### third title
|
||||
|
||||
|
||||
|
||||
* *Content
|
||||

|
||||
-
|
||||
| Name | Score |
|
||||
|-------|-------|
|
||||
| Alice | 95 |
|
||||
| Bob | 88 |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
* test multiple
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### multiple
|
BIN
testResult/MarkdowntoPDF/PDF.pdf
Normal file
BIN
testResult/MarkdowntoPDF/PDF.pdf
Normal file
Binary file not shown.
50
testResult/MarkdowntoPDF/README.md
Normal file
50
testResult/MarkdowntoPDF/README.md
Normal file
@ -0,0 +1,50 @@
|
||||
### E2E
|
||||
|
||||
* we can see there is picture in markdown, but the tool can't solve it.
|
||||
* 甚至在图片后面的文字就不会展现
|
||||
|
||||
|
||||
### API
|
||||
* normal request
|
||||
```shell
|
||||
curl 'http://localhost:9090/api/v1/convert/markdown/pdf' \
|
||||
-H 'Accept: */*' \
|
||||
-H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8' \
|
||||
-H 'Cache-Control: no-cache' \
|
||||
-H 'Connection: keep-alive' \
|
||||
-H 'Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryrcTlqeW0YnirF5hW' \
|
||||
-b 'Idea-858fbbc8=83332792-11c8-4114-baf3-80dfa95bc2a1; token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjE4Yjg0YTg4LTIzOWMtNDUxMy1iOGYxLWIzMTlhZTFiOGExMiJ9.hM2U8JLQ4wSVWsJ-TVXDofwICq6LOk89zvVoXQ9TECQ; authjs.csrf-token=b41a8b0ca45ecab3f39a8d36f519b06e8f6a10e618738f8da9dae25bfaf18e0d%7Cd57f1d087ee1811c473138cc06a36be4f2ecd4444d5e1bea16bcc7e713d2848f; authjs.callback-url=http%3A%2F%2Flocalhost%3A3001; __stripe_mid=2d0e63fa-8ec2-4c07-9fcc-d265afe3ef7d67eda5; __next_hmr_refresh_hash__=4; NEXT_LOCALE=en; session=9XB6PKMHXuw8fqutPsunv32XkG9SURJxwFIKz7OAM0o; cc_cookie=%7B%22categories%22%3A%5B%22necessary%22%2C%22analytics%22%5D%2C%22revision%22%3A0%2C%22data%22%3Anull%2C%22consentTimestamp%22%3A%222025-04-16T01%3A32%3A51.025Z%22%2C%22consentId%22%3A%2224ef8464-ff47-4934-8ba7-5ae0c50c8ff8%22%2C%22services%22%3A%7B%22necessary%22%3A%5B%5D%2C%22analytics%22%3A%5B%5D%7D%2C%22languageCode%22%3A%22en%22%2C%22lastConsentTimestamp%22%3A%222025-04-16T01%3A32%3A51.025Z%22%2C%22expirationTime%22%3A1760491971026%7D; JSESSIONID=node0k0mwiga25gx6zmuqkfizpc5t1.node0' \
|
||||
-H 'Origin: http://localhost:9090' \
|
||||
-H 'Pragma: no-cache' \
|
||||
-H 'Referer: http://localhost:9090/markdown-to-pdf' \
|
||||
-H 'Sec-Fetch-Dest: empty' \
|
||||
-H 'Sec-Fetch-Mode: cors' \
|
||||
-H 'Sec-Fetch-Site: same-origin' \
|
||||
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36' \
|
||||
-H 'sec-ch-ua: "Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"' \
|
||||
-H 'sec-ch-ua-mobile: ?0' \
|
||||
-H 'sec-ch-ua-platform: "macOS"' \
|
||||
--data-raw $'------WebKitFormBoundaryrcTlqeW0YnirF5hW\r\nContent-Disposition: form-data; name="fileInput"; filename="Markdown.md"\r\nContent-Type: text/markdown\r\n\r\n\r\n------WebKitFormBoundaryrcTlqeW0YnirF5hW--\r\n'\
|
||||
--output output.pdf
|
||||
```
|
||||
* response
|
||||
```shell
|
||||
% Total % Received % Xferd Average Speed Time Time Time Current
|
||||
Dload Upload Total Spent Left Speed
|
||||
100 1104 100 911 100 193 17 3 0:01:04 0:00:50 0:00:14 249
|
||||
```
|
||||
* existed file
|
||||
```shell
|
||||
carpewang@wangkaipengdeMacBook-Pro ~ % curl -X POST "http://localhost:9090/api/v1/convert/markdown/pdf" \
|
||||
-H "Origin: http://localhost:9090" \
|
||||
-H "Referer: http://localhost:9090/markdown-to-pdf" \
|
||||
-F "fileInput=@\"/Users/carpewang/Desktop/Software Testing/Stirling-PDF/testResult/MarkdowntoPDF/Markdown.md\";type=text/markdown" \
|
||||
--output output.pdf
|
||||
```
|
||||
* response
|
||||
```shell
|
||||
% Total % Received % Xferd Average Speed Time Time Time Current
|
||||
Dload Upload Total Spent Left Speed
|
||||
100 8004 100 7556 100 448 228 13 0:00:34 0:00:32 0:00:02 2035
|
||||
carpewang@wangkaipengdeMacBook-Pro ~ %
|
||||
```
|
BIN
testResult/MarkdowntoPDF/img.png
Normal file
BIN
testResult/MarkdowntoPDF/img.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 324 KiB |
38
testResult/PDFtoMarkdown/PDFTestingFile.md
Normal file
38
testResult/PDFtoMarkdown/PDFTestingFile.md
Normal file
@ -0,0 +1,38 @@
|
||||

|
||||
|
||||
**wkp**
|
||||
|
||||
**second title**
|
||||
|
||||
**third title**
|
||||
|
||||
\*Content
|
||||
|
||||
-
|
||||
|
||||
test multiple
|
||||
|
||||
**multiple**
|
||||
|
||||
**Document Outline**
|
||||
|
||||
wkp
|
||||
|
||||
second title
|
||||
|
||||
third title
|
||||
|
||||
multiple
|
||||

|
||||
|
||||
here is picture
|
||||
|
||||
**there is the testing after the picture**
|
||||
|
||||
*** ** * ** ***
|
||||
|
||||
Document Outline
|
||||
================
|
||||
|
||||
* [Document Outline](PDFTestingFile.html#1)
|
||||
* [there is the testing after the picture](PDFTestingFile.html#2)
|
BIN
testResult/PDFtoMarkdown/PDFTestingFile.pdf
Normal file
BIN
testResult/PDFtoMarkdown/PDFTestingFile.pdf
Normal file
Binary file not shown.
22
testResult/PDFtoMarkdown/README.md
Normal file
22
testResult/PDFtoMarkdown/README.md
Normal file
@ -0,0 +1,22 @@
|
||||
```curl
|
||||
curl 'http://localhost:9090/api/v1/convert/pdf/markdown' \
|
||||
-H 'Accept: */*' \
|
||||
-H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8' \
|
||||
-H 'Cache-Control: no-cache' \
|
||||
-H 'Connection: keep-alive' \
|
||||
-H 'Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryPJHT4P9TTmBkYHfl' \
|
||||
-b 'Idea-858fbbc8=83332792-11c8-4114-baf3-80dfa95bc2a1; token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjE4Yjg0YTg4LTIzOWMtNDUxMy1iOGYxLWIzMTlhZTFiOGExMiJ9.hM2U8JLQ4wSVWsJ-TVXDofwICq6LOk89zvVoXQ9TECQ; authjs.csrf-token=b41a8b0ca45ecab3f39a8d36f519b06e8f6a10e618738f8da9dae25bfaf18e0d%7Cd57f1d087ee1811c473138cc06a36be4f2ecd4444d5e1bea16bcc7e713d2848f; authjs.callback-url=http%3A%2F%2Flocalhost%3A3001; __stripe_mid=2d0e63fa-8ec2-4c07-9fcc-d265afe3ef7d67eda5; __next_hmr_refresh_hash__=4; NEXT_LOCALE=en; session=9XB6PKMHXuw8fqutPsunv32XkG9SURJxwFIKz7OAM0o; cc_cookie=%7B%22categories%22%3A%5B%22necessary%22%2C%22analytics%22%5D%2C%22revision%22%3A0%2C%22data%22%3Anull%2C%22consentTimestamp%22%3A%222025-04-16T01%3A32%3A51.025Z%22%2C%22consentId%22%3A%2224ef8464-ff47-4934-8ba7-5ae0c50c8ff8%22%2C%22services%22%3A%7B%22necessary%22%3A%5B%5D%2C%22analytics%22%3A%5B%5D%7D%2C%22languageCode%22%3A%22en%22%2C%22lastConsentTimestamp%22%3A%222025-04-16T01%3A32%3A51.025Z%22%2C%22expirationTime%22%3A1760491971026%7D; JSESSIONID=node0k0mwiga25gx6zmuqkfizpc5t1.node0' \
|
||||
-H 'Origin: http://localhost:9090' \
|
||||
-H 'Pragma: no-cache' \
|
||||
-H 'Referer: http://localhost:9090/pdf-to-markdown' \
|
||||
-H 'Sec-Fetch-Dest: empty' \
|
||||
-H 'Sec-Fetch-Mode: cors' \
|
||||
-H 'Sec-Fetch-Site: same-origin' \
|
||||
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36' \
|
||||
-H 'sec-ch-ua: "Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"' \
|
||||
-H 'sec-ch-ua-mobile: ?0' \
|
||||
-H 'sec-ch-ua-platform: "macOS"' \
|
||||
--data-raw $'------WebKitFormBoundaryPJHT4P9TTmBkYHfl\r\nContent-Disposition: form-data; name="fileInput"; filename="pdf.pdf"\r\nContent-Type: application/pdf\r\n\r\n\r\n------WebKitFormBoundaryPJHT4P9TTmBkYHfl--\r\n'
|
||||
```
|
||||
|
||||

|
BIN
testResult/PDFtoMarkdown/img.png
Normal file
BIN
testResult/PDFtoMarkdown/img.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 348 KiB |
BIN
testResult/PDFtoMarkdown/img_1.png
Normal file
BIN
testResult/PDFtoMarkdown/img_1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 119 KiB |
Loading…
x
Reference in New Issue
Block a user