package stirling.software.SPDF.utils; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import org.junit.jupiter.api.Test; class CustomHtmlSanitizerTest { @Test void testSanitizeAllowsValidHtml() { // Arrange String validHtml = "
This is valid HTML with formatting.
"; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(validHtml); // Assert assertEquals(validHtml, sanitizedHtml); } @Test void testSanitizeAllowsFormattingElements() { // Arrange - Testing Sanitizers.FORMATTING String htmlWithFormatting = "Text with bold, italic, underline, "
+ "emphasis, strong, strikethrough, "
+ "strike, subscript, superscript, "
+ "teletype, code
, big, small.
Blockquote
"), "Blockquote tags should be preserved"); assertTrue(sanitizedHtml.contains(""), "UL tags should be preserved"); assertTrue(sanitizedHtml.contains("
"), "OL tags should be preserved"); assertTrue(sanitizedHtml.contains("
- "), "LI tags should be preserved"); } @Test void testSanitizeAllowsStyles() { // Arrange - Testing Sanitizers.STYLES String htmlWithStyles = "
Styled text
"; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlWithStyles); // Assert // The OWASP HTML Sanitizer might filter some specific styles, so we only check that // the sanitized HTML is not empty and contains a paragraph tag with style assertTrue(sanitizedHtml.contains("Example Link"; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlWithLink); // Assert // The most important aspect is that the link content is preserved assertTrue(sanitizedHtml.contains("Example Link"), "Link text should be preserved"); // Check that the href is present in some form assertTrue(sanitizedHtml.contains("href="), "Link href attribute should be present"); // Check that the URL is present in some form assertTrue(sanitizedHtml.contains("example.com"), "Link URL should be preserved"); // OWASP sanitizer may handle title attributes differently depending on version // So we won't make strict assertions about the title attribute } @Test void testSanitizeDisallowsJavaScriptLinks() { // Arrange String htmlWithJsLink = "Malicious Link"; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlWithJsLink); // Assert assertFalse(sanitizedHtml.contains("javascript:"), "JavaScript URLs should be removed"); // The link tag might still be there, but the href should be sanitized assertTrue(sanitizedHtml.contains("Malicious Link"), "Link text should be preserved"); } @Test void testSanitizeAllowsTables() { // Arrange - Testing Sanitizers.TABLES String htmlWithTable = "
" + "
"; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlWithTable); // Assert assertTrue(sanitizedHtml.contains("" + " Header 1 Header 2 " + " Cell 1 Cell 2 " + " Footer "), "Table rows should be preserved"); assertTrue(sanitizedHtml.contains("
"), "Table headers should be preserved"); assertTrue(sanitizedHtml.contains(" "), "Table cells should be preserved"); // Note: border attribute might be removed as it's deprecated in HTML5 // Check for content values instead of exact tag formats because // the sanitizer may normalize tags and attributes assertTrue(sanitizedHtml.contains("Header 1"), "Table header content should be preserved"); assertTrue(sanitizedHtml.contains("Cell 1"), "Table cell content should be preserved"); assertTrue(sanitizedHtml.contains("Footer"), "Table footer content should be preserved"); // OWASP sanitizer may not preserve these structural elements or attributes in the same // format // So we check for the content rather than the exact structure } @Test void testSanitizeAllowsImages() { // Arrange - Testing Sanitizers.IMAGES String htmlWithImage = " "; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlWithImage); // Assert assertTrue(sanitizedHtml.contains("
"; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlWithDataUrlImage); // Assert assertFalse( sanitizedHtml.contains("data:image/svg"), "Data URLs with potentially malicious content should be removed"); } @Test void testSanitizeRemovesJavaScriptInAttributes() { // Arrange String htmlWithJsEvent = "Click me"; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlWithJsEvent); // Assert assertFalse( sanitizedHtml.contains("onclick"), "JavaScript event handlers should be removed"); assertFalse( sanitizedHtml.contains("onmouseover"), "JavaScript event handlers should be removed"); assertTrue(sanitizedHtml.contains("Click me"), "Link text should be preserved"); } @Test void testSanitizeRemovesScriptTags() { // Arrange String htmlWithScript = "
Safe content
"; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(htmlWithScript); // Assert assertFalse(sanitizedHtml.contains("" + " " + ""; // Act String sanitizedHtml = CustomHtmlSanitizer.sanitize(complexHtml); // Assert assertTrue(sanitizedHtml.contains("") && sanitizedHtml.contains("test"), "Strong tag should be preserved"); // Check for content rather than exact formatting assertTrue( sanitizedHtml.contains(""), "Script tag should be removed"); assertFalse(sanitizedHtml.contains("