2025-01-26 13:10:16 +00:00
|
|
|
package stirling.software.SPDF.controller.api;
|
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.util.*;
|
|
|
|
|
2025-01-26 13:10:16 +00:00
|
|
|
import org.apache.pdfbox.cos.COSName;
|
|
|
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
2025-01-30 15:13:42 +00:00
|
|
|
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
2025-01-26 13:10:16 +00:00
|
|
|
import org.apache.pdfbox.pdmodel.PDPage;
|
|
|
|
import org.apache.pdfbox.pdmodel.PDPageTree;
|
|
|
|
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
|
2025-01-30 15:13:42 +00:00
|
|
|
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
|
|
|
|
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
|
2025-01-26 13:10:16 +00:00
|
|
|
import org.springframework.web.bind.annotation.*;
|
2025-01-30 15:13:42 +00:00
|
|
|
|
2025-01-26 13:10:16 +00:00
|
|
|
import io.swagger.v3.oas.annotations.Operation;
|
|
|
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
|
|
|
|
2025-04-25 15:35:12 +02:00
|
|
|
import lombok.RequiredArgsConstructor;
|
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
import stirling.software.SPDF.model.api.PDFFile;
|
2025-03-12 13:13:44 +01:00
|
|
|
import stirling.software.SPDF.service.CustomPDFDocumentFactory;
|
2025-01-26 13:10:16 +00:00
|
|
|
|
|
|
|
@RestController
|
|
|
|
@RequestMapping("/api/v1/analysis")
|
|
|
|
@Tag(name = "Analysis", description = "Analysis APIs")
|
2025-04-25 15:35:12 +02:00
|
|
|
@RequiredArgsConstructor
|
2025-01-26 13:10:16 +00:00
|
|
|
public class AnalysisController {
|
|
|
|
|
2025-03-12 13:13:44 +01:00
|
|
|
private final CustomPDFDocumentFactory pdfDocumentFactory;
|
2025-03-08 00:03:27 +00:00
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
@PostMapping(value = "/page-count", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Get PDF page count",
|
|
|
|
description = "Returns total number of pages in PDF. Input:PDF Output:JSON Type:SISO")
|
|
|
|
public Map<String, Integer> getPageCount(@ModelAttribute PDFFile file) throws IOException {
|
2025-03-10 20:17:45 +00:00
|
|
|
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
|
2025-01-30 15:13:42 +00:00
|
|
|
return Map.of("pageCount", document.getNumberOfPages());
|
|
|
|
}
|
|
|
|
}
|
2025-01-30 11:01:35 +00:00
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
@PostMapping(value = "/basic-info", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Get basic PDF information",
|
|
|
|
description = "Returns page count, version, file size. Input:PDF Output:JSON Type:SISO")
|
2025-01-26 13:10:16 +00:00
|
|
|
public Map<String, Object> getBasicInfo(@ModelAttribute PDFFile file) throws IOException {
|
2025-03-10 20:17:45 +00:00
|
|
|
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
|
2025-01-26 13:10:16 +00:00
|
|
|
Map<String, Object> info = new HashMap<>();
|
|
|
|
info.put("pageCount", document.getNumberOfPages());
|
|
|
|
info.put("pdfVersion", document.getVersion());
|
|
|
|
info.put("fileSize", file.getFileInput().getSize());
|
|
|
|
return info;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
@PostMapping(value = "/document-properties", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Get PDF document properties",
|
|
|
|
description = "Returns title, author, subject, etc. Input:PDF Output:JSON Type:SISO")
|
|
|
|
public Map<String, String> getDocumentProperties(@ModelAttribute PDFFile file)
|
|
|
|
throws IOException {
|
fix read wrong properties (#3472)
# Description of Changes
Please provide a summary of the changes, including:
Test file:
[12345678.pdf](https://github.com/user-attachments/files/20028981/12345678.pdf)
Behavior without readOnly
```json
{
"creator": null,
"modificationDate": "java.util.GregorianCalendar[time=1746381303000,areFieldsSet=true,areAllFieldsSet=true,lenient=false,zone=java.util.SimpleTimeZone[id=GMT,offset=0,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=2025,MONTH=4,WEEK_OF_YEAR=19,WEEK_OF_MONTH=2,DAY_OF_MONTH=4,DAY_OF_YEAR=124,DAY_OF_WEEK=1,DAY_OF_WEEK_IN_MONTH=1,AM_PM=1,HOUR=5,HOUR_OF_DAY=17,MINUTE=55,SECOND=3,MILLISECOND=0,ZONE_OFFSET=0,DST_OFFSET=0]",
"keywords": null,
"author": "",
"subject": null,
"producer": "Stirling-PDF v0.46.0",
"title": "Microsoft Word - Dokument1",
"creationDate": "java.util.GregorianCalendar[time=1746381238000,areFieldsSet=true,areAllFieldsSet=true,lenient=false,zone=java.util.SimpleTimeZone[id=GMT+02:00,offset=7200000,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=2025,MONTH=4,WEEK_OF_YEAR=19,WEEK_OF_MONTH=2,DAY_OF_MONTH=4,DAY_OF_YEAR=124,DAY_OF_WEEK=1,DAY_OF_WEEK_IN_MONTH=1,AM_PM=1,HOUR=7,HOUR_OF_DAY=19,MINUTE=53,SECOND=58,MILLISECOND=0,ZONE_OFFSET=7200000,DST_OFFSET=0]"
}
```
with readOnly=true
```json
{
"creator": null,
"modificationDate": "java.util.GregorianCalendar[time=1746381238000,areFieldsSet=true,areAllFieldsSet=true,lenient=false,zone=java.util.SimpleTimeZone[id=GMT+02:00,offset=7200000,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=2025,MONTH=4,WEEK_OF_YEAR=19,WEEK_OF_MONTH=2,DAY_OF_MONTH=4,DAY_OF_YEAR=124,DAY_OF_WEEK=1,DAY_OF_WEEK_IN_MONTH=1,AM_PM=1,HOUR=7,HOUR_OF_DAY=19,MINUTE=53,SECOND=58,MILLISECOND=0,ZONE_OFFSET=7200000,DST_OFFSET=0]",
"keywords": null,
"author": "",
"subject": null,
"producer": "Microsoft: Print To PDF",
"title": "Microsoft Word - Dokument1",
"creationDate": "java.util.GregorianCalendar[time=1746381238000,areFieldsSet=true,areAllFieldsSet=true,lenient=false,zone=java.util.SimpleTimeZone[id=GMT+02:00,offset=7200000,dstSavings=3600000,useDaylight=false,startYear=0,startMode=0,startMonth=0,startDay=0,startDayOfWeek=0,startTime=0,startTimeMode=0,endMode=0,endMonth=0,endDay=0,endDayOfWeek=0,endTime=0,endTimeMode=0],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=1,YEAR=2025,MONTH=4,WEEK_OF_YEAR=19,WEEK_OF_MONTH=2,DAY_OF_MONTH=4,DAY_OF_YEAR=124,DAY_OF_WEEK=1,DAY_OF_WEEK_IN_MONTH=1,AM_PM=1,HOUR=7,HOUR_OF_DAY=19,MINUTE=53,SECOND=58,MILLISECOND=0,ZONE_OFFSET=7200000,DST_OFFSET=0]"
}
```
---
## Checklist
### General
- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings
### Documentation
- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)
### UI Changes (if applicable)
- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)
### Testing (if applicable)
- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
---------
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-05-07 10:18:02 +02:00
|
|
|
// Load the document in read-only mode to prevent modifications and ensure the integrity of the original file.
|
|
|
|
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput(), true)) {
|
2025-01-26 13:10:16 +00:00
|
|
|
PDDocumentInformation info = document.getDocumentInformation();
|
|
|
|
Map<String, String> properties = new HashMap<>();
|
|
|
|
properties.put("title", info.getTitle());
|
|
|
|
properties.put("author", info.getAuthor());
|
|
|
|
properties.put("subject", info.getSubject());
|
|
|
|
properties.put("keywords", info.getKeywords());
|
|
|
|
properties.put("creator", info.getCreator());
|
|
|
|
properties.put("producer", info.getProducer());
|
|
|
|
properties.put("creationDate", info.getCreationDate().toString());
|
|
|
|
properties.put("modificationDate", info.getModificationDate().toString());
|
|
|
|
return properties;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
@PostMapping(value = "/page-dimensions", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Get page dimensions for all pages",
|
|
|
|
description = "Returns width and height of each page. Input:PDF Output:JSON Type:SISO")
|
|
|
|
public List<Map<String, Float>> getPageDimensions(@ModelAttribute PDFFile file)
|
|
|
|
throws IOException {
|
2025-03-10 20:17:45 +00:00
|
|
|
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
|
2025-01-26 13:10:16 +00:00
|
|
|
List<Map<String, Float>> dimensions = new ArrayList<>();
|
|
|
|
PDPageTree pages = document.getPages();
|
2025-01-30 11:01:35 +00:00
|
|
|
|
2025-01-26 13:10:16 +00:00
|
|
|
for (PDPage page : pages) {
|
|
|
|
Map<String, Float> pageDim = new HashMap<>();
|
|
|
|
pageDim.put("width", page.getBBox().getWidth());
|
|
|
|
pageDim.put("height", page.getBBox().getHeight());
|
|
|
|
dimensions.add(pageDim);
|
|
|
|
}
|
|
|
|
return dimensions;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
@PostMapping(value = "/form-fields", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Get form field information",
|
|
|
|
description =
|
|
|
|
"Returns count and details of form fields. Input:PDF Output:JSON Type:SISO")
|
2025-01-26 13:10:16 +00:00
|
|
|
public Map<String, Object> getFormFields(@ModelAttribute PDFFile file) throws IOException {
|
2025-03-10 20:17:45 +00:00
|
|
|
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
|
2025-01-26 13:10:16 +00:00
|
|
|
Map<String, Object> formInfo = new HashMap<>();
|
|
|
|
PDAcroForm form = document.getDocumentCatalog().getAcroForm();
|
2025-01-30 11:01:35 +00:00
|
|
|
|
2025-01-26 13:10:16 +00:00
|
|
|
if (form != null) {
|
|
|
|
formInfo.put("fieldCount", form.getFields().size());
|
|
|
|
formInfo.put("hasXFA", form.hasXFA());
|
|
|
|
formInfo.put("isSignaturesExist", form.isSignaturesExist());
|
|
|
|
} else {
|
|
|
|
formInfo.put("fieldCount", 0);
|
|
|
|
formInfo.put("hasXFA", false);
|
|
|
|
formInfo.put("isSignaturesExist", false);
|
|
|
|
}
|
|
|
|
return formInfo;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
@PostMapping(value = "/annotation-info", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Get annotation information",
|
|
|
|
description = "Returns count and types of annotations. Input:PDF Output:JSON Type:SISO")
|
2025-01-26 13:10:16 +00:00
|
|
|
public Map<String, Object> getAnnotationInfo(@ModelAttribute PDFFile file) throws IOException {
|
2025-03-10 20:17:45 +00:00
|
|
|
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
|
2025-01-26 13:10:16 +00:00
|
|
|
Map<String, Object> annotInfo = new HashMap<>();
|
|
|
|
int totalAnnotations = 0;
|
|
|
|
Map<String, Integer> annotationTypes = new HashMap<>();
|
|
|
|
|
|
|
|
for (PDPage page : document.getPages()) {
|
|
|
|
for (PDAnnotation annot : page.getAnnotations()) {
|
|
|
|
totalAnnotations++;
|
|
|
|
String subType = annot.getSubtype();
|
|
|
|
annotationTypes.merge(subType, 1, Integer::sum);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
annotInfo.put("totalCount", totalAnnotations);
|
|
|
|
annotInfo.put("typeBreakdown", annotationTypes);
|
|
|
|
return annotInfo;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
@PostMapping(value = "/font-info", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Get font information",
|
|
|
|
description =
|
|
|
|
"Returns list of fonts used in the document. Input:PDF Output:JSON Type:SISO")
|
2025-01-26 13:10:16 +00:00
|
|
|
public Map<String, Object> getFontInfo(@ModelAttribute PDFFile file) throws IOException {
|
2025-03-10 20:17:45 +00:00
|
|
|
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
|
2025-01-26 13:10:16 +00:00
|
|
|
Map<String, Object> fontInfo = new HashMap<>();
|
|
|
|
Set<String> fontNames = new HashSet<>();
|
|
|
|
|
|
|
|
for (PDPage page : document.getPages()) {
|
|
|
|
for (COSName font : page.getResources().getFontNames()) {
|
|
|
|
fontNames.add(font.getName());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fontInfo.put("fontCount", fontNames.size());
|
|
|
|
fontInfo.put("fonts", fontNames);
|
|
|
|
return fontInfo;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-30 15:13:42 +00:00
|
|
|
@PostMapping(value = "/security-info", consumes = "multipart/form-data")
|
|
|
|
@Operation(
|
|
|
|
summary = "Get security information",
|
|
|
|
description =
|
|
|
|
"Returns encryption and permission details. Input:PDF Output:JSON Type:SISO")
|
2025-01-26 13:10:16 +00:00
|
|
|
public Map<String, Object> getSecurityInfo(@ModelAttribute PDFFile file) throws IOException {
|
2025-03-10 20:17:45 +00:00
|
|
|
try (PDDocument document = pdfDocumentFactory.load(file.getFileInput())) {
|
2025-01-26 13:10:16 +00:00
|
|
|
Map<String, Object> securityInfo = new HashMap<>();
|
|
|
|
PDEncryption encryption = document.getEncryption();
|
2025-01-30 11:01:35 +00:00
|
|
|
|
2025-01-26 13:10:16 +00:00
|
|
|
if (encryption != null) {
|
|
|
|
securityInfo.put("isEncrypted", true);
|
|
|
|
securityInfo.put("keyLength", encryption.getLength());
|
2025-01-30 11:01:35 +00:00
|
|
|
|
2025-01-26 13:10:16 +00:00
|
|
|
// Get permissions
|
|
|
|
Map<String, Boolean> permissions = new HashMap<>();
|
|
|
|
permissions.put("canPrint", document.getCurrentAccessPermission().canPrint());
|
|
|
|
permissions.put("canModify", document.getCurrentAccessPermission().canModify());
|
2025-01-30 15:13:42 +00:00
|
|
|
permissions.put(
|
|
|
|
"canExtractContent",
|
|
|
|
document.getCurrentAccessPermission().canExtractContent());
|
|
|
|
permissions.put(
|
|
|
|
"canModifyAnnotations",
|
|
|
|
document.getCurrentAccessPermission().canModifyAnnotations());
|
2025-01-30 11:01:35 +00:00
|
|
|
|
2025-01-26 13:10:16 +00:00
|
|
|
securityInfo.put("permissions", permissions);
|
|
|
|
} else {
|
|
|
|
securityInfo.put("isEncrypted", false);
|
|
|
|
}
|
2025-01-30 11:01:35 +00:00
|
|
|
|
2025-01-26 13:10:16 +00:00
|
|
|
return securityInfo;
|
|
|
|
}
|
|
|
|
}
|
2025-01-30 11:01:35 +00:00
|
|
|
}
|