Add: check for duplicate keys (#2749)

# Description of Changes

introduces the check for duplicate keys

---

## Checklist

### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [x] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.
This commit is contained in:
Ludy 2025-01-20 11:52:23 +01:00 committed by GitHub
parent af1b156ba6
commit 8353c399d2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -21,25 +21,60 @@ import argparse
import re
def find_duplicate_keys(file_path):
"""
Identifies duplicate keys in a .properties file.
:param file_path: Path to the .properties file.
:return: List of tuples (key, first_occurrence_line, duplicate_line).
"""
keys = {}
duplicates = []
with open(file_path, "r", encoding="utf-8") as file:
for line_number, line in enumerate(file, start=1):
stripped_line = line.strip()
# Skip empty lines and comments
if not stripped_line or stripped_line.startswith("#"):
continue
# Split the line into key and value
if "=" in stripped_line:
key, _ = stripped_line.split("=", 1)
key = key.strip()
# Check if the key already exists
if key in keys:
duplicates.append((key, keys[key], line_number))
else:
keys[key] = line_number
return duplicates
# Maximum size for properties files (e.g., 200 KB)
MAX_FILE_SIZE = 200 * 1024
def parse_properties_file(file_path):
"""Parses a .properties file and returns a list of objects (including comments, empty lines, and line numbers)."""
"""
Parses a .properties file and returns a structured list of its contents.
:param file_path: Path to the .properties file.
:return: List of dictionaries representing each line in the file.
"""
properties_list = []
with open(file_path, "r", encoding="utf-8") as file:
for line_number, line in enumerate(file, start=1):
stripped_line = line.strip()
# Empty lines
# Handle empty lines
if not stripped_line:
properties_list.append(
{"line_number": line_number, "type": "empty", "content": ""}
)
continue
# Comments
# Handle comments
if stripped_line.startswith("#"):
properties_list.append(
{
@ -50,7 +85,7 @@ def parse_properties_file(file_path):
)
continue
# Key-value pairs
# Handle key-value pairs
match = re.match(r"^([^=]+)=(.*)$", line)
if match:
key, value = match.groups()
@ -67,9 +102,14 @@ def parse_properties_file(file_path):
def write_json_file(file_path, updated_properties):
"""
Writes updated properties back to the file in their original format.
:param file_path: Path to the .properties file.
:param updated_properties: List of updated properties to write.
"""
updated_lines = {entry["line_number"]: entry for entry in updated_properties}
# Sort by line numbers and retain comments and empty lines
# Sort lines by their numbers and retain comments and empty lines
all_lines = sorted(set(updated_lines.keys()))
original_format = []
@ -88,8 +128,8 @@ def write_json_file(file_path, updated_properties):
# Replace entries with those from the current JSON
original_format.append(entry)
# Write back in the original format
with open(file_path, "w", encoding="utf-8") as file:
# Write the updated content back to the file
with open(file_path, "w", encoding="utf-8", newline="\n") as file:
for entry in original_format:
if entry["type"] == "comment":
file.write(f"{entry['content']}\n")
@ -100,6 +140,12 @@ def write_json_file(file_path, updated_properties):
def update_missing_keys(reference_file, file_list, branch=""):
"""
Updates missing keys in the translation files based on the reference file.
:param reference_file: Path to the reference .properties file.
:param file_list: List of translation files to update.
:param branch: Branch where the files are located.
"""
reference_properties = parse_properties_file(reference_file)
for file_path in file_list:
basename_current_file = os.path.basename(os.path.join(branch, file_path))
@ -245,6 +291,24 @@ def check_for_differences(reference_file, file_list, branch, actor):
)
else:
report.append("2. **Test Status:** ✅ **_Passed_**")
if find_duplicate_keys(os.path.join(branch, file_path)):
has_differences = True
output = "\n".join(
[
f" - `{key}`: first at line {first}, duplicate at `line {duplicate}`"
for key, first, duplicate in find_duplicate_keys(
os.path.join(branch, file_path)
)
]
)
report.append("3. **Test Status:** ❌ **_Failed_**")
report.append(" - **Issue:**")
report.append(" - duplicate entries were found:")
report.append(output)
else:
report.append("3. **Test Status:** ✅ **_Passed_**")
report.append("")
report.append("---")
report.append("")