diff --git a/contrib/requirements_dev.txt b/contrib/requirements_dev.txt index 75626f5..a2aa1f6 100644 --- a/contrib/requirements_dev.txt +++ b/contrib/requirements_dev.txt @@ -4,6 +4,7 @@ loguru>=0.6.0 click>=8.1.3 click-option-group>=0.5.5 xmltodict>=0.13.0 +xmlschema>=2.2.1 img2pdf>=0.4.4 diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py index aa49234..f0fa425 100644 --- a/mangadlp/metadata.py +++ b/mangadlp/metadata.py @@ -1,63 +1,118 @@ from pathlib import Path +from typing import Any import xmltodict from loguru import logger as log +METADATA_TEMPLATE = Path("mangadlp/metadata/ComicInfo_v2.0.xml") +# define metadata types and valid values. an empty list means no value check +METADATA_TYPES: dict[str, tuple[type, Any, list]] = { + "Title": (str, None, []), + "Series": (str, None, []), + "Number": (str, None, []), + "Count": (int, None, []), + "Volume": (int, None, []), + "AlternateSeries": (str, None, []), + "AlternateNumber": (str, None, []), + "AlternateCount": (int, None, []), + "Summary": (str, None, []), + "Notes": (str, "Downloaded with https://github.com/olofvndrhr/manga-dlp", []), + "Year": (int, None, []), + "Month": (int, None, []), + "Day": (int, None, []), + "Writer": (str, None, []), + "Colorist": (str, None, []), + "Publisher": (str, None, []), + "Genre": (str, None, []), + "Web": (str, None, []), + "PageCount": (int, None, []), + "LanguageISO": (str, None, []), + "Format": (str, None, []), + "BlackAndWhite": (str, None, ["Yes", "No", "Unknown"]), + "Manga": (str, "Yes", ["Yes", "No", "Unknown", "YesAndRightToLeft"]), + "ScanInformation": (str, None, []), + "SeriesGroup": (str, None, []), + "AgeRating": ( + str, + None, + [ + "Unknown", + "Adults Only 18+", + "Early Childhood", + "Everyone", + "Everyone 10+", + "G", + "Kids to Adults", + "M", + "MA15+", + "Mature 17+", + "PG", + "R18+", + "Rating Pending", + "Teen", + "X18+", + ], + ), + "CommunityRating": (int, None, [1, 2, 3, 4, 5]), +} + + +def validate_metadata(metadata_in: dict) -> dict: + log.info("Validating metadata") + + metadata_valid: dict[str, dict] = {"ComicInfo": {}} + for key, value in METADATA_TYPES.items(): + metadata_type, metadata_default, metadata_validation = value + + # add default value if present + if metadata_default: + log.info(f"Setting default value for Key:{key} -> value={metadata_default}") + metadata_valid["ComicInfo"][key] = metadata_default + + # check if metadata key is available + try: + md_to_check = metadata_in[key] + except KeyError: + continue + # check if provided metadata item is empty + if not md_to_check: + continue + + # check if metadata type is correct + log.debug(f"Key:{key} -> value={type(md_to_check)} -> check={metadata_type}") + if not isinstance(md_to_check, metadata_type): # noqa + log.warning( + f"Metadata has wrong type: {key}:{metadata_type} -> {md_to_check}" + ) + continue + + # check if metadata is valid + log.debug(f"Key:{key} -> value={md_to_check} -> valid={metadata_validation}") + if (len(metadata_validation) > 0) and (md_to_check not in metadata_validation): + log.warning( + f"Metadata is invalid: {key}:{metadata_validation} -> {md_to_check}" + ) + continue + + log.debug(f"Updating metadata: '{key}' = '{md_to_check}'") + metadata_valid["ComicInfo"][key] = md_to_check + + return metadata_valid + def write_metadata(chapter_path: Path, metadata: dict) -> None: if metadata["Format"] == "pdf": log.warning("Can't add metadata for pdf format. Skipping") return - # define metadata types - metadata_types: dict[str, type] = { - "Title": str, - "Series": str, - "Number": str, - "Count": int, - "Volume": int, - "Summary": str, - "Genre": str, - "Web": str, - "PageCount": int, - "LanguageISO": str, - "Format": str, - "ScanInformation": str, - "SeriesGroup": str, - } - - try: - metadata_template = Path("mangadlp/metadata/ComicInfo.xml").read_text( - encoding="utf8" - ) - metadata_empty: dict[str, dict] = xmltodict.parse(metadata_template) - except Exception as exc: - log.error("Can't open or parse xml template") - raise exc metadata_file = chapter_path / "ComicInfo.xml" - log.info(f"Writing metadata to: '{metadata_file}'") log.debug(f"Metadata items: {metadata}") - for key, value in metadata.items(): - # check if metadata is empty - if not value: - continue - # try to match with template - try: - metadata_empty["ComicInfo"][key] - except KeyError: - continue - # check if metadata type is correct - log.debug(f"Key:{key} -> value={type(value)} -> check={metadata_types[key]}") - if not isinstance(value, metadata_types[key]): # noqa - log.warning( - f"Metadata has wrong type: {key}:{metadata_types[key]} -> {value}" - ) - continue + metadata_valid = validate_metadata(metadata) - log.debug(f"Updating metadata: '{key}' = '{value}'") - metadata_empty["ComicInfo"][key] = value - - metadata_export = xmltodict.unparse(metadata_empty, pretty=True, indent=" " * 4) + log.info(f"Writing metadata to: '{metadata_file}'") + metadata_export = xmltodict.unparse( + metadata_valid, pretty=True, indent=" " * 4, short_empty_elements=True + ) metadata_file.touch(exist_ok=True) metadata_file.write_text(metadata_export, encoding="utf8") diff --git a/mangadlp/metadata/ComicInfo.xml b/mangadlp/metadata/ComicInfo.xml deleted file mode 100644 index 5472a58..0000000 --- a/mangadlp/metadata/ComicInfo.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - - - - - Unknown - Unknown - Yes - Downloaded with https://github.com/olofvndrhr/manga-dlp - diff --git a/tests/ComicInfo_test.xml b/tests/ComicInfo_test.xml index d773dc5..de17013 100644 --- a/tests/ComicInfo_test.xml +++ b/tests/ComicInfo_test.xml @@ -6,15 +6,11 @@ 10 1 summary1 + Downloaded with https://github.com/olofvndrhr/manga-dlp genre1 https://mangadex.org 99 en cbz - - - Unknown - Unknown Yes - Downloaded with https://github.com/olofvndrhr/manga-dlp \ No newline at end of file diff --git a/tests/test_07_metadata.py b/tests/test_07_metadata.py index b48fe0b..2ef3c4b 100644 --- a/tests/test_07_metadata.py +++ b/tests/test_07_metadata.py @@ -1,5 +1,9 @@ +import shutil +import subprocess from pathlib import Path +import xmlschema + from mangadlp.metadata import write_metadata @@ -30,3 +34,38 @@ def test_metadata_creation(): # cleanup metadata_file.unlink() + + +def test_metadata_chapter_validity(): + url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" + manga_path = Path("tests/Tomo-chan wa Onna no ko") + metadata_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire/ComicInfo.xml" + ) + language = "en" + chapters = "1" + download_path = "tests" + command_args = [ + "-u", + url_uuid, + "-l", + language, + "-c", + chapters, + "--path", + download_path, + "--format", + "", + "--debug", + ] + schema = xmlschema.XMLSchema("mangadlp/metadata/ComicInfo_v2.0.xsd") + + script_path = "manga-dlp.py" + command = ["python3", script_path] + command_args + + assert subprocess.call(command) == 0 + assert metadata_path.is_file() + assert schema.is_valid(metadata_path) + + # cleanup + shutil.rmtree(manga_path, ignore_errors=True)