From 6ccaeda8a4eaeb68f32ef1d343b373b3f0ba972d Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Sun, 12 Feb 2023 16:13:41 +0100 Subject: [PATCH 01/18] update cache to include manga title --- mangadlp/app.py | 6 ++++-- mangadlp/cache.py | 47 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/mangadlp/app.py b/mangadlp/app.py index f5a0744..cdcb2b0 100644 --- a/mangadlp/app.py +++ b/mangadlp/app.py @@ -166,7 +166,9 @@ class MangaDLP: # prepare cache if specified if self.cache_path: - cache = CacheDB(self.cache_path, self.manga_uuid, self.language) + cache = CacheDB( + self.cache_path, self.manga_uuid, self.language, self.manga_title + ) cached_chapters = cache.db_uuid_chapters log.info(f"Cached chapters: {cached_chapters}") @@ -200,7 +202,7 @@ class MangaDLP: error_chapters: list[Any] = [] for chapter in chapters_to_download: if self.cache_path and chapter in cached_chapters: - log.info("Chapter is in cache. Skipping download") + log.info(f"Chapter '{chapter}' is in cache. Skipping download") continue try: diff --git a/mangadlp/cache.py b/mangadlp/cache.py index 84a267f..b6621d5 100644 --- a/mangadlp/cache.py +++ b/mangadlp/cache.py @@ -6,22 +6,34 @@ from loguru import logger as log class CacheDB: - def __init__(self, db_path: Union[str, Path], uuid: str, lang: str) -> None: + def __init__( + self, + db_path: Union[str, Path], + manga_uuid: str, + manga_lang: str, + manga_name: str, + ) -> None: self.db_path = Path(db_path) - self.uuid = uuid - self.lang = lang - self.db_key = f"{uuid}__{lang}" + self.uuid = manga_uuid + self.lang = manga_lang + self.name = manga_name + self.db_key = f"{manga_uuid}__{manga_lang}" - self._prepare() + self._prepare_db() - self.db_data = self.read_db() + self.db_data = self._read_db() # create db key entry if not found if not self.db_data.get(self.db_key): self.db_data[self.db_key] = {} + self.db_uuid_data: dict = self.db_data[self.db_key] + if not self.db_uuid_data.get("name"): + self.db_uuid_data.update({"name": self.name}) + self._write_db() + self.db_uuid_chapters: list = self.db_uuid_data.get("chapters") or [] - def _prepare(self): + def _prepare_db(self): if self.db_path.exists(): return # create empty cache @@ -32,7 +44,7 @@ class CacheDB: log.error("Can't create db-file") raise exc - def read_db(self) -> dict: + def _read_db(self) -> dict: log.info(f"Reading cache-db: {self.db_path}") try: db_txt = self.db_path.read_text(encoding="utf8") @@ -43,14 +55,29 @@ class CacheDB: return db_dict + def _write_db(self) -> None: + db_dump = json.dumps(self.db_data, indent=4, sort_keys=True) + self.db_path.write_text(db_dump, encoding="utf8") + def add_chapter(self, chapter: str) -> None: log.info(f"Adding chapter to cache-db: {chapter}") self.db_uuid_chapters.append(chapter) # dedup entries updated_chapters = list({*self.db_uuid_chapters}) + sorted_chapters = sort_chapters(updated_chapters) try: - self.db_data[self.db_key]["chapters"] = sorted(updated_chapters) - self.db_path.write_text(json.dumps(self.db_data, indent=4), encoding="utf8") + self.db_data[self.db_key]["chapters"] = sorted_chapters + self._write_db() except Exception as exc: log.error("Can't write cache-db") raise exc + + +def sort_chapters(chapters: list) -> list: + try: + sorted_list = sorted(chapters, key=float) + except Exception: + log.debug("Can't sort cache by float, using default sorting") + sorted_list = sorted(chapters) + + return sorted_list From 6120fe7c81991e7bcd0be8701f63dc95c8c0380a Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Mon, 13 Feb 2023 19:15:27 +0100 Subject: [PATCH 02/18] add metadata support --- CHANGELOG.md | 16 ++++ contrib/api_template.py | 32 +++++-- mangadlp/__about__.py | 2 +- mangadlp/api/mangadex.py | 53 ++++++------ mangadlp/app.py | 17 +++- mangadlp/metadata.py | 31 +++++++ mangadlp/metadata/ComicInfo.xml | 19 +++++ mangadlp/metadata/ComicInfo_v2.0.xsd | 123 +++++++++++++++++++++++++++ mangadlp/utils.py | 3 +- requirements.txt | 1 + 10 files changed, 263 insertions(+), 34 deletions(-) create mode 100644 mangadlp/metadata.py create mode 100644 mangadlp/metadata/ComicInfo.xml create mode 100644 mangadlp/metadata/ComicInfo_v2.0.xsd diff --git a/CHANGELOG.md b/CHANGELOG.md index 65b713f..b385432 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,22 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Add support for more sites +## [2.2.21] - 2023-02-11 + +### Added + +- Metadata is now added to each chapter. Schema + standard: [https://anansi-project.github.io/docs/comicinfo/schemas/v2.0](https://anansi-project.github.io/docs/comicinfo/schemas/v2.0) +- `xmltodict` as a package requirement + +### Fixed + +- API template typos + +### Changed + +- Simplified the chapter info generation + ## [2.2.20] - 2023-02-12 ### Fixed diff --git a/contrib/api_template.py b/contrib/api_template.py index c6a0759..ecbbc88 100644 --- a/contrib/api_template.py +++ b/contrib/api_template.py @@ -34,17 +34,28 @@ class YourAPI: # attributes needed by app.py self.manga_uuid = "abc" self.manga_title = "abc" - self.chapter_list = "abc" + self.chapter_list = ["1", "2"] # methods needed by app.py # get chapter infos as a dictionary - def get_chapter_infos(chapter: str) -> dict: + def get_manga_chapter_data(chapter: str) -> dict: # these keys have to be returned return { - "uuid": chapter_uuid, - "volume": chapter_vol, - "chapter": chapter_num, - "name": chapter_name, + "1": { + "uuid": "abc", + "volume": "1", + "chapter": "1", + "name": "test", + }, + } + # or with --forcevol + return { + "1:1": { + "uuid": "abc", + "volume": "1", + "chapter": "1", + "name": "test", + }, } # get chapter images as a list (full links) @@ -55,3 +66,12 @@ class YourAPI: "https://abc.def/image/1234.png", "https://abc.def/image/12345.png", ] + + # get metadata with correct keys for ComicInfo.xml + def create_metadata(self, chapter: str) -> dict: + # example + return { + "Volume": "abc", + "LanguageISO": "en", + "Title": "test", + } diff --git a/mangadlp/__about__.py b/mangadlp/__about__.py index 97eeff4..20868fd 100644 --- a/mangadlp/__about__.py +++ b/mangadlp/__about__.py @@ -1 +1 @@ -__version__ = "2.2.20" +__version__ = "2.2.21" diff --git a/mangadlp/api/mangadex.py b/mangadlp/api/mangadex.py index 9c3417d..8bb574b 100644 --- a/mangadlp/api/mangadex.py +++ b/mangadlp/api/mangadex.py @@ -169,13 +169,15 @@ class Mangadex: chapter_uuid = chapter.get("id") or "" chapter_name = attributes.get("title") or "" chapter_external = attributes.get("externalUrl") or "" + chapter_pages = attributes.get("pages") or "" # check for chapter title and fix it if chapter_name: chapter_name = utils.fix_name(chapter_name) + # check if the chapter is external (can't download them) if chapter_external: - log.debug(f"Chapter is external. Skipping: {chapter_uuid}") + log.debug(f"Chapter is external. Skipping: {chapter_name}") continue # check if its duplicate from the last entry @@ -186,12 +188,13 @@ class Mangadex: chapter_index = ( chapter_num if not self.forcevol else f"{chapter_vol}:{chapter_num}" ) - chapter_data[chapter_index] = [ - chapter_uuid, - chapter_vol, - chapter_num, - chapter_name, - ] + chapter_data[chapter_index] = { + "uuid": chapter_uuid, + "volume": chapter_vol, + "chapter": chapter_num, + "name": chapter_name, + "pages": chapter_pages, + } # add last chapter to duplicate check last_volume, last_chapter = (chapter_vol, chapter_num) @@ -204,7 +207,7 @@ class Mangadex: def get_chapter_images(self, chapter: str, wait_time: float) -> list: log.debug(f"Getting chapter images for: {self.manga_uuid}") athome_url = f"{self.api_base_url}/at-home/server" - chapter_uuid = self.manga_chapter_data[chapter][0] + chapter_uuid = self.manga_chapter_data[chapter]["uuid"] # retry up to two times if the api applied rate limits api_error = False @@ -251,10 +254,9 @@ class Mangadex: def create_chapter_list(self) -> list: log.debug(f"Creating chapter list for: {self.manga_uuid}") chapter_list = [] - for index, _ in self.manga_chapter_data.items(): - chapter_info: dict = self.get_chapter_infos(index) - chapter_number: str = chapter_info["chapter"] - volume_number: str = chapter_info["volume"] + for data in self.manga_chapter_data.values(): + chapter_number: str = data["chapter"] + volume_number: str = data["volume"] if self.forcevol: chapter_list.append(f"{volume_number}:{chapter_number}") else: @@ -262,17 +264,20 @@ class Mangadex: return chapter_list - # create easy to access chapter infos - def get_chapter_infos(self, chapter: str) -> dict: - chapter_uuid: str = self.manga_chapter_data[chapter][0] - chapter_vol: str = self.manga_chapter_data[chapter][1] - chapter_num: str = self.manga_chapter_data[chapter][2] - chapter_name: str = self.manga_chapter_data[chapter][3] - log.debug(f"Getting chapter infos for: {chapter_uuid}") + def create_metadata(self, chapter: str) -> dict: + log.info("Creating metadata from api") - return { - "uuid": chapter_uuid, - "volume": chapter_vol, - "chapter": chapter_num, - "name": chapter_name, + chapter_data = self.manga_chapter_data[chapter] + metadata = { + "Volume": chapter_data["volume"], + "Number": chapter_data["chapter"], + "PageCount": chapter_data["pages"], + "Count": len(self.manga_chapter_data), + "LanguageISO": self.language, + "Title": self.manga_title, + "Summary": self.manga_data["attributes"]["description"].get("en"), + "Genre": self.manga_data["attributes"].get("publicationDemographic"), + "Web": f"https://mangadex.org/title/{self.manga_uuid}", } + + return metadata diff --git a/mangadlp/app.py b/mangadlp/app.py index cdcb2b0..769ed17 100644 --- a/mangadlp/app.py +++ b/mangadlp/app.py @@ -9,6 +9,7 @@ from mangadlp import downloader, utils from mangadlp.api.mangadex import Mangadex from mangadlp.cache import CacheDB from mangadlp.hooks import run_hook +from mangadlp.metadata import write_metadata class MangaDLP: @@ -205,20 +206,34 @@ class MangaDLP: log.info(f"Chapter '{chapter}' is in cache. Skipping download") continue + # download chapter try: chapter_path = self.get_chapter(chapter) except KeyboardInterrupt as exc: raise exc except FileExistsError: + # skipping chapter download as its already available skipped_chapters.append(chapter) # update cache if self.cache_path: cache.add_chapter(chapter) continue except Exception: + # skip download/packing due to an error error_chapters.append(chapter) continue + # add metadata + try: + metadata = self.api.create_metadata(chapter) + write_metadata( + chapter_path, + {"Format": self.file_format.removeprefix("."), **metadata}, + ) + except Exception: + log.warning(f"Can't write metadata for chapter '{chapter}'") + + # pack downloaded folder if self.file_format: try: self.archive_chapter(chapter_path) @@ -268,7 +283,7 @@ class MangaDLP: # once called per chapter def get_chapter(self, chapter: str) -> Path: # get chapter infos - chapter_infos = self.api.get_chapter_infos(chapter) + chapter_infos: dict = self.api.manga_chapter_data[chapter] log.debug(f"Chapter infos: {chapter_infos}") # get image urls for chapter diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py new file mode 100644 index 0000000..651d4ae --- /dev/null +++ b/mangadlp/metadata.py @@ -0,0 +1,31 @@ +from pathlib import Path + +import xmltodict +from loguru import logger as log + + +def write_metadata(chapter_path: Path, metadata: dict) -> None: + try: + metadata_template = Path("mangadlp/metadata/ComicInfo.xml").read_text( + encoding="utf8" + ) + metadata_empty: dict[str, dict] = xmltodict.parse(metadata_template) + except Exception as exc: + log.error("Can't open or parse xml template") + raise exc + metadata_file = chapter_path / "ComicInfo.xml" + + log.info(f"Writing metadata to: '{metadata_file}'") + log.debug(f"Metadata items: {metadata}") + for key, value in metadata.items(): + if not value: + continue + try: + metadata_empty["ComicInfo"][key] + except KeyError: + continue + log.debug(f"Updating metadata: '{key}' = '{value}'") + metadata_empty["ComicInfo"][key] = value + + metadata_export = xmltodict.unparse(metadata_empty, pretty=True, indent=(" " * 4)) + metadata_file.write_text(metadata_export, encoding="utf8") diff --git a/mangadlp/metadata/ComicInfo.xml b/mangadlp/metadata/ComicInfo.xml new file mode 100644 index 0000000..ef77aba --- /dev/null +++ b/mangadlp/metadata/ComicInfo.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + Unknown + Yes + Unknown + Downloaded with https://github.com/olofvndrhr/manga-dlp + diff --git a/mangadlp/metadata/ComicInfo_v2.0.xsd b/mangadlp/metadata/ComicInfo_v2.0.xsd new file mode 100644 index 0000000..6732fe8 --- /dev/null +++ b/mangadlp/metadata/ComicInfo_v2.0.xsd @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mangadlp/utils.py b/mangadlp/utils.py index b49c41a..32c7a87 100644 --- a/mangadlp/utils.py +++ b/mangadlp/utils.py @@ -83,7 +83,6 @@ def get_chapter_list(chapters: str, available_chapters: list) -> list: # remove illegal characters etc def fix_name(filename: str) -> str: - log.debug(f"Input name='{filename}'") filename = filename.encode(encoding="utf8", errors="ignore").decode(encoding="utf8") # remove illegal characters filename = re.sub(r'[/\\<>:;|?*!@"]', "", filename) @@ -94,7 +93,7 @@ def fix_name(filename: str) -> str: # remove trailing and beginning spaces filename = re.sub("([ \t]+$)|(^[ \t]+)", "", filename) - log.debug(f"Output name='{filename}'") + log.debug(f"Input name='{filename}', Output name='{filename}'") return filename diff --git a/requirements.txt b/requirements.txt index 0fe1f62..bd44bdf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,6 @@ requests>=2.28.0 loguru>=0.6.0 click>=8.1.3 click-option-group>=0.5.5 +xmltodict~=0.13.0 img2pdf>=0.4.4 From 463878bd3729cf292c7a076be475454b925e2e1a Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Mon, 13 Feb 2023 19:17:10 +0100 Subject: [PATCH 03/18] update changelog [CI SKIP] --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b385432..8333a42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Metadata is now added to each chapter. Schema standard: [https://anansi-project.github.io/docs/comicinfo/schemas/v2.0](https://anansi-project.github.io/docs/comicinfo/schemas/v2.0) - `xmltodict` as a package requirement +- Cache now also saves the manga title ### Fixed From 455963510208c434acff76b532814908b17d4a66 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Mon, 13 Feb 2023 19:19:03 +0100 Subject: [PATCH 04/18] update requirements --- contrib/requirements_dev.txt | 1 + requirements.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/requirements_dev.txt b/contrib/requirements_dev.txt index 4c05131..75626f5 100644 --- a/contrib/requirements_dev.txt +++ b/contrib/requirements_dev.txt @@ -3,6 +3,7 @@ requests>=2.28.0 loguru>=0.6.0 click>=8.1.3 click-option-group>=0.5.5 +xmltodict>=0.13.0 img2pdf>=0.4.4 diff --git a/requirements.txt b/requirements.txt index bd44bdf..d5fba89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ requests>=2.28.0 loguru>=0.6.0 click>=8.1.3 click-option-group>=0.5.5 -xmltodict~=0.13.0 +xmltodict>=0.13.0 img2pdf>=0.4.4 From a7b5c0b786a629a66bdeb3ad54c732001370c408 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Mon, 13 Feb 2023 23:17:52 +0100 Subject: [PATCH 05/18] various fixes --- CHANGELOG.md | 6 ++- LICENSE | 2 +- contrib/api_template.py | 82 ++++++++++++++++++++++----------- mangadlp/api/mangadex.py | 10 ++-- mangadlp/metadata.py | 7 ++- mangadlp/metadata/ComicInfo.xml | 1 + mangadlp/utils.py | 4 +- tests/ComicInfo_test.xml | 20 ++++++++ tests/test_02_utils.py | 3 +- tests/test_03_downloader.py | 2 +- tests/test_04_input.py | 3 -- tests/test_06_cache.py | 19 ++++---- tests/test_07_metadata.py | 31 +++++++++++++ tests/test_11_api_mangadex.py | 23 ++++++++- tests/test_21_full.py | 4 +- 15 files changed, 164 insertions(+), 53 deletions(-) create mode 100644 tests/ComicInfo_test.xml create mode 100644 tests/test_07_metadata.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 8333a42..1399a3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,16 +15,20 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Metadata is now added to each chapter. Schema standard: [https://anansi-project.github.io/docs/comicinfo/schemas/v2.0](https://anansi-project.github.io/docs/comicinfo/schemas/v2.0) -- `xmltodict` as a package requirement +- Added `xmltodict` as a package requirement - Cache now also saves the manga title +- New tests ### Fixed - API template typos +- Some useless type annotations ### Changed - Simplified the chapter info generation +- Updated the license year +- Updated the API template ## [2.2.20] - 2023-02-12 diff --git a/LICENSE b/LICENSE index d2b1f45..08ef653 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Ivan Schaller +Copyright (c) 2021-2023 Ivan Schaller Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/contrib/api_template.py b/contrib/api_template.py index ecbbc88..2ec34fa 100644 --- a/contrib/api_template.py +++ b/contrib/api_template.py @@ -22,9 +22,10 @@ class YourAPI: api_base_url = "https://api.mangadex.org" img_base_url = "https://uploads.mangadex.org" - # get infos to initiate class def __init__(self, url_uuid, language, forcevol): - # static info + """ + get infos to initiate class + """ self.api_name = "Your API Name" self.url_uuid = url_uuid @@ -34,32 +35,49 @@ class YourAPI: # attributes needed by app.py self.manga_uuid = "abc" self.manga_title = "abc" - self.chapter_list = ["1", "2"] + self.chapter_list = ["1", "2", "2.1", "5", "10"] + self.manga_chapter_data = { # example data + "1": { + "uuid": "abc", + "volume": "1", + "chapter": "1", + "name": "test", + }, + "2": { + "uuid": "abc", + "volume": "1", + "chapter": "2", + "name": "test", + }, + } + # or with --forcevol + self.manga_chapter_data = { + "1:1": { + "uuid": "abc", + "volume": "1", + "chapter": "1", + "name": "test", + }, + "1:2": { + "uuid": "abc", + "volume": "1", + "chapter": "2", + "name": "test", + }, + } - # methods needed by app.py - # get chapter infos as a dictionary - def get_manga_chapter_data(chapter: str) -> dict: - # these keys have to be returned - return { - "1": { - "uuid": "abc", - "volume": "1", - "chapter": "1", - "name": "test", - }, - } - # or with --forcevol - return { - "1:1": { - "uuid": "abc", - "volume": "1", - "chapter": "1", - "name": "test", - }, - } - - # get chapter images as a list (full links) def get_chapter_images(chapter: str, download_wait: float) -> list: + """ + Get chapter images as a list (full links) + + Args: + chapter: The chapter number (chapter data index) + download_wait: Wait time between image downloads + + Returns: + The list of urls of the page images + """ + # example return [ "https://abc.def/image/123.png", @@ -67,8 +85,18 @@ class YourAPI: "https://abc.def/image/12345.png", ] - # get metadata with correct keys for ComicInfo.xml def create_metadata(self, chapter: str) -> dict: + """ + Get metadata with correct keys for ComicInfo.xml + Provide as much metadata as possible. empty/false values will be ignored + + Args: + chapter: The chapter number (chapter data index) + + Returns: + The metadata as a dict + """ + # example return { "Volume": "abc", diff --git a/mangadlp/api/mangadex.py b/mangadlp/api/mangadex.py index 8bb574b..4e35110 100644 --- a/mangadlp/api/mangadex.py +++ b/mangadlp/api/mangadex.py @@ -138,10 +138,9 @@ class Mangadex: "Error retrieving the chapters list. Did you specify a valid language code?" ) raise exc - else: - if total_chapters == 0: - log.error("No chapters available to download in specified language") - raise KeyError + if total_chapters == 0: + log.error("No chapters available to download in specified language") + raise KeyError log.debug(f"Total chapters={total_chapters}") return total_chapters @@ -272,9 +271,10 @@ class Mangadex: "Volume": chapter_data["volume"], "Number": chapter_data["chapter"], "PageCount": chapter_data["pages"], + "Title": chapter_data["name"], + "Series": self.manga_title, "Count": len(self.manga_chapter_data), "LanguageISO": self.language, - "Title": self.manga_title, "Summary": self.manga_data["attributes"]["description"].get("en"), "Genre": self.manga_data["attributes"].get("publicationDemographic"), "Web": f"https://mangadex.org/title/{self.manga_uuid}", diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py index 651d4ae..7b3f97f 100644 --- a/mangadlp/metadata.py +++ b/mangadlp/metadata.py @@ -5,6 +5,10 @@ from loguru import logger as log def write_metadata(chapter_path: Path, metadata: dict) -> None: + if metadata["Format"] == "pdf": + log.warning("Can't add metadata for pdf format. Skipping") + return + try: metadata_template = Path("mangadlp/metadata/ComicInfo.xml").read_text( encoding="utf8" @@ -27,5 +31,6 @@ def write_metadata(chapter_path: Path, metadata: dict) -> None: log.debug(f"Updating metadata: '{key}' = '{value}'") metadata_empty["ComicInfo"][key] = value - metadata_export = xmltodict.unparse(metadata_empty, pretty=True, indent=(" " * 4)) + metadata_export = xmltodict.unparse(metadata_empty, pretty=True, indent=" " * 4) + metadata_file.touch(exist_ok=True) metadata_file.write_text(metadata_export, encoding="utf8") diff --git a/mangadlp/metadata/ComicInfo.xml b/mangadlp/metadata/ComicInfo.xml index ef77aba..0bc92af 100644 --- a/mangadlp/metadata/ComicInfo.xml +++ b/mangadlp/metadata/ComicInfo.xml @@ -1,6 +1,7 @@ + diff --git a/mangadlp/utils.py b/mangadlp/utils.py index 32c7a87..91fe552 100644 --- a/mangadlp/utils.py +++ b/mangadlp/utils.py @@ -9,7 +9,7 @@ from loguru import logger as log # create an archive of the chapter images def make_archive(chapter_path: Path, file_format: str) -> None: - zip_path: Path = Path(f"{chapter_path}.zip") + zip_path = Path(f"{chapter_path}.zip") try: # create zip with ZipFile(zip_path, "w") as zipfile: @@ -29,7 +29,7 @@ def make_pdf(chapter_path: Path) -> None: log.error("Cant import img2pdf. Please install it first") raise exc - pdf_path: Path = Path(f"{chapter_path}.pdf") + pdf_path = Path(f"{chapter_path}.pdf") images: list[str] = [] for file in chapter_path.iterdir(): images.append(str(file)) diff --git a/tests/ComicInfo_test.xml b/tests/ComicInfo_test.xml new file mode 100644 index 0000000..4c8d2a6 --- /dev/null +++ b/tests/ComicInfo_test.xml @@ -0,0 +1,20 @@ + + + title1 + series1 + 2 + 10 + 1 + summary1 + genre1 + https://mangadex.org + 99 + en + + + + Unknown + Yes + Unknown + Downloaded with https://github.com/olofvndrhr/manga-dlp + \ No newline at end of file diff --git a/tests/test_02_utils.py b/tests/test_02_utils.py index a13f277..13752b2 100644 --- a/tests/test_02_utils.py +++ b/tests/test_02_utils.py @@ -3,8 +3,7 @@ from pathlib import Path import pytest -import mangadlp.app as app -import mangadlp.utils as utils +from mangadlp import app, utils def test_make_archive_true(): diff --git a/tests/test_03_downloader.py b/tests/test_03_downloader.py index d9ea7df..8bb0c57 100644 --- a/tests/test_03_downloader.py +++ b/tests/test_03_downloader.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest import requests -import mangadlp.downloader as downloader +from mangadlp import downloader def test_downloader(): diff --git a/tests/test_04_input.py b/tests/test_04_input.py index fb41fe4..093558d 100644 --- a/tests/test_04_input.py +++ b/tests/test_04_input.py @@ -1,7 +1,4 @@ import os -from pathlib import Path - -import pytest import mangadlp.cli as mdlpinput diff --git a/tests/test_06_cache.py b/tests/test_06_cache.py index b0d51e2..a3e065b 100644 --- a/tests/test_06_cache.py +++ b/tests/test_06_cache.py @@ -6,27 +6,28 @@ from mangadlp.cache import CacheDB def test_cache_creation(): cache_file = Path("cache.json") - cache = CacheDB(cache_file, "abc", "en") + cache = CacheDB(cache_file, "abc", "en", "test") - assert cache_file.exists() and cache_file.read_text(encoding="utf8") == "{}" + assert cache_file.exists() cache_file.unlink() def test_cache_insert(): cache_file = Path("cache.json") - cache = CacheDB(cache_file, "abc", "en") + cache = CacheDB(cache_file, "abc", "en", "test") cache.add_chapter("1") cache.add_chapter("2") cache_data = json.loads(cache_file.read_text(encoding="utf8")) assert cache_data["abc__en"]["chapters"] == ["1", "2"] + assert cache_data["abc__en"]["name"] == "test" cache_file.unlink() def test_cache_update(): cache_file = Path("cache.json") - cache = CacheDB(cache_file, "abc", "en") + cache = CacheDB(cache_file, "abc", "en", "test") cache.add_chapter("1") cache.add_chapter("2") @@ -43,29 +44,31 @@ def test_cache_update(): def test_cache_multiple(): cache_file = Path("cache.json") - cache1 = CacheDB(cache_file, "abc", "en") + cache1 = CacheDB(cache_file, "abc", "en", "test") cache1.add_chapter("1") cache1.add_chapter("2") - cache2 = CacheDB(cache_file, "def", "en") + cache2 = CacheDB(cache_file, "def", "en", "test2") cache2.add_chapter("8") cache2.add_chapter("9") cache_data = json.loads(cache_file.read_text(encoding="utf8")) assert cache_data["abc__en"]["chapters"] == ["1", "2"] + assert cache_data["abc__en"]["name"] == "test" assert cache_data["def__en"]["chapters"] == ["8", "9"] + assert cache_data["def__en"]["name"] == "test2" cache_file.unlink() def test_cache_lang(): cache_file = Path("cache.json") - cache1 = CacheDB(cache_file, "abc", "en") + cache1 = CacheDB(cache_file, "abc", "en", "test") cache1.add_chapter("1") cache1.add_chapter("2") - cache2 = CacheDB(cache_file, "abc", "de") + cache2 = CacheDB(cache_file, "abc", "de", "test") cache2.add_chapter("8") cache2.add_chapter("9") diff --git a/tests/test_07_metadata.py b/tests/test_07_metadata.py new file mode 100644 index 0000000..6a11324 --- /dev/null +++ b/tests/test_07_metadata.py @@ -0,0 +1,31 @@ +from pathlib import Path + +from mangadlp.metadata import write_metadata + + +def test_metadata_creation(): + test_metadata_file = Path("tests/ComicInfo_test.xml") + metadata_path = Path("tests/") + metadata_file = Path("tests/ComicInfo.xml") + metadata = { + "Volume": "1", + "Number": "2", + "PageCount": "99", + "Count": "10", + "LanguageISO": "en", + "Title": "title1", + "Series": "series1", + "Summary": "summary1", + "Genre": "genre1", + "Web": "https://mangadex.org", + } + + write_metadata(metadata_path, metadata) + assert metadata_file.exists() + + read_in_metadata = metadata_file.read_text(encoding="utf8") + test_metadata = test_metadata_file.read_text(encoding="utf8") + assert test_metadata == read_in_metadata + + # cleanup + metadata_file.unlink() diff --git a/tests/test_11_api_mangadex.py b/tests/test_11_api_mangadex.py index ad10b36..9ef1868 100644 --- a/tests/test_11_api_mangadex.py +++ b/tests/test_11_api_mangadex.py @@ -64,7 +64,7 @@ def test_chapter_infos(): language = "en" forcevol = False test = Mangadex(url_uuid, language, forcevol) - chapter_infos = test.get_chapter_infos("1") + chapter_infos = test.manga_chapter_data["1"] chapter_uuid = chapter_infos["uuid"] chapter_name = chapter_infos["name"] chapter_num = chapter_infos["chapter"] @@ -239,3 +239,24 @@ def test_get_chapter_images_error(monkeypatch): monkeypatch.setattr(requests, "get", fail_url) assert not test.get_chapter_images(chapter_num, 2) + + +def test_chapter_metadata(): + url_uuid = "https://mangadex.org/title/a96676e5-8ae2-425e-b549-7f15dd34a6d8/komi-san-wa-komyushou-desu" + language = "en" + forcevol = False + test = Mangadex(url_uuid, language, forcevol) + chapter_metadata = test.create_metadata("1") + manga_name = chapter_metadata["Series"] + chapter_name = chapter_metadata["Title"] + chapter_num = chapter_metadata["Number"] + chapter_volume = chapter_metadata["Volume"] + chapter_url = chapter_metadata["Web"] + + assert (manga_name, chapter_name, chapter_volume, chapter_num, chapter_url) == ( + "Komi-san wa Komyushou Desu", + "A Normal Person", + "1", + "1", + "https://mangadex.org/title/a96676e5-8ae2-425e-b549-7f15dd34a6d8", + ) diff --git a/tests/test_21_full.py b/tests/test_21_full.py index dfdaee0..fc05210 100644 --- a/tests/test_21_full.py +++ b/tests/test_21_full.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -import mangadlp.app as app +from mangadlp import app @pytest.fixture @@ -107,12 +107,14 @@ def test_full_with_input_folder(wait_20s): download_path = "tests" manga_path = Path("tests/Shikimori's Not Just a Cutie") chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1") + metadata_path = Path("tests/Shikimori's Not Just a Cutie/ComicInfo.xml") command_args = f"-u {url_uuid} -l {language} -c {chapters} --path {download_path} --format '{file_format}' --debug --wait 2" script_path = "manga-dlp.py" os.system(f"python3 {script_path} {command_args}") assert manga_path.exists() and manga_path.is_dir() assert chapter_path.exists() and chapter_path.is_dir() + assert metadata_path.exists() and metadata_path.is_file() # cleanup shutil.rmtree(manga_path, ignore_errors=True) From 796aeb8aa7fbad649908e25839540a41aacb288b Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Tue, 14 Feb 2023 13:31:55 +0100 Subject: [PATCH 06/18] add metadata type check --- contrib/api_template.py | 17 +++++++++++++++++ mangadlp/api/mangadex.py | 14 +++++++------- mangadlp/metadata.py | 27 +++++++++++++++++++++++++++ mangadlp/metadata/ComicInfo.xml | 2 +- 4 files changed, 52 insertions(+), 8 deletions(-) diff --git a/contrib/api_template.py b/contrib/api_template.py index 2ec34fa..7560372 100644 --- a/contrib/api_template.py +++ b/contrib/api_template.py @@ -97,6 +97,23 @@ class YourAPI: The metadata as a dict """ + # metadata types. have to be correct to be valid + { + "Title": str, + "Series": str, + "Number": str, + "Count": int, + "Volume": int, + "Summary": str, + "Genre": str, + "Web": str, + "PageCount": int, + "LanguageISO": str, + "Format": str, + "ScanInformation": str, + "SeriesGroup": str, + } + # example return { "Volume": "abc", diff --git a/mangadlp/api/mangadex.py b/mangadlp/api/mangadex.py index 4e35110..1ad23ce 100644 --- a/mangadlp/api/mangadex.py +++ b/mangadlp/api/mangadex.py @@ -163,12 +163,12 @@ class Mangadex: for chapter in r.json()["data"]: attributes: dict = chapter["attributes"] # chapter infos from feed - chapter_num = attributes.get("chapter") or "" - chapter_vol = attributes.get("volume") or "" - chapter_uuid = chapter.get("id") or "" - chapter_name = attributes.get("title") or "" - chapter_external = attributes.get("externalUrl") or "" - chapter_pages = attributes.get("pages") or "" + chapter_num: str = attributes.get("chapter") or "" + chapter_vol: str = attributes.get("volume") or "" + chapter_uuid: str = chapter.get("id") or "" + chapter_name: str = attributes.get("title") or "" + chapter_external: str = attributes.get("externalUrl") or "" + chapter_pages: int = attributes.get("pages") or 0 # check for chapter title and fix it if chapter_name: @@ -268,7 +268,7 @@ class Mangadex: chapter_data = self.manga_chapter_data[chapter] metadata = { - "Volume": chapter_data["volume"], + "Volume": int(chapter_data["volume"]), "Number": chapter_data["chapter"], "PageCount": chapter_data["pages"], "Title": chapter_data["name"], diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py index 7b3f97f..aa49234 100644 --- a/mangadlp/metadata.py +++ b/mangadlp/metadata.py @@ -9,6 +9,23 @@ def write_metadata(chapter_path: Path, metadata: dict) -> None: log.warning("Can't add metadata for pdf format. Skipping") return + # define metadata types + metadata_types: dict[str, type] = { + "Title": str, + "Series": str, + "Number": str, + "Count": int, + "Volume": int, + "Summary": str, + "Genre": str, + "Web": str, + "PageCount": int, + "LanguageISO": str, + "Format": str, + "ScanInformation": str, + "SeriesGroup": str, + } + try: metadata_template = Path("mangadlp/metadata/ComicInfo.xml").read_text( encoding="utf8" @@ -22,12 +39,22 @@ def write_metadata(chapter_path: Path, metadata: dict) -> None: log.info(f"Writing metadata to: '{metadata_file}'") log.debug(f"Metadata items: {metadata}") for key, value in metadata.items(): + # check if metadata is empty if not value: continue + # try to match with template try: metadata_empty["ComicInfo"][key] except KeyError: continue + # check if metadata type is correct + log.debug(f"Key:{key} -> value={type(value)} -> check={metadata_types[key]}") + if not isinstance(value, metadata_types[key]): # noqa + log.warning( + f"Metadata has wrong type: {key}:{metadata_types[key]} -> {value}" + ) + continue + log.debug(f"Updating metadata: '{key}' = '{value}'") metadata_empty["ComicInfo"][key] = value diff --git a/mangadlp/metadata/ComicInfo.xml b/mangadlp/metadata/ComicInfo.xml index 0bc92af..5472a58 100644 --- a/mangadlp/metadata/ComicInfo.xml +++ b/mangadlp/metadata/ComicInfo.xml @@ -14,7 +14,7 @@ Unknown - Yes Unknown + Yes Downloaded with https://github.com/olofvndrhr/manga-dlp From f8b1013b68d41830726c52b4416aea2296344ace Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Tue, 14 Feb 2023 13:36:48 +0100 Subject: [PATCH 07/18] fix metadata test --- tests/test_07_metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_07_metadata.py b/tests/test_07_metadata.py index 6a11324..987b1ed 100644 --- a/tests/test_07_metadata.py +++ b/tests/test_07_metadata.py @@ -18,6 +18,7 @@ def test_metadata_creation(): "Summary": "summary1", "Genre": "genre1", "Web": "https://mangadex.org", + "Format": "cbz", } write_metadata(metadata_path, metadata) From 931a53686053cd8a51c8642f70e248876bb1a4ea Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Tue, 14 Feb 2023 14:15:33 +0100 Subject: [PATCH 08/18] fix tests --- tests/ComicInfo_test.xml | 4 ++-- tests/test_07_metadata.py | 6 +++--- tests/test_11_api_mangadex.py | 2 +- tests/test_21_full.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/ComicInfo_test.xml b/tests/ComicInfo_test.xml index 4c8d2a6..d773dc5 100644 --- a/tests/ComicInfo_test.xml +++ b/tests/ComicInfo_test.xml @@ -10,11 +10,11 @@ https://mangadex.org 99 en - + cbz Unknown - Yes Unknown + Yes Downloaded with https://github.com/olofvndrhr/manga-dlp \ No newline at end of file diff --git a/tests/test_07_metadata.py b/tests/test_07_metadata.py index 987b1ed..b48fe0b 100644 --- a/tests/test_07_metadata.py +++ b/tests/test_07_metadata.py @@ -8,10 +8,10 @@ def test_metadata_creation(): metadata_path = Path("tests/") metadata_file = Path("tests/ComicInfo.xml") metadata = { - "Volume": "1", + "Volume": 1, "Number": "2", - "PageCount": "99", - "Count": "10", + "PageCount": 99, + "Count": 10, "LanguageISO": "en", "Title": "title1", "Series": "series1", diff --git a/tests/test_11_api_mangadex.py b/tests/test_11_api_mangadex.py index 9ef1868..38ace21 100644 --- a/tests/test_11_api_mangadex.py +++ b/tests/test_11_api_mangadex.py @@ -256,7 +256,7 @@ def test_chapter_metadata(): assert (manga_name, chapter_name, chapter_volume, chapter_num, chapter_url) == ( "Komi-san wa Komyushou Desu", "A Normal Person", - "1", + 1, "1", "https://mangadex.org/title/a96676e5-8ae2-425e-b549-7f15dd34a6d8", ) diff --git a/tests/test_21_full.py b/tests/test_21_full.py index fc05210..5809e4f 100644 --- a/tests/test_21_full.py +++ b/tests/test_21_full.py @@ -107,7 +107,7 @@ def test_full_with_input_folder(wait_20s): download_path = "tests" manga_path = Path("tests/Shikimori's Not Just a Cutie") chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1") - metadata_path = Path("tests/Shikimori's Not Just a Cutie/ComicInfo.xml") + metadata_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1/ComicInfo.xml") command_args = f"-u {url_uuid} -l {language} -c {chapters} --path {download_path} --format '{file_format}' --debug --wait 2" script_path = "manga-dlp.py" os.system(f"python3 {script_path} {command_args}") From 3368b18677dec1520369967e1ac087d1e89a2770 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Tue, 14 Feb 2023 14:37:47 +0100 Subject: [PATCH 09/18] add metadata flag --- mangadlp/app.py | 19 +++++++++++-------- mangadlp/cli.py | 11 +++++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/mangadlp/app.py b/mangadlp/app.py index 769ed17..aff8da1 100644 --- a/mangadlp/app.py +++ b/mangadlp/app.py @@ -45,6 +45,7 @@ class MangaDLP: chapter_pre_hook_cmd: str = "", chapter_post_hook_cmd: str = "", cache_path: str = "", + add_metadata: bool = True, ) -> None: # init parameters self.url_uuid = url_uuid @@ -63,6 +64,7 @@ class MangaDLP: self.chapter_post_hook_cmd = chapter_post_hook_cmd self.hook_infos: dict = {} self.cache_path = cache_path + self.add_metadata = add_metadata # prepare everything self._prepare() @@ -224,14 +226,15 @@ class MangaDLP: continue # add metadata - try: - metadata = self.api.create_metadata(chapter) - write_metadata( - chapter_path, - {"Format": self.file_format.removeprefix("."), **metadata}, - ) - except Exception: - log.warning(f"Can't write metadata for chapter '{chapter}'") + if self.add_metadata: + try: + metadata = self.api.create_metadata(chapter) + write_metadata( + chapter_path, + {"Format": self.file_format.removeprefix("."), **metadata}, + ) + except Exception: + log.warning(f"Can't write metadata for chapter '{chapter}'") # pack downloaded folder if self.file_format: diff --git a/mangadlp/cli.py b/mangadlp/cli.py index 7f8361c..6bff791 100644 --- a/mangadlp/cli.py +++ b/mangadlp/cli.py @@ -217,6 +217,15 @@ def readin_list(_ctx, _param, value) -> list: show_default=True, help="Where to store the cache-db. If no path is given, cache is disabled", ) +@click.option( + "--add-metadata/--no-metadata", + "add_metadata", + is_flag=True, + default=True, + required=False, + show_default=True, + help="Enable/disable creation of metadata via ComicInfo.xml", +) @click.pass_context def main( ctx: click.Context, @@ -237,6 +246,7 @@ def main( hook_chapter_pre: str, hook_chapter_post: str, cache_path: str, + add_metadata: bool, ): # pylint: disable=too-many-locals """ Script to download mangas from various sites @@ -274,6 +284,7 @@ def main( chapter_pre_hook_cmd=hook_chapter_pre, chapter_post_hook_cmd=hook_chapter_post, cache_path=cache_path, + add_metadata=add_metadata, ) mdlp.get_manga() except (KeyboardInterrupt, Exception) as exc: From d7c3d511feeccca1fd2ab7fd9009ff7ae86e2461 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 13:50:59 +0100 Subject: [PATCH 10/18] update metadata generation and add validation tests --- contrib/requirements_dev.txt | 1 + mangadlp/metadata.py | 147 ++++++++++++++++++++++---------- mangadlp/metadata/ComicInfo.xml | 20 ----- tests/ComicInfo_test.xml | 6 +- tests/test_07_metadata.py | 39 +++++++++ 5 files changed, 142 insertions(+), 71 deletions(-) delete mode 100644 mangadlp/metadata/ComicInfo.xml diff --git a/contrib/requirements_dev.txt b/contrib/requirements_dev.txt index 75626f5..a2aa1f6 100644 --- a/contrib/requirements_dev.txt +++ b/contrib/requirements_dev.txt @@ -4,6 +4,7 @@ loguru>=0.6.0 click>=8.1.3 click-option-group>=0.5.5 xmltodict>=0.13.0 +xmlschema>=2.2.1 img2pdf>=0.4.4 diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py index aa49234..f0fa425 100644 --- a/mangadlp/metadata.py +++ b/mangadlp/metadata.py @@ -1,63 +1,118 @@ from pathlib import Path +from typing import Any import xmltodict from loguru import logger as log +METADATA_TEMPLATE = Path("mangadlp/metadata/ComicInfo_v2.0.xml") +# define metadata types and valid values. an empty list means no value check +METADATA_TYPES: dict[str, tuple[type, Any, list]] = { + "Title": (str, None, []), + "Series": (str, None, []), + "Number": (str, None, []), + "Count": (int, None, []), + "Volume": (int, None, []), + "AlternateSeries": (str, None, []), + "AlternateNumber": (str, None, []), + "AlternateCount": (int, None, []), + "Summary": (str, None, []), + "Notes": (str, "Downloaded with https://github.com/olofvndrhr/manga-dlp", []), + "Year": (int, None, []), + "Month": (int, None, []), + "Day": (int, None, []), + "Writer": (str, None, []), + "Colorist": (str, None, []), + "Publisher": (str, None, []), + "Genre": (str, None, []), + "Web": (str, None, []), + "PageCount": (int, None, []), + "LanguageISO": (str, None, []), + "Format": (str, None, []), + "BlackAndWhite": (str, None, ["Yes", "No", "Unknown"]), + "Manga": (str, "Yes", ["Yes", "No", "Unknown", "YesAndRightToLeft"]), + "ScanInformation": (str, None, []), + "SeriesGroup": (str, None, []), + "AgeRating": ( + str, + None, + [ + "Unknown", + "Adults Only 18+", + "Early Childhood", + "Everyone", + "Everyone 10+", + "G", + "Kids to Adults", + "M", + "MA15+", + "Mature 17+", + "PG", + "R18+", + "Rating Pending", + "Teen", + "X18+", + ], + ), + "CommunityRating": (int, None, [1, 2, 3, 4, 5]), +} + + +def validate_metadata(metadata_in: dict) -> dict: + log.info("Validating metadata") + + metadata_valid: dict[str, dict] = {"ComicInfo": {}} + for key, value in METADATA_TYPES.items(): + metadata_type, metadata_default, metadata_validation = value + + # add default value if present + if metadata_default: + log.info(f"Setting default value for Key:{key} -> value={metadata_default}") + metadata_valid["ComicInfo"][key] = metadata_default + + # check if metadata key is available + try: + md_to_check = metadata_in[key] + except KeyError: + continue + # check if provided metadata item is empty + if not md_to_check: + continue + + # check if metadata type is correct + log.debug(f"Key:{key} -> value={type(md_to_check)} -> check={metadata_type}") + if not isinstance(md_to_check, metadata_type): # noqa + log.warning( + f"Metadata has wrong type: {key}:{metadata_type} -> {md_to_check}" + ) + continue + + # check if metadata is valid + log.debug(f"Key:{key} -> value={md_to_check} -> valid={metadata_validation}") + if (len(metadata_validation) > 0) and (md_to_check not in metadata_validation): + log.warning( + f"Metadata is invalid: {key}:{metadata_validation} -> {md_to_check}" + ) + continue + + log.debug(f"Updating metadata: '{key}' = '{md_to_check}'") + metadata_valid["ComicInfo"][key] = md_to_check + + return metadata_valid + def write_metadata(chapter_path: Path, metadata: dict) -> None: if metadata["Format"] == "pdf": log.warning("Can't add metadata for pdf format. Skipping") return - # define metadata types - metadata_types: dict[str, type] = { - "Title": str, - "Series": str, - "Number": str, - "Count": int, - "Volume": int, - "Summary": str, - "Genre": str, - "Web": str, - "PageCount": int, - "LanguageISO": str, - "Format": str, - "ScanInformation": str, - "SeriesGroup": str, - } - - try: - metadata_template = Path("mangadlp/metadata/ComicInfo.xml").read_text( - encoding="utf8" - ) - metadata_empty: dict[str, dict] = xmltodict.parse(metadata_template) - except Exception as exc: - log.error("Can't open or parse xml template") - raise exc metadata_file = chapter_path / "ComicInfo.xml" - log.info(f"Writing metadata to: '{metadata_file}'") log.debug(f"Metadata items: {metadata}") - for key, value in metadata.items(): - # check if metadata is empty - if not value: - continue - # try to match with template - try: - metadata_empty["ComicInfo"][key] - except KeyError: - continue - # check if metadata type is correct - log.debug(f"Key:{key} -> value={type(value)} -> check={metadata_types[key]}") - if not isinstance(value, metadata_types[key]): # noqa - log.warning( - f"Metadata has wrong type: {key}:{metadata_types[key]} -> {value}" - ) - continue + metadata_valid = validate_metadata(metadata) - log.debug(f"Updating metadata: '{key}' = '{value}'") - metadata_empty["ComicInfo"][key] = value - - metadata_export = xmltodict.unparse(metadata_empty, pretty=True, indent=" " * 4) + log.info(f"Writing metadata to: '{metadata_file}'") + metadata_export = xmltodict.unparse( + metadata_valid, pretty=True, indent=" " * 4, short_empty_elements=True + ) metadata_file.touch(exist_ok=True) metadata_file.write_text(metadata_export, encoding="utf8") diff --git a/mangadlp/metadata/ComicInfo.xml b/mangadlp/metadata/ComicInfo.xml deleted file mode 100644 index 5472a58..0000000 --- a/mangadlp/metadata/ComicInfo.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - - - - - Unknown - Unknown - Yes - Downloaded with https://github.com/olofvndrhr/manga-dlp - diff --git a/tests/ComicInfo_test.xml b/tests/ComicInfo_test.xml index d773dc5..de17013 100644 --- a/tests/ComicInfo_test.xml +++ b/tests/ComicInfo_test.xml @@ -6,15 +6,11 @@ 10 1 summary1 + Downloaded with https://github.com/olofvndrhr/manga-dlp genre1 https://mangadex.org 99 en cbz - - - Unknown - Unknown Yes - Downloaded with https://github.com/olofvndrhr/manga-dlp \ No newline at end of file diff --git a/tests/test_07_metadata.py b/tests/test_07_metadata.py index b48fe0b..2ef3c4b 100644 --- a/tests/test_07_metadata.py +++ b/tests/test_07_metadata.py @@ -1,5 +1,9 @@ +import shutil +import subprocess from pathlib import Path +import xmlschema + from mangadlp.metadata import write_metadata @@ -30,3 +34,38 @@ def test_metadata_creation(): # cleanup metadata_file.unlink() + + +def test_metadata_chapter_validity(): + url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" + manga_path = Path("tests/Tomo-chan wa Onna no ko") + metadata_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire/ComicInfo.xml" + ) + language = "en" + chapters = "1" + download_path = "tests" + command_args = [ + "-u", + url_uuid, + "-l", + language, + "-c", + chapters, + "--path", + download_path, + "--format", + "", + "--debug", + ] + schema = xmlschema.XMLSchema("mangadlp/metadata/ComicInfo_v2.0.xsd") + + script_path = "manga-dlp.py" + command = ["python3", script_path] + command_args + + assert subprocess.call(command) == 0 + assert metadata_path.is_file() + assert schema.is_valid(metadata_path) + + # cleanup + shutil.rmtree(manga_path, ignore_errors=True) From 4d5b0f4dee6a392d819a6a5e842fc9dd15f7c26a Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 14:21:45 +0100 Subject: [PATCH 11/18] update changelog/docs etc for new metadata feature --- CHANGELOG.md | 2 +- MANIFEST.in | 3 ++ README.md | 1 + contrib/api_template.py | 68 ++++++++++++++++++++++++++++++++--------- docs/pages/download.md | 13 ++++++++ docs/pages/index.md | 1 + mangadlp/__about__.py | 2 +- mangadlp/metadata.py | 6 ++-- pyproject.toml | 3 ++ 9 files changed, 81 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1399a3c..d3b3268 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Add support for more sites -## [2.2.21] - 2023-02-11 +## [2.3.0] - 2023-02-11 ### Added diff --git a/MANIFEST.in b/MANIFEST.in index 77e9413..6b91978 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,7 +4,10 @@ include *.properties include *.py include *.txt include *.yml +include *.xml recursive-include contrib *.py recursive-include mangadlp *.py +recursive-include mangadlp *.xml recursive-include tests *.py +recursive-include tests *.xml recursive-include tests *.txt diff --git a/README.md b/README.md index 955a5ec..23a8f23 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,7 @@ verbosity: [mutually_exclusive] --hook-chapter-pre TEXT Commands to execute before the chapter download starts --hook-chapter-post TEXT Commands to execute after the chapter download finished --cache-path PATH Where to store the cache-db. If no path is given, cache is disabled +--add-metadata / --no-metadata Enable/disable creation of metadata via ComicInfo.xml [default: add-metadata] ``` ## Contribution / Bugs diff --git a/contrib/api_template.py b/contrib/api_template.py index 7560372..63b544c 100644 --- a/contrib/api_template.py +++ b/contrib/api_template.py @@ -1,4 +1,5 @@ # api template for manga-dlp +from typing import Any class YourAPI: @@ -97,21 +98,60 @@ class YourAPI: The metadata as a dict """ - # metadata types. have to be correct to be valid + # metadata types. have to be valid + # {key: (type, default value, valid values)} { - "Title": str, - "Series": str, - "Number": str, - "Count": int, - "Volume": int, - "Summary": str, - "Genre": str, - "Web": str, - "PageCount": int, - "LanguageISO": str, - "Format": str, - "ScanInformation": str, - "SeriesGroup": str, + "Title": (str, None, []), + "Series": (str, None, []), + "Number": (str, None, []), + "Count": (int, None, []), + "Volume": (int, None, []), + "AlternateSeries": (str, None, []), + "AlternateNumber": (str, None, []), + "AlternateCount": (int, None, []), + "Summary": (str, None, []), + "Notes": ( + str, + "Downloaded with https://github.com/olofvndrhr/manga-dlp", + [], + ), + "Year": (int, None, []), + "Month": (int, None, []), + "Day": (int, None, []), + "Writer": (str, None, []), + "Colorist": (str, None, []), + "Publisher": (str, None, []), + "Genre": (str, None, []), + "Web": (str, None, []), + "PageCount": (int, None, []), + "LanguageISO": (str, None, []), + "Format": (str, None, []), + "BlackAndWhite": (str, None, ["Yes", "No", "Unknown"]), + "Manga": (str, "Yes", ["Yes", "No", "Unknown", "YesAndRightToLeft"]), + "ScanInformation": (str, None, []), + "SeriesGroup": (str, None, []), + "AgeRating": ( + str, + None, + [ + "Unknown", + "Adults Only 18+", + "Early Childhood", + "Everyone", + "Everyone 10+", + "G", + "Kids to Adults", + "M", + "MA15+", + "Mature 17+", + "PG", + "R18+", + "Rating Pending", + "Teen", + "X18+", + ], + ), + "CommunityRating": (int, None, [1, 2, 3, 4, 5]), } # example diff --git a/docs/pages/download.md b/docs/pages/download.md index e71e131..93e465f 100644 --- a/docs/pages/download.md +++ b/docs/pages/download.md @@ -7,6 +7,10 @@ └── / └── / └── / + └── ComicInfo.xml (optional) + └── 001.png + └── 002.png + └── etc. ``` **Example:** @@ -167,3 +171,12 @@ chapters will be tracked there, and the script doesn't have to check on disk if you already downloaded it. If the option is unset (default), then no caching will be done. + +## Add metadata + +manga-dlp supports the creation of metadata files in the downloaded chapter. +The metadata is based on the newer [ComicRack/Anansi](https://anansi-project.github.io/docs/introduction) standard. +The default option is to add the metadata in the folder/archive with the name `ComicInfo.xml`. +If you don't want metadata, you can pass the `--no-metadata` flag. + +> pdf format does not support metadata at the moment \ No newline at end of file diff --git a/docs/pages/index.md b/docs/pages/index.md index db1dcdc..7dbc2ac 100644 --- a/docs/pages/index.md +++ b/docs/pages/index.md @@ -115,6 +115,7 @@ verbosity: [mutually_exclusive] --hook-chapter-pre TEXT Commands to execute before the chapter download starts --hook-chapter-post TEXT Commands to execute after the chapter download finished --cache-path PATH Where to store the cache-db. If no path is given, cache is disabled +--add-metadata / --no-metadata Enable/disable creation of metadata via ComicInfo.xml [default: add-metadata] ``` ## Contribution / Bugs diff --git a/mangadlp/__about__.py b/mangadlp/__about__.py index 20868fd..55e4709 100644 --- a/mangadlp/__about__.py +++ b/mangadlp/__about__.py @@ -1 +1 @@ -__version__ = "2.2.21" +__version__ = "2.3.0" diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py index f0fa425..d32cc12 100644 --- a/mangadlp/metadata.py +++ b/mangadlp/metadata.py @@ -4,8 +4,10 @@ from typing import Any import xmltodict from loguru import logger as log +METADATA_FILENAME = "ComicInfo.xml" METADATA_TEMPLATE = Path("mangadlp/metadata/ComicInfo_v2.0.xml") -# define metadata types and valid values. an empty list means no value check +# define metadata types, defaults and valid values. an empty list means no value check +# {key: (type, default value, valid values)} METADATA_TYPES: dict[str, tuple[type, Any, list]] = { "Title": (str, None, []), "Series": (str, None, []), @@ -105,7 +107,7 @@ def write_metadata(chapter_path: Path, metadata: dict) -> None: log.warning("Can't add metadata for pdf format. Skipping") return - metadata_file = chapter_path / "ComicInfo.xml" + metadata_file = chapter_path / METADATA_FILENAME log.debug(f"Metadata items: {metadata}") metadata_valid = validate_metadata(metadata) diff --git a/pyproject.toml b/pyproject.toml index 3ba405c..9762d3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "loguru>=0.6.0", "click>=8.1.3", "click-option-group>=0.5.5", + "xmltodict>=0.13.0" ] [project.urls] @@ -60,6 +61,8 @@ dependencies = [ "loguru>=0.6.0", "click>=8.1.3", "click-option-group>=0.5.5", + "xmltodict>=0.13.0", + "xmlschema>=2.2.1", "img2pdf>=0.4.4", "hatch>=1.6.0", "hatchling>=1.11.0", From 879e62b4d378458d8217acedacc848e10becff3f Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 16:00:55 +0100 Subject: [PATCH 12/18] add new metadata tests --- tests/test_07_metadata.py | 68 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/tests/test_07_metadata.py b/tests/test_07_metadata.py index 2ef3c4b..9fbc37c 100644 --- a/tests/test_07_metadata.py +++ b/tests/test_07_metadata.py @@ -4,7 +4,7 @@ from pathlib import Path import xmlschema -from mangadlp.metadata import write_metadata +from mangadlp.metadata import validate_metadata, write_metadata def test_metadata_creation(): @@ -36,6 +36,72 @@ def test_metadata_creation(): metadata_file.unlink() +def test_metadata_validation(): + metadata = { + "Volume": "1", # invalid + "Number": "2", + "PageCount": "99", # invalid + "Count": "10", # invalid + "LanguageISO": 1, # invalid + "Title": "title1", + "Series": "series1", + "Summary": "summary1", + "Genre": "genre1", + "Web": "https://mangadex.org", + "Format": "cbz", + } + + valid_metadata = validate_metadata(metadata) + + assert valid_metadata["ComicInfo"] == { + "Title": "title1", + "Series": "series1", + "Number": "2", + "Summary": "summary1", + "Notes": "Downloaded with https://github.com/olofvndrhr/manga-dlp", + "Genre": "genre1", + "Web": "https://mangadex.org", + "Format": "cbz", + "Manga": "Yes", + } + + +def test_metadata_validation_values(): + metadata = { + "BlackAndWhite": "No", + "Manga": "YesAndRightToLeft", + "AgeRating": "Rating Pending", + "CommunityRating": 4, + } + + valid_metadata = validate_metadata(metadata) + + assert valid_metadata["ComicInfo"] == { + "Notes": "Downloaded with https://github.com/olofvndrhr/manga-dlp", + "BlackAndWhite": "No", + "Manga": "YesAndRightToLeft", + "AgeRating": "Rating Pending", + "CommunityRating": 4, + } + + +def test_metadata_validation_values2(): + metadata = { + "BlackAndWhite": "No", + "Manga": "YesAndRightToLeft", + "AgeRating": "12+", + "CommunityRating": 10, + } + + valid_metadata = validate_metadata(metadata) + + assert valid_metadata["ComicInfo"] == { + "Notes": "Downloaded with https://github.com/olofvndrhr/manga-dlp", + "BlackAndWhite": "No", + "Manga": "YesAndRightToLeft", + } + + def test_metadata_chapter_validity(): url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" manga_path = Path("tests/Tomo-chan wa Onna no ko") From 0c2511a5f85891b855234a0225455ddabe7a3cf7 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 16:02:24 +0100 Subject: [PATCH 13/18] typos --- README.md | 2 +- docs/pages/index.md | 2 +- mangadlp/cli.py | 2 +- tests/test_07_metadata.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 23a8f23..f569f1d 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ verbosity: [mutually_exclusive] -p, --path PATH Download path [default: downloads] -l, --language TEXT Manga language [default: en] --list List all available chapters ---format [cbz|cbr|zip|pdf|] Archive format to create. An empty string means dont archive the folder [default: cbz] +--format [cbz|cbr|zip|pdf|] Archive format to create. An empty string means don't archive the folder [default: cbz] --name-format TEXT Naming format to use when saving chapters. See docs for more infos [default: {default}] --name-format-none TEXT String to use when the variable of the custom name format is empty --forcevol Force naming of volumes. For mangas where chapters reset each volume diff --git a/docs/pages/index.md b/docs/pages/index.md index 7dbc2ac..efca1e0 100644 --- a/docs/pages/index.md +++ b/docs/pages/index.md @@ -105,7 +105,7 @@ verbosity: [mutually_exclusive] -p, --path PATH Download path [default: downloads] -l, --language TEXT Manga language [default: en] --list List all available chapters ---format [cbz|cbr|zip|pdf|] Archive format to create. An empty string means dont archive the folder [default: cbz] +--format [cbz|cbr|zip|pdf|] Archive format to create. An empty string means don't archive the folder [default: cbz] --name-format TEXT Naming format to use when saving chapters. See docs for more infos [default: {default}] --name-format-none TEXT String to use when the variable of the custom name format is empty --forcevol Force naming of volumes. For mangas where chapters reset each volume diff --git a/mangadlp/cli.py b/mangadlp/cli.py index 6bff791..36803e9 100644 --- a/mangadlp/cli.py +++ b/mangadlp/cli.py @@ -133,7 +133,7 @@ def readin_list(_ctx, _param, value) -> list: default="cbz", required=False, show_default=True, - help="Archive format to create. An empty string means dont archive the folder", + help="Archive format to create. An empty string means don't archive the folder", ) @click.option( "--name-format", diff --git a/tests/test_07_metadata.py b/tests/test_07_metadata.py index 9fbc37c..ea407df 100644 --- a/tests/test_07_metadata.py +++ b/tests/test_07_metadata.py @@ -89,8 +89,8 @@ def test_metadata_validation_values2(): metadata = { "BlackAndWhite": "No", "Manga": "YesAndRightToLeft", - "AgeRating": "12+", - "CommunityRating": 10, + "AgeRating": "12+", # invalid + "CommunityRating": 10, # invalid } valid_metadata = validate_metadata(metadata) From 042e8b736c33544e8a1f868df4b6106287594d85 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 16:47:43 +0100 Subject: [PATCH 14/18] fix metadata error when no volume present --- mangadlp/api/mangadex.py | 12 ++++++++---- mangadlp/metadata.py | 4 +++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/mangadlp/api/mangadex.py b/mangadlp/api/mangadex.py index 1ad23ce..12abf93 100644 --- a/mangadlp/api/mangadex.py +++ b/mangadlp/api/mangadex.py @@ -267,11 +267,15 @@ class Mangadex: log.info("Creating metadata from api") chapter_data = self.manga_chapter_data[chapter] + try: + volume = int(chapter_data.get("volume")) + except (ValueError, TypeError): + volume = None metadata = { - "Volume": int(chapter_data["volume"]), - "Number": chapter_data["chapter"], - "PageCount": chapter_data["pages"], - "Title": chapter_data["name"], + "Volume": volume, + "Number": chapter_data.get("chapter"), + "PageCount": chapter_data.get("pages"), + "Title": chapter_data.get("name"), "Series": self.manga_title, "Count": len(self.manga_chapter_data), "LanguageISO": self.language, diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py index d32cc12..3f7cc78 100644 --- a/mangadlp/metadata.py +++ b/mangadlp/metadata.py @@ -68,7 +68,9 @@ def validate_metadata(metadata_in: dict) -> dict: # add default value if present if metadata_default: - log.info(f"Setting default value for Key:{key} -> value={metadata_default}") + log.debug( + f"Setting default value for Key:{key} -> value={metadata_default}" + ) metadata_valid["ComicInfo"][key] = metadata_default # check if metadata key is available From ef937f4ed051204a9080ac002cb0d8c33acc64e8 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 18:46:37 +0100 Subject: [PATCH 15/18] fix release date [CI SKIP] --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3b3268..30852bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Add support for more sites -## [2.3.0] - 2023-02-11 +## [2.3.0] - 2023-02-15 ### Added From ce6ebc429167358589ceb35740ce4837243b15c0 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 20:17:22 +0100 Subject: [PATCH 16/18] update typo annotations and add new test --- CHANGELOG.md | 1 + contrib/api_template.py | 1 - mangadlp/app.py | 27 +++++---- mangadlp/cache.py | 10 ++-- mangadlp/cli.py | 61 +++++-------------- mangadlp/metadata.py | 6 +- mangadlp/utils.py | 4 +- tests/test_21_full.py | 119 +++++++++++++++++++++++++++---------- tests/test_22_all_flags.py | 54 +++++++++++++++++ tests/test_list2.txt | 2 +- 10 files changed, 185 insertions(+), 100 deletions(-) create mode 100644 tests/test_22_all_flags.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 30852bb..676caa1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Added `xmltodict` as a package requirement - Cache now also saves the manga title - New tests +- More typo annotations for function, compatible with python3.8 ### Fixed diff --git a/contrib/api_template.py b/contrib/api_template.py index 63b544c..aa95174 100644 --- a/contrib/api_template.py +++ b/contrib/api_template.py @@ -1,5 +1,4 @@ # api template for manga-dlp -from typing import Any class YourAPI: diff --git a/mangadlp/app.py b/mangadlp/app.py index aff8da1..9f0c93c 100644 --- a/mangadlp/app.py +++ b/mangadlp/app.py @@ -17,18 +17,23 @@ class MangaDLP: After initialization, start the script with the function get_manga(). Args: - url_uuid (str): URL or UUID of the manga - language (str): Manga language with country codes. "en" --> english - chapters (str): Chapters to download, "all" for every chapter available - list_chapters (bool): List all available chapters and exit - file_format (str): Archive format to create. An empty string means don't archive the folder - forcevol (bool): Force naming of volumes. Useful for mangas where chapters reset each volume - download_path (str/Path): Download path. Defaults to '/downloads' - download_wait (float): Time to wait for each picture to download in seconds - + url_uuid: URL or UUID of the manga + language: Manga language with country codes. "en" --> english + chapters: Chapters to download, "all" for every chapter available + list_chapters: List all available chapters and exit + file_format: Archive format to create. An empty string means don't archive the folder + forcevol: Force naming of volumes. Useful for mangas where chapters reset each volume + download_path: Download path. Defaults to '/downloads' + download_wait: Time to wait for each picture to download in seconds + manga_pre_hook_cmd: Command(s) to before after each manga + manga_post_hook_cmd: Command(s) to run after each manga + chapter_pre_hook_cmd: Command(s) to run before each chapter + chapter_post_hook_cmd: Command(s) to run after each chapter + cache_path: Path to the json cache. If emitted, no cache is used + add_metadata: Flag to toggle creation & inclusion of metadata """ - def __init__( + def __init__( # pylint: disable=too-many-locals self, url_uuid: str, language: str = "en", @@ -62,9 +67,9 @@ class MangaDLP: self.manga_post_hook_cmd = manga_post_hook_cmd self.chapter_pre_hook_cmd = chapter_pre_hook_cmd self.chapter_post_hook_cmd = chapter_post_hook_cmd - self.hook_infos: dict = {} self.cache_path = cache_path self.add_metadata = add_metadata + self.hook_infos: dict = {} # prepare everything self._prepare() diff --git a/mangadlp/cache.py b/mangadlp/cache.py index b6621d5..a2077b5 100644 --- a/mangadlp/cache.py +++ b/mangadlp/cache.py @@ -1,6 +1,6 @@ import json from pathlib import Path -from typing import Union +from typing import Dict, List, Union from loguru import logger as log @@ -33,7 +33,7 @@ class CacheDB: self.db_uuid_chapters: list = self.db_uuid_data.get("chapters") or [] - def _prepare_db(self): + def _prepare_db(self) -> None: if self.db_path.exists(): return # create empty cache @@ -44,11 +44,11 @@ class CacheDB: log.error("Can't create db-file") raise exc - def _read_db(self) -> dict: + def _read_db(self) -> Dict[str, dict]: log.info(f"Reading cache-db: {self.db_path}") try: db_txt = self.db_path.read_text(encoding="utf8") - db_dict: dict = json.loads(db_txt) + db_dict: dict[str, dict] = json.loads(db_txt) except Exception as exc: log.error("Can't load cache-db") raise exc @@ -73,7 +73,7 @@ class CacheDB: raise exc -def sort_chapters(chapters: list) -> list: +def sort_chapters(chapters: list) -> List[str]: try: sorted_list = sorted(chapters, key=float) except Exception: diff --git a/mangadlp/cli.py b/mangadlp/cli.py index 36803e9..b62e1cc 100644 --- a/mangadlp/cli.py +++ b/mangadlp/cli.py @@ -99,7 +99,7 @@ def readin_list(_ctx, _param, value) -> list: @click.option( "-p", "--path", - "path", + "download_path", type=click.Path(exists=False, writable=True, path_type=Path), default="downloads", required=False, @@ -109,7 +109,7 @@ def readin_list(_ctx, _param, value) -> list: @click.option( "-l", "--language", - "lang", + "language", type=str, default="en", required=False, @@ -127,7 +127,7 @@ def readin_list(_ctx, _param, value) -> list: ) @click.option( "--format", - "chapter_format", + "file_format", multiple=False, type=click.Choice(["cbz", "cbr", "zip", "pdf", ""], case_sensitive=False), default="cbz", @@ -164,7 +164,7 @@ def readin_list(_ctx, _param, value) -> list: ) @click.option( "--wait", - "wait_time", + "download_wait", type=float, default=0.5, required=False, @@ -174,7 +174,7 @@ def readin_list(_ctx, _param, value) -> list: # hook options @click.option( "--hook-manga-pre", - "hook_manga_pre", + "manga_pre_hook_cmd", type=str, default=None, required=False, @@ -183,7 +183,7 @@ def readin_list(_ctx, _param, value) -> list: ) @click.option( "--hook-manga-post", - "hook_manga_post", + "manga_post_hook_cmd", type=str, default=None, required=False, @@ -192,7 +192,7 @@ def readin_list(_ctx, _param, value) -> list: ) @click.option( "--hook-chapter-pre", - "hook_chapter_pre", + "chapter_pre_hook_cmd", type=str, default=None, required=False, @@ -201,7 +201,7 @@ def readin_list(_ctx, _param, value) -> list: ) @click.option( "--hook-chapter-post", - "hook_chapter_post", + "chapter_post_hook_cmd", type=str, default=None, required=False, @@ -227,32 +227,16 @@ def readin_list(_ctx, _param, value) -> list: help="Enable/disable creation of metadata via ComicInfo.xml", ) @click.pass_context -def main( - ctx: click.Context, - url_uuid: str, - read_mangas: list, - verbosity: int, - chapters: str, - path: Path, - lang: str, - list_chapters: bool, - chapter_format: str, - name_format: str, - name_format_none: str, - forcevol: bool, - wait_time: float, - hook_manga_pre: str, - hook_manga_post: str, - hook_chapter_pre: str, - hook_chapter_post: str, - cache_path: str, - add_metadata: bool, -): # pylint: disable=too-many-locals +def main(ctx: click.Context, **kwargs) -> None: """ Script to download mangas from various sites """ + url_uuid: str = kwargs.pop("url_uuid") + read_mangas: list[str] = kwargs.pop("read_mangas") + verbosity: int = kwargs.pop("verbosity") + # set log level to INFO if not set if not verbosity: verbosity = 20 @@ -268,24 +252,7 @@ def main( for manga in requested_mangas: try: - mdlp = app.MangaDLP( - url_uuid=manga, - language=lang, - chapters=chapters, - list_chapters=list_chapters, - file_format=chapter_format, - name_format=name_format, - name_format_none=name_format_none, - forcevol=forcevol, - download_path=path, - download_wait=wait_time, - manga_pre_hook_cmd=hook_manga_pre, - manga_post_hook_cmd=hook_manga_post, - chapter_pre_hook_cmd=hook_chapter_pre, - chapter_post_hook_cmd=hook_chapter_post, - cache_path=cache_path, - add_metadata=add_metadata, - ) + mdlp = app.MangaDLP(url_uuid=manga, **kwargs) mdlp.get_manga() except (KeyboardInterrupt, Exception) as exc: # if only a single manga is requested and had an error, then exit diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py index 3f7cc78..343a3cc 100644 --- a/mangadlp/metadata.py +++ b/mangadlp/metadata.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any +from typing import Any, Dict, Tuple import xmltodict from loguru import logger as log @@ -8,7 +8,7 @@ METADATA_FILENAME = "ComicInfo.xml" METADATA_TEMPLATE = Path("mangadlp/metadata/ComicInfo_v2.0.xml") # define metadata types, defaults and valid values. an empty list means no value check # {key: (type, default value, valid values)} -METADATA_TYPES: dict[str, tuple[type, Any, list]] = { +METADATA_TYPES: Dict[str, Tuple[type, Any, list]] = { "Title": (str, None, []), "Series": (str, None, []), "Number": (str, None, []), @@ -59,7 +59,7 @@ METADATA_TYPES: dict[str, tuple[type, Any, list]] = { } -def validate_metadata(metadata_in: dict) -> dict: +def validate_metadata(metadata_in: dict) -> Dict[str, dict]: log.info("Validating metadata") metadata_valid: dict[str, dict] = {"ComicInfo": {}} diff --git a/mangadlp/utils.py b/mangadlp/utils.py index 91fe552..4d9afc5 100644 --- a/mangadlp/utils.py +++ b/mangadlp/utils.py @@ -1,7 +1,7 @@ import re from datetime import datetime from pathlib import Path -from typing import Any +from typing import Any, List from zipfile import ZipFile from loguru import logger as log @@ -41,7 +41,7 @@ def make_pdf(chapter_path: Path) -> None: # create a list of chapters -def get_chapter_list(chapters: str, available_chapters: list) -> list: +def get_chapter_list(chapters: str, available_chapters: list) -> List[str]: # check if there are available chapter chapter_list: list[str] = [] for chapter in chapters.split(","): diff --git a/tests/test_21_full.py b/tests/test_21_full.py index 5809e4f..1cbb7f7 100644 --- a/tests/test_21_full.py +++ b/tests/test_21_full.py @@ -22,10 +22,12 @@ def wait_20s(): def test_full_api_mangadex(wait_20s): - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1.cbz") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" + ) mdlp = app.MangaDLP( - url_uuid="https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie", + url_uuid="https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko", language="en", chapters="1", list_chapters=False, @@ -43,13 +45,15 @@ def test_full_api_mangadex(wait_20s): def test_full_with_input_cbz(wait_20s): - url_uuid = "https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie" + url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" language = "en" chapters = "1" file_format = "cbz" download_path = "tests" - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1.cbz") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" + ) command_args = f"-u {url_uuid} -l {language} -c {chapters} --path {download_path} --format {file_format} --debug --wait 2" script_path = "manga-dlp.py" os.system(f"python3 {script_path} {command_args}") @@ -61,13 +65,15 @@ def test_full_with_input_cbz(wait_20s): def test_full_with_input_cbz_info(wait_20s): - url_uuid = "https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie" + url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" language = "en" chapters = "1" file_format = "cbz" download_path = "tests" - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1.cbz") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" + ) command_args = f"-u {url_uuid} -l {language} -c {chapters} --path {download_path} --format {file_format} --wait 2" script_path = "manga-dlp.py" os.system(f"python3 {script_path} {command_args}") @@ -82,13 +88,15 @@ def test_full_with_input_cbz_info(wait_20s): platform.machine() != "x86_64", reason="pdf only supported on amd64" ) def test_full_with_input_pdf(wait_20s): - url_uuid = "https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie" + url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" language = "en" chapters = "1" file_format = "pdf" download_path = "tests" - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1.pdf") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.pdf" + ) command_args = f"-u {url_uuid} -l {language} -c {chapters} --path {download_path} --format {file_format} --debug --wait 2" script_path = "manga-dlp.py" os.system(f"python3 {script_path} {command_args}") @@ -100,14 +108,18 @@ def test_full_with_input_pdf(wait_20s): def test_full_with_input_folder(wait_20s): - url_uuid = "https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie" + url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" language = "en" chapters = "1" file_format = "" download_path = "tests" - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1") - metadata_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1/ComicInfo.xml") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire" + ) + metadata_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire/ComicInfo.xml" + ) command_args = f"-u {url_uuid} -l {language} -c {chapters} --path {download_path} --format '{file_format}' --debug --wait 2" script_path = "manga-dlp.py" os.system(f"python3 {script_path} {command_args}") @@ -120,13 +132,15 @@ def test_full_with_input_folder(wait_20s): def test_full_with_input_skip_cbz(wait_10s): - url_uuid = "https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie" + url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" language = "en" chapters = "1" file_format = "cbz" download_path = "tests" - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1.cbz") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" + ) command_args = f"-u {url_uuid} -l {language} -c {chapters} --path {download_path} --format {file_format} --debug --wait 2" script_path = "manga-dlp.py" manga_path.mkdir(parents=True, exist_ok=True) @@ -140,13 +154,15 @@ def test_full_with_input_skip_cbz(wait_10s): def test_full_with_input_skip_folder(wait_10s): - url_uuid = "https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie" + url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" language = "en" chapters = "1" file_format = "" download_path = "tests" - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire" + ) command_args = f"-u {url_uuid} -l {language} -c {chapters} --path {download_path} --format '{file_format}' --debug --wait 2" script_path = "manga-dlp.py" chapter_path.mkdir(parents=True, exist_ok=True) @@ -158,8 +174,12 @@ def test_full_with_input_skip_folder(wait_10s): assert chapter_path.is_dir() assert found_files == [] - assert not Path("tests/Shikimori's Not Just a Cutie/Ch. 1.cbz").exists() - assert not Path("tests/Shikimori's Not Just a Cutie/Ch. 1.zip").exists() + assert not Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" + ).exists() + assert not Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.zip" + ).exists() # cleanup shutil.rmtree(manga_path, ignore_errors=True) @@ -170,12 +190,14 @@ def test_full_with_read_cbz(wait_20s): chapters = "1" file_format = "cbz" download_path = "tests" - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1.cbz") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" + ) command_args = f"--read {str(url_list)} -l {language} -c {chapters} --path {download_path} --format {file_format} --debug --wait 2" script_path = "manga-dlp.py" url_list.write_text( - "https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie" + "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" ) os.system(f"python3 {script_path} {command_args}") @@ -192,14 +214,16 @@ def test_full_with_read_skip_cbz(wait_10s): chapters = "1" file_format = "cbz" download_path = "tests" - manga_path = Path("tests/Shikimori's Not Just a Cutie") - chapter_path = Path("tests/Shikimori's Not Just a Cutie/Ch. 1.cbz") + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" + ) command_args = f"--read {str(url_list)} -l {language} -c {chapters} --path {download_path} --format {file_format} --debug --wait 2" script_path = "manga-dlp.py" manga_path.mkdir(parents=True, exist_ok=True) chapter_path.touch() url_list.write_text( - "https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie" + "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" ) os.system(f"python3 {script_path} {command_args}") @@ -209,6 +233,41 @@ def test_full_with_read_skip_cbz(wait_10s): shutil.rmtree(manga_path, ignore_errors=True) +def test_full_with_all_flags(wait_20s): + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = Path( + "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" + ) + cache_path = Path("tests/cache.json") + flags = [ + "-u https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko", + "--loglevel 10", + "-l en", + "-c 1", + "--path tests", + "--format cbz", + "--name-format 'Ch.{chapter_num} - {chapter_name}'", + "--name-format-none 0", + "--forcevol", + "--wait 2", + "--hook-manga-pre echo 0", + "--hook-manga-post 1", + "--hook-chapter-pre 2", + "--hook-chapter-post 3", + "--cache-path tests/cache.json", + "--add-metadata", + ] + script_path = "manga-dlp.py" + os.system(f"python3 {script_path} {' '.join(flags)}") + + assert manga_path.exists() and manga_path.is_dir() + assert chapter_path.exists() and chapter_path.is_file() + assert cache_path.exists() and cache_path.is_file() + # cleanup + shutil.rmtree(manga_path, ignore_errors=True) + cache_path.unlink(missing_ok=True) + + # def test_full_without_input(): # script_path = "manga-dlp.py" # assert os.system(f"python3 {script_path}") != 0 diff --git a/tests/test_22_all_flags.py b/tests/test_22_all_flags.py new file mode 100644 index 0000000..57f32fe --- /dev/null +++ b/tests/test_22_all_flags.py @@ -0,0 +1,54 @@ +import os +import platform +import shutil +import time +from pathlib import Path + +import pytest + +from mangadlp import app + + +@pytest.fixture +def wait_10s(): + print("sleeping 10 seconds because of api timeouts") + time.sleep(10) + + +@pytest.fixture +def wait_20s(): + print("sleeping 20 seconds because of api timeouts") + time.sleep(20) + + +def test_full_with_all_flags(wait_10s): + manga_path = Path("tests/Tomo-chan wa Onna no ko") + chapter_path = manga_path / "Ch. 1 - Once In A Life Time Misfire.cbz" + cache_path = Path("tests/test_cache.json") + flags = [ + "-u https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko", + "--loglevel 10", + "-l en", + "-c 1", + "--path tests", + "--format cbz", + "--name-format 'Ch. {chapter_num} - {chapter_name}'", + "--name-format-none 0", + # "--forcevol", + "--wait 2", + "--hook-manga-pre 'echo 0'", + "--hook-manga-post 'echo 1'", + "--hook-chapter-pre 'echo 2'", + "--hook-chapter-post 'echo 3'", + "--cache-path tests/test_cache.json", + "--add-metadata", + ] + script_path = "manga-dlp.py" + os.system(f"python3 {script_path} {' '.join(flags)}") + + assert manga_path.exists() and manga_path.is_dir() + assert chapter_path.exists() and chapter_path.is_file() + assert cache_path.exists() and cache_path.is_file() + # cleanup + shutil.rmtree(manga_path, ignore_errors=True) + cache_path.unlink(missing_ok=True) diff --git a/tests/test_list2.txt b/tests/test_list2.txt index dda2d34..727c876 100644 --- a/tests/test_list2.txt +++ b/tests/test_list2.txt @@ -1 +1 @@ -https://mangadex.org/title/0aea9f43-d4a9-4bf7-bebc-550a512f9b95/shikimori-s-not-just-a-cutie \ No newline at end of file +https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko \ No newline at end of file From 5afeed11eac6fddf34ea7a017b27f559a0d6da37 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 20:26:04 +0100 Subject: [PATCH 17/18] remove duplicate test --- tests/test_21_full.py | 35 ----------------------------------- tests/test_22_all_flags.py | 3 --- 2 files changed, 38 deletions(-) diff --git a/tests/test_21_full.py b/tests/test_21_full.py index 1cbb7f7..0f4e5fd 100644 --- a/tests/test_21_full.py +++ b/tests/test_21_full.py @@ -233,41 +233,6 @@ def test_full_with_read_skip_cbz(wait_10s): shutil.rmtree(manga_path, ignore_errors=True) -def test_full_with_all_flags(wait_20s): - manga_path = Path("tests/Tomo-chan wa Onna no ko") - chapter_path = Path( - "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire.cbz" - ) - cache_path = Path("tests/cache.json") - flags = [ - "-u https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko", - "--loglevel 10", - "-l en", - "-c 1", - "--path tests", - "--format cbz", - "--name-format 'Ch.{chapter_num} - {chapter_name}'", - "--name-format-none 0", - "--forcevol", - "--wait 2", - "--hook-manga-pre echo 0", - "--hook-manga-post 1", - "--hook-chapter-pre 2", - "--hook-chapter-post 3", - "--cache-path tests/cache.json", - "--add-metadata", - ] - script_path = "manga-dlp.py" - os.system(f"python3 {script_path} {' '.join(flags)}") - - assert manga_path.exists() and manga_path.is_dir() - assert chapter_path.exists() and chapter_path.is_file() - assert cache_path.exists() and cache_path.is_file() - # cleanup - shutil.rmtree(manga_path, ignore_errors=True) - cache_path.unlink(missing_ok=True) - - # def test_full_without_input(): # script_path = "manga-dlp.py" # assert os.system(f"python3 {script_path}") != 0 diff --git a/tests/test_22_all_flags.py b/tests/test_22_all_flags.py index 57f32fe..10c0ce6 100644 --- a/tests/test_22_all_flags.py +++ b/tests/test_22_all_flags.py @@ -1,13 +1,10 @@ import os -import platform import shutil import time from pathlib import Path import pytest -from mangadlp import app - @pytest.fixture def wait_10s(): From 6105f15e9ad5f1a407527c1c6379c523a8c762b3 Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Wed, 15 Feb 2023 21:53:59 +0100 Subject: [PATCH 18/18] add file format checker and update api matcher --- CHANGELOG.md | 2 + mangadlp/app.py | 82 ++++++++++++++++++++++++-------------- mangadlp/utils.py | 14 +++++++ tests/test_07_metadata.py | 14 +++++-- tests/test_22_all_flags.py | 2 +- 5 files changed, 78 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 676caa1..53a88c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Cache now also saves the manga title - New tests - More typo annotations for function, compatible with python3.8 +- File format checker if you use the MangaDLP class directly ### Fixed @@ -30,6 +31,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Simplified the chapter info generation - Updated the license year - Updated the API template +- Updated the API detection and removed it from the MangaDLP class ## [2.2.20] - 2023-02-12 diff --git a/mangadlp/app.py b/mangadlp/app.py index 9f0c93c..f1054b0 100644 --- a/mangadlp/app.py +++ b/mangadlp/app.py @@ -10,6 +10,46 @@ from mangadlp.api.mangadex import Mangadex from mangadlp.cache import CacheDB from mangadlp.hooks import run_hook from mangadlp.metadata import write_metadata +from mangadlp.utils import get_file_format + + +def match_api(url_uuid: str) -> type: + """ + Match the correct api class from a string + + Args: + url_uuid: url/uuid to check + + Returns: + The class of the API to use + """ + + # apis to check + apis: list[tuple[str, re.Pattern, type]] = [ + ( + "mangadex.org", + re.compile( + r"(mangadex.org)|([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})" + ), + Mangadex, + ), + ( + "test.org", + re.compile(r"(test.test)"), + type, + ), + ] + + # check url for match + for api_name, api_re, api_cls in apis: + if not api_re.search(url_uuid): + continue + log.info(f"API matched: {api_name}") + return api_cls + + # no supported api found + log.error(f"No supported api in link/uuid found: {url_uuid}") + raise ValueError class MangaDLP: @@ -75,13 +115,12 @@ class MangaDLP: self._prepare() def _prepare(self) -> None: - # set manga format suffix - if self.file_format and self.file_format[0] != ".": - self.file_format = f".{self.file_format}" + # check and set correct file suffix/format + self.file_format = get_file_format(self.file_format) # start prechecks - self.pre_checks() + self._pre_checks() # init api - self.api_used = self.check_api(self.url_uuid) + self.api_used = match_api(self.url_uuid) try: log.debug("Initializing api") self.api = self.api_used(self.url_uuid, self.language, self.forcevol) @@ -94,9 +133,9 @@ class MangaDLP: # get chapter list self.manga_chapter_list = self.api.chapter_list self.manga_total_chapters = len(self.manga_chapter_list) - self.manga_path = Path(f"{self.download_path}/{self.manga_title}") + self.manga_path = self.download_path / self.manga_title - def pre_checks(self) -> None: + def _pre_checks(self) -> None: # prechecks userinput/options # no url and no readin list given if not self.url_uuid: @@ -121,27 +160,6 @@ class MangaDLP: log.error("Don't specify the volume without --forcevol") raise ValueError - # check the api which needs to be used - def check_api(self, url_uuid: str) -> type: - # apis to check - api_mangadex = re.compile( - r"(mangadex.org)|([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})" - ) - api_test = re.compile("test.test") - - # check url for match - if api_mangadex.search(url_uuid): - log.debug("Matched api: mangadex.org") - return Mangadex - # this is only for testing multiple apis - if api_test.search(url_uuid): - log.critical("Not supported yet") - raise ValueError - - # no supported api found - log.error(f"No supported api in link/uuid found: {url_uuid}") - raise ValueError - # once called per manga def get_manga(self) -> None: print_divider = "=========================================" @@ -236,10 +254,12 @@ class MangaDLP: metadata = self.api.create_metadata(chapter) write_metadata( chapter_path, - {"Format": self.file_format.removeprefix("."), **metadata}, + {"Format": self.file_format[1:], **metadata}, + ) + except Exception as exc: + log.warning( + f"Can't write metadata for chapter '{chapter}'. Reason={exc}" ) - except Exception: - log.warning(f"Can't write metadata for chapter '{chapter}'") # pack downloaded folder if self.file_format: diff --git a/mangadlp/utils.py b/mangadlp/utils.py index 4d9afc5..d305a45 100644 --- a/mangadlp/utils.py +++ b/mangadlp/utils.py @@ -145,6 +145,20 @@ def get_filename( return f"Ch. {chapter_num} - {chapter_name}" +def get_file_format(file_format: str) -> str: + if not file_format: + return "" + + if re.match(r"\.?[a-z0-9]+", file_format, flags=re.I): + if file_format[0] != ".": + file_format = f".{file_format}" + else: + log.error(f"Invalid file format: '{file_format}'") + raise ValueError + + return file_format + + def progress_bar(progress: float, total: float) -> None: time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S") percent = int(progress / (int(total) / 100)) diff --git a/tests/test_07_metadata.py b/tests/test_07_metadata.py index ea407df..33ff523 100644 --- a/tests/test_07_metadata.py +++ b/tests/test_07_metadata.py @@ -1,12 +1,20 @@ import shutil import subprocess +import time from pathlib import Path +import pytest import xmlschema from mangadlp.metadata import validate_metadata, write_metadata +@pytest.fixture +def wait_20s(): + print("sleeping 20 seconds because of api timeouts") + time.sleep(20) + + def test_metadata_creation(): test_metadata_file = Path("tests/ComicInfo_test.xml") metadata_path = Path("tests/") @@ -102,12 +110,10 @@ def test_metadata_validation_values2(): } -def test_metadata_chapter_validity(): +def test_metadata_chapter_validity(wait_20s): url_uuid = "https://mangadex.org/title/76ee7069-23b4-493c-bc44-34ccbf3051a8/tomo-chan-wa-onna-no-ko" manga_path = Path("tests/Tomo-chan wa Onna no ko") - metadata_path = Path( - "tests/Tomo-chan wa Onna no ko/Ch. 1 - Once In A Life Time Misfire/ComicInfo.xml" - ) + metadata_path = manga_path / "Ch. 1 - Once In A Life Time Misfire/ComicInfo.xml" language = "en" chapters = "1" download_path = "tests" diff --git a/tests/test_22_all_flags.py b/tests/test_22_all_flags.py index 10c0ce6..84d890e 100644 --- a/tests/test_22_all_flags.py +++ b/tests/test_22_all_flags.py @@ -18,7 +18,7 @@ def wait_20s(): time.sleep(20) -def test_full_with_all_flags(wait_10s): +def test_full_with_all_flags(wait_20s): manga_path = Path("tests/Tomo-chan wa Onna no ko") chapter_path = manga_path / "Ch. 1 - Once In A Life Time Misfire.cbz" cache_path = Path("tests/test_cache.json")