From 6120fe7c81991e7bcd0be8701f63dc95c8c0380a Mon Sep 17 00:00:00 2001 From: Ivan Schaller Date: Mon, 13 Feb 2023 19:15:27 +0100 Subject: [PATCH] add metadata support --- CHANGELOG.md | 16 ++++ contrib/api_template.py | 32 +++++-- mangadlp/__about__.py | 2 +- mangadlp/api/mangadex.py | 53 ++++++------ mangadlp/app.py | 17 +++- mangadlp/metadata.py | 31 +++++++ mangadlp/metadata/ComicInfo.xml | 19 +++++ mangadlp/metadata/ComicInfo_v2.0.xsd | 123 +++++++++++++++++++++++++++ mangadlp/utils.py | 3 +- requirements.txt | 1 + 10 files changed, 263 insertions(+), 34 deletions(-) create mode 100644 mangadlp/metadata.py create mode 100644 mangadlp/metadata/ComicInfo.xml create mode 100644 mangadlp/metadata/ComicInfo_v2.0.xsd diff --git a/CHANGELOG.md b/CHANGELOG.md index 65b713f..b385432 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,22 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Add support for more sites +## [2.2.21] - 2023-02-11 + +### Added + +- Metadata is now added to each chapter. Schema + standard: [https://anansi-project.github.io/docs/comicinfo/schemas/v2.0](https://anansi-project.github.io/docs/comicinfo/schemas/v2.0) +- `xmltodict` as a package requirement + +### Fixed + +- API template typos + +### Changed + +- Simplified the chapter info generation + ## [2.2.20] - 2023-02-12 ### Fixed diff --git a/contrib/api_template.py b/contrib/api_template.py index c6a0759..ecbbc88 100644 --- a/contrib/api_template.py +++ b/contrib/api_template.py @@ -34,17 +34,28 @@ class YourAPI: # attributes needed by app.py self.manga_uuid = "abc" self.manga_title = "abc" - self.chapter_list = "abc" + self.chapter_list = ["1", "2"] # methods needed by app.py # get chapter infos as a dictionary - def get_chapter_infos(chapter: str) -> dict: + def get_manga_chapter_data(chapter: str) -> dict: # these keys have to be returned return { - "uuid": chapter_uuid, - "volume": chapter_vol, - "chapter": chapter_num, - "name": chapter_name, + "1": { + "uuid": "abc", + "volume": "1", + "chapter": "1", + "name": "test", + }, + } + # or with --forcevol + return { + "1:1": { + "uuid": "abc", + "volume": "1", + "chapter": "1", + "name": "test", + }, } # get chapter images as a list (full links) @@ -55,3 +66,12 @@ class YourAPI: "https://abc.def/image/1234.png", "https://abc.def/image/12345.png", ] + + # get metadata with correct keys for ComicInfo.xml + def create_metadata(self, chapter: str) -> dict: + # example + return { + "Volume": "abc", + "LanguageISO": "en", + "Title": "test", + } diff --git a/mangadlp/__about__.py b/mangadlp/__about__.py index 97eeff4..20868fd 100644 --- a/mangadlp/__about__.py +++ b/mangadlp/__about__.py @@ -1 +1 @@ -__version__ = "2.2.20" +__version__ = "2.2.21" diff --git a/mangadlp/api/mangadex.py b/mangadlp/api/mangadex.py index 9c3417d..8bb574b 100644 --- a/mangadlp/api/mangadex.py +++ b/mangadlp/api/mangadex.py @@ -169,13 +169,15 @@ class Mangadex: chapter_uuid = chapter.get("id") or "" chapter_name = attributes.get("title") or "" chapter_external = attributes.get("externalUrl") or "" + chapter_pages = attributes.get("pages") or "" # check for chapter title and fix it if chapter_name: chapter_name = utils.fix_name(chapter_name) + # check if the chapter is external (can't download them) if chapter_external: - log.debug(f"Chapter is external. Skipping: {chapter_uuid}") + log.debug(f"Chapter is external. Skipping: {chapter_name}") continue # check if its duplicate from the last entry @@ -186,12 +188,13 @@ class Mangadex: chapter_index = ( chapter_num if not self.forcevol else f"{chapter_vol}:{chapter_num}" ) - chapter_data[chapter_index] = [ - chapter_uuid, - chapter_vol, - chapter_num, - chapter_name, - ] + chapter_data[chapter_index] = { + "uuid": chapter_uuid, + "volume": chapter_vol, + "chapter": chapter_num, + "name": chapter_name, + "pages": chapter_pages, + } # add last chapter to duplicate check last_volume, last_chapter = (chapter_vol, chapter_num) @@ -204,7 +207,7 @@ class Mangadex: def get_chapter_images(self, chapter: str, wait_time: float) -> list: log.debug(f"Getting chapter images for: {self.manga_uuid}") athome_url = f"{self.api_base_url}/at-home/server" - chapter_uuid = self.manga_chapter_data[chapter][0] + chapter_uuid = self.manga_chapter_data[chapter]["uuid"] # retry up to two times if the api applied rate limits api_error = False @@ -251,10 +254,9 @@ class Mangadex: def create_chapter_list(self) -> list: log.debug(f"Creating chapter list for: {self.manga_uuid}") chapter_list = [] - for index, _ in self.manga_chapter_data.items(): - chapter_info: dict = self.get_chapter_infos(index) - chapter_number: str = chapter_info["chapter"] - volume_number: str = chapter_info["volume"] + for data in self.manga_chapter_data.values(): + chapter_number: str = data["chapter"] + volume_number: str = data["volume"] if self.forcevol: chapter_list.append(f"{volume_number}:{chapter_number}") else: @@ -262,17 +264,20 @@ class Mangadex: return chapter_list - # create easy to access chapter infos - def get_chapter_infos(self, chapter: str) -> dict: - chapter_uuid: str = self.manga_chapter_data[chapter][0] - chapter_vol: str = self.manga_chapter_data[chapter][1] - chapter_num: str = self.manga_chapter_data[chapter][2] - chapter_name: str = self.manga_chapter_data[chapter][3] - log.debug(f"Getting chapter infos for: {chapter_uuid}") + def create_metadata(self, chapter: str) -> dict: + log.info("Creating metadata from api") - return { - "uuid": chapter_uuid, - "volume": chapter_vol, - "chapter": chapter_num, - "name": chapter_name, + chapter_data = self.manga_chapter_data[chapter] + metadata = { + "Volume": chapter_data["volume"], + "Number": chapter_data["chapter"], + "PageCount": chapter_data["pages"], + "Count": len(self.manga_chapter_data), + "LanguageISO": self.language, + "Title": self.manga_title, + "Summary": self.manga_data["attributes"]["description"].get("en"), + "Genre": self.manga_data["attributes"].get("publicationDemographic"), + "Web": f"https://mangadex.org/title/{self.manga_uuid}", } + + return metadata diff --git a/mangadlp/app.py b/mangadlp/app.py index cdcb2b0..769ed17 100644 --- a/mangadlp/app.py +++ b/mangadlp/app.py @@ -9,6 +9,7 @@ from mangadlp import downloader, utils from mangadlp.api.mangadex import Mangadex from mangadlp.cache import CacheDB from mangadlp.hooks import run_hook +from mangadlp.metadata import write_metadata class MangaDLP: @@ -205,20 +206,34 @@ class MangaDLP: log.info(f"Chapter '{chapter}' is in cache. Skipping download") continue + # download chapter try: chapter_path = self.get_chapter(chapter) except KeyboardInterrupt as exc: raise exc except FileExistsError: + # skipping chapter download as its already available skipped_chapters.append(chapter) # update cache if self.cache_path: cache.add_chapter(chapter) continue except Exception: + # skip download/packing due to an error error_chapters.append(chapter) continue + # add metadata + try: + metadata = self.api.create_metadata(chapter) + write_metadata( + chapter_path, + {"Format": self.file_format.removeprefix("."), **metadata}, + ) + except Exception: + log.warning(f"Can't write metadata for chapter '{chapter}'") + + # pack downloaded folder if self.file_format: try: self.archive_chapter(chapter_path) @@ -268,7 +283,7 @@ class MangaDLP: # once called per chapter def get_chapter(self, chapter: str) -> Path: # get chapter infos - chapter_infos = self.api.get_chapter_infos(chapter) + chapter_infos: dict = self.api.manga_chapter_data[chapter] log.debug(f"Chapter infos: {chapter_infos}") # get image urls for chapter diff --git a/mangadlp/metadata.py b/mangadlp/metadata.py new file mode 100644 index 0000000..651d4ae --- /dev/null +++ b/mangadlp/metadata.py @@ -0,0 +1,31 @@ +from pathlib import Path + +import xmltodict +from loguru import logger as log + + +def write_metadata(chapter_path: Path, metadata: dict) -> None: + try: + metadata_template = Path("mangadlp/metadata/ComicInfo.xml").read_text( + encoding="utf8" + ) + metadata_empty: dict[str, dict] = xmltodict.parse(metadata_template) + except Exception as exc: + log.error("Can't open or parse xml template") + raise exc + metadata_file = chapter_path / "ComicInfo.xml" + + log.info(f"Writing metadata to: '{metadata_file}'") + log.debug(f"Metadata items: {metadata}") + for key, value in metadata.items(): + if not value: + continue + try: + metadata_empty["ComicInfo"][key] + except KeyError: + continue + log.debug(f"Updating metadata: '{key}' = '{value}'") + metadata_empty["ComicInfo"][key] = value + + metadata_export = xmltodict.unparse(metadata_empty, pretty=True, indent=(" " * 4)) + metadata_file.write_text(metadata_export, encoding="utf8") diff --git a/mangadlp/metadata/ComicInfo.xml b/mangadlp/metadata/ComicInfo.xml new file mode 100644 index 0000000..ef77aba --- /dev/null +++ b/mangadlp/metadata/ComicInfo.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + Unknown + Yes + Unknown + Downloaded with https://github.com/olofvndrhr/manga-dlp + diff --git a/mangadlp/metadata/ComicInfo_v2.0.xsd b/mangadlp/metadata/ComicInfo_v2.0.xsd new file mode 100644 index 0000000..6732fe8 --- /dev/null +++ b/mangadlp/metadata/ComicInfo_v2.0.xsd @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mangadlp/utils.py b/mangadlp/utils.py index b49c41a..32c7a87 100644 --- a/mangadlp/utils.py +++ b/mangadlp/utils.py @@ -83,7 +83,6 @@ def get_chapter_list(chapters: str, available_chapters: list) -> list: # remove illegal characters etc def fix_name(filename: str) -> str: - log.debug(f"Input name='{filename}'") filename = filename.encode(encoding="utf8", errors="ignore").decode(encoding="utf8") # remove illegal characters filename = re.sub(r'[/\\<>:;|?*!@"]', "", filename) @@ -94,7 +93,7 @@ def fix_name(filename: str) -> str: # remove trailing and beginning spaces filename = re.sub("([ \t]+$)|(^[ \t]+)", "", filename) - log.debug(f"Output name='{filename}'") + log.debug(f"Input name='{filename}', Output name='{filename}'") return filename diff --git a/requirements.txt b/requirements.txt index 0fe1f62..bd44bdf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,6 @@ requests>=2.28.0 loguru>=0.6.0 click>=8.1.3 click-option-group>=0.5.5 +xmltodict~=0.13.0 img2pdf>=0.4.4