add metadata support
Some checks failed
ci/woodpecker/push/tests Pipeline failed

This commit is contained in:
Ivan Schaller 2023-02-13 19:15:27 +01:00
parent 6ccaeda8a4
commit 6120fe7c81
Signed by: olofvndrhr
GPG key ID: 2A6BE07D99C8C205
10 changed files with 263 additions and 34 deletions

View file

@ -9,6 +9,22 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
- Add support for more sites - Add support for more sites
## [2.2.21] - 2023-02-11
### Added
- Metadata is now added to each chapter. Schema
standard: [https://anansi-project.github.io/docs/comicinfo/schemas/v2.0](https://anansi-project.github.io/docs/comicinfo/schemas/v2.0)
- `xmltodict` as a package requirement
### Fixed
- API template typos
### Changed
- Simplified the chapter info generation
## [2.2.20] - 2023-02-12 ## [2.2.20] - 2023-02-12
### Fixed ### Fixed

View file

@ -34,17 +34,28 @@ class YourAPI:
# attributes needed by app.py # attributes needed by app.py
self.manga_uuid = "abc" self.manga_uuid = "abc"
self.manga_title = "abc" self.manga_title = "abc"
self.chapter_list = "abc" self.chapter_list = ["1", "2"]
# methods needed by app.py # methods needed by app.py
# get chapter infos as a dictionary # get chapter infos as a dictionary
def get_chapter_infos(chapter: str) -> dict: def get_manga_chapter_data(chapter: str) -> dict:
# these keys have to be returned # these keys have to be returned
return { return {
"uuid": chapter_uuid, "1": {
"volume": chapter_vol, "uuid": "abc",
"chapter": chapter_num, "volume": "1",
"name": chapter_name, "chapter": "1",
"name": "test",
},
}
# or with --forcevol
return {
"1:1": {
"uuid": "abc",
"volume": "1",
"chapter": "1",
"name": "test",
},
} }
# get chapter images as a list (full links) # get chapter images as a list (full links)
@ -55,3 +66,12 @@ class YourAPI:
"https://abc.def/image/1234.png", "https://abc.def/image/1234.png",
"https://abc.def/image/12345.png", "https://abc.def/image/12345.png",
] ]
# get metadata with correct keys for ComicInfo.xml
def create_metadata(self, chapter: str) -> dict:
# example
return {
"Volume": "abc",
"LanguageISO": "en",
"Title": "test",
}

View file

@ -1 +1 @@
__version__ = "2.2.20" __version__ = "2.2.21"

View file

@ -169,13 +169,15 @@ class Mangadex:
chapter_uuid = chapter.get("id") or "" chapter_uuid = chapter.get("id") or ""
chapter_name = attributes.get("title") or "" chapter_name = attributes.get("title") or ""
chapter_external = attributes.get("externalUrl") or "" chapter_external = attributes.get("externalUrl") or ""
chapter_pages = attributes.get("pages") or ""
# check for chapter title and fix it # check for chapter title and fix it
if chapter_name: if chapter_name:
chapter_name = utils.fix_name(chapter_name) chapter_name = utils.fix_name(chapter_name)
# check if the chapter is external (can't download them) # check if the chapter is external (can't download them)
if chapter_external: if chapter_external:
log.debug(f"Chapter is external. Skipping: {chapter_uuid}") log.debug(f"Chapter is external. Skipping: {chapter_name}")
continue continue
# check if its duplicate from the last entry # check if its duplicate from the last entry
@ -186,12 +188,13 @@ class Mangadex:
chapter_index = ( chapter_index = (
chapter_num if not self.forcevol else f"{chapter_vol}:{chapter_num}" chapter_num if not self.forcevol else f"{chapter_vol}:{chapter_num}"
) )
chapter_data[chapter_index] = [ chapter_data[chapter_index] = {
chapter_uuid, "uuid": chapter_uuid,
chapter_vol, "volume": chapter_vol,
chapter_num, "chapter": chapter_num,
chapter_name, "name": chapter_name,
] "pages": chapter_pages,
}
# add last chapter to duplicate check # add last chapter to duplicate check
last_volume, last_chapter = (chapter_vol, chapter_num) last_volume, last_chapter = (chapter_vol, chapter_num)
@ -204,7 +207,7 @@ class Mangadex:
def get_chapter_images(self, chapter: str, wait_time: float) -> list: def get_chapter_images(self, chapter: str, wait_time: float) -> list:
log.debug(f"Getting chapter images for: {self.manga_uuid}") log.debug(f"Getting chapter images for: {self.manga_uuid}")
athome_url = f"{self.api_base_url}/at-home/server" athome_url = f"{self.api_base_url}/at-home/server"
chapter_uuid = self.manga_chapter_data[chapter][0] chapter_uuid = self.manga_chapter_data[chapter]["uuid"]
# retry up to two times if the api applied rate limits # retry up to two times if the api applied rate limits
api_error = False api_error = False
@ -251,10 +254,9 @@ class Mangadex:
def create_chapter_list(self) -> list: def create_chapter_list(self) -> list:
log.debug(f"Creating chapter list for: {self.manga_uuid}") log.debug(f"Creating chapter list for: {self.manga_uuid}")
chapter_list = [] chapter_list = []
for index, _ in self.manga_chapter_data.items(): for data in self.manga_chapter_data.values():
chapter_info: dict = self.get_chapter_infos(index) chapter_number: str = data["chapter"]
chapter_number: str = chapter_info["chapter"] volume_number: str = data["volume"]
volume_number: str = chapter_info["volume"]
if self.forcevol: if self.forcevol:
chapter_list.append(f"{volume_number}:{chapter_number}") chapter_list.append(f"{volume_number}:{chapter_number}")
else: else:
@ -262,17 +264,20 @@ class Mangadex:
return chapter_list return chapter_list
# create easy to access chapter infos def create_metadata(self, chapter: str) -> dict:
def get_chapter_infos(self, chapter: str) -> dict: log.info("Creating metadata from api")
chapter_uuid: str = self.manga_chapter_data[chapter][0]
chapter_vol: str = self.manga_chapter_data[chapter][1]
chapter_num: str = self.manga_chapter_data[chapter][2]
chapter_name: str = self.manga_chapter_data[chapter][3]
log.debug(f"Getting chapter infos for: {chapter_uuid}")
return { chapter_data = self.manga_chapter_data[chapter]
"uuid": chapter_uuid, metadata = {
"volume": chapter_vol, "Volume": chapter_data["volume"],
"chapter": chapter_num, "Number": chapter_data["chapter"],
"name": chapter_name, "PageCount": chapter_data["pages"],
"Count": len(self.manga_chapter_data),
"LanguageISO": self.language,
"Title": self.manga_title,
"Summary": self.manga_data["attributes"]["description"].get("en"),
"Genre": self.manga_data["attributes"].get("publicationDemographic"),
"Web": f"https://mangadex.org/title/{self.manga_uuid}",
} }
return metadata

View file

@ -9,6 +9,7 @@ from mangadlp import downloader, utils
from mangadlp.api.mangadex import Mangadex from mangadlp.api.mangadex import Mangadex
from mangadlp.cache import CacheDB from mangadlp.cache import CacheDB
from mangadlp.hooks import run_hook from mangadlp.hooks import run_hook
from mangadlp.metadata import write_metadata
class MangaDLP: class MangaDLP:
@ -205,20 +206,34 @@ class MangaDLP:
log.info(f"Chapter '{chapter}' is in cache. Skipping download") log.info(f"Chapter '{chapter}' is in cache. Skipping download")
continue continue
# download chapter
try: try:
chapter_path = self.get_chapter(chapter) chapter_path = self.get_chapter(chapter)
except KeyboardInterrupt as exc: except KeyboardInterrupt as exc:
raise exc raise exc
except FileExistsError: except FileExistsError:
# skipping chapter download as its already available
skipped_chapters.append(chapter) skipped_chapters.append(chapter)
# update cache # update cache
if self.cache_path: if self.cache_path:
cache.add_chapter(chapter) cache.add_chapter(chapter)
continue continue
except Exception: except Exception:
# skip download/packing due to an error
error_chapters.append(chapter) error_chapters.append(chapter)
continue continue
# add metadata
try:
metadata = self.api.create_metadata(chapter)
write_metadata(
chapter_path,
{"Format": self.file_format.removeprefix("."), **metadata},
)
except Exception:
log.warning(f"Can't write metadata for chapter '{chapter}'")
# pack downloaded folder
if self.file_format: if self.file_format:
try: try:
self.archive_chapter(chapter_path) self.archive_chapter(chapter_path)
@ -268,7 +283,7 @@ class MangaDLP:
# once called per chapter # once called per chapter
def get_chapter(self, chapter: str) -> Path: def get_chapter(self, chapter: str) -> Path:
# get chapter infos # get chapter infos
chapter_infos = self.api.get_chapter_infos(chapter) chapter_infos: dict = self.api.manga_chapter_data[chapter]
log.debug(f"Chapter infos: {chapter_infos}") log.debug(f"Chapter infos: {chapter_infos}")
# get image urls for chapter # get image urls for chapter

31
mangadlp/metadata.py Normal file
View file

@ -0,0 +1,31 @@
from pathlib import Path
import xmltodict
from loguru import logger as log
def write_metadata(chapter_path: Path, metadata: dict) -> None:
try:
metadata_template = Path("mangadlp/metadata/ComicInfo.xml").read_text(
encoding="utf8"
)
metadata_empty: dict[str, dict] = xmltodict.parse(metadata_template)
except Exception as exc:
log.error("Can't open or parse xml template")
raise exc
metadata_file = chapter_path / "ComicInfo.xml"
log.info(f"Writing metadata to: '{metadata_file}'")
log.debug(f"Metadata items: {metadata}")
for key, value in metadata.items():
if not value:
continue
try:
metadata_empty["ComicInfo"][key]
except KeyError:
continue
log.debug(f"Updating metadata: '{key}' = '{value}'")
metadata_empty["ComicInfo"][key] = value
metadata_export = xmltodict.unparse(metadata_empty, pretty=True, indent=(" " * 4))
metadata_file.write_text(metadata_export, encoding="utf8")

View file

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?>
<ComicInfo>
<Title></Title>
<Number></Number>
<Count></Count>
<Volume></Volume>
<Summary></Summary>
<Genre></Genre>
<Web></Web>
<PageCount></PageCount>
<LanguageISO></LanguageISO>
<Format></Format>
<ScanInformation></ScanInformation>
<SeriesGroup></SeriesGroup>
<BlackAndWhite>Unknown</BlackAndWhite>
<Manga>Yes</Manga>
<AgeRating>Unknown</AgeRating>
<Notes>Downloaded with https://github.com/olofvndrhr/manga-dlp</Notes>
</ComicInfo>

View file

@ -0,0 +1,123 @@
<?xml version="1.0" encoding="utf-8"?>
<xs:schema elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="ComicInfo" nillable="true" type="ComicInfo" />
<xs:complexType name="ComicInfo">
<xs:sequence>
<xs:element minOccurs="0" maxOccurs="1" default="" name="Title" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Series" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Number" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Count" type="xs:int" />
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Volume" type="xs:int" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="AlternateSeries" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="AlternateNumber" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="AlternateCount" type="xs:int" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Summary" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Notes" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Year" type="xs:int" />
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Month" type="xs:int" />
<xs:element minOccurs="0" maxOccurs="1" default="-1" name="Day" type="xs:int" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Writer" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Penciller" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Inker" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Colorist" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Letterer" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="CoverArtist" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Editor" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Publisher" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Imprint" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Genre" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Web" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="0" name="PageCount" type="xs:int" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="LanguageISO" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Format" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="Unknown" name="BlackAndWhite" type="YesNo" />
<xs:element minOccurs="0" maxOccurs="1" default="Unknown" name="Manga" type="Manga" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Characters" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Teams" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Locations" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="ScanInformation" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="StoryArc" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="SeriesGroup" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="Unknown" name="AgeRating" type="AgeRating" />
<xs:element minOccurs="0" maxOccurs="1" name="Pages" type="ArrayOfComicPageInfo" />
<xs:element minOccurs="0" maxOccurs="1" name="CommunityRating" type="Rating" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="MainCharacterOrTeam" type="xs:string" />
<xs:element minOccurs="0" maxOccurs="1" default="" name="Review" type="xs:string" />
</xs:sequence>
</xs:complexType>
<xs:simpleType name="YesNo">
<xs:restriction base="xs:string">
<xs:enumeration value="Unknown" />
<xs:enumeration value="No" />
<xs:enumeration value="Yes" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="Manga">
<xs:restriction base="xs:string">
<xs:enumeration value="Unknown" />
<xs:enumeration value="No" />
<xs:enumeration value="Yes" />
<xs:enumeration value="YesAndRightToLeft" />
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="Rating">
<xs:restriction base="xs:decimal">
<xs:minInclusive value="0"/>
<xs:maxInclusive value="5"/>
<xs:fractionDigits value="2"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="AgeRating">
<xs:restriction base="xs:string">
<xs:enumeration value="Unknown" />
<xs:enumeration value="Adults Only 18+" />
<xs:enumeration value="Early Childhood" />
<xs:enumeration value="Everyone" />
<xs:enumeration value="Everyone 10+" />
<xs:enumeration value="G" />
<xs:enumeration value="Kids to Adults" />
<xs:enumeration value="M" />
<xs:enumeration value="MA15+" />
<xs:enumeration value="Mature 17+" />
<xs:enumeration value="PG" />
<xs:enumeration value="R18+" />
<xs:enumeration value="Rating Pending" />
<xs:enumeration value="Teen" />
<xs:enumeration value="X18+" />
</xs:restriction>
</xs:simpleType>
<xs:complexType name="ArrayOfComicPageInfo">
<xs:sequence>
<xs:element minOccurs="0" maxOccurs="unbounded" name="Page" nillable="true" type="ComicPageInfo" />
</xs:sequence>
</xs:complexType>
<xs:complexType name="ComicPageInfo">
<xs:attribute name="Image" type="xs:int" use="required" />
<xs:attribute default="Story" name="Type" type="ComicPageType" />
<xs:attribute default="false" name="DoublePage" type="xs:boolean" />
<xs:attribute default="0" name="ImageSize" type="xs:long" />
<xs:attribute default="" name="Key" type="xs:string" />
<xs:attribute default="" name="Bookmark" type="xs:string" />
<xs:attribute default="-1" name="ImageWidth" type="xs:int" />
<xs:attribute default="-1" name="ImageHeight" type="xs:int" />
</xs:complexType>
<xs:simpleType name="ComicPageType">
<xs:list>
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:enumeration value="FrontCover" />
<xs:enumeration value="InnerCover" />
<xs:enumeration value="Roundup" />
<xs:enumeration value="Story" />
<xs:enumeration value="Advertisement" />
<xs:enumeration value="Editorial" />
<xs:enumeration value="Letters" />
<xs:enumeration value="Preview" />
<xs:enumeration value="BackCover" />
<xs:enumeration value="Other" />
<xs:enumeration value="Deleted" />
</xs:restriction>
</xs:simpleType>
</xs:list>
</xs:simpleType>
</xs:schema>

View file

@ -83,7 +83,6 @@ def get_chapter_list(chapters: str, available_chapters: list) -> list:
# remove illegal characters etc # remove illegal characters etc
def fix_name(filename: str) -> str: def fix_name(filename: str) -> str:
log.debug(f"Input name='{filename}'")
filename = filename.encode(encoding="utf8", errors="ignore").decode(encoding="utf8") filename = filename.encode(encoding="utf8", errors="ignore").decode(encoding="utf8")
# remove illegal characters # remove illegal characters
filename = re.sub(r'[/\\<>:;|?*!@"]', "", filename) filename = re.sub(r'[/\\<>:;|?*!@"]', "", filename)
@ -94,7 +93,7 @@ def fix_name(filename: str) -> str:
# remove trailing and beginning spaces # remove trailing and beginning spaces
filename = re.sub("([ \t]+$)|(^[ \t]+)", "", filename) filename = re.sub("([ \t]+$)|(^[ \t]+)", "", filename)
log.debug(f"Output name='{filename}'") log.debug(f"Input name='{filename}', Output name='{filename}'")
return filename return filename

View file

@ -2,5 +2,6 @@ requests>=2.28.0
loguru>=0.6.0 loguru>=0.6.0
click>=8.1.3 click>=8.1.3
click-option-group>=0.5.5 click-option-group>=0.5.5
xmltodict~=0.13.0
img2pdf>=0.4.4 img2pdf>=0.4.4