manga-dlp/mangadlp/app.py

455 lines
16 KiB
Python
Raw Normal View History

import re
2022-05-16 16:09:17 +02:00
import shutil
from pathlib import Path
2023-02-06 14:46:58 +01:00
from typing import Any, Union
from loguru import logger as log
2022-07-22 21:11:01 +02:00
from mangadlp import downloader, utils
from mangadlp.api.mangadex import Mangadex
2023-02-06 14:46:58 +01:00
from mangadlp.cache import CacheDB
from mangadlp.hooks import run_hook
2023-02-13 19:15:27 +01:00
from mangadlp.metadata import write_metadata
from mangadlp.utils import get_file_format
def match_api(url_uuid: str) -> type:
"""
Match the correct api class from a string
Args:
url_uuid: url/uuid to check
Returns:
The class of the API to use
"""
# apis to check
apis: list[tuple[str, re.Pattern, type]] = [
(
"mangadex.org",
re.compile(
r"(mangadex.org)|([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})"
),
Mangadex,
),
(
"test.org",
re.compile(r"(test.test)"),
type,
),
]
# check url for match
for api_name, api_re, api_cls in apis:
if not api_re.search(url_uuid):
continue
log.info(f"API matched: {api_name}")
return api_cls
# no supported api found
log.error(f"No supported api in link/uuid found: {url_uuid}")
raise ValueError
2022-07-15 12:49:49 +02:00
2022-05-16 22:03:37 +02:00
class MangaDLP:
"""Download Mangas from supported sites.
After initialization, start the script with the function get_manga().
2022-05-13 22:34:25 +02:00
2022-07-22 21:11:01 +02:00
Args:
url_uuid: URL or UUID of the manga
language: Manga language with country codes. "en" --> english
chapters: Chapters to download, "all" for every chapter available
list_chapters: List all available chapters and exit
file_format: Archive format to create. An empty string means don't archive the folder
forcevol: Force naming of volumes. Useful for mangas where chapters reset each volume
download_path: Download path. Defaults to '<script_dir>/downloads'
download_wait: Time to wait for each picture to download in seconds
manga_pre_hook_cmd: Command(s) to before after each manga
manga_post_hook_cmd: Command(s) to run after each manga
chapter_pre_hook_cmd: Command(s) to run before each chapter
chapter_post_hook_cmd: Command(s) to run after each chapter
cache_path: Path to the json cache. If emitted, no cache is used
add_metadata: Flag to toggle creation & inclusion of metadata
2022-05-04 19:17:12 +02:00
"""
2022-05-13 22:34:25 +02:00
def __init__( # pylint: disable=too-many-locals
2022-05-16 22:03:37 +02:00
self,
2022-05-20 19:32:36 +02:00
url_uuid: str,
2022-05-16 22:03:37 +02:00
language: str = "en",
chapters: str = "",
2022-05-16 22:03:37 +02:00
list_chapters: bool = False,
file_format: str = "cbz",
name_format: str = "{default}",
name_format_none: str = "",
2022-05-16 22:03:37 +02:00
forcevol: bool = False,
2023-02-06 14:46:58 +01:00
download_path: Union[str, Path] = "downloads",
2022-05-16 22:03:37 +02:00
download_wait: float = 0.5,
2022-08-13 18:52:32 +02:00
manga_pre_hook_cmd: str = "",
manga_post_hook_cmd: str = "",
chapter_pre_hook_cmd: str = "",
chapter_post_hook_cmd: str = "",
2023-02-06 14:46:58 +01:00
cache_path: str = "",
2023-02-14 14:37:47 +01:00
add_metadata: bool = True,
2022-05-16 22:03:37 +02:00
) -> None:
# init parameters
2023-02-06 14:46:58 +01:00
self.url_uuid = url_uuid
self.language = language
self.chapters = chapters
self.list_chapters = list_chapters
self.file_format = file_format
self.name_format = name_format
self.name_format_none = name_format_none
self.forcevol = forcevol
self.download_path: Path = Path(download_path)
self.download_wait = download_wait
self.manga_pre_hook_cmd = manga_pre_hook_cmd
self.manga_post_hook_cmd = manga_post_hook_cmd
self.chapter_pre_hook_cmd = chapter_pre_hook_cmd
self.chapter_post_hook_cmd = chapter_post_hook_cmd
self.cache_path = cache_path
2023-02-14 14:37:47 +01:00
self.add_metadata = add_metadata
self.hook_infos: dict = {}
2022-05-20 19:32:36 +02:00
# prepare everything
self._prepare()
2022-05-16 22:03:37 +02:00
def _prepare(self) -> None:
# check and set correct file suffix/format
self.file_format = get_file_format(self.file_format)
2022-05-16 22:03:37 +02:00
# start prechecks
self._pre_checks()
2022-05-20 19:32:36 +02:00
# init api
self.api_used = match_api(self.url_uuid)
2023-01-21 15:36:49 +01:00
try:
log.debug("Initializing api")
self.api = self.api_used(self.url_uuid, self.language, self.forcevol)
2023-02-12 01:27:13 +01:00
except Exception as exc:
2023-01-21 15:36:49 +01:00
log.error("Can't initialize api. Exiting")
2023-02-12 01:27:13 +01:00
raise exc
2022-05-20 19:32:36 +02:00
# get manga title and uuid
self.manga_uuid = self.api.manga_uuid
self.manga_title = self.api.manga_title
# get chapter list
self.manga_chapter_list = self.api.chapter_list
2022-08-14 16:34:15 +02:00
self.manga_total_chapters = len(self.manga_chapter_list)
self.manga_path = self.download_path / self.manga_title
2022-05-20 19:32:36 +02:00
def _pre_checks(self) -> None:
2022-05-16 22:03:37 +02:00
# prechecks userinput/options
# no url and no readin list given
2022-05-20 19:32:36 +02:00
if not self.url_uuid:
2022-07-15 12:49:49 +02:00
log.error(
2022-07-06 22:19:40 +02:00
'You need to specify a manga url/uuid with "-u" or a list with "--read"'
2022-05-16 22:03:37 +02:00
)
2023-02-12 01:27:13 +01:00
raise ValueError
# checks if --list is not used
if not self.list_chapters:
2022-07-15 12:49:49 +02:00
if not self.chapters:
# no chapters to download were given
2022-07-15 12:49:49 +02:00
log.error(
2022-07-06 22:19:40 +02:00
'You need to specify one or more chapters to download. To see all chapters use "--list"'
)
2023-02-12 01:27:13 +01:00
raise ValueError
# if forcevol is used, but didn't specify a volume in the chapters selected
if self.forcevol and ":" not in self.chapters:
2022-07-15 12:49:49 +02:00
log.error("You need to specify the volume if you use --forcevol")
2023-02-12 01:27:13 +01:00
raise ValueError
# if forcevol is not used, but a volume is specified
if not self.forcevol and ":" in self.chapters:
2022-07-15 12:49:49 +02:00
log.error("Don't specify the volume without --forcevol")
2023-02-12 01:27:13 +01:00
raise ValueError
2022-05-16 22:03:37 +02:00
# once called per manga
def get_manga(self) -> None:
2022-05-16 22:03:37 +02:00
print_divider = "========================================="
# show infos
2022-07-15 12:49:49 +02:00
log.info(f"{print_divider}")
log.info(f"Manga Name: {self.manga_title}")
2022-07-15 12:49:49 +02:00
log.info(f"Manga UUID: {self.manga_uuid}")
2022-08-14 16:34:15 +02:00
log.info(f"Total chapters: {self.manga_total_chapters}")
2022-05-16 22:03:37 +02:00
# list chapters if list_chapters is true
if self.list_chapters:
2022-07-15 12:49:49 +02:00
log.info(f"Available Chapters: {', '.join(self.manga_chapter_list)}")
log.info(f"{print_divider}\n")
return
2022-05-16 22:03:37 +02:00
# check chapters to download if not all
if self.chapters.lower() == "all":
chapters_to_download = self.manga_chapter_list
else:
chapters_to_download = utils.get_chapter_list(
self.chapters, self.manga_chapter_list
)
2022-05-04 19:17:12 +02:00
2022-05-16 22:03:37 +02:00
# show chapters to download
log.info(f"Chapters selected: {', '.join(chapters_to_download)}")
2022-07-15 12:49:49 +02:00
log.info(f"{print_divider}")
2022-05-04 19:17:12 +02:00
2022-05-16 22:03:37 +02:00
# create manga folder
self.manga_path.mkdir(parents=True, exist_ok=True)
2023-02-06 14:46:58 +01:00
# prepare cache if specified
if self.cache_path:
2023-02-12 16:13:41 +01:00
cache = CacheDB(
self.cache_path, self.manga_uuid, self.language, self.manga_title
)
2023-02-11 14:20:01 +01:00
cached_chapters = cache.db_uuid_chapters
log.info(f"Cached chapters: {cached_chapters}")
2023-02-06 14:46:58 +01:00
2022-08-13 18:52:32 +02:00
# create dict with all variables for the hooks
self.hook_infos.update(
{
"api": self.api.api_name,
"manga_url_uuid": self.url_uuid,
"manga_uuid": self.manga_uuid,
"manga_title": self.manga_title,
"language": self.language,
2022-08-14 16:34:15 +02:00
"total_chapters": self.manga_total_chapters,
2022-08-13 18:52:32 +02:00
"chapters_to_download": chapters_to_download,
"file_format": self.file_format,
"forcevol": self.forcevol,
2023-02-06 14:46:58 +01:00
"download_path": str(self.download_path),
2022-08-13 18:52:32 +02:00
"manga_path": self.manga_path,
}
)
# start manga pre hook
run_hook(
command=self.manga_pre_hook_cmd,
hook_type="manga_pre",
status="starting",
**self.hook_infos,
)
2022-08-13 18:52:32 +02:00
2022-05-16 22:03:37 +02:00
# get chapters
2023-02-06 14:46:58 +01:00
skipped_chapters: list[Any] = []
error_chapters: list[Any] = []
2022-05-16 22:03:37 +02:00
for chapter in chapters_to_download:
2023-02-11 14:20:01 +01:00
if self.cache_path and chapter in cached_chapters:
2023-02-12 16:13:41 +01:00
log.info(f"Chapter '{chapter}' is in cache. Skipping download")
2023-02-06 14:46:58 +01:00
continue
2022-08-13 18:52:32 +02:00
2023-02-13 19:15:27 +01:00
# download chapter
try:
2023-02-06 14:46:58 +01:00
chapter_path = self.get_chapter(chapter)
2023-02-12 01:27:13 +01:00
except KeyboardInterrupt as exc:
raise exc
2023-02-06 14:46:58 +01:00
except FileExistsError:
2023-02-13 19:15:27 +01:00
# skipping chapter download as its already available
2023-02-06 14:46:58 +01:00
skipped_chapters.append(chapter)
# update cache
if self.cache_path:
cache.add_chapter(chapter)
continue
except Exception:
2023-02-13 19:15:27 +01:00
# skip download/packing due to an error
2023-02-06 14:46:58 +01:00
error_chapters.append(chapter)
continue
2023-02-13 19:15:27 +01:00
# add metadata
2023-02-14 14:37:47 +01:00
if self.add_metadata:
try:
metadata = self.api.create_metadata(chapter)
write_metadata(
chapter_path,
{"Format": self.file_format[1:], **metadata},
)
except Exception as exc:
log.warning(
f"Can't write metadata for chapter '{chapter}'. Reason={exc}"
2023-02-14 14:37:47 +01:00
)
2023-02-13 19:15:27 +01:00
# pack downloaded folder
2023-02-06 14:46:58 +01:00
if self.file_format:
try:
self.archive_chapter(chapter_path)
except Exception:
error_chapters.append(chapter)
continue
# done with chapter
log.info(f"Done with chapter '{chapter}'")
# update cache
if self.cache_path:
cache.add_chapter(chapter)
# start chapter post hook
run_hook(
command=self.chapter_post_hook_cmd,
hook_type="chapter_post",
status="successful",
**self.hook_infos,
)
2022-08-13 18:52:32 +02:00
2022-05-16 22:03:37 +02:00
# done with manga
2022-07-15 12:49:49 +02:00
log.info(f"{print_divider}")
log.info(f"Done with manga: {self.manga_title}")
2022-08-13 18:52:32 +02:00
2022-05-16 22:03:37 +02:00
# filter skipped list
skipped_chapters = list(filter(None, skipped_chapters))
if len(skipped_chapters) >= 1:
log.info(f"Skipped chapters: {', '.join(skipped_chapters)}")
2022-08-13 18:52:32 +02:00
# filter error list
error_chapters = list(filter(None, error_chapters))
if len(error_chapters) >= 1:
log.info(f"Chapters with errors: {', '.join(error_chapters)}")
2022-07-06 22:19:40 +02:00
2022-08-13 18:52:32 +02:00
# start manga post hook
run_hook(
command=self.manga_post_hook_cmd,
hook_type="manga_post",
status="successful",
**self.hook_infos,
)
2022-08-13 18:52:32 +02:00
2022-07-15 12:49:49 +02:00
log.info(f"{print_divider}\n")
2022-05-16 22:03:37 +02:00
# once called per chapter
2023-02-06 14:46:58 +01:00
def get_chapter(self, chapter: str) -> Path:
# get chapter infos
2023-02-13 19:15:27 +01:00
chapter_infos: dict = self.api.manga_chapter_data[chapter]
log.debug(f"Chapter infos: {chapter_infos}")
# get image urls for chapter
try:
2022-05-16 22:03:37 +02:00
chapter_image_urls = self.api.get_chapter_images(
chapter, self.download_wait
)
2023-02-12 01:27:13 +01:00
except KeyboardInterrupt as exc:
log.critical("Keyboard interrupt. Stopping")
raise exc
2022-05-04 19:17:12 +02:00
# check if the image urls are empty. if yes skip this chapter (for mass downloads)
if not chapter_image_urls:
2022-07-15 12:49:49 +02:00
log.error(
2022-07-06 22:19:40 +02:00
f"No images: Skipping Vol. {chapter_infos['volume']} Ch.{chapter_infos['chapter']}"
)
2022-08-13 18:52:32 +02:00
run_hook(
command=self.chapter_pre_hook_cmd,
hook_type="chapter_pre",
status="skipped",
reason="No images",
**self.hook_infos,
2022-08-13 18:52:32 +02:00
)
2023-02-06 14:46:58 +01:00
# error
raise SystemError
2022-05-16 16:09:17 +02:00
# get filename for chapter (without suffix)
chapter_filename = utils.get_filename(
self.manga_title,
chapter_infos["name"],
chapter_infos["volume"],
chapter,
self.forcevol,
self.name_format,
self.name_format_none,
2022-05-16 16:09:17 +02:00
)
log.debug(f"Filename: '{chapter_filename}'")
2022-05-04 19:17:12 +02:00
2022-05-16 16:09:17 +02:00
# set download path for chapter (image folder)
2022-05-16 22:03:37 +02:00
chapter_path = self.manga_path / chapter_filename
2022-05-16 16:09:17 +02:00
# set archive path with file format
2022-05-16 22:03:37 +02:00
chapter_archive_path = Path(f"{chapter_path}{self.file_format}")
2022-05-04 19:17:12 +02:00
2022-05-16 16:09:17 +02:00
# check if chapter already exists
2022-05-16 22:03:37 +02:00
# check for folder, if file format is an empty string
2022-05-16 16:09:17 +02:00
if chapter_archive_path.exists():
2022-10-06 21:26:04 +02:00
log.info(f"'{chapter_archive_path}' already exists. Skipping")
2022-08-13 18:52:32 +02:00
run_hook(
command=self.chapter_pre_hook_cmd,
hook_type="chapter_pre",
status="skipped",
reason="Existing",
**self.hook_infos,
2022-08-13 18:52:32 +02:00
)
2023-02-06 14:46:58 +01:00
# skipped
raise FileExistsError
2022-05-04 19:17:12 +02:00
# create chapter folder (skips it if it already exists)
chapter_path.mkdir(parents=True, exist_ok=True)
# verbose log
log.debug(f"Chapter UUID: {chapter_infos['uuid']}")
log.debug(f"File path: '{chapter_archive_path}'")
log.debug(f"Image URLS:\n{chapter_image_urls}")
2022-05-04 19:17:12 +02:00
2022-08-13 18:52:32 +02:00
# create dict with all variables for the hooks
self.hook_infos.update(
{
"chapter_filename": chapter_filename,
"chapter_path": chapter_path,
"chapter_archive_path": chapter_archive_path,
"chapter_uuid": chapter_infos["uuid"],
"chapter_volume": chapter_infos["volume"],
"chapter_number": chapter_infos["chapter"],
"chapter_name": chapter_infos["name"],
}
)
# start chapter pre hook
run_hook(
command=self.chapter_pre_hook_cmd,
hook_type="chapter_pre",
status="starting",
**self.hook_infos,
)
2022-08-13 18:52:32 +02:00
2022-05-04 19:17:12 +02:00
# log
log.info(f"Downloading: '{chapter_filename}'")
2022-05-04 19:17:12 +02:00
# download images
try:
downloader.download_chapter(
2022-07-06 22:19:40 +02:00
chapter_image_urls, chapter_path, self.download_wait
2022-05-04 19:17:12 +02:00
)
2023-02-12 01:27:13 +01:00
except KeyboardInterrupt as exc:
log.critical("Keyboard interrupt. Stopping")
raise exc
2023-02-06 14:46:58 +01:00
except Exception as exc:
2022-07-15 12:49:49 +02:00
log.error(f"Cant download: '{chapter_filename}'. Skipping")
2022-08-13 18:52:32 +02:00
# run chapter post hook
run_hook(
command=self.chapter_post_hook_cmd,
hook_type="chapter_post",
status="starting",
reason="Download error",
**self.hook_infos,
2022-08-13 18:52:32 +02:00
)
2023-02-06 14:46:58 +01:00
# chapter error
raise exc
2023-02-06 14:46:58 +01:00
# Done with chapter
log.info(f"Successfully downloaded: '{chapter_filename}'")
2022-08-13 18:52:32 +02:00
2023-02-06 14:46:58 +01:00
# ok
return chapter_path
2022-05-16 22:03:37 +02:00
# create an archive of the chapter if needed
2023-02-06 14:46:58 +01:00
def archive_chapter(self, chapter_path: Path) -> None:
log.info(f"Creating archive '{chapter_path}{self.file_format}'")
2022-05-16 22:03:37 +02:00
try:
# check if image folder is existing
if not chapter_path.exists():
2022-07-15 12:49:49 +02:00
log.error(f"Image folder: {chapter_path} does not exist")
2022-05-16 22:03:37 +02:00
raise IOError
if self.file_format == ".pdf":
utils.make_pdf(chapter_path)
2022-05-16 16:09:17 +02:00
else:
2022-05-16 22:03:37 +02:00
utils.make_archive(chapter_path, self.file_format)
2023-02-06 14:46:58 +01:00
except Exception as exc:
2022-07-21 20:39:56 +02:00
log.error("Archive error. Skipping chapter")
2023-02-06 14:46:58 +01:00
raise exc
2022-05-16 22:03:37 +02:00
2023-02-06 14:46:58 +01:00
# remove image folder
shutil.rmtree(chapter_path)