manga-dlp/mangadlp/api/mangadex.py
Ivan Schaller 03461b80bf
Some checks failed
ci/woodpecker/push/tests Pipeline failed
switch to strict typing with pyright
Signed-off-by: Ivan Schaller <ivan@schaller.sh>
2023-02-18 16:21:03 +01:00

292 lines
11 KiB
Python

import re
from time import sleep
from typing import Any, Dict, List, Union
import requests
from loguru import logger as log
from mangadlp import utils
class Mangadex:
"""Mangadex API Class.
Get infos for a manga from mangadex.org.
Args:
url_uuid (str): URL or UUID of the manga
language (str): Manga language with country codes. "en" --> english
forcevol (bool): Force naming of volumes. Useful for mangas where chapters reset each volume
Attributes:
api_name (str): Name of the API
manga_uuid (str): UUID of the manga, without the url part
manga_data (dict): Infos of the manga. Name, title etc
manga_title (str): The title of the manga, sanitized for all file systems
manga_chapter_data (dict): All chapter data of the manga. Volumes, chapters, chapter uuids and chapter names
chapter_list (list): A list of all available chapters for the language
"""
# api information
api_base_url = "https://api.mangadex.org"
img_base_url = "https://uploads.mangadex.org"
# get infos to initiate class
def __init__(self, url_uuid: str, language: str, forcevol: bool):
# static info
self.api_name = "Mangadex"
self.url_uuid = url_uuid
self.language = language
self.forcevol = forcevol
# api stuff
self.api_content_ratings = "contentRating[]=safe&contentRating[]=suggestive&contentRating[]=erotica&contentRating[]=pornographic"
self.api_language = f"translatedLanguage[]={self.language}"
self.api_additions = f"{self.api_language}&{self.api_content_ratings}"
# infos from functions
self.manga_uuid = self.get_manga_uuid()
self.manga_data = self.get_manga_data()
self.manga_title = self.get_manga_title()
self.manga_chapter_data = self.get_chapter_data()
self.chapter_list = self.create_chapter_list()
# get the uuid for the manga
def get_manga_uuid(self) -> str:
# isolate id from url
uuid_regex = re.compile(
"[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}"
)
# try to get uuid in string
try:
uuid = uuid_regex.search(self.url_uuid)[0] # type: ignore
except Exception as exc:
log.error("No valid UUID found")
raise exc
return uuid # pyright:ignore
# make initial request
def get_manga_data(self) -> Dict[str, Any]:
log.debug(f"Getting manga data for: {self.manga_uuid}")
counter = 1
while counter <= 3:
try:
response = requests.get(
f"{self.api_base_url}/manga/{self.manga_uuid}", timeout=10
)
except Exception as exc:
if counter >= 3:
log.error("Maybe the MangaDex API is down?")
raise exc
log.error("Mangadex API not reachable. Retrying")
sleep(2)
counter += 1
else:
break
response_body: Dict[str, Dict[str, Any]] = response.json() # pyright:ignore
# check if manga exists
if response_body["result"] != "ok": # type:ignore
log.error("Manga not found")
raise KeyError
return response_body["data"]
# get the title of the manga (and fix the filename)
def get_manga_title(self) -> str:
log.debug(f"Getting manga title for: {self.manga_uuid}")
attributes = self.manga_data["attributes"]
# try to get the title in requested language
try:
title = attributes["title"][self.language]
except KeyError:
log.info("Manga title not found in requested language. Trying alt titles")
else:
log.debug(f"Language={self.language}, Title='{title}'")
return utils.fix_name(title)
# search in alt titles
try:
log.debug(f"Alt titles: {attributes['altTitles']}")
for item in attributes["altTitles"]:
if item.get(self.language):
alt_title = item
break
title = alt_title[self.language] # pyright:ignore
except (KeyError, UnboundLocalError):
log.warning(
"Manga title also not found in alt titles. Falling back to english title"
)
else:
log.debug(f"Language={self.language}, Alt-title='{title}'")
return utils.fix_name(title)
title = attributes["title"]["en"]
log.debug(f"Language=en, Fallback-title='{title}'")
return utils.fix_name(title)
# check if chapters are available in requested language
def check_chapter_lang(self) -> int:
log.debug(f"Checking for chapters in specified language for: {self.manga_uuid}")
r = requests.get(
f"{self.api_base_url}/manga/{self.manga_uuid}/feed?limit=0&{self.api_additions}",
timeout=10,
)
try:
total_chapters: int = r.json()["total"]
except Exception as exc:
log.error(
"Error retrieving the chapters list. Did you specify a valid language code?"
)
raise exc
if total_chapters == 0:
log.error("No chapters available to download in specified language")
raise KeyError
log.debug(f"Total chapters={total_chapters}")
return total_chapters
# get chapter data like name, uuid etc
def get_chapter_data(self) -> Dict[str, Dict[str, Union[str, int]]]:
log.debug(f"Getting chapter data for: {self.manga_uuid}")
api_sorting = "order[chapter]=asc&order[volume]=asc"
# check for chapters in specified lang
total_chapters = self.check_chapter_lang()
chapter_data = {}
last_volume, last_chapter = ("", "")
offset = 0
while offset < total_chapters: # if more than 500 chapters
r = requests.get(
f"{self.api_base_url}/manga/{self.manga_uuid}/feed?{api_sorting}&limit=500&offset={offset}&{self.api_additions}",
timeout=10,
)
response_body: Dict[str, Any] = r.json()
for chapter in response_body["data"]:
attributes: Dict[str, Any] = chapter["attributes"]
# chapter infos from feed
chapter_num: str = attributes.get("chapter") or ""
chapter_vol: str = attributes.get("volume") or ""
chapter_uuid: str = chapter.get("id") or ""
chapter_name: str = attributes.get("title") or ""
chapter_external: str = attributes.get("externalUrl") or ""
chapter_pages: int = attributes.get("pages") or 0
# check for chapter title and fix it
if chapter_name:
chapter_name = utils.fix_name(chapter_name)
# check if the chapter is external (can't download them)
if chapter_external:
log.debug(f"Chapter is external. Skipping: {chapter_name}")
continue
# check if its duplicate from the last entry
if last_volume == chapter_vol and last_chapter == chapter_num:
continue
# export chapter data as a dict
chapter_index = (
chapter_num if not self.forcevol else f"{chapter_vol}:{chapter_num}"
)
chapter_data[chapter_index] = {
"uuid": chapter_uuid,
"volume": chapter_vol,
"chapter": chapter_num,
"name": chapter_name,
"pages": chapter_pages,
}
# add last chapter to duplicate check
last_volume, last_chapter = (chapter_vol, chapter_num)
# increase offset for mangas with more than 500 chapters
offset += 500
return chapter_data # type:ignore
# get images for the chapter (mangadex@home)
def get_chapter_images(self, chapter: str, wait_time: float) -> List[str]:
log.debug(f"Getting chapter images for: {self.manga_uuid}")
athome_url = f"{self.api_base_url}/at-home/server"
chapter_uuid = self.manga_chapter_data[chapter]["uuid"]
# retry up to two times if the api applied rate limits
api_error = False
counter = 1
while counter <= 3:
try:
r = requests.get(f"{athome_url}/{chapter_uuid}", timeout=10)
api_data = r.json()
if api_data["result"] != "ok":
log.error(f"No chapter with the id {chapter_uuid} found")
api_error = True
raise IndexError
if api_data["chapter"]["data"] is None:
log.error(f"No chapter data found for chapter {chapter_uuid}")
api_error = True
raise IndexError
# no error
api_error = False
break
except Exception:
if counter >= 3:
api_error = True
log.error("Retrying in a few seconds")
counter += 1
sleep(wait_time + 2)
# check if result is ok
else:
if api_error:
return []
chapter_hash = api_data["chapter"]["hash"] # pyright:ignore
chapter_img_data = api_data["chapter"]["data"] # pyright:ignore
# get list of image urls
image_urls: List[str] = []
for image in chapter_img_data:
image_urls.append(f"{self.img_base_url}/data/{chapter_hash}/{image}")
sleep(wait_time)
return image_urls
# create list of chapters
def create_chapter_list(self) -> List[str]:
log.debug(f"Creating chapter list for: {self.manga_uuid}")
chapter_list: List[str] = []
for data in self.manga_chapter_data.values():
chapter_number: str = data["chapter"] # type:ignore
volume_number: str = data["volume"] # type:ignore
if self.forcevol:
chapter_list.append(f"{volume_number}:{chapter_number}")
else:
chapter_list.append(chapter_number)
return chapter_list
def create_metadata(self, chapter: str) -> Dict[str, Union[str, int, None]]:
log.info("Creating metadata from api")
chapter_data = self.manga_chapter_data[chapter]
try:
volume = int(chapter_data["volume"])
except (ValueError, TypeError):
volume = None
metadata = {
"Volume": volume,
"Number": chapter_data.get("chapter"),
"PageCount": chapter_data.get("pages"),
"Title": chapter_data.get("name"),
"Series": self.manga_title,
"Count": len(self.manga_chapter_data),
"LanguageISO": self.language,
"Summary": self.manga_data["attributes"]["description"].get("en"),
"Genre": self.manga_data["attributes"].get("publicationDemographic"),
"Web": f"https://mangadex.org/title/{self.manga_uuid}",
}
return metadata # pyright:ignore