manga-dlp/mangadlp/api/mangadex.py

284 lines
11 KiB
Python
Raw Normal View History

import re
from time import sleep
2023-02-20 14:03:40 +01:00
from typing import Any, Dict, List
import requests
from loguru import logger as log
2022-07-22 21:11:01 +02:00
from mangadlp import utils
2023-02-20 14:38:09 +01:00
from mangadlp.types import ChapterData, ComicInfo
2022-07-15 12:49:49 +02:00
2022-05-04 19:17:12 +02:00
class Mangadex:
2022-07-22 21:11:01 +02:00
"""Mangadex API Class.
Get infos for a manga from mangadex.org.
2022-07-22 21:11:01 +02:00
Args:
url_uuid (str): URL or UUID of the manga
language (str): Manga language with country codes. "en" --> english
forcevol (bool): Force naming of volumes. Useful for mangas where chapters reset each volume
Attributes:
2022-08-14 16:34:15 +02:00
api_name (str): Name of the API
2022-07-22 21:11:01 +02:00
manga_uuid (str): UUID of the manga, without the url part
manga_data (dict): Infos of the manga. Name, title etc
manga_title (str): The title of the manga, sanitized for all file systems
2022-07-22 21:11:01 +02:00
manga_chapter_data (dict): All chapter data of the manga. Volumes, chapters, chapter uuids and chapter names
chapter_list (list): A list of all available chapters for the language
"""
2022-05-04 19:17:12 +02:00
# api information
api_base_url = "https://api.mangadex.org"
img_base_url = "https://uploads.mangadex.org"
2022-05-17 13:32:50 +02:00
# get infos to initiate class
2022-07-06 22:19:40 +02:00
def __init__(self, url_uuid: str, language: str, forcevol: bool):
2022-05-04 19:17:12 +02:00
# static info
2022-08-13 18:52:32 +02:00
self.api_name = "Mangadex"
2022-05-13 22:34:25 +02:00
self.url_uuid = url_uuid
self.language = language
self.forcevol = forcevol
# api stuff
self.api_content_ratings = "contentRating[]=safe&contentRating[]=suggestive&contentRating[]=erotica&contentRating[]=pornographic"
2022-05-13 22:34:25 +02:00
self.api_language = f"translatedLanguage[]={self.language}"
self.api_additions = f"{self.api_language}&{self.api_content_ratings}"
2022-05-04 19:17:12 +02:00
# infos from functions
2023-02-11 13:32:53 +01:00
self.manga_uuid = self.get_manga_uuid()
self.manga_data = self.get_manga_data()
self.manga_title = self.get_manga_title()
self.manga_chapter_data = self.get_chapter_data()
self.chapter_list = self.create_chapter_list()
2023-01-21 15:36:49 +01:00
# get the uuid for the manga
def get_manga_uuid(self) -> str:
# isolate id from url
uuid_regex = re.compile("[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}")
2023-01-21 15:36:49 +01:00
# try to get uuid in string
try:
uuid = uuid_regex.search(self.url_uuid)[0] # type: ignore
except Exception as exc:
log.error("No valid UUID found")
2023-02-11 13:32:53 +01:00
raise exc
2023-01-21 15:36:49 +01:00
return uuid # pyright:ignore
2022-05-04 19:17:12 +02:00
# make initial request
def get_manga_data(self) -> Dict[str, Any]:
log.debug(f"Getting manga data for: {self.manga_uuid}")
counter = 1
while counter <= 3:
try:
response = requests.get(f"{self.api_base_url}/manga/{self.manga_uuid}", timeout=10)
2023-01-21 15:36:49 +01:00
except Exception as exc:
if counter >= 3:
2022-07-15 12:49:49 +02:00
log.error("Maybe the MangaDex API is down?")
2023-02-11 13:32:53 +01:00
raise exc
2023-01-21 15:36:49 +01:00
log.error("Mangadex API not reachable. Retrying")
sleep(2)
counter += 1
else:
break
response_body: Dict[str, Dict[str, Any]] = response.json() # pyright:ignore
# check if manga exists
if response_body["result"] != "ok": # type:ignore
2022-07-15 12:49:49 +02:00
log.error("Manga not found")
2023-02-11 13:32:53 +01:00
raise KeyError
2022-07-21 20:39:56 +02:00
return response_body["data"]
2022-05-04 19:17:12 +02:00
# get the title of the manga (and fix the filename)
def get_manga_title(self) -> str:
log.debug(f"Getting manga title for: {self.manga_uuid}")
2023-01-21 15:36:49 +01:00
attributes = self.manga_data["attributes"]
# try to get the title in requested language
2022-05-04 19:17:12 +02:00
try:
2023-01-21 15:36:49 +01:00
title = attributes["title"][self.language]
2023-02-11 13:32:53 +01:00
except KeyError:
2023-01-21 15:36:49 +01:00
log.info("Manga title not found in requested language. Trying alt titles")
else:
log.debug(f"Language={self.language}, Title='{title}'")
return utils.fix_name(title)
2022-07-21 20:39:56 +02:00
2023-01-21 15:36:49 +01:00
# search in alt titles
try:
log.debug(f"Alt titles: {attributes['altTitles']}")
for item in attributes["altTitles"]:
if item.get(self.language):
alt_title = item
break
title = alt_title[self.language] # pyright:ignore
2023-02-11 13:57:06 +01:00
except (KeyError, UnboundLocalError):
log.warning("Manga title also not found in alt titles. Falling back to english title")
2023-01-21 15:36:49 +01:00
else:
log.debug(f"Language={self.language}, Alt-title='{title}'")
return utils.fix_name(title)
title = attributes["title"]["en"]
log.debug(f"Language=en, Fallback-title='{title}'")
2022-05-04 19:17:12 +02:00
return utils.fix_name(title)
# check if chapters are available in requested language
def check_chapter_lang(self) -> int:
log.debug(f"Checking for chapters in specified language for: {self.manga_uuid}")
r = requests.get(
f"{self.api_base_url}/manga/{self.manga_uuid}/feed?limit=0&{self.api_additions}",
timeout=10,
2022-05-04 19:17:12 +02:00
)
try:
total_chapters: int = r.json()["total"]
2023-01-21 15:36:49 +01:00
except Exception as exc:
log.error("Error retrieving the chapters list. Did you specify a valid language code?")
2023-02-11 13:32:53 +01:00
raise exc
2023-02-13 23:17:52 +01:00
if total_chapters == 0:
log.error("No chapters available to download in specified language")
raise KeyError
2023-01-21 15:36:49 +01:00
log.debug(f"Total chapters={total_chapters}")
return total_chapters
# get chapter data like name, uuid etc
2023-02-20 14:03:40 +01:00
def get_chapter_data(self) -> Dict[str, ChapterData]:
log.debug(f"Getting chapter data for: {self.manga_uuid}")
api_sorting = "order[chapter]=asc&order[volume]=asc"
# check for chapters in specified lang
total_chapters = self.check_chapter_lang()
2023-02-20 14:03:40 +01:00
chapter_data: dict[str, ChapterData] = {}
last_volume, last_chapter = ("", "")
2022-05-04 19:17:12 +02:00
offset = 0
while offset < total_chapters: # if more than 500 chapters
r = requests.get(
f"{self.api_base_url}/manga/{self.manga_uuid}/feed?{api_sorting}&limit=500&offset={offset}&{self.api_additions}",
timeout=10,
2022-05-04 19:17:12 +02:00
)
response_body: Dict[str, Any] = r.json()
for chapter in response_body["data"]:
attributes: Dict[str, Any] = chapter["attributes"]
2022-05-04 19:17:12 +02:00
# chapter infos from feed
2023-02-14 13:31:55 +01:00
chapter_num: str = attributes.get("chapter") or ""
chapter_vol: str = attributes.get("volume") or ""
chapter_uuid: str = chapter.get("id") or ""
chapter_name: str = attributes.get("title") or ""
chapter_external: str = attributes.get("externalUrl") or ""
chapter_pages: int = attributes.get("pages") or 0
# check for chapter title and fix it
if chapter_name:
chapter_name = utils.fix_name(chapter_name)
2023-02-13 19:15:27 +01:00
2022-05-04 19:17:12 +02:00
# check if the chapter is external (can't download them)
if chapter_external:
2023-02-13 19:15:27 +01:00
log.debug(f"Chapter is external. Skipping: {chapter_name}")
continue
# check if its duplicate from the last entry
if last_volume == chapter_vol and last_chapter == chapter_num:
continue
# export chapter data as a dict
chapter_index = chapter_num if not self.forcevol else f"{chapter_vol}:{chapter_num}"
2023-02-13 19:15:27 +01:00
chapter_data[chapter_index] = {
"uuid": chapter_uuid,
"volume": chapter_vol,
"chapter": chapter_num,
"name": chapter_name,
"pages": chapter_pages,
}
# add last chapter to duplicate check
last_volume, last_chapter = (chapter_vol, chapter_num)
# increase offset for mangas with more than 500 chapters
2022-05-04 19:17:12 +02:00
offset += 500
2023-02-20 14:03:40 +01:00
return chapter_data
2022-05-04 19:17:12 +02:00
# get images for the chapter (mangadex@home)
def get_chapter_images(self, chapter: str, wait_time: float) -> List[str]:
log.debug(f"Getting chapter images for: {self.manga_uuid}")
2022-05-04 19:17:12 +02:00
athome_url = f"{self.api_base_url}/at-home/server"
2023-02-13 19:15:27 +01:00
chapter_uuid = self.manga_chapter_data[chapter]["uuid"]
# retry up to two times if the api applied rate limits
api_error = False
counter = 1
while counter <= 3:
try:
r = requests.get(f"{athome_url}/{chapter_uuid}", timeout=10)
api_data = r.json()
if api_data["result"] != "ok":
2022-07-15 12:49:49 +02:00
log.error(f"No chapter with the id {chapter_uuid} found")
api_error = True
raise IndexError
2022-07-22 21:11:01 +02:00
if api_data["chapter"]["data"] is None:
2022-07-15 12:49:49 +02:00
log.error(f"No chapter data found for chapter {chapter_uuid}")
api_error = True
raise IndexError
2022-07-22 21:11:01 +02:00
# no error
api_error = False
break
2022-07-21 20:39:56 +02:00
except Exception:
if counter >= 3:
api_error = True
2022-07-21 20:39:56 +02:00
log.error("Retrying in a few seconds")
counter += 1
sleep(wait_time + 2)
# check if result is ok
else:
if api_error:
return []
2022-05-04 19:17:12 +02:00
chapter_hash = api_data["chapter"]["hash"] # pyright:ignore
chapter_img_data = api_data["chapter"]["data"] # pyright:ignore
# get list of image urls
image_urls: List[str] = []
for image in chapter_img_data:
image_urls.append(f"{self.img_base_url}/data/{chapter_hash}/{image}")
2022-05-04 19:17:12 +02:00
sleep(wait_time)
2022-07-21 20:39:56 +02:00
return image_urls
2022-05-04 19:17:12 +02:00
# create list of chapters
def create_chapter_list(self) -> List[str]:
log.debug(f"Creating chapter list for: {self.manga_uuid}")
chapter_list: List[str] = []
2023-02-13 19:15:27 +01:00
for data in self.manga_chapter_data.values():
2023-02-20 14:03:40 +01:00
chapter_number: str = data["chapter"]
volume_number: str = data["volume"]
if self.forcevol:
2022-05-04 19:17:12 +02:00
chapter_list.append(f"{volume_number}:{chapter_number}")
else:
chapter_list.append(chapter_number)
return chapter_list
2023-02-20 14:03:40 +01:00
def create_metadata(self, chapter: str) -> ComicInfo:
2023-02-13 19:15:27 +01:00
log.info("Creating metadata from api")
chapter_data = self.manga_chapter_data[chapter]
try:
volume = int(chapter_data["volume"])
except (ValueError, TypeError):
volume = None
2023-02-20 14:03:40 +01:00
metadata: ComicInfo = {
"Volume": volume,
"Number": chapter_data.get("chapter"),
"PageCount": chapter_data.get("pages"),
"Title": chapter_data.get("name"),
2023-02-13 23:17:52 +01:00
"Series": self.manga_title,
2023-02-13 19:15:27 +01:00
"Count": len(self.manga_chapter_data),
"LanguageISO": self.language,
"Summary": self.manga_data["attributes"]["description"].get("en"),
"Genre": self.manga_data["attributes"].get("publicationDemographic"),
"Web": f"https://mangadex.org/title/{self.manga_uuid}",
}
2023-02-13 19:15:27 +01:00
2023-02-20 14:03:40 +01:00
return metadata