mirror of
https://github.com/jo1gi/grawlix.git
synced 2025-12-16 04:09:10 +00:00
Merge branch 'marvel'
This commit is contained in:
commit
f064dfa9a6
1
.gitignore
vendored
1
.gitignore
vendored
@ -36,3 +36,4 @@ MANIFEST
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
cookies.txt
|
||||
|
||||
@ -12,6 +12,7 @@ grawlix currently supports downloading from the following sources:
|
||||
- [Flipp](https://flipp.dk)
|
||||
- [Internet Archive](https://archive.org)
|
||||
- [Manga Plus](https://mangaplus.shueisha.co.jp)
|
||||
- [Marvel Unlimited](https://marvel.com)
|
||||
- [Royal Road](https://www.royalroad.com)
|
||||
- [Saxo](https://saxo.com)
|
||||
- [Webtoons](https://webtoons.com)
|
||||
|
||||
@ -8,6 +8,7 @@ from . import arguments, logging
|
||||
from typing import Tuple, Optional
|
||||
from rich.progress import Progress
|
||||
from functools import partial
|
||||
import os
|
||||
import asyncio
|
||||
|
||||
|
||||
@ -48,6 +49,20 @@ def get_urls(options) -> list[str]:
|
||||
return urls
|
||||
|
||||
|
||||
def get_cookie_file(options) -> Optional[str]:
|
||||
"""
|
||||
Get path to cookie file
|
||||
|
||||
:param options: Cli arguments
|
||||
:returns: Path to cookie file
|
||||
"""
|
||||
if options.cookie_file is not None and os.path.exists(options.cookie_file):
|
||||
return options.cookie_file
|
||||
if os.path.exists("./cookies.txt"):
|
||||
return "./cookies.txt"
|
||||
return None
|
||||
|
||||
|
||||
async def authenticate(source: Source, config: Config, options):
|
||||
"""
|
||||
Authenticate with source
|
||||
@ -61,6 +76,10 @@ async def authenticate(source: Source, config: Config, options):
|
||||
username, password, library = get_login(source, config, options)
|
||||
await source.login(username, password, library=library)
|
||||
source.authenticated = True
|
||||
if source.supports_cookies:
|
||||
cookie_file = get_cookie_file(options)
|
||||
if cookie_file:
|
||||
source.load_cookies(cookie_file)
|
||||
else:
|
||||
raise SourceNotAuthenticated
|
||||
|
||||
|
||||
@ -44,6 +44,12 @@ def parse_arguments() -> argparse.Namespace:
|
||||
help = "Library for login",
|
||||
dest = "library",
|
||||
)
|
||||
parser.add_argument(
|
||||
'-c',
|
||||
'--cookies',
|
||||
help = "Path to netscape cookie file",
|
||||
dest = "cookie_file"
|
||||
)
|
||||
# Outputs
|
||||
parser.add_argument(
|
||||
'-o',
|
||||
|
||||
@ -5,6 +5,7 @@ from .ereolen import Ereolen
|
||||
from .flipp import Flipp
|
||||
from .internet_archive import InternetArchive
|
||||
from .mangaplus import MangaPlus
|
||||
from .marvel import Marvel
|
||||
from .royal_road import RoyalRoad
|
||||
from .saxo import Saxo
|
||||
from .webtoons import Webtoons
|
||||
@ -56,6 +57,7 @@ def get_source_classes() -> list[type[Source]]:
|
||||
Flipp,
|
||||
InternetArchive,
|
||||
MangaPlus,
|
||||
Marvel,
|
||||
RoyalRoad,
|
||||
Saxo,
|
||||
Webtoons
|
||||
|
||||
138
grawlix/sources/marvel.py
Normal file
138
grawlix/sources/marvel.py
Normal file
@ -0,0 +1,138 @@
|
||||
from grawlix.book import Book, Metadata, ImageList, OnlineFile, Series, Result
|
||||
from grawlix.exceptions import InvalidUrl, DataNotFound
|
||||
|
||||
from .source import Source
|
||||
|
||||
import re
|
||||
|
||||
# Personal marvel ip key
|
||||
API_KEY = "83ac0da31d3f6801f2c73c7e07ad76e8"
|
||||
|
||||
class Marvel(Source[str]):
|
||||
name: str = "Marvel"
|
||||
match = [
|
||||
r"https://www.marvel.com/comics/issue/\d+/.+",
|
||||
r"https://read.marvel.com/#/book/\d+",
|
||||
r"https://www.marvel.com/comics/series/\d+/.+"
|
||||
]
|
||||
_authentication_methods: list[str] = [ "cookies" ]
|
||||
|
||||
|
||||
async def download(self, url: str) -> Result[str]:
|
||||
match_index = self.get_match_index(url)
|
||||
if match_index == 0:
|
||||
issue_id = await self._get_issue_id(url)
|
||||
return await self.download_book_from_id(issue_id)
|
||||
if match_index == 1:
|
||||
issue_id = url.split("/")[-1]
|
||||
return await self.download_book_from_id(issue_id)
|
||||
if match_index == 2:
|
||||
return await self._download_series(url)
|
||||
raise InvalidUrl
|
||||
|
||||
|
||||
async def _download_series(self, url: str) -> Series[str]:
|
||||
"""
|
||||
Download series
|
||||
|
||||
:param url: Url of series
|
||||
:returns: Series data
|
||||
"""
|
||||
series_id = url.split("/")[-2]
|
||||
issue_ids = await self._download_issue_ids(series_id)
|
||||
metadata = await self._download_series_metadata(series_id)
|
||||
return Series(
|
||||
title = metadata["data"]["results"][0]["title"],
|
||||
book_ids = issue_ids
|
||||
)
|
||||
|
||||
|
||||
async def _download_issue_ids(self, series_id: str) -> list[str]:
|
||||
"""
|
||||
Download issue ids from series
|
||||
|
||||
:param series_id: Id of comic series on marvel.com
|
||||
:returns: List of comic ids for marvel comics
|
||||
"""
|
||||
response = await self._client.get(
|
||||
f"https://api.marvel.com/browse/comics?byType=comic_series&isDigital=1&limit=10000&byId={series_id}",
|
||||
)
|
||||
issue_ids = [issue["digital_id"] for issue in response.json()["data"]["results"]]
|
||||
return issue_ids
|
||||
|
||||
|
||||
async def _download_series_metadata(self, series_id: str) -> dict:
|
||||
"""
|
||||
Download series metadata
|
||||
|
||||
:param series_id: Id of comic series on marvel.com
|
||||
:returns: Dictionary with metadata
|
||||
"""
|
||||
response = await self._client.get(
|
||||
f"https://gateway.marvel.com:443/v1/public/series/{series_id}?apikey={API_KEY}",
|
||||
headers = {
|
||||
"Referer": "https://developer.marvel.com/"
|
||||
}
|
||||
)
|
||||
return response.json()
|
||||
|
||||
async def _get_issue_id(self, url: str) -> str:
|
||||
"""
|
||||
Download issue id from url
|
||||
|
||||
:param url: Url to issue info page
|
||||
:return: Issue id
|
||||
"""
|
||||
response = await self._client.get(url)
|
||||
search = re.search(r"digital_comic_id: \"(\d+)\"", response.text)
|
||||
if not search:
|
||||
raise DataNotFound
|
||||
return search.group(1)
|
||||
|
||||
|
||||
|
||||
async def download_book_from_id(self, issue_id: str) -> Book:
|
||||
return Book(
|
||||
metadata = await self._download_issue_metadata(issue_id),
|
||||
data = await self._download_issue_pages(issue_id)
|
||||
)
|
||||
|
||||
|
||||
async def _download_issue_metadata(self, issue_id: str) -> Metadata:
|
||||
"""
|
||||
Download and parse metadata for issue
|
||||
|
||||
:param issue_id: Identifier for issue
|
||||
:returns: Issue metadata
|
||||
"""
|
||||
response = await self._client.get(
|
||||
f"https://bifrost.marvel.com/v1/catalog/digital-comics/metadata/{issue_id}"
|
||||
)
|
||||
issue_meta = response.json()["data"]["results"][0]["issue_meta"]
|
||||
return Metadata(
|
||||
title = issue_meta["title"],
|
||||
series = issue_meta["series_title"],
|
||||
publisher = "Marvel",
|
||||
authors = [c["full_name"] for c in issue_meta["creators"]["extended_list"]]
|
||||
)
|
||||
|
||||
|
||||
async def _download_issue_pages(self, issue_id: str) -> ImageList:
|
||||
"""
|
||||
Download list of page links for issue
|
||||
|
||||
:param issue_id: Identifier for issue
|
||||
:returns: List of links to comic pages
|
||||
"""
|
||||
response = await self._client.get(
|
||||
f"https://bifrost.marvel.com/v1/catalog/digital-comics/web/assets/{issue_id}"
|
||||
)
|
||||
images = []
|
||||
for page in response.json()["data"]["results"][0]["pages"]:
|
||||
images.append(
|
||||
OnlineFile(
|
||||
url = page["assets"]["source"],
|
||||
extension = "jpg"
|
||||
)
|
||||
)
|
||||
return ImageList(images)
|
||||
@ -1,5 +1,8 @@
|
||||
from grawlix.book import Book, Series, Result
|
||||
|
||||
from typing import Generic, TypeVar, Tuple, Optional
|
||||
from http.cookiejar import MozillaCookieJar
|
||||
import re
|
||||
from typing import Generic, TypeVar, Tuple
|
||||
import httpx
|
||||
|
||||
@ -42,6 +45,25 @@ class Source(Generic[T]):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@property
|
||||
def supports_cookies(self) -> bool:
|
||||
"""Does the source support authentication with cookie file"""
|
||||
return "cookies" in self._authentication_methods
|
||||
|
||||
|
||||
def load_cookies(self, cookie_file: str):
|
||||
"""
|
||||
Authenticate with source with netscape cookie file
|
||||
|
||||
:param cookie_file: Path to netscape cookie file
|
||||
"""
|
||||
if self.supports_cookies:
|
||||
cookie_jar = MozillaCookieJar()
|
||||
cookie_jar.load(cookie_file, ignore_expires=True)
|
||||
self._client.cookies.update(cookie_jar)
|
||||
self.authenticated = True
|
||||
|
||||
|
||||
async def download(self, url: str) -> Result[T]:
|
||||
"""
|
||||
Download book metadata from source
|
||||
@ -60,3 +82,16 @@ class Source(Generic[T]):
|
||||
:returns: Downloaded book metadata
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def get_match_index(self, url: str) -> Optional[int]:
|
||||
"""
|
||||
Find the first regex in `self.match` that matches url
|
||||
|
||||
:param url: Url to match
|
||||
:returns: Index of regex
|
||||
"""
|
||||
for index, match in enumerate(self.match):
|
||||
if re.match(match, url):
|
||||
return index
|
||||
return None
|
||||
|
||||
Loading…
Reference in New Issue
Block a user