Merge branch 'marvel'

2025-12-16 04:09:10 +00:00 · 2023-05-14 17:40:03 +02:00 · 2023-05-14 17:40:03 +02:00 · f064dfa9a6
commit f064dfa9a6
parent f91a32b0ab ada2005c2e
7 changed files with 202 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -36,3 +36,4 @@ MANIFEST
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
+cookies.txt
--- a/README.md
+++ b/README.md
@ -12,6 +12,7 @@ grawlix currently supports downloading from the following sources:
 - [Flipp](https://flipp.dk)
 - [Internet Archive](https://archive.org)
 - [Manga Plus](https://mangaplus.shueisha.co.jp)
+- [Marvel Unlimited](https://marvel.com)
 - [Royal Road](https://www.royalroad.com)
 - [Saxo](https://saxo.com)
 - [Webtoons](https://webtoons.com)
--- a/grawlix/main.py
+++ b/grawlix/main.py
@ -8,6 +8,7 @@ from . import  arguments, logging
 from typing import Tuple, Optional
 from rich.progress import Progress
 from functools import partial
+import os
 import asyncio


@ -48,6 +49,20 @@ def get_urls(options) -> list[str]:
    return urls


+def get_cookie_file(options) -> Optional[str]:
+    """
+    Get path to cookie file
+
+    :param options: Cli arguments
+    :returns: Path to cookie file
+    """
+    if options.cookie_file is not None and os.path.exists(options.cookie_file):
+        return options.cookie_file
+    if os.path.exists("./cookies.txt"):
+        return "./cookies.txt"
+    return None
+
+
 async def authenticate(source: Source, config: Config, options):
    """
    Authenticate with source
@ -61,6 +76,10 @@ async def authenticate(source: Source, config: Config, options):
        username, password, library = get_login(source, config, options)
        await source.login(username, password, library=library)
        source.authenticated = True
+    if source.supports_cookies:
+        cookie_file = get_cookie_file(options)
+        if cookie_file:
+            source.load_cookies(cookie_file)
    else:
        raise SourceNotAuthenticated

--- a/grawlix/arguments.py
+++ b/grawlix/arguments.py
@ -44,6 +44,12 @@ def parse_arguments() -> argparse.Namespace:
        help = "Library for login",
        dest = "library",
    )
+    parser.add_argument(
+        '-c',
+        '--cookies',
+        help = "Path to netscape cookie file",
+        dest = "cookie_file"
+    )
    # Outputs
    parser.add_argument(
        '-o',
--- a/grawlix/sources/init.py
+++ b/grawlix/sources/init.py
@ -5,6 +5,7 @@ from .ereolen import Ereolen
 from .flipp import Flipp
 from .internet_archive import InternetArchive
 from .mangaplus import MangaPlus
+from .marvel import Marvel
 from .royal_road import RoyalRoad
 from .saxo import Saxo
 from .webtoons import Webtoons
@ -56,6 +57,7 @@ def get_source_classes() -> list[type[Source]]:
        Flipp,
        InternetArchive,
        MangaPlus,
+        Marvel,
        RoyalRoad,
        Saxo,
        Webtoons
--- a/grawlix/sources/marvel.py
+++ b/grawlix/sources/marvel.py
@ -0,0 +1,138 @@
+from grawlix.book import Book, Metadata, ImageList, OnlineFile, Series, Result
+from grawlix.exceptions import InvalidUrl, DataNotFound
+
+from .source import Source
+
+import re
+
+# Personal marvel ip key
+API_KEY = "83ac0da31d3f6801f2c73c7e07ad76e8"
+
+class Marvel(Source[str]):
+    name: str = "Marvel"
+    match = [
+        r"https://www.marvel.com/comics/issue/\d+/.+",
+        r"https://read.marvel.com/#/book/\d+",
+        r"https://www.marvel.com/comics/series/\d+/.+"
+    ]
+    _authentication_methods: list[str] = [ "cookies" ]
+
+
+    async def download(self, url: str) -> Result[str]:
+        match_index = self.get_match_index(url)
+        if match_index == 0:
+            issue_id = await self._get_issue_id(url)
+            return await self.download_book_from_id(issue_id)
+        if match_index == 1:
+            issue_id = url.split("/")[-1]
+            return await self.download_book_from_id(issue_id)
+        if match_index == 2:
+            return await self._download_series(url)
+        raise InvalidUrl
+
+
+    async def _download_series(self, url: str) -> Series[str]:
+        """
+        Download series
+
+        :param url: Url of series
+        :returns: Series data
+        """
+        series_id = url.split("/")[-2]
+        issue_ids = await self._download_issue_ids(series_id)
+        metadata = await self._download_series_metadata(series_id)
+        return Series(
+            title = metadata["data"]["results"][0]["title"],
+            book_ids = issue_ids
+        )
+
+
+    async def _download_issue_ids(self, series_id: str) -> list[str]:
+        """
+        Download issue ids from series
+
+        :param series_id: Id of comic series on marvel.com
+        :returns: List of comic ids for marvel comics
+        """
+        response = await self._client.get(
+            f"https://api.marvel.com/browse/comics?byType=comic_series&isDigital=1&limit=10000&byId={series_id}",
+        )
+        issue_ids = [issue["digital_id"] for issue in response.json()["data"]["results"]]
+        return issue_ids
+
+
+    async def _download_series_metadata(self, series_id: str) -> dict:
+        """
+        Download series metadata
+
+        :param series_id: Id of comic series on marvel.com
+        :returns: Dictionary with metadata
+        """
+        response = await self._client.get(
+            f"https://gateway.marvel.com:443/v1/public/series/{series_id}?apikey={API_KEY}",
+            headers = {
+                "Referer": "https://developer.marvel.com/"
+            }
+        )
+        return response.json()
+
+    async def _get_issue_id(self, url: str) -> str:
+        """
+        Download issue id from url
+
+        :param url: Url to issue info page
+        :return: Issue id
+        """
+        response = await self._client.get(url)
+        search = re.search(r"digital_comic_id: \"(\d+)\"", response.text)
+        if not search:
+            raise DataNotFound
+        return search.group(1)
+
+
+
+    async def download_book_from_id(self, issue_id: str) -> Book:
+        return Book(
+            metadata = await self._download_issue_metadata(issue_id),
+            data = await self._download_issue_pages(issue_id)
+        )
+
+
+    async def _download_issue_metadata(self, issue_id: str) -> Metadata:
+        """
+        Download and parse metadata for issue
+
+        :param issue_id: Identifier for issue
+        :returns: Issue metadata
+        """
+        response = await self._client.get(
+            f"https://bifrost.marvel.com/v1/catalog/digital-comics/metadata/{issue_id}"
+        )
+        issue_meta = response.json()["data"]["results"][0]["issue_meta"]
+        return Metadata(
+            title = issue_meta["title"],
+            series = issue_meta["series_title"],
+            publisher = "Marvel",
+            authors = [c["full_name"] for c in issue_meta["creators"]["extended_list"]]
+        )
+
+
+    async def _download_issue_pages(self, issue_id: str) -> ImageList:
+        """
+        Download list of page links for issue
+
+        :param issue_id: Identifier for issue
+        :returns: List of links to comic pages
+        """
+        response = await self._client.get(
+            f"https://bifrost.marvel.com/v1/catalog/digital-comics/web/assets/{issue_id}"
+        )
+        images = []
+        for page in response.json()["data"]["results"][0]["pages"]:
+            images.append(
+                OnlineFile(
+                    url = page["assets"]["source"],
+                    extension = "jpg"
+                )
+            )
+        return ImageList(images)
--- a/grawlix/sources/source.py
+++ b/grawlix/sources/source.py
@ -1,5 +1,8 @@
 from grawlix.book import Book, Series, Result

+from typing import Generic, TypeVar, Tuple, Optional
+from http.cookiejar import MozillaCookieJar
+import re
 from typing import Generic, TypeVar, Tuple
 import httpx

@ -42,6 +45,25 @@ class Source(Generic[T]):
        raise NotImplementedError


+    @property
+    def supports_cookies(self) -> bool:
+        """Does the source support authentication with cookie file"""
+        return "cookies" in self._authentication_methods
+
+
+    def load_cookies(self, cookie_file: str):
+        """
+        Authenticate with source with netscape cookie file
+
+        :param cookie_file: Path to netscape cookie file
+        """
+        if self.supports_cookies:
+            cookie_jar = MozillaCookieJar()
+            cookie_jar.load(cookie_file, ignore_expires=True)
+            self._client.cookies.update(cookie_jar)
+            self.authenticated = True
+
+
    async def download(self, url: str) -> Result[T]:
        """
        Download book metadata from source
@ -60,3 +82,16 @@ class Source(Generic[T]):
        :returns: Downloaded book metadata
        """
        raise NotImplementedError
+
+
+    def get_match_index(self, url: str) -> Optional[int]:
+        """
+        Find the first regex in `self.match` that matches url
+
+        :param url: Url to match
+        :returns: Index of regex
+        """
+        for index, match in enumerate(self.match):
+            if re.match(match, url):
+                return index
+        return None