diff --git a/README.md b/README.md index 52d5843..153f98f 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ CLI ebook downloader ## Supported services grawlix currently supports downloading from the following sources: - [eReolen](https://ereolen.dk) +- [fanfiction.net](https://www.fanfiction.net) - [Flipp](https://flipp.dk) - [Internet Archive](https://archive.org) - [Manga Plus](https://mangaplus.shueisha.co.jp) diff --git a/grawlix/sources/__init__.py b/grawlix/sources/__init__.py index 7ebb447..54c4c21 100644 --- a/grawlix/sources/__init__.py +++ b/grawlix/sources/__init__.py @@ -2,6 +2,7 @@ from grawlix.exceptions import InvalidUrl from .source import Source from .ereolen import Ereolen +from .fanfictionnet import FanfictionNet from .flipp import Flipp from .internet_archive import InternetArchive from .mangaplus import MangaPlus @@ -54,6 +55,7 @@ def get_source_classes() -> list[type[Source]]: """ return [ Ereolen, + FanfictionNet, Flipp, InternetArchive, MangaPlus, diff --git a/grawlix/sources/fanfictionnet.py b/grawlix/sources/fanfictionnet.py new file mode 100644 index 0000000..32a8940 --- /dev/null +++ b/grawlix/sources/fanfictionnet.py @@ -0,0 +1,55 @@ +from .source import Source +from grawlix.book import Book, HtmlFile, HtmlFiles, OnlineFile, Metadata + +from bs4 import BeautifulSoup + +USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0" + +class FanfictionNet(Source): + name: str = "fanfiction.net" + match = [ + r"https://www.fanfiction.net/s/\d+/\d+.*" + ] + _authentication_methods: list[str] = [ "cookies" ] + + async def download(self, url: str) -> Book: + book_id = self._extract_id(url) + response = await self._client.get( + f"https://www.fanfiction.net/s/{book_id}/1", + headers = { + "User-Agent": USER_AGENT + } + ) + soup = BeautifulSoup(response.text, "lxml") + chapters = [] + for index, chapter in enumerate(soup.find(id="chap_select").find_all("option")): + chapters.append( + HtmlFile( + title = chapter.text, + file = OnlineFile( + url = f"https://www.fanfiction.net/s/{book_id}/{index+1}", + extension = "html", + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0", + }, + cookies = self._client.cookies + ), + selector = { "id": "storytext" } + ) + ) + return Book( + data = HtmlFiles(htmlfiles = chapters), + metadata = Metadata( + title = soup.find("b", class_="xcontrast_txt").text, + ) + ) + + @staticmethod + def _extract_id(url: str) -> str: + """ + Extracts book id from url + + :param url: Url of book + :returns: Id of book + """ + return url.split("/")[4]