Add fanfiction.net source

2025-12-16 04:09:10 +00:00 · 2023-06-02 20:02:07 +02:00 · 2023-06-02 20:02:07 +02:00 · be0aa9eec0
commit be0aa9eec0
parent dade9db6da
3 changed files with 58 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -9,6 +9,7 @@ CLI ebook downloader
 ## Supported services
 grawlix currently supports downloading from the following sources:
 - [eReolen](https://ereolen.dk)
+- [fanfiction.net](https://www.fanfiction.net)
 - [Flipp](https://flipp.dk)
 - [Internet Archive](https://archive.org)
 - [Manga Plus](https://mangaplus.shueisha.co.jp)
--- a/grawlix/sources/init.py
+++ b/grawlix/sources/init.py
@ -2,6 +2,7 @@ from grawlix.exceptions import InvalidUrl

 from .source import Source
 from .ereolen import Ereolen
+from .fanfictionnet import FanfictionNet
 from .flipp import Flipp
 from .internet_archive import InternetArchive
 from .mangaplus import MangaPlus
@ -54,6 +55,7 @@ def get_source_classes() -> list[type[Source]]:
    """
    return [
        Ereolen,
+        FanfictionNet,
        Flipp,
        InternetArchive,
        MangaPlus,
--- a/grawlix/sources/fanfictionnet.py
+++ b/grawlix/sources/fanfictionnet.py
@ -0,0 +1,55 @@
+from .source import Source
+from grawlix.book import Book, HtmlFile, HtmlFiles, OnlineFile, Metadata
+
+from bs4 import BeautifulSoup
+
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0"
+
+class FanfictionNet(Source):
+    name: str = "fanfiction.net"
+    match = [
+        r"https://www.fanfiction.net/s/\d+/\d+.*"
+    ]
+    _authentication_methods: list[str] = [ "cookies" ]
+
+    async def download(self, url: str) -> Book:
+        book_id = self._extract_id(url)
+        response = await self._client.get(
+            f"https://www.fanfiction.net/s/{book_id}/1",
+            headers = {
+                "User-Agent": USER_AGENT
+            }
+        )
+        soup = BeautifulSoup(response.text, "lxml")
+        chapters = []
+        for index, chapter in enumerate(soup.find(id="chap_select").find_all("option")):
+            chapters.append(
+                HtmlFile(
+                    title = chapter.text,
+                    file = OnlineFile(
+                        url = f"https://www.fanfiction.net/s/{book_id}/{index+1}",
+                        extension = "html",
+                        headers = {
+                            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:113.0) Gecko/20100101 Firefox/113.0",
+                        },
+                        cookies = self._client.cookies
+                    ),
+                    selector = { "id": "storytext" }
+                )
+            )
+        return Book(
+            data = HtmlFiles(htmlfiles = chapters),
+            metadata = Metadata(
+                title = soup.find("b", class_="xcontrast_txt").text,
+            )
+        )
+
+    @staticmethod
+    def _extract_id(url: str) -> str:
+        """
+        Extracts book id from url
+
+        :param url: Url of book
+        :returns: Id of book
+        """
+        return url.split("/")[4]