mirror of
https://github.com/jo1gi/grawlix.git
synced 2025-12-16 04:09:10 +00:00
Merge branch 'royal_road'
This commit is contained in:
commit
dc571ba38d
@ -46,9 +46,21 @@ class ImageList:
|
|||||||
"""
|
"""
|
||||||
images: list[OnlineFile]
|
images: list[OnlineFile]
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class HtmlFile:
|
||||||
|
title: str
|
||||||
|
file: OnlineFile
|
||||||
|
selector: Optional[dict[str, str]]
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class HtmlFiles:
|
||||||
|
cover: OnlineFile
|
||||||
|
htmlfiles: list[HtmlFile]
|
||||||
|
|
||||||
BookData = Union[
|
BookData = Union[
|
||||||
SingleFile,
|
SingleFile,
|
||||||
ImageList
|
ImageList,
|
||||||
|
HtmlFiles
|
||||||
]
|
]
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
@ -56,6 +68,7 @@ class Book:
|
|||||||
"""Stores information about a book"""
|
"""Stores information about a book"""
|
||||||
metadata: Metadata
|
metadata: Metadata
|
||||||
data: BookData
|
data: BookData
|
||||||
|
overwrite: bool = False
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,10 @@
|
|||||||
from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile
|
from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile, HtmlFiles
|
||||||
from grawlix.exceptions import GrawlixError
|
from grawlix.exceptions import GrawlixError
|
||||||
from grawlix.logging import info
|
from grawlix.logging import info
|
||||||
|
|
||||||
from .output_format import OutputFormat
|
from .output_format import OutputFormat
|
||||||
from .epub import Epub
|
|
||||||
from .cbz import Cbz
|
from .cbz import Cbz
|
||||||
|
from .epub import Epub
|
||||||
|
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -18,16 +18,18 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non
|
|||||||
"""
|
"""
|
||||||
output_format = get_default_format(book.data)
|
output_format = get_default_format(book.data)
|
||||||
location = format_output_location(book, output_format, template)
|
location = format_output_location(book, output_format, template)
|
||||||
if os.path.exists(location):
|
if not book.overwrite and os.path.exists(location):
|
||||||
info("Skipping - File already exists")
|
info("Skipping - File already exists")
|
||||||
return
|
return
|
||||||
parent = Path(location).parent
|
parent = Path(location).parent
|
||||||
if not parent.exists():
|
if not parent.exists():
|
||||||
os.makedirs(parent)
|
os.makedirs(parent)
|
||||||
if isinstance(book.data, SingleFile):
|
if isinstance(book.data, SingleFile):
|
||||||
await output_format.dl_single_file(book.data, location, update_func)
|
await output_format.dl_single_file(book, location, update_func)
|
||||||
elif isinstance(book.data, ImageList):
|
elif isinstance(book.data, ImageList):
|
||||||
await output_format.dl_image_list(book.data, location, update_func)
|
await output_format.dl_image_list(book, location, update_func)
|
||||||
|
elif isinstance(book.data, HtmlFiles):
|
||||||
|
await output_format.dl_html_files(book, location, update_func)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
await output_format.close()
|
await output_format.close()
|
||||||
@ -58,6 +60,8 @@ def get_default_format(bookdata: BookData) -> OutputFormat:
|
|||||||
return output_format_from_str(bookdata.file.extension)
|
return output_format_from_str(bookdata.file.extension)
|
||||||
if isinstance(bookdata, ImageList):
|
if isinstance(bookdata, ImageList):
|
||||||
return Cbz()
|
return Cbz()
|
||||||
|
if isinstance(bookdata, HtmlFiles):
|
||||||
|
return Epub()
|
||||||
raise GrawlixError
|
raise GrawlixError
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
from .output_format import OutputFormat, Update
|
from .output_format import OutputFormat, Update, Book
|
||||||
from grawlix.book import ImageList, OnlineFile
|
from grawlix.book import ImageList, OnlineFile
|
||||||
|
from grawlix.exceptions import UnsupportedOutputFormat
|
||||||
|
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
import asyncio
|
import asyncio
|
||||||
@ -9,8 +10,11 @@ class Cbz(OutputFormat):
|
|||||||
|
|
||||||
extension: str = "cbz"
|
extension: str = "cbz"
|
||||||
|
|
||||||
async def dl_image_list(self, book: ImageList, location: str, update: Update) -> None:
|
async def dl_image_list(self, book: Book, location: str, update: Update) -> None:
|
||||||
image_count = len(book.images)
|
if not isinstance(book.data, ImageList):
|
||||||
|
raise UnsupportedOutputFormat
|
||||||
|
images = book.data.images
|
||||||
|
image_count = len(images)
|
||||||
with ZipFile(location, mode="w") as zip:
|
with ZipFile(location, mode="w") as zip:
|
||||||
async def download_page(index: int, file: OnlineFile):
|
async def download_page(index: int, file: OnlineFile):
|
||||||
content = await self._download_file(file)
|
content = await self._download_file(file)
|
||||||
@ -19,6 +23,6 @@ class Cbz(OutputFormat):
|
|||||||
update(1/image_count)
|
update(1/image_count)
|
||||||
tasks = [
|
tasks = [
|
||||||
asyncio.create_task(download_page(index, file))
|
asyncio.create_task(download_page(index, file))
|
||||||
for index, file in enumerate(book.images)
|
for index, file in enumerate(images)
|
||||||
]
|
]
|
||||||
await asyncio.wait(tasks)
|
await asyncio.wait(tasks)
|
||||||
|
|||||||
@ -1,5 +1,63 @@
|
|||||||
from grawlix.book import Book, SingleFile
|
from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book
|
||||||
from .output_format import OutputFormat
|
from grawlix.exceptions import UnsupportedOutputFormat
|
||||||
|
from .output_format import OutputFormat, Update
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import os
|
||||||
|
from ebooklib import epub
|
||||||
|
|
||||||
class Epub(OutputFormat):
|
class Epub(OutputFormat):
|
||||||
extension = "epub"
|
extension = "epub"
|
||||||
|
|
||||||
|
async def dl_html_files(self, book: Book, location: str, update: Update) -> None:
|
||||||
|
if not isinstance(book.data, HtmlFiles):
|
||||||
|
raise UnsupportedOutputFormat
|
||||||
|
html = book.data
|
||||||
|
output = epub.EpubBook()
|
||||||
|
output.set_title(book.metadata.title)
|
||||||
|
for author in book.metadata.authors:
|
||||||
|
output.add_author(author)
|
||||||
|
file_count = len(html.htmlfiles) + 1 # Html files + cover
|
||||||
|
|
||||||
|
async def download_cover(cover_file: OnlineFile):
|
||||||
|
cover_filename = f"cover.{html.cover.extension}"
|
||||||
|
epub_cover = epub.EpubCover(file_name = cover_filename)
|
||||||
|
epub_cover.content = await self._download_file(html.cover)
|
||||||
|
output.add_item(epub_cover)
|
||||||
|
epub_cover_page = epub.EpubCoverHtml(image_name = cover_filename)
|
||||||
|
if update:
|
||||||
|
update(1/file_count)
|
||||||
|
return epub_cover_page
|
||||||
|
|
||||||
|
|
||||||
|
async def download_file(index: int, file: HtmlFile):
|
||||||
|
response = await self._client.get(file.file.url, follow_redirects=True)
|
||||||
|
soup = BeautifulSoup(response.text, "lxml")
|
||||||
|
selected_element = soup.find(attrs=file.selector)
|
||||||
|
epub_file = epub.EpubHtml(
|
||||||
|
title = file.title,
|
||||||
|
file_name = f"part {index}.html",
|
||||||
|
content = str(selected_element)
|
||||||
|
)
|
||||||
|
if update:
|
||||||
|
update(1/file_count)
|
||||||
|
return epub_file
|
||||||
|
|
||||||
|
# Download files
|
||||||
|
tasks = [
|
||||||
|
download_file(index, file)
|
||||||
|
for index, file in enumerate(html.htmlfiles)
|
||||||
|
]
|
||||||
|
epub_files = await asyncio.gather(download_cover(html.cover), *tasks)
|
||||||
|
|
||||||
|
# Add files to epub
|
||||||
|
for epub_file in epub_files:
|
||||||
|
output.add_item(epub_file)
|
||||||
|
output.spine.append(epub_file)
|
||||||
|
output.toc.append(epub_file)
|
||||||
|
|
||||||
|
# Complete book
|
||||||
|
output.add_item(epub.EpubNcx())
|
||||||
|
output.add_item(epub.EpubNav())
|
||||||
|
epub.write_epub(location, output)
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
from grawlix.book import Book, SingleFile, OnlineFile, ImageList
|
from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book
|
||||||
from grawlix.exceptions import UnsupportedOutputFormat
|
from grawlix.exceptions import UnsupportedOutputFormat
|
||||||
from grawlix.encryption import decrypt
|
from grawlix.encryption import decrypt
|
||||||
|
|
||||||
@ -20,7 +20,7 @@ class OutputFormat:
|
|||||||
await self._client.aclose()
|
await self._client.aclose()
|
||||||
|
|
||||||
|
|
||||||
async def dl_single_file(self, book: SingleFile, location: str, update_func: Update) -> None:
|
async def dl_single_file(self, book: Book, location: str, update_func: Update) -> None:
|
||||||
"""
|
"""
|
||||||
Download and write an `grawlix.SingleFile` to disk
|
Download and write an `grawlix.SingleFile` to disk
|
||||||
|
|
||||||
@ -28,12 +28,14 @@ class OutputFormat:
|
|||||||
:param location: Path to where the file is written
|
:param location: Path to where the file is written
|
||||||
:raises UnsupportedOutputFormat: If datatype is not supported by format
|
:raises UnsupportedOutputFormat: If datatype is not supported by format
|
||||||
"""
|
"""
|
||||||
if not book.file.extension == self.extension:
|
if not isinstance(book.data, SingleFile):
|
||||||
raise UnsupportedOutputFormat
|
raise UnsupportedOutputFormat
|
||||||
await self._download_and_write_file(book.file, location, update_func)
|
if not book.data.file.extension == self.extension:
|
||||||
|
raise UnsupportedOutputFormat
|
||||||
|
await self._download_and_write_file(book.data.file, location, update_func)
|
||||||
|
|
||||||
|
|
||||||
async def dl_image_list(self, book: ImageList, location: str, update_func: Update) -> None:
|
async def dl_image_list(self, book: Book, location: str, update_func: Update) -> None:
|
||||||
"""
|
"""
|
||||||
Download and write an `grawlix.ImageList` to disk
|
Download and write an `grawlix.ImageList` to disk
|
||||||
|
|
||||||
@ -44,6 +46,17 @@ class OutputFormat:
|
|||||||
raise UnsupportedOutputFormat
|
raise UnsupportedOutputFormat
|
||||||
|
|
||||||
|
|
||||||
|
async def dl_html_files(self, book: Book, location: str, update_func: Update) -> None:
|
||||||
|
"""
|
||||||
|
Download and write a `grawlix.HtmlFiles` to disk
|
||||||
|
|
||||||
|
:param book: Book to download
|
||||||
|
:param location: Path to where the file is written
|
||||||
|
:raises UnsupportedOutputFormat: If datatype is not supported by format
|
||||||
|
"""
|
||||||
|
raise UnsupportedOutputFormat
|
||||||
|
|
||||||
|
|
||||||
async def _download_file(self, file: OnlineFile, update: Update = None) -> bytes:
|
async def _download_file(self, file: OnlineFile, update: Update = None) -> bytes:
|
||||||
"""
|
"""
|
||||||
Download `grawlix.OnlineFile`
|
Download `grawlix.OnlineFile`
|
||||||
@ -53,7 +66,7 @@ class OutputFormat:
|
|||||||
:returns: Content of downloaded file
|
:returns: Content of downloaded file
|
||||||
"""
|
"""
|
||||||
content = b""
|
content = b""
|
||||||
async with self._client.stream("GET", file.url, headers = file.headers) as request:
|
async with self._client.stream("GET", file.url, headers = file.headers, follow_redirects=True) as request:
|
||||||
total_filesize = int(request.headers["Content-length"])
|
total_filesize = int(request.headers["Content-length"])
|
||||||
async for chunk in request.aiter_bytes():
|
async for chunk in request.aiter_bytes():
|
||||||
content += chunk
|
content += chunk
|
||||||
|
|||||||
@ -4,6 +4,7 @@ from .source import Source
|
|||||||
from .ereolen import Ereolen
|
from .ereolen import Ereolen
|
||||||
from .flipp import Flipp
|
from .flipp import Flipp
|
||||||
from .mangaplus import MangaPlus
|
from .mangaplus import MangaPlus
|
||||||
|
from .royal_road import RoyalRoad
|
||||||
from .saxo import Saxo
|
from .saxo import Saxo
|
||||||
from .webtoons import Webtoons
|
from .webtoons import Webtoons
|
||||||
|
|
||||||
@ -53,6 +54,7 @@ def get_source_classes() -> list[type[Source]]:
|
|||||||
Ereolen,
|
Ereolen,
|
||||||
Flipp,
|
Flipp,
|
||||||
MangaPlus,
|
MangaPlus,
|
||||||
|
RoyalRoad,
|
||||||
Saxo,
|
Saxo,
|
||||||
Webtoons
|
Webtoons
|
||||||
]
|
]
|
||||||
|
|||||||
45
grawlix/sources/royal_road.py
Normal file
45
grawlix/sources/royal_road.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
from .source import Source
|
||||||
|
from grawlix.book import Book, HtmlFile, HtmlFiles, OnlineFile, Metadata
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
class RoyalRoad(Source):
|
||||||
|
name: str = "Royal Road"
|
||||||
|
match = [
|
||||||
|
r"https://www.royalroad.com/fiction/\d+/[^/]+"
|
||||||
|
]
|
||||||
|
_authentication_methods: list[str] = []
|
||||||
|
|
||||||
|
|
||||||
|
async def download(self, url: str) -> Book:
|
||||||
|
response = await self._client.get(url)
|
||||||
|
soup = BeautifulSoup(response.text, "lxml")
|
||||||
|
chapters = []
|
||||||
|
for chapter in soup.find_all("tr", class_="chapter-row"):
|
||||||
|
chapters.append(
|
||||||
|
HtmlFile(
|
||||||
|
title = chapter.find("a").text.strip(),
|
||||||
|
file = OnlineFile(
|
||||||
|
url = f"https://royalroad.com{chapter.get('data-url')}",
|
||||||
|
extension = "html"
|
||||||
|
),
|
||||||
|
selector = { "class": "chapter-content" }
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return Book(
|
||||||
|
data = HtmlFiles(
|
||||||
|
cover = OnlineFile(
|
||||||
|
url = soup.find("div", class_="cover-art-container") \
|
||||||
|
.find("img") \
|
||||||
|
.get("src") \
|
||||||
|
.replace("full", "large"),
|
||||||
|
extension = "jpg"
|
||||||
|
),
|
||||||
|
htmlfiles = chapters
|
||||||
|
),
|
||||||
|
metadata = Metadata(
|
||||||
|
title = soup.find("meta", attrs={"name":"twitter:title"}).get("content"),
|
||||||
|
authors = [ soup.find("meta", attrs={"name":"twitter:creator"}).get("content") ]
|
||||||
|
),
|
||||||
|
overwrite = True
|
||||||
|
)
|
||||||
@ -14,6 +14,7 @@ dependencies = [
|
|||||||
"appdirs",
|
"appdirs",
|
||||||
"beautifulsoup4",
|
"beautifulsoup4",
|
||||||
"blackboxprotobuf",
|
"blackboxprotobuf",
|
||||||
|
"EbookLib",
|
||||||
"httpx",
|
"httpx",
|
||||||
"importlib-resources",
|
"importlib-resources",
|
||||||
"lxml",
|
"lxml",
|
||||||
|
|||||||
13
shell.nix
13
shell.nix
@ -20,6 +20,18 @@ let
|
|||||||
|
|
||||||
doCheck = false;
|
doCheck = false;
|
||||||
};
|
};
|
||||||
|
ebooklib = python3Packages.buildPythonPackage rec {
|
||||||
|
pname = "EbookLib";
|
||||||
|
version = "0.18";
|
||||||
|
src = python3Packages.fetchPypi {
|
||||||
|
inherit pname version;
|
||||||
|
sha256 = "sha256-OFYmQ6e8lNm/VumTC0kn5Ok7XR0JF/aXpkVNtaHBpTM=";
|
||||||
|
};
|
||||||
|
propagatedBuildInputs = with python3Packages; [
|
||||||
|
six
|
||||||
|
lxml
|
||||||
|
];
|
||||||
|
};
|
||||||
in
|
in
|
||||||
mkShell {
|
mkShell {
|
||||||
buildInputs = [
|
buildInputs = [
|
||||||
@ -27,6 +39,7 @@ mkShell {
|
|||||||
appdirs
|
appdirs
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
blackboxprotobuf
|
blackboxprotobuf
|
||||||
|
ebooklib
|
||||||
httpx
|
httpx
|
||||||
importlib-resources
|
importlib-resources
|
||||||
lxml
|
lxml
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user