mirror of
https://github.com/jo1gi/grawlix.git
synced 2025-12-16 04:09:10 +00:00
Merge branch 'master' of github.com:jo1gi/grawlix
This commit is contained in:
commit
4c32ac847f
@ -3,12 +3,14 @@
|
||||

|
||||

|
||||
[](https://ko-fi.com/jo1gi)
|
||||
|
||||
CLI ebook downloader
|
||||
|
||||
## Supported services
|
||||
grawlix currently supports downloading from the following sources:
|
||||
- [eReolen](https://ereolen.dk)
|
||||
- [Flipp](https://flipp.dk)
|
||||
- [Internet Archive](https://archive.org)
|
||||
- [Manga Plus](https://mangaplus.shueisha.co.jp)
|
||||
- [Royal Road](https://www.royalroad.com)
|
||||
- [Saxo](https://saxo.com)
|
||||
|
||||
@ -20,7 +20,7 @@ def get_login(source: Source, config: Config, options) -> Tuple[str, str, Option
|
||||
:param options: Command line options
|
||||
:returns: Login credentials
|
||||
"""
|
||||
source_name = source.name.lower()
|
||||
source_name = source.name.lower().replace(" ", "")
|
||||
if source_name in config.sources:
|
||||
username = config.sources[source_name].username or options.username
|
||||
password = config.sources[source_name].password or options.password
|
||||
|
||||
@ -31,11 +31,20 @@ class OnlineFile:
|
||||
encryption: Optional[Encryption] = None
|
||||
headers: Optional[dict[str, str]] = None
|
||||
|
||||
@dataclass(slots=True)
|
||||
class OfflineFile:
|
||||
"""Stores content of a file"""
|
||||
content: bytes
|
||||
extension: str
|
||||
encryption: Optional[Encryption] = None
|
||||
|
||||
File = Union[OnlineFile, OfflineFile]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SingleFile:
|
||||
"""Bookdata in the form of a single file"""
|
||||
file: OnlineFile
|
||||
file: File
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
|
||||
@ -33,7 +33,7 @@ def load_config() -> Config:
|
||||
else:
|
||||
config_dict = {}
|
||||
sources = {}
|
||||
if "source" in config_dict:
|
||||
if "sources" in config_dict:
|
||||
for key, values in config_dict["sources"].items():
|
||||
sources[key] = SourceConfig (
|
||||
username = values.get("username"),
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile, HtmlFiles
|
||||
from grawlix.exceptions import GrawlixError
|
||||
from grawlix.exceptions import GrawlixError, UnsupportedOutputFormat
|
||||
from grawlix.logging import info
|
||||
|
||||
from .output_format import OutputFormat
|
||||
from .acsm import Acsm
|
||||
from .cbz import Cbz
|
||||
from .epub import Epub
|
||||
|
||||
@ -16,7 +17,12 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non
|
||||
|
||||
:param book: Book to download
|
||||
"""
|
||||
output_format = get_default_format(book.data)
|
||||
_, ext = os.path.splitext(template)
|
||||
ext = ext[1:]
|
||||
if ext in get_valid_extensions():
|
||||
output_format = find_output_format(book, ext)()
|
||||
else:
|
||||
output_format = get_default_format(book)
|
||||
location = format_output_location(book, output_format, template)
|
||||
if not book.overwrite and os.path.exists(location):
|
||||
info("Skipping - File already exists")
|
||||
@ -24,14 +30,7 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non
|
||||
parent = Path(location).parent
|
||||
if not parent.exists():
|
||||
os.makedirs(parent)
|
||||
if isinstance(book.data, SingleFile):
|
||||
await output_format.dl_single_file(book, location, update_func)
|
||||
elif isinstance(book.data, ImageList):
|
||||
await output_format.dl_image_list(book, location, update_func)
|
||||
elif isinstance(book.data, HtmlFiles):
|
||||
await output_format.dl_html_files(book, location, update_func)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
await output_format.download(book, location, update_func)
|
||||
await output_format.close()
|
||||
|
||||
|
||||
@ -48,34 +47,43 @@ def format_output_location(book: Book, output_format: OutputFormat, template: st
|
||||
return template.format(**values, ext = output_format.extension)
|
||||
|
||||
|
||||
def get_default_format(bookdata: BookData) -> OutputFormat:
|
||||
def get_default_format(book: Book) -> OutputFormat:
|
||||
"""
|
||||
Get default output format for bookdata.
|
||||
Should only be used if no format was specified by the user
|
||||
|
||||
:param bookdata: Content of book
|
||||
:param book: Content of book
|
||||
:returns: OutputFormat object matching the default
|
||||
"""
|
||||
bookdata = book.data
|
||||
if isinstance(bookdata, SingleFile):
|
||||
return output_format_from_str(bookdata.file.extension)
|
||||
extension = bookdata.file.extension
|
||||
if isinstance(bookdata, ImageList):
|
||||
return Cbz()
|
||||
extension = "cbz"
|
||||
if isinstance(bookdata, HtmlFiles):
|
||||
return Epub()
|
||||
raise GrawlixError
|
||||
extension = "epub"
|
||||
output_format = find_output_format(book, extension)
|
||||
return output_format()
|
||||
|
||||
|
||||
def output_format_from_str(name: str) -> OutputFormat:
|
||||
def find_output_format(book: Book, extension: str) -> type[OutputFormat]:
|
||||
"""
|
||||
Convert string to outputformat object
|
||||
Find a compatible output format
|
||||
|
||||
:param name: Name of output format
|
||||
:returns: OutputFormat object
|
||||
:param book: Book to download
|
||||
:param extension: Extension of output file
|
||||
:returns: Compatible OutputFormat type
|
||||
:raises: UnsupportedOutputFormat if nothing is found
|
||||
"""
|
||||
for output_format in get_output_formats():
|
||||
if output_format.extension == name:
|
||||
return output_format()
|
||||
raise GrawlixError
|
||||
matches_extension = output_format.extension == extension
|
||||
supports_bookdata = type(book.data) in output_format.input_types
|
||||
if matches_extension and supports_bookdata:
|
||||
return output_format
|
||||
raise UnsupportedOutputFormat
|
||||
|
||||
def get_valid_extensions() -> list[str]:
|
||||
return [output_format.extension for output_format in get_output_formats()]
|
||||
|
||||
|
||||
def get_output_formats() -> list[type[OutputFormat]]:
|
||||
@ -85,6 +93,7 @@ def get_output_formats() -> list[type[OutputFormat]]:
|
||||
:returns: List of available output format classes
|
||||
"""
|
||||
return [
|
||||
Acsm,
|
||||
Cbz,
|
||||
Epub,
|
||||
]
|
||||
|
||||
23
grawlix/output/acsm.py
Normal file
23
grawlix/output/acsm.py
Normal file
@ -0,0 +1,23 @@
|
||||
from grawlix.book import Book, SingleFile
|
||||
from .output_format import OutputFormat, Update
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
class Acsm(OutputFormat):
|
||||
extension = "acsm"
|
||||
input_types = [SingleFile]
|
||||
|
||||
async def download(self, book: Book, location: str, update_func: Update) -> None:
|
||||
# Download and write acsm file to disk
|
||||
await self._download_single_file(book, location, update_func)
|
||||
# TODO: Implement more general solution
|
||||
# Decrypt if knock is available
|
||||
# https://web.archive.org/web/20221016154220/https://github.com/BentonEdmondson/knock
|
||||
if shutil.which("knock") is not None:
|
||||
subprocess.run(
|
||||
["knock", location],
|
||||
capture_output = True
|
||||
)
|
||||
else:
|
||||
# TODO: Print warning
|
||||
pass
|
||||
@ -9,18 +9,21 @@ class Cbz(OutputFormat):
|
||||
"""Comic book zip file"""
|
||||
|
||||
extension: str = "cbz"
|
||||
input_types = [ImageList]
|
||||
|
||||
async def dl_image_list(self, book: Book, location: str, update: Update) -> None:
|
||||
async def download(self, book: Book, location: str, update: Update) -> None:
|
||||
if not isinstance(book.data, ImageList):
|
||||
raise UnsupportedOutputFormat
|
||||
semaphore = asyncio.Semaphore(10)
|
||||
images = book.data.images
|
||||
image_count = len(images)
|
||||
with ZipFile(location, mode="w") as zip:
|
||||
async def download_page(index: int, file: OnlineFile):
|
||||
content = await self._download_file(file)
|
||||
zip.writestr(f"Image {index}.{file.extension}", content)
|
||||
if update:
|
||||
update(1/image_count)
|
||||
async with semaphore:
|
||||
content = await self._download_file(file)
|
||||
zip.writestr(f"Image {index}.{file.extension}", content)
|
||||
if update:
|
||||
update(1/image_count)
|
||||
tasks = [
|
||||
asyncio.create_task(download_page(index, file))
|
||||
for index, file in enumerate(images)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book
|
||||
from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book, SingleFile, Metadata
|
||||
from grawlix.exceptions import UnsupportedOutputFormat
|
||||
from .output_format import OutputFormat, Update
|
||||
|
||||
@ -9,14 +9,20 @@ from ebooklib import epub
|
||||
|
||||
class Epub(OutputFormat):
|
||||
extension = "epub"
|
||||
input_types = [SingleFile, HtmlFiles]
|
||||
|
||||
async def dl_html_files(self, book: Book, location: str, update: Update) -> None:
|
||||
if not isinstance(book.data, HtmlFiles):
|
||||
async def download(self, book: Book, location: str, update: Update) -> None:
|
||||
if isinstance(book.data, SingleFile):
|
||||
await self._download_single_file(book, location, update)
|
||||
elif isinstance(book.data, HtmlFiles):
|
||||
await self._download_html_files(book.data, book.metadata, location, update)
|
||||
else:
|
||||
raise UnsupportedOutputFormat
|
||||
html = book.data
|
||||
|
||||
async def _download_html_files(self, html: HtmlFiles, metadata: Metadata, location: str, update: Update) -> None:
|
||||
output = epub.EpubBook()
|
||||
output.set_title(book.metadata.title)
|
||||
for author in book.metadata.authors:
|
||||
output.set_title(metadata.title)
|
||||
for author in metadata.authors:
|
||||
output.add_author(author)
|
||||
file_count = len(html.htmlfiles) + 1 # Html files + cover
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book
|
||||
from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book, OfflineFile, BookData
|
||||
from grawlix.exceptions import UnsupportedOutputFormat
|
||||
from grawlix.encryption import decrypt
|
||||
|
||||
@ -9,7 +9,8 @@ Update = Optional[Callable[[float], None]]
|
||||
|
||||
class OutputFormat:
|
||||
# Extension for output files
|
||||
extension: str = ""
|
||||
extension: str
|
||||
input_types: list[type[BookData]]
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._client = httpx.AsyncClient()
|
||||
@ -20,7 +21,18 @@ class OutputFormat:
|
||||
await self._client.aclose()
|
||||
|
||||
|
||||
async def dl_single_file(self, book: Book, location: str, update_func: Update) -> None:
|
||||
async def download(self, book: Book, location: str, update_func: Update) -> None:
|
||||
"""
|
||||
Download book
|
||||
|
||||
:param book: Book to download
|
||||
:param location: Path to where the file is written
|
||||
:param update_func: Function to update progress bar
|
||||
"""
|
||||
raise UnsupportedOutputFormat
|
||||
|
||||
|
||||
async def _download_single_file(self, book: Book, location: str, update_func: Update) -> None:
|
||||
"""
|
||||
Download and write an `grawlix.SingleFile` to disk
|
||||
|
||||
@ -32,29 +44,10 @@ class OutputFormat:
|
||||
raise UnsupportedOutputFormat
|
||||
if not book.data.file.extension == self.extension:
|
||||
raise UnsupportedOutputFormat
|
||||
await self._download_and_write_file(book.data.file, location, update_func)
|
||||
|
||||
|
||||
async def dl_image_list(self, book: Book, location: str, update_func: Update) -> None:
|
||||
"""
|
||||
Download and write an `grawlix.ImageList` to disk
|
||||
|
||||
:param book: Book to download
|
||||
:param location: Path to where the file is written
|
||||
:raises UnsupportedOutputFormat: If datatype is not supported by format
|
||||
"""
|
||||
raise UnsupportedOutputFormat
|
||||
|
||||
|
||||
async def dl_html_files(self, book: Book, location: str, update_func: Update) -> None:
|
||||
"""
|
||||
Download and write a `grawlix.HtmlFiles` to disk
|
||||
|
||||
:param book: Book to download
|
||||
:param location: Path to where the file is written
|
||||
:raises UnsupportedOutputFormat: If datatype is not supported by format
|
||||
"""
|
||||
raise UnsupportedOutputFormat
|
||||
if isinstance(book.data.file, OnlineFile):
|
||||
await self._download_and_write_file(book.data.file, location, update_func)
|
||||
elif isinstance(book.data.file, OfflineFile):
|
||||
self._write_offline_file(book.data.file, location)
|
||||
|
||||
|
||||
async def _download_file(self, file: OnlineFile, update: Update = None) -> bytes:
|
||||
@ -88,3 +81,17 @@ class OutputFormat:
|
||||
content = await self._download_file(file, update)
|
||||
with open(location, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def _write_offline_file(self, file: OfflineFile, location: str) -> None:
|
||||
"""
|
||||
Write the content of an `OfflineFile` to disk
|
||||
|
||||
:param file: File to write to disk
|
||||
:param location: Path to where the file is written
|
||||
"""
|
||||
with open(location, "wb") as f:
|
||||
content = file.content
|
||||
if file.encryption:
|
||||
content = decrypt(content, file.encryption)
|
||||
f.write(content)
|
||||
|
||||
@ -3,6 +3,7 @@ from grawlix.exceptions import NoSourceFound
|
||||
from .source import Source
|
||||
from .ereolen import Ereolen
|
||||
from .flipp import Flipp
|
||||
from .internet_archive import InternetArchive
|
||||
from .mangaplus import MangaPlus
|
||||
from .royal_road import RoyalRoad
|
||||
from .saxo import Saxo
|
||||
@ -53,6 +54,7 @@ def get_source_classes() -> list[type[Source]]:
|
||||
return [
|
||||
Ereolen,
|
||||
Flipp,
|
||||
InternetArchive,
|
||||
MangaPlus,
|
||||
RoyalRoad,
|
||||
Saxo,
|
||||
|
||||
@ -2,8 +2,8 @@ from grawlix.book import Result, Book, SingleFile, Metadata, OnlineFile
|
||||
from grawlix.encryption import AESCTREncryption
|
||||
from grawlix.exceptions import InvalidUrl, DataNotFound
|
||||
from grawlix.utils import nearest_string
|
||||
|
||||
from .source import Source
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import re
|
||||
|
||||
111
grawlix/sources/internet_archive.py
Normal file
111
grawlix/sources/internet_archive.py
Normal file
@ -0,0 +1,111 @@
|
||||
from grawlix.book import Book, SingleFile, Metadata, OfflineFile
|
||||
from .source import Source
|
||||
|
||||
import random
|
||||
import string
|
||||
from bs4 import BeautifulSoup
|
||||
import asyncio
|
||||
|
||||
class InternetArchive(Source):
|
||||
name: str = "Internet Archive"
|
||||
match: list[str] = [
|
||||
r"https://archive.org/details/.+"
|
||||
]
|
||||
_authentication_methods = [ "login", "cookies" ]
|
||||
_login_credentials = [ "username", "password" ]
|
||||
|
||||
@staticmethod
|
||||
def _format_data(content_type: str, fields):
|
||||
data = ""
|
||||
for name, value in fields.items():
|
||||
data += f"--{content_type}\x0d\x0aContent-Disposition: form-data; name=\"{name}\"\x0d\x0a\x0d\x0a{value}\x0d\x0a"
|
||||
data += content_type+"--"
|
||||
return data
|
||||
|
||||
|
||||
async def login(self, username: str, password: str, **kwargs) -> None:
|
||||
await self._client.get("https://archive.org/account/login")
|
||||
content_type = "----WebKitFormBoundary"+"".join(random.sample(string.ascii_letters + string.digits, 16))
|
||||
headers = {'Content-Type': 'multipart/form-data; boundary='+content_type}
|
||||
data = self._format_data(content_type, {"username":username, "password":password, "submit_by_js":"true"})
|
||||
response = await self._client.post(
|
||||
"https://archive.org/account/login",
|
||||
data=data,
|
||||
headers=headers
|
||||
)
|
||||
if not "Successful login" in response.text:
|
||||
print("Failed login")
|
||||
exit(1)
|
||||
|
||||
|
||||
async def _download_acsm(self, book_id: str) -> bytes:
|
||||
"""
|
||||
Loan book on archive.org and download acsm file
|
||||
|
||||
:param book_id: Id of book
|
||||
"""
|
||||
await self._client.post(
|
||||
"https://archive.org/services/loans/loan/searchInside.php",
|
||||
data = {
|
||||
"action": "grant_access",
|
||||
"identifier": book_id
|
||||
}
|
||||
)
|
||||
await self._client.post(
|
||||
"https://archive.org/services/loans/loan/",
|
||||
data = {
|
||||
"action": "browse_book",
|
||||
"identifier": book_id
|
||||
}
|
||||
)
|
||||
# TODO: Error handling
|
||||
await self._client.post(
|
||||
"https://archive.org/services/loans/loan/",
|
||||
data = {
|
||||
"action": "create_token",
|
||||
"identifier": book_id
|
||||
}
|
||||
)
|
||||
acsm_response = await self._client.get(
|
||||
f"https://archive.org/services/loans/loan/?action=media_url&identifier={book_id}&format=pdf&redirect=1",
|
||||
follow_redirects = True
|
||||
)
|
||||
return acsm_response.content
|
||||
|
||||
|
||||
async def download(self, url: str) -> Book:
|
||||
book_id = url.split("/")[4]
|
||||
metadata, acsm_file = await asyncio.gather(
|
||||
self._download_metadata(book_id),
|
||||
self._download_acsm(book_id)
|
||||
)
|
||||
return Book(
|
||||
data = SingleFile(
|
||||
OfflineFile(
|
||||
content = acsm_file,
|
||||
extension = "acsm",
|
||||
)
|
||||
),
|
||||
metadata = Metadata(
|
||||
title = metadata["title"],
|
||||
authors = [ metadata.get("creator") ] if "creator" in metadata else []
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def _download_metadata(self, book_id: str) -> dict:
|
||||
"""
|
||||
Download metadata for book
|
||||
|
||||
:param book_id: Id of book
|
||||
:returns: Dictionary with metadata
|
||||
"""
|
||||
page_response = await self._client.get(
|
||||
f"https://archive.org/details/{book_id}"
|
||||
)
|
||||
soup = BeautifulSoup(page_response.text, "lxml")
|
||||
metadata_url = soup.find("ia-book-theater").get("bookmanifesturl")
|
||||
metadata_response = await self._client.get(
|
||||
f"https:{metadata_url}"
|
||||
)
|
||||
return metadata_response.json()["data"]["metadata"]
|
||||
Loading…
Reference in New Issue
Block a user