From f91a32b0abf8dbd8e63632b4aaefc98a9442a449 Mon Sep 17 00:00:00 2001 From: Joakim Holm Date: Fri, 5 May 2023 12:05:52 +0200 Subject: [PATCH] Restructure output system Formats can now be loaded based on book data format and file extension of the output file. Will also try to use the extension of the output location instead of using the default filetype every time. --- grawlix/output/__init__.py | 53 +++++++++++++++++++-------------- grawlix/output/acsm.py | 7 +++-- grawlix/output/cbz.py | 13 ++++---- grawlix/output/epub.py | 18 +++++++---- grawlix/output/output_format.py | 38 +++++++++-------------- 5 files changed, 68 insertions(+), 61 deletions(-) diff --git a/grawlix/output/__init__.py b/grawlix/output/__init__.py index fcca4b9..35eb1d2 100644 --- a/grawlix/output/__init__.py +++ b/grawlix/output/__init__.py @@ -1,5 +1,5 @@ from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile, HtmlFiles -from grawlix.exceptions import GrawlixError +from grawlix.exceptions import GrawlixError, UnsupportedOutputFormat from grawlix.logging import info from .output_format import OutputFormat @@ -17,7 +17,12 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non :param book: Book to download """ - output_format = get_default_format(book.data) + _, ext = os.path.splitext(template) + ext = ext[1:] + if ext in get_valid_extensions(): + output_format = find_output_format(book, ext)() + else: + output_format = get_default_format(book) location = format_output_location(book, output_format, template) if not book.overwrite and os.path.exists(location): info("Skipping - File already exists") @@ -25,14 +30,7 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non parent = Path(location).parent if not parent.exists(): os.makedirs(parent) - if isinstance(book.data, SingleFile): - await output_format.dl_single_file(book, location, update_func) - elif isinstance(book.data, ImageList): - await output_format.dl_image_list(book, location, update_func) - elif isinstance(book.data, HtmlFiles): - await output_format.dl_html_files(book, location, update_func) - else: - raise NotImplementedError + await output_format.download(book, location, update_func) await output_format.close() @@ -49,34 +47,43 @@ def format_output_location(book: Book, output_format: OutputFormat, template: st return template.format(**values, ext = output_format.extension) -def get_default_format(bookdata: BookData) -> OutputFormat: +def get_default_format(book: Book) -> OutputFormat: """ Get default output format for bookdata. Should only be used if no format was specified by the user - :param bookdata: Content of book + :param book: Content of book :returns: OutputFormat object matching the default """ + bookdata = book.data if isinstance(bookdata, SingleFile): - return output_format_from_str(bookdata.file.extension) + extension = bookdata.file.extension if isinstance(bookdata, ImageList): - return Cbz() + extension = "cbz" if isinstance(bookdata, HtmlFiles): - return Epub() - raise GrawlixError + extension = "epub" + output_format = find_output_format(book, extension) + return output_format() -def output_format_from_str(name: str) -> OutputFormat: +def find_output_format(book: Book, extension: str) -> type[OutputFormat]: """ - Convert string to outputformat object + Find a compatible output format - :param name: Name of output format - :returns: OutputFormat object + :param book: Book to download + :param extension: Extension of output file + :returns: Compatible OutputFormat type + :raises: UnsupportedOutputFormat if nothing is found """ for output_format in get_output_formats(): - if output_format.extension == name: - return output_format() - raise GrawlixError + matches_extension = output_format.extension == extension + supports_bookdata = type(book.data) in output_format.input_types + if matches_extension and supports_bookdata: + return output_format + raise UnsupportedOutputFormat + +def get_valid_extensions() -> list[str]: + return [output_format.extension for output_format in get_output_formats()] def get_output_formats() -> list[type[OutputFormat]]: diff --git a/grawlix/output/acsm.py b/grawlix/output/acsm.py index ca5230f..0d12e46 100644 --- a/grawlix/output/acsm.py +++ b/grawlix/output/acsm.py @@ -1,14 +1,15 @@ -from grawlix.book import Book +from grawlix.book import Book, SingleFile from .output_format import OutputFormat, Update import shutil import subprocess class Acsm(OutputFormat): extension = "acsm" + input_types = [SingleFile] - async def dl_single_file(self, book: Book, location: str, update_func: Update) -> None: + async def download(self, book: Book, location: str, update_func: Update) -> None: # Download and write acsm file to disk - await super().dl_single_file(book, location, update_func) + await self._download_single_file(book, location, update_func) # TODO: Implement more general solution # Decrypt if knock is available # https://web.archive.org/web/20221016154220/https://github.com/BentonEdmondson/knock diff --git a/grawlix/output/cbz.py b/grawlix/output/cbz.py index 54ac63a..b374c5a 100644 --- a/grawlix/output/cbz.py +++ b/grawlix/output/cbz.py @@ -9,18 +9,21 @@ class Cbz(OutputFormat): """Comic book zip file""" extension: str = "cbz" + input_types = [ImageList] - async def dl_image_list(self, book: Book, location: str, update: Update) -> None: + async def download(self, book: Book, location: str, update: Update) -> None: if not isinstance(book.data, ImageList): raise UnsupportedOutputFormat + semaphore = asyncio.Semaphore(10) images = book.data.images image_count = len(images) with ZipFile(location, mode="w") as zip: async def download_page(index: int, file: OnlineFile): - content = await self._download_file(file) - zip.writestr(f"Image {index}.{file.extension}", content) - if update: - update(1/image_count) + async with semaphore: + content = await self._download_file(file) + zip.writestr(f"Image {index}.{file.extension}", content) + if update: + update(1/image_count) tasks = [ asyncio.create_task(download_page(index, file)) for index, file in enumerate(images) diff --git a/grawlix/output/epub.py b/grawlix/output/epub.py index 39bc683..7a5d4fa 100644 --- a/grawlix/output/epub.py +++ b/grawlix/output/epub.py @@ -1,4 +1,4 @@ -from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book +from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book, SingleFile, Metadata from grawlix.exceptions import UnsupportedOutputFormat from .output_format import OutputFormat, Update @@ -9,14 +9,20 @@ from ebooklib import epub class Epub(OutputFormat): extension = "epub" + input_types = [SingleFile, HtmlFiles] - async def dl_html_files(self, book: Book, location: str, update: Update) -> None: - if not isinstance(book.data, HtmlFiles): + async def download(self, book: Book, location: str, update: Update) -> None: + if isinstance(book.data, SingleFile): + await self._download_single_file(book, location, update) + elif isinstance(book.data, HtmlFiles): + await self._download_html_files(book.data, book.metadata, location, update) + else: raise UnsupportedOutputFormat - html = book.data + + async def _download_html_files(self, html: HtmlFiles, metadata: Metadata, location: str, update: Update) -> None: output = epub.EpubBook() - output.set_title(book.metadata.title) - for author in book.metadata.authors: + output.set_title(metadata.title) + for author in metadata.authors: output.add_author(author) file_count = len(html.htmlfiles) + 1 # Html files + cover diff --git a/grawlix/output/output_format.py b/grawlix/output/output_format.py index f9a60ea..1ed91e9 100644 --- a/grawlix/output/output_format.py +++ b/grawlix/output/output_format.py @@ -1,4 +1,4 @@ -from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book, OfflineFile +from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book, OfflineFile, BookData from grawlix.exceptions import UnsupportedOutputFormat from grawlix.encryption import decrypt @@ -10,6 +10,7 @@ Update = Optional[Callable[[float], None]] class OutputFormat: # Extension for output files extension: str + input_types: list[type[BookData]] def __init__(self) -> None: self._client = httpx.AsyncClient() @@ -20,7 +21,18 @@ class OutputFormat: await self._client.aclose() - async def dl_single_file(self, book: Book, location: str, update_func: Update) -> None: + async def download(self, book: Book, location: str, update_func: Update) -> None: + """ + Download book + + :param book: Book to download + :param location: Path to where the file is written + :param update_func: Function to update progress bar + """ + raise UnsupportedOutputFormat + + + async def _download_single_file(self, book: Book, location: str, update_func: Update) -> None: """ Download and write an `grawlix.SingleFile` to disk @@ -38,28 +50,6 @@ class OutputFormat: self._write_offline_file(book.data.file, location) - async def dl_image_list(self, book: Book, location: str, update_func: Update) -> None: - """ - Download and write an `grawlix.ImageList` to disk - - :param book: Book to download - :param location: Path to where the file is written - :raises UnsupportedOutputFormat: If datatype is not supported by format - """ - raise UnsupportedOutputFormat - - - async def dl_html_files(self, book: Book, location: str, update_func: Update) -> None: - """ - Download and write a `grawlix.HtmlFiles` to disk - - :param book: Book to download - :param location: Path to where the file is written - :raises UnsupportedOutputFormat: If datatype is not supported by format - """ - raise UnsupportedOutputFormat - - async def _download_file(self, file: OnlineFile, update: Update = None) -> bytes: """ Download `grawlix.OnlineFile`