diff --git a/grawlix/output/__init__.py b/grawlix/output/__init__.py index fcca4b9..35eb1d2 100644 --- a/grawlix/output/__init__.py +++ b/grawlix/output/__init__.py @@ -1,5 +1,5 @@ from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile, HtmlFiles -from grawlix.exceptions import GrawlixError +from grawlix.exceptions import GrawlixError, UnsupportedOutputFormat from grawlix.logging import info from .output_format import OutputFormat @@ -17,7 +17,12 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non :param book: Book to download """ - output_format = get_default_format(book.data) + _, ext = os.path.splitext(template) + ext = ext[1:] + if ext in get_valid_extensions(): + output_format = find_output_format(book, ext)() + else: + output_format = get_default_format(book) location = format_output_location(book, output_format, template) if not book.overwrite and os.path.exists(location): info("Skipping - File already exists") @@ -25,14 +30,7 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non parent = Path(location).parent if not parent.exists(): os.makedirs(parent) - if isinstance(book.data, SingleFile): - await output_format.dl_single_file(book, location, update_func) - elif isinstance(book.data, ImageList): - await output_format.dl_image_list(book, location, update_func) - elif isinstance(book.data, HtmlFiles): - await output_format.dl_html_files(book, location, update_func) - else: - raise NotImplementedError + await output_format.download(book, location, update_func) await output_format.close() @@ -49,34 +47,43 @@ def format_output_location(book: Book, output_format: OutputFormat, template: st return template.format(**values, ext = output_format.extension) -def get_default_format(bookdata: BookData) -> OutputFormat: +def get_default_format(book: Book) -> OutputFormat: """ Get default output format for bookdata. Should only be used if no format was specified by the user - :param bookdata: Content of book + :param book: Content of book :returns: OutputFormat object matching the default """ + bookdata = book.data if isinstance(bookdata, SingleFile): - return output_format_from_str(bookdata.file.extension) + extension = bookdata.file.extension if isinstance(bookdata, ImageList): - return Cbz() + extension = "cbz" if isinstance(bookdata, HtmlFiles): - return Epub() - raise GrawlixError + extension = "epub" + output_format = find_output_format(book, extension) + return output_format() -def output_format_from_str(name: str) -> OutputFormat: +def find_output_format(book: Book, extension: str) -> type[OutputFormat]: """ - Convert string to outputformat object + Find a compatible output format - :param name: Name of output format - :returns: OutputFormat object + :param book: Book to download + :param extension: Extension of output file + :returns: Compatible OutputFormat type + :raises: UnsupportedOutputFormat if nothing is found """ for output_format in get_output_formats(): - if output_format.extension == name: - return output_format() - raise GrawlixError + matches_extension = output_format.extension == extension + supports_bookdata = type(book.data) in output_format.input_types + if matches_extension and supports_bookdata: + return output_format + raise UnsupportedOutputFormat + +def get_valid_extensions() -> list[str]: + return [output_format.extension for output_format in get_output_formats()] def get_output_formats() -> list[type[OutputFormat]]: diff --git a/grawlix/output/acsm.py b/grawlix/output/acsm.py index ca5230f..0d12e46 100644 --- a/grawlix/output/acsm.py +++ b/grawlix/output/acsm.py @@ -1,14 +1,15 @@ -from grawlix.book import Book +from grawlix.book import Book, SingleFile from .output_format import OutputFormat, Update import shutil import subprocess class Acsm(OutputFormat): extension = "acsm" + input_types = [SingleFile] - async def dl_single_file(self, book: Book, location: str, update_func: Update) -> None: + async def download(self, book: Book, location: str, update_func: Update) -> None: # Download and write acsm file to disk - await super().dl_single_file(book, location, update_func) + await self._download_single_file(book, location, update_func) # TODO: Implement more general solution # Decrypt if knock is available # https://web.archive.org/web/20221016154220/https://github.com/BentonEdmondson/knock diff --git a/grawlix/output/cbz.py b/grawlix/output/cbz.py index 54ac63a..b374c5a 100644 --- a/grawlix/output/cbz.py +++ b/grawlix/output/cbz.py @@ -9,18 +9,21 @@ class Cbz(OutputFormat): """Comic book zip file""" extension: str = "cbz" + input_types = [ImageList] - async def dl_image_list(self, book: Book, location: str, update: Update) -> None: + async def download(self, book: Book, location: str, update: Update) -> None: if not isinstance(book.data, ImageList): raise UnsupportedOutputFormat + semaphore = asyncio.Semaphore(10) images = book.data.images image_count = len(images) with ZipFile(location, mode="w") as zip: async def download_page(index: int, file: OnlineFile): - content = await self._download_file(file) - zip.writestr(f"Image {index}.{file.extension}", content) - if update: - update(1/image_count) + async with semaphore: + content = await self._download_file(file) + zip.writestr(f"Image {index}.{file.extension}", content) + if update: + update(1/image_count) tasks = [ asyncio.create_task(download_page(index, file)) for index, file in enumerate(images) diff --git a/grawlix/output/epub.py b/grawlix/output/epub.py index 39bc683..7a5d4fa 100644 --- a/grawlix/output/epub.py +++ b/grawlix/output/epub.py @@ -1,4 +1,4 @@ -from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book +from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book, SingleFile, Metadata from grawlix.exceptions import UnsupportedOutputFormat from .output_format import OutputFormat, Update @@ -9,14 +9,20 @@ from ebooklib import epub class Epub(OutputFormat): extension = "epub" + input_types = [SingleFile, HtmlFiles] - async def dl_html_files(self, book: Book, location: str, update: Update) -> None: - if not isinstance(book.data, HtmlFiles): + async def download(self, book: Book, location: str, update: Update) -> None: + if isinstance(book.data, SingleFile): + await self._download_single_file(book, location, update) + elif isinstance(book.data, HtmlFiles): + await self._download_html_files(book.data, book.metadata, location, update) + else: raise UnsupportedOutputFormat - html = book.data + + async def _download_html_files(self, html: HtmlFiles, metadata: Metadata, location: str, update: Update) -> None: output = epub.EpubBook() - output.set_title(book.metadata.title) - for author in book.metadata.authors: + output.set_title(metadata.title) + for author in metadata.authors: output.add_author(author) file_count = len(html.htmlfiles) + 1 # Html files + cover diff --git a/grawlix/output/output_format.py b/grawlix/output/output_format.py index f9a60ea..1ed91e9 100644 --- a/grawlix/output/output_format.py +++ b/grawlix/output/output_format.py @@ -1,4 +1,4 @@ -from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book, OfflineFile +from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book, OfflineFile, BookData from grawlix.exceptions import UnsupportedOutputFormat from grawlix.encryption import decrypt @@ -10,6 +10,7 @@ Update = Optional[Callable[[float], None]] class OutputFormat: # Extension for output files extension: str + input_types: list[type[BookData]] def __init__(self) -> None: self._client = httpx.AsyncClient() @@ -20,7 +21,18 @@ class OutputFormat: await self._client.aclose() - async def dl_single_file(self, book: Book, location: str, update_func: Update) -> None: + async def download(self, book: Book, location: str, update_func: Update) -> None: + """ + Download book + + :param book: Book to download + :param location: Path to where the file is written + :param update_func: Function to update progress bar + """ + raise UnsupportedOutputFormat + + + async def _download_single_file(self, book: Book, location: str, update_func: Update) -> None: """ Download and write an `grawlix.SingleFile` to disk @@ -38,28 +50,6 @@ class OutputFormat: self._write_offline_file(book.data.file, location) - async def dl_image_list(self, book: Book, location: str, update_func: Update) -> None: - """ - Download and write an `grawlix.ImageList` to disk - - :param book: Book to download - :param location: Path to where the file is written - :raises UnsupportedOutputFormat: If datatype is not supported by format - """ - raise UnsupportedOutputFormat - - - async def dl_html_files(self, book: Book, location: str, update_func: Update) -> None: - """ - Download and write a `grawlix.HtmlFiles` to disk - - :param book: Book to download - :param location: Path to where the file is written - :raises UnsupportedOutputFormat: If datatype is not supported by format - """ - raise UnsupportedOutputFormat - - async def _download_file(self, file: OnlineFile, update: Update = None) -> bytes: """ Download `grawlix.OnlineFile`