Restructure output system

Formats can now be loaded based on book data format and file extension
of the output file. Will also try to use the extension of the output
location instead of using the default filetype every time.
This commit is contained in:
Joakim Holm 2023-05-05 12:05:52 +02:00
parent c2545b871f
commit f91a32b0ab
5 changed files with 68 additions and 61 deletions

View File

@ -1,5 +1,5 @@
from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile, HtmlFiles from grawlix.book import Book, BookData, SingleFile, ImageList, OnlineFile, HtmlFiles
from grawlix.exceptions import GrawlixError from grawlix.exceptions import GrawlixError, UnsupportedOutputFormat
from grawlix.logging import info from grawlix.logging import info
from .output_format import OutputFormat from .output_format import OutputFormat
@ -17,7 +17,12 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non
:param book: Book to download :param book: Book to download
""" """
output_format = get_default_format(book.data) _, ext = os.path.splitext(template)
ext = ext[1:]
if ext in get_valid_extensions():
output_format = find_output_format(book, ext)()
else:
output_format = get_default_format(book)
location = format_output_location(book, output_format, template) location = format_output_location(book, output_format, template)
if not book.overwrite and os.path.exists(location): if not book.overwrite and os.path.exists(location):
info("Skipping - File already exists") info("Skipping - File already exists")
@ -25,14 +30,7 @@ async def download_book(book: Book, update_func: Callable, template: str) -> Non
parent = Path(location).parent parent = Path(location).parent
if not parent.exists(): if not parent.exists():
os.makedirs(parent) os.makedirs(parent)
if isinstance(book.data, SingleFile): await output_format.download(book, location, update_func)
await output_format.dl_single_file(book, location, update_func)
elif isinstance(book.data, ImageList):
await output_format.dl_image_list(book, location, update_func)
elif isinstance(book.data, HtmlFiles):
await output_format.dl_html_files(book, location, update_func)
else:
raise NotImplementedError
await output_format.close() await output_format.close()
@ -49,34 +47,43 @@ def format_output_location(book: Book, output_format: OutputFormat, template: st
return template.format(**values, ext = output_format.extension) return template.format(**values, ext = output_format.extension)
def get_default_format(bookdata: BookData) -> OutputFormat: def get_default_format(book: Book) -> OutputFormat:
""" """
Get default output format for bookdata. Get default output format for bookdata.
Should only be used if no format was specified by the user Should only be used if no format was specified by the user
:param bookdata: Content of book :param book: Content of book
:returns: OutputFormat object matching the default :returns: OutputFormat object matching the default
""" """
bookdata = book.data
if isinstance(bookdata, SingleFile): if isinstance(bookdata, SingleFile):
return output_format_from_str(bookdata.file.extension) extension = bookdata.file.extension
if isinstance(bookdata, ImageList): if isinstance(bookdata, ImageList):
return Cbz() extension = "cbz"
if isinstance(bookdata, HtmlFiles): if isinstance(bookdata, HtmlFiles):
return Epub() extension = "epub"
raise GrawlixError output_format = find_output_format(book, extension)
return output_format()
def output_format_from_str(name: str) -> OutputFormat: def find_output_format(book: Book, extension: str) -> type[OutputFormat]:
""" """
Convert string to outputformat object Find a compatible output format
:param name: Name of output format :param book: Book to download
:returns: OutputFormat object :param extension: Extension of output file
:returns: Compatible OutputFormat type
:raises: UnsupportedOutputFormat if nothing is found
""" """
for output_format in get_output_formats(): for output_format in get_output_formats():
if output_format.extension == name: matches_extension = output_format.extension == extension
return output_format() supports_bookdata = type(book.data) in output_format.input_types
raise GrawlixError if matches_extension and supports_bookdata:
return output_format
raise UnsupportedOutputFormat
def get_valid_extensions() -> list[str]:
return [output_format.extension for output_format in get_output_formats()]
def get_output_formats() -> list[type[OutputFormat]]: def get_output_formats() -> list[type[OutputFormat]]:

View File

@ -1,14 +1,15 @@
from grawlix.book import Book from grawlix.book import Book, SingleFile
from .output_format import OutputFormat, Update from .output_format import OutputFormat, Update
import shutil import shutil
import subprocess import subprocess
class Acsm(OutputFormat): class Acsm(OutputFormat):
extension = "acsm" extension = "acsm"
input_types = [SingleFile]
async def dl_single_file(self, book: Book, location: str, update_func: Update) -> None: async def download(self, book: Book, location: str, update_func: Update) -> None:
# Download and write acsm file to disk # Download and write acsm file to disk
await super().dl_single_file(book, location, update_func) await self._download_single_file(book, location, update_func)
# TODO: Implement more general solution # TODO: Implement more general solution
# Decrypt if knock is available # Decrypt if knock is available
# https://web.archive.org/web/20221016154220/https://github.com/BentonEdmondson/knock # https://web.archive.org/web/20221016154220/https://github.com/BentonEdmondson/knock

View File

@ -9,18 +9,21 @@ class Cbz(OutputFormat):
"""Comic book zip file""" """Comic book zip file"""
extension: str = "cbz" extension: str = "cbz"
input_types = [ImageList]
async def dl_image_list(self, book: Book, location: str, update: Update) -> None: async def download(self, book: Book, location: str, update: Update) -> None:
if not isinstance(book.data, ImageList): if not isinstance(book.data, ImageList):
raise UnsupportedOutputFormat raise UnsupportedOutputFormat
semaphore = asyncio.Semaphore(10)
images = book.data.images images = book.data.images
image_count = len(images) image_count = len(images)
with ZipFile(location, mode="w") as zip: with ZipFile(location, mode="w") as zip:
async def download_page(index: int, file: OnlineFile): async def download_page(index: int, file: OnlineFile):
content = await self._download_file(file) async with semaphore:
zip.writestr(f"Image {index}.{file.extension}", content) content = await self._download_file(file)
if update: zip.writestr(f"Image {index}.{file.extension}", content)
update(1/image_count) if update:
update(1/image_count)
tasks = [ tasks = [
asyncio.create_task(download_page(index, file)) asyncio.create_task(download_page(index, file))
for index, file in enumerate(images) for index, file in enumerate(images)

View File

@ -1,4 +1,4 @@
from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book from grawlix.book import HtmlFiles, HtmlFile, OnlineFile, Book, SingleFile, Metadata
from grawlix.exceptions import UnsupportedOutputFormat from grawlix.exceptions import UnsupportedOutputFormat
from .output_format import OutputFormat, Update from .output_format import OutputFormat, Update
@ -9,14 +9,20 @@ from ebooklib import epub
class Epub(OutputFormat): class Epub(OutputFormat):
extension = "epub" extension = "epub"
input_types = [SingleFile, HtmlFiles]
async def dl_html_files(self, book: Book, location: str, update: Update) -> None: async def download(self, book: Book, location: str, update: Update) -> None:
if not isinstance(book.data, HtmlFiles): if isinstance(book.data, SingleFile):
await self._download_single_file(book, location, update)
elif isinstance(book.data, HtmlFiles):
await self._download_html_files(book.data, book.metadata, location, update)
else:
raise UnsupportedOutputFormat raise UnsupportedOutputFormat
html = book.data
async def _download_html_files(self, html: HtmlFiles, metadata: Metadata, location: str, update: Update) -> None:
output = epub.EpubBook() output = epub.EpubBook()
output.set_title(book.metadata.title) output.set_title(metadata.title)
for author in book.metadata.authors: for author in metadata.authors:
output.add_author(author) output.add_author(author)
file_count = len(html.htmlfiles) + 1 # Html files + cover file_count = len(html.htmlfiles) + 1 # Html files + cover

View File

@ -1,4 +1,4 @@
from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book, OfflineFile from grawlix.book import Book, SingleFile, OnlineFile, ImageList, HtmlFiles, Book, OfflineFile, BookData
from grawlix.exceptions import UnsupportedOutputFormat from grawlix.exceptions import UnsupportedOutputFormat
from grawlix.encryption import decrypt from grawlix.encryption import decrypt
@ -10,6 +10,7 @@ Update = Optional[Callable[[float], None]]
class OutputFormat: class OutputFormat:
# Extension for output files # Extension for output files
extension: str extension: str
input_types: list[type[BookData]]
def __init__(self) -> None: def __init__(self) -> None:
self._client = httpx.AsyncClient() self._client = httpx.AsyncClient()
@ -20,7 +21,18 @@ class OutputFormat:
await self._client.aclose() await self._client.aclose()
async def dl_single_file(self, book: Book, location: str, update_func: Update) -> None: async def download(self, book: Book, location: str, update_func: Update) -> None:
"""
Download book
:param book: Book to download
:param location: Path to where the file is written
:param update_func: Function to update progress bar
"""
raise UnsupportedOutputFormat
async def _download_single_file(self, book: Book, location: str, update_func: Update) -> None:
""" """
Download and write an `grawlix.SingleFile` to disk Download and write an `grawlix.SingleFile` to disk
@ -38,28 +50,6 @@ class OutputFormat:
self._write_offline_file(book.data.file, location) self._write_offline_file(book.data.file, location)
async def dl_image_list(self, book: Book, location: str, update_func: Update) -> None:
"""
Download and write an `grawlix.ImageList` to disk
:param book: Book to download
:param location: Path to where the file is written
:raises UnsupportedOutputFormat: If datatype is not supported by format
"""
raise UnsupportedOutputFormat
async def dl_html_files(self, book: Book, location: str, update_func: Update) -> None:
"""
Download and write a `grawlix.HtmlFiles` to disk
:param book: Book to download
:param location: Path to where the file is written
:raises UnsupportedOutputFormat: If datatype is not supported by format
"""
raise UnsupportedOutputFormat
async def _download_file(self, file: OnlineFile, update: Update = None) -> bytes: async def _download_file(self, file: OnlineFile, update: Update = None) -> bytes:
""" """
Download `grawlix.OnlineFile` Download `grawlix.OnlineFile`