From dade9db6dafb3c842bbfef820d5bb167c1a42e43 Mon Sep 17 00:00:00 2001 From: Joakim Holm Date: Thu, 1 Jun 2023 22:32:04 +0200 Subject: [PATCH] Make onlinefiles support cookies --- grawlix/book.py | 5 +++-- grawlix/output/epub.py | 15 +++++++++++---- grawlix/output/output_format.py | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/grawlix/book.py b/grawlix/book.py index ae49564..0cdf054 100644 --- a/grawlix/book.py +++ b/grawlix/book.py @@ -1,6 +1,6 @@ from grawlix import Encryption from dataclasses import dataclass, field -from typing import Optional, Union, TypeVar, Generic +from typing import Optional, Union, TypeVar, Generic, Any @dataclass(slots=True) class Metadata: @@ -30,6 +30,7 @@ class OnlineFile: extension: str encryption: Optional[Encryption] = None headers: Optional[dict[str, str]] = None + cookies: Optional[Any] = None # TODO Change type @dataclass(slots=True) class OfflineFile: @@ -63,8 +64,8 @@ class HtmlFile: @dataclass(slots=True) class HtmlFiles: - cover: OnlineFile htmlfiles: list[HtmlFile] + cover: Optional[OnlineFile] = None BookData = Union[ SingleFile, diff --git a/grawlix/output/epub.py b/grawlix/output/epub.py index 7a5d4fa..96d9c28 100644 --- a/grawlix/output/epub.py +++ b/grawlix/output/epub.py @@ -27,9 +27,9 @@ class Epub(OutputFormat): file_count = len(html.htmlfiles) + 1 # Html files + cover async def download_cover(cover_file: OnlineFile): - cover_filename = f"cover.{html.cover.extension}" + cover_filename = f"cover.{cover_file.extension}" epub_cover = epub.EpubCover(file_name = cover_filename) - epub_cover.content = await self._download_file(html.cover) + epub_cover.content = await self._download_file(cover_file) output.add_item(epub_cover) epub_cover_page = epub.EpubCoverHtml(image_name = cover_filename) if update: @@ -38,7 +38,12 @@ class Epub(OutputFormat): async def download_file(index: int, file: HtmlFile): - response = await self._client.get(file.file.url, follow_redirects=True) + response = await self._client.get( + file.file.url, + headers = file.file.headers, + cookies = file.file.cookies, + follow_redirects=True + ) soup = BeautifulSoup(response.text, "lxml") selected_element = soup.find(attrs=file.selector) epub_file = epub.EpubHtml( @@ -55,7 +60,9 @@ class Epub(OutputFormat): download_file(index, file) for index, file in enumerate(html.htmlfiles) ] - epub_files = await asyncio.gather(download_cover(html.cover), *tasks) + if html.cover: + tasks.append(download_cover(html.cover)) + epub_files = await asyncio.gather(*tasks) # Add files to epub for epub_file in epub_files: diff --git a/grawlix/output/output_format.py b/grawlix/output/output_format.py index 1ed91e9..80f7c78 100644 --- a/grawlix/output/output_format.py +++ b/grawlix/output/output_format.py @@ -59,7 +59,7 @@ class OutputFormat: :returns: Content of downloaded file """ content = b"" - async with self._client.stream("GET", file.url, headers = file.headers, follow_redirects=True) as request: + async with self._client.stream("GET", file.url, headers = file.headers, cookies = file.cookies, follow_redirects=True) as request: total_filesize = int(request.headers["Content-length"]) async for chunk in request.aiter_bytes(): content += chunk