Merge pull request #1 from jo1gi/async

Make http calls async
This commit is contained in:
Joakim Holm 2023-04-26 22:20:14 +02:00 committed by GitHub
commit ce61b70d00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 151 additions and 133 deletions

View File

@ -8,6 +8,7 @@ from . import arguments, logging
from typing import Tuple, Optional
from rich.progress import Progress
from functools import partial
import asyncio
def get_login(source: Source, config: Config, options) -> Tuple[str, str, Optional[str]]:
@ -46,7 +47,7 @@ def get_urls(options) -> list[str]:
return urls
def authenticate(source: Source, config: Config, options):
async def authenticate(source: Source, config: Config, options):
"""
Authenticate with source
@ -57,35 +58,35 @@ def authenticate(source: Source, config: Config, options):
logging.info(f"Authenticating with source [magenta]{source.name}[/]")
if source.supports_login:
username, password, library = get_login(source, config, options)
source.login(username, password, library=library)
await source.login(username, password, library=library)
source.authenticated = True
else:
raise SourceNotAuthenticated
def main() -> None:
async def main() -> None:
args = arguments.parse_arguments()
config = load_config()
urls = get_urls(args)
for url in urls:
source: Source = load_source(url)
if not source.authenticated and source.requires_authentication:
authenticate(source, config, args)
result = source.download(url)
await authenticate(source, config, args)
result = await source.download(url)
if isinstance(result, Book):
with logging.progress(result.metadata.title, source.name) as progress:
template = args.output or "{title}.{ext}"
download_with_progress(result, progress, template)
template: str = args.output or "{title}.{ext}"
await download_with_progress(result, progress, template)
elif isinstance(result, Series):
with logging.progress(result.title, source.name, len(result.book_ids)) as progress:
for book_id in result.book_ids:
book = source.download_book_from_id(book_id)
template = args.output or "{series}/{title}.{ext}"
download_with_progress(book, progress, template)
book: Book = await source.download_book_from_id(book_id)
template: str = args.output or "{series}/{title}.{ext}"
await download_with_progress(book, progress, template)
logging.info("")
def download_with_progress(book: Book, progress: Progress, template: str):
async def download_with_progress(book: Book, progress: Progress, template: str):
"""
Download book with progress bar in cli
@ -95,9 +96,14 @@ def download_with_progress(book: Book, progress: Progress, template: str):
"""
task = logging.add_book(progress, book)
update_function = partial(progress.advance, task)
download_book(book, update_function, template)
await download_book(book, update_function, template)
progress.advance(task, 1)
def run() -> None:
"""Start main function"""
asyncio.run(main())
if __name__ == "__main__":
main()
run()

View File

@ -10,7 +10,7 @@ from typing import Callable
from pathlib import Path
import os
def download_book(book: Book, update_func: Callable, template: str) -> None:
async def download_book(book: Book, update_func: Callable, template: str) -> None:
"""
Download and write book to disk
@ -25,11 +25,12 @@ def download_book(book: Book, update_func: Callable, template: str) -> None:
if not parent.exists():
os.makedirs(parent)
if isinstance(book.data, SingleFile):
output_format.dl_single_file(book.data, location, update_func)
await output_format.dl_single_file(book.data, location, update_func)
elif isinstance(book.data, ImageList):
output_format.dl_image_list(book.data, location, update_func)
await output_format.dl_image_list(book.data, location, update_func)
else:
raise NotImplementedError
await output_format.close()
def format_output_location(book: Book, output_format: OutputFormat, template: str) -> str:

View File

@ -1,18 +1,24 @@
from .output_format import OutputFormat, Update
from grawlix.book import ImageList
from grawlix.book import ImageList, OnlineFile
import zipfile
from zipfile import ZipFile
import asyncio
class Cbz(OutputFormat):
"""Comic book zip file"""
extension: str = "cbz"
def dl_image_list(self, book: ImageList, location: str, update: Update) -> None:
async def dl_image_list(self, book: ImageList, location: str, update: Update) -> None:
image_count = len(book.images)
with zipfile.ZipFile(location, mode="w") as zip:
for n, file in enumerate(book.images):
content = self._download_file(file)
zip.writestr(f"Image {n}.{file.extension}", content)
with ZipFile(location, mode="w") as zip:
async def download_page(index: int, file: OnlineFile):
content = await self._download_file(file)
zip.writestr(f"Image {index}.{file.extension}", content)
if update:
update(1/image_count)
tasks = [
asyncio.create_task(download_page(index, file))
for index, file in enumerate(book.images)
]
await asyncio.wait(tasks)

View File

@ -2,7 +2,7 @@ from grawlix.book import Book, SingleFile, OnlineFile, ImageList
from grawlix.exceptions import UnsupportedOutputFormat
from grawlix.encryption import decrypt
import requests
import httpx
from typing import Callable, Optional
Update = Optional[Callable[[float], None]]
@ -11,11 +11,16 @@ class OutputFormat:
# Extension for output files
extension: str = ""
def __init__(self):
self._session = requests.Session()
def __init__(self) -> None:
self._client = httpx.AsyncClient()
def dl_single_file(self, book: SingleFile, location: str, update_func: Update) -> None:
async def close(self) -> None:
"""Cleanup"""
await self._client.aclose()
async def dl_single_file(self, book: SingleFile, location: str, update_func: Update) -> None:
"""
Download and write an `grawlix.SingleFile` to disk
@ -25,10 +30,10 @@ class OutputFormat:
"""
if not book.file.extension == self.extension:
raise UnsupportedOutputFormat
self._download_and_write_file(book.file, location, update_func)
await self._download_and_write_file(book.file, location, update_func)
def dl_image_list(self, book: ImageList, location: str, update_func: Update) -> None:
async def dl_image_list(self, book: ImageList, location: str, update_func: Update) -> None:
"""
Download and write an `grawlix.ImageList` to disk
@ -39,7 +44,7 @@ class OutputFormat:
raise UnsupportedOutputFormat
def _download_file(self, file: OnlineFile, update: Update = None) -> bytes:
async def _download_file(self, file: OnlineFile, update: Update = None) -> bytes:
"""
Download `grawlix.OnlineFile`
@ -47,23 +52,19 @@ class OutputFormat:
:param update: Update function that is called with a percentage every time a chunk is downloaded
:returns: Content of downloaded file
"""
request = self._session.get(
file.url,
headers = file.headers,
stream = True
)
total_filesize = int(request.headers["Content-length"])
content = b""
for chunk in request.iter_content(chunk_size=1024):
content += chunk
if update:
update(len(chunk)/total_filesize)
if file.encryption is not None:
content = decrypt(content, file.encryption)
async with self._client.stream("GET", file.url, headers = file.headers) as request:
total_filesize = int(request.headers["Content-length"])
async for chunk in request.aiter_bytes():
content += chunk
if update:
update(len(chunk)/total_filesize)
if file.encryption is not None:
content = decrypt(content, file.encryption)
return content
def _download_and_write_file(self, file: OnlineFile, location: str, update: Update = None) -> None:
async def _download_and_write_file(self, file: OnlineFile, location: str, update: Update = None) -> None:
"""
Download `grawlix.OnlineFile` and write to content to disk
@ -71,6 +72,6 @@ class OutputFormat:
:param location: Path to where the file is written
:param update: Update function that is called with a percentage every time a chunk is downloaded
"""
content = self._download_file(file, update)
content = await self._download_file(file, update)
with open(location, "wb") as f:
f.write(content)

View File

@ -24,17 +24,17 @@ class Ereolen(Source):
_login_credentials = [ "username", "password", "library" ]
def login(self, username: str, password: str, **kwargs) -> None:
async def login(self, username: str, password: str, **kwargs) -> None:
library = kwargs["library"]
login_page = self._session.get(LOGIN_PAGE_URL).text
login_soup = BeautifulSoup(login_page, "lxml")
login_page = await self._client.get(LOGIN_PAGE_URL, follow_redirects=True)
login_soup = BeautifulSoup(login_page.text, "lxml")
borchk_login_form = login_soup.find(id="borchk-login-form")
login_path = borchk_login_form.get("action")
library_attr_name = borchk_login_form.find("label").get("for")
libraries = self._extract_available_libraries(login_page)
libraries = self._extract_available_libraries(login_page.text)
if not library in libraries:
library = nearest_string(library, list(libraries.keys()))
self._session.post(
await self._client.post(
f"https://login.bib.dk{login_path}",
headers = { "Content-Type": "application/x-www-form-urlencoded" },
data = {
@ -42,7 +42,8 @@ class Ereolen(Source):
"agency": libraries[library],
"userId": username,
"pincode": password
}
},
follow_redirects = True
)
@ -65,11 +66,12 @@ class Ereolen(Source):
return libraries
def download(self, url: str) -> Result:
book_id = self._get_book_id(url)
metadata = self._session.get(
f"https://bookstreaming.pubhub.dk/v1/order/metadata/{book_id}"
).json()
async def download(self, url: str) -> Result:
book_id: str = await self._get_book_id(url)
metadata_response = await self._client.get(
f"https://bookstreaming.pubhub.dk/v1/order/metadata/{book_id}",
)
metadata = metadata_response.json()
key = self._decrypt_key(metadata["key"])
return Book(
data = SingleFile(
@ -102,7 +104,7 @@ class Ereolen(Source):
return cipher.decrypt(decoded_key)[:16]
def _get_book_id(self, url: str) -> str:
async def _get_book_id(self, url: str) -> str:
"""
Download and extract book_id
@ -110,20 +112,20 @@ class Ereolen(Source):
:returns: Book id
"""
if re.match(self.match[0], url):
return self._get_book_id_from_reader(url)
return await self._get_book_id_from_reader(url)
if re.match(self.match[1], url):
return self._get_book_id_from_reader(f"{url}/read")
return await self._get_book_id_from_reader(f"{url}/read")
else:
raise InvalidUrl
def _get_book_id_from_reader(self, url: str) -> str:
async def _get_book_id_from_reader(self, url: str) -> str:
"""
Download and extract book_id from reader page
:param url: Url to reader page
:returns: Book id
"""
page = self._session.get(url).text
soup = BeautifulSoup(page, "lxml")
page = await self._client.get(url)
soup = BeautifulSoup(page.text, "lxml")
return soup.find("div", id="pubhub-reader").get("order-id")

View File

@ -18,22 +18,22 @@ class Flipp(Source):
_authentication_methods: list[str] = []
_login_cache: Optional[dict] = None
def download(self, url: str) -> Result:
async def download(self, url: str) -> Result:
if re.match(self.match[0], url):
eid = self._get_eid(url)
publication_id = self._get_series_id(eid)
return self._download_book(eid, publication_id)
publication_id = await self._get_series_id(eid)
return await self._download_book(eid, publication_id)
elif re.match(self.match[1], url):
return self._download_series(url)
return await self._download_series(url)
raise InvalidUrl
def download_book_from_id(self, book_id: Tuple[str, str]) -> Book:
async def download_book_from_id(self, book_id: Tuple[str, str]) -> Book:
series_id, issue_id = book_id
return self._download_book(issue_id, series_id)
return await self._download_book(issue_id, series_id)
def _download_series(self, url: str) -> Series:
async def _download_series(self, url: str) -> Series:
"""
Download series with book ids from Flipp
@ -41,7 +41,7 @@ class Flipp(Source):
:returns: Series object
"""
series_id = url.split("/")[-1]
login_info = self._download_login_info()
login_info = await self._download_login_info()
series_metadata = self._extract_series_data(login_info, series_id)
issues = []
for issue in series_metadata["issues"]:
@ -53,7 +53,7 @@ class Flipp(Source):
)
def _download_login_info(self) -> dict:
async def _download_login_info(self) -> dict:
"""
Download login info from Flipp
Will use cache if available
@ -62,7 +62,7 @@ class Flipp(Source):
"""
if self._login_cache:
return self._login_cache
login_info = self._session.post(
login_cache = await self._client.post(
"https://flippapi.egmontservice.com/api/signin",
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:111.0) Gecko/20100101 Firefox/111.0"
@ -77,9 +77,9 @@ class Flipp(Source):
"uuid": "",
"os": ""
}
).json()
self.login_cache = login_info
return login_info
)
self._login_cache = login_cache.json()
return login_cache.json()
def _extract_series_data(self, response: dict, series_id: str) -> dict:
@ -96,7 +96,7 @@ class Flipp(Source):
raise DataNotFound
def _download_book(self, issue_id: str, series_id: str) -> Book:
async def _download_book(self, issue_id: str, series_id: str) -> Book:
"""
Download book from Flipp
@ -104,8 +104,8 @@ class Flipp(Source):
:param series_id: Series identifier
:returns: Book metadata
"""
pages = self._get_pages(issue_id, series_id)
metadata = self._get_metadata(issue_id, series_id)
pages = await self._get_pages(issue_id, series_id)
metadata = await self._get_metadata(issue_id, series_id)
return Book(
data = ImageList(pages),
metadata = Metadata(
@ -116,7 +116,7 @@ class Flipp(Source):
)
def _get_metadata(self, issue_id: str, series_id: str) -> dict:
async def _get_metadata(self, issue_id: str, series_id: str) -> dict:
"""
Download and extract issue data
@ -124,7 +124,7 @@ class Flipp(Source):
:param series_id: Series id
:returns: Issue metadata
"""
login_info = self._download_login_info()
login_info = await self._download_login_info()
series_metadata = self._extract_series_data(login_info, series_id)
for issue in series_metadata["issues"]:
if issue["customIssueCode"] == issue_id:
@ -136,14 +136,14 @@ class Flipp(Source):
return get_arg_from_url(url, "edid")
def _get_series_id(self, issue_id: str) -> str:
async def _get_series_id(self, issue_id: str) -> str:
"""
Download series id from issue id
:param issue_id: Issue id
:returns: Series id
"""
response = self._session.get(f"{BASEURL}/production/default.aspx?pubname=&edid={issue_id}")
response = await self._client.get(f"{BASEURL}/production/default.aspx?pubname=&edid={issue_id}")
# TODO Make faster
search = re.search(r'publicationguid = "([^"]+)', response.text)
if search is None:
@ -151,7 +151,7 @@ class Flipp(Source):
return search.group(1)
def _get_pages(self, issue_id: str, series_id: str) -> list[OnlineFile]:
async def _get_pages(self, issue_id: str, series_id: str) -> list[OnlineFile]:
"""
Download page metadata for book
@ -159,7 +159,7 @@ class Flipp(Source):
:param series_id: Series id
:return: Page image links
"""
response = self._session.get(
response = await self._client.get(
f"{BASEURL}/get_page_groups_from_eid.aspx?pubid={series_id}&eid={issue_id}",
)
result = []

View File

@ -17,28 +17,28 @@ class MangaPlus(Source):
_authentication_methods: list[str] = []
def download(self, url: str) -> Result:
async def download(self, url: str) -> Result:
if re.match(self.match[0], url):
issue_id = url.split('/')[-1]
return self._download_issue(issue_id)
return await self._download_issue(issue_id)
if re.match(self.match[1], url):
series_id = url.split("/")[-1]
return self._download_series(series_id)
return await self._download_series(series_id)
raise InvalidUrl
def download_book_from_id(self, book_id: str) -> Book:
async def download_book_from_id(self, book_id: str) -> Book:
return self._download_issue(book_id)
def _download_series(self, series_id: str) -> Series:
async def _download_series(self, series_id: str) -> Series:
"""
Download series from Manga Plus
:param series_id: Identifier for series
:returns: Series data
"""
content = self._session.get(
response = await self._client.get(
f"https://jumpg-api.tokyo-cdn.com/api/title_detailV2",
params = {
"title_id": series_id,
@ -48,8 +48,8 @@ class MangaPlus(Source):
"app_ver": "40",
"secret": "2afb69fbb05f57a1856cf75e1c4b6ee6"
},
).content
data, _ = blackboxprotobuf.protobuf_to_json(content)
)
data, _ = blackboxprotobuf.protobuf_to_json(response.content)
parsed = json.loads(data)
title = parsed["1"]["8"]["1"]["2"]
issues = []
@ -70,7 +70,7 @@ class MangaPlus(Source):
book_ids = issues
)
def _download_issue(self, issue_id: str) -> Book:
async def _download_issue(self, issue_id: str) -> Book:
"""
Download issue from Manga Plus
@ -78,10 +78,10 @@ class MangaPlus(Source):
:returns: Issue metadata
"""
url = f"https://jumpg-webapi.tokyo-cdn.com/api/manga_viewer?chapter_id={issue_id}&split=yes&img_quality=super_high"
content = self._session.get(url).content
response, _ = blackboxprotobuf.protobuf_to_json(content)
response = await self._client.get(url)
content, _ = blackboxprotobuf.protobuf_to_json(response.content)
images = []
parsed = json.loads(response)
parsed = json.loads(content)
for image in parsed["1"]["10"]["1"]:
if "1" in image:
images.append(

View File

@ -13,8 +13,8 @@ class Saxo(Source):
_authentication_methods = [ "login" ]
user_id: str
def login(self, username: str, password: str, **kwargs) -> None:
response = self._session.post(
async def login(self, username: str, password: str, **kwargs) -> None:
response = await self._client.post(
"https://auth-read.saxo.com/auth/token",
data = {
"username": username,
@ -27,7 +27,7 @@ class Saxo(Source):
)
json = response.json()
bearer_token = json["access_token"]
self._session.headers = {
self._client.headers = {
"Appauthorization": f"bearer {bearer_token}",
"App-Os": "android",
"App-Version": "6.2.4"
@ -35,16 +35,16 @@ class Saxo(Source):
self.user_id = json["id"]
def download(self, url: str) -> Book:
async def download(self, url: str) -> Book:
isbn = self._extract_isbn_from_url(url)
book_id = self._get_book_id(isbn)
metadata = self._get_book_metadata(book_id)
book_id = await self._get_book_id(isbn)
metadata = await self._get_book_metadata(book_id)
ebook_id = metadata["id"] # Id of ebook file
return Book(
metadata = self._extract_metadata(metadata),
data = SingleFile(
OnlineFile(
url = self._get_book_file_link(ebook_id),
url = await self._get_book_file_link(ebook_id),
extension = "epub",
# Encryption keys extracted from app
encryption = AESEncryption(
@ -56,33 +56,33 @@ class Saxo(Source):
)
def _get_book_id(self, isbn: str) -> str:
async def _get_book_id(self, isbn: str) -> str:
"""
Download internal book id of book from isbn
:param isbn: Isbn of book
:returns: Saxo internal book id
"""
response = self._session.get(
response = await self._client.get(
f"https://api-read.saxo.com/api/v2/search/user/{self.user_id}/premium/books/{isbn}"
)
return response.json()["items"][0]["bookId"]
def _get_book_metadata(self, book_id: str) -> dict:
async def _get_book_metadata(self, book_id: str) -> dict:
"""
Download metadata of book
:param book_id: Id of book
:returns: Metadata of book
"""
response = self._session.get(
response = await self._client.get(
f"https://api-read.saxo.com/api/v2/book/{book_id}/user/{self.user_id}/details"
)
return response.json()["ebooks"][0]
def _get_book_file_link(self, ebook_id: str) -> str:
async def _get_book_file_link(self, ebook_id: str) -> str:
"""
Download link to epub file
@ -90,12 +90,13 @@ class Saxo(Source):
:returns: Link to ebook file
:raises ThrottleError: If there have been too many downloads
"""
response = self._session.get(
response = await self._client.get(
f"https://api-read.saxo.com/api/v1/book/{ebook_id}/content/encryptedstream/"
).json()
if not "link" in response:
)
json = response.json()
if not "link" in json:
raise ThrottleError
return response["link"]
return json["link"]
@staticmethod

View File

@ -1,7 +1,7 @@
from grawlix.book import Book, Series, Result
from typing import Generic, TypeVar, Tuple
import requests
import httpx
T = TypeVar("T")
@ -17,7 +17,7 @@ class Source(Generic[T]):
authenticated = False
def __init__(self):
self._session = requests.Session()
self._client = httpx.AsyncClient()
@property
@ -32,7 +32,7 @@ class Source(Generic[T]):
return "login" in self._authentication_methods
def login(self, username: str, password: str, **kwargs: str):
async def login(self, username: str, password: str, **kwargs: str):
"""
Login to source
@ -42,7 +42,7 @@ class Source(Generic[T]):
raise NotImplementedError
def download(self, url: str) -> Result[T]:
async def download(self, url: str) -> Result[T]:
"""
Download book metadata from source
@ -52,7 +52,7 @@ class Source(Generic[T]):
raise NotImplementedError
def download_book_from_id(self, book_id: T) -> Book:
async def download_book_from_id(self, book_id: T) -> Book:
"""
Download book from id

View File

@ -19,19 +19,19 @@ class Webtoons(Source[str]):
]
_authentication_methods: list[str] = []
def download(self, url: str) -> Result[str]:
async def download(self, url: str) -> Result[str]:
if re.match(self.match[0], url):
return self._download_episode(url)
return await self._download_episode(url)
if re.match(self.match[1], url):
return self._download_series(url)
return await self._download_series(url)
raise InvalidUrl
def download_book_from_id(self, book_id: str) -> Book:
return self._download_episode(book_id)
async def download_book_from_id(self, book_id: str) -> Book:
return await self._download_episode(book_id)
def _download_series(self, url: str) -> Series[str]:
async def _download_series(self, url: str) -> Series[str]:
"""
Download a series of webtoons
@ -39,7 +39,7 @@ class Webtoons(Source[str]):
:returns: Webtoons series data
"""
parsed_url = urlparse(url)
page = self._session.get(
response = await self._client.get(
f"https://m.webtoons.com{parsed_url.path}",
params = parsed_url.query,
headers = {
@ -49,9 +49,10 @@ class Webtoons(Source[str]):
"needGDPR": "FALSE",
"needCCPA": "FALSE",
"needCOPPA": "FALSE"
}
).text
soup = BeautifulSoup(page, "lxml")
},
follow_redirects = True,
)
soup = BeautifulSoup(response.text, "lxml")
title = soup.find("meta", property="og:title").get("content")
episodes = []
for episode in soup.find_all("li", class_="_episodeItem"):
@ -63,15 +64,15 @@ class Webtoons(Source[str]):
)
def _download_episode(self, url: str) -> Book:
async def _download_episode(self, url: str) -> Book:
"""
Download single webtoon episode
:param url: Url of episode
:returns: Episode
"""
page = self._session.get(url).text
soup = BeautifulSoup(page, "lxml")
response = await self._client.get(url, follow_redirects = True)
soup = BeautifulSoup(response.text, "lxml")
title = soup.find("h1", class_="subj_episode").get("title")
series = soup.find("div", class_="subj_info").find("a").get("title")
images = []

View File

@ -14,12 +14,12 @@ dependencies = [
"appdirs",
"beautifulsoup4",
"blackboxprotobuf",
"httpx",
"importlib-resources",
"lxml",
"pycryptodome",
"requests",
"rich",
"tomli"
"tomli",
]
dynamic = ["version"]
@ -28,7 +28,7 @@ dynamic = ["version"]
"Bugtracker" = "https://github.com/jo1gi/ebook-dl/issues"
[project.scripts]
audiobook-dl = "grawlix.__main__:main"
audiobook-dl = "grawlix.__main__:run"
[build-system]

View File

@ -27,10 +27,10 @@ mkShell {
appdirs
beautifulsoup4
blackboxprotobuf
httpx
importlib-resources
lxml
pycryptodome
requests
rich
tomli