mirror of
https://github.com/jo1gi/grawlix.git
synced 2025-12-16 04:09:10 +00:00
Update Internet Archive metadata retrieval
Should fix issue 17
This commit is contained in:
parent
f154be5c25
commit
465abbecad
@ -1,10 +1,13 @@
|
||||
from grawlix.book import Book, SingleFile, Metadata, OfflineFile
|
||||
from grawlix.exceptions import DataNotFound
|
||||
from grawlix import logging
|
||||
from .source import Source
|
||||
|
||||
import random
|
||||
import string
|
||||
from bs4 import BeautifulSoup
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
class InternetArchive(Source):
|
||||
name: str = "Internet Archive"
|
||||
@ -104,8 +107,10 @@ class InternetArchive(Source):
|
||||
f"https://archive.org/details/{book_id}"
|
||||
)
|
||||
soup = BeautifulSoup(page_response.text, "lxml")
|
||||
metadata_url = soup.find("ia-book-theater").get("bookmanifesturl")
|
||||
reader_data = json.loads(soup.find(class_="js-bookreader").get("value"))
|
||||
metadata_url = f"https:{reader_data['url']}"
|
||||
logging.debug(f"{metadata_url=}")
|
||||
metadata_response = await self._client.get(
|
||||
f"https:{metadata_url}"
|
||||
metadata_url
|
||||
)
|
||||
return metadata_response.json()["data"]["metadata"]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user