diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 0000000..18bbb8c --- /dev/null +++ b/api/__init__.py @@ -0,0 +1 @@ +from .api import Client \ No newline at end of file diff --git a/api/api.py b/api/api.py new file mode 100644 index 0000000..b4240ec --- /dev/null +++ b/api/api.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +#Sorrow446. + +import os +import re +from random import randint + +import requests +from api.exceptions import IneligibleError + +class Client: + + def __init__(self, **kwargs): + self.session = requests.Session() + self.session.headers.update({ + 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0' + }) + self.base = 'https://read-api.marvel.com/' + + def set_cookies(self, cookies): + self.session.cookies.update(cookies) + + def get_id(self, url): + r = self.session.get(url) + regex = r'digital_comic_id : "(([0-9]{5}))"' + return re.search(regex, r.text).group(1) + + def make_call(self, epoint, json=None, params=None): + r = self.session.get(self.base+epoint, json=json, params=params) + r.raise_for_status() + return r + + def get_comic_meta(self, id): + self.session.headers.update({'Referer': 'https://read.marvel.com/'}) + r = self.make_call('issue/v1/digitalcomics/'+id+'?') + return r.json()['data']['results'][0]['issue_meta'] + + def get_comic(self, id): + params={'rand': randint(10000, 99999)} + r = self.make_call('asset/v1/digitalcomics/'+id+'?', params=params) + j = r.json()['data']['results'][0] + if not j['auth_state']['subscriber']: + raise IneligibleError('Marvel Unlimited subscription required.') + urls = [url['assets']['source'] for url in j['pages']] + return urls \ No newline at end of file diff --git a/api/exceptions.py b/api/exceptions.py new file mode 100644 index 0000000..f691843 --- /dev/null +++ b/api/exceptions.py @@ -0,0 +1,2 @@ +class IneligibleError(Exception): + pass \ No newline at end of file diff --git a/mur.py b/mur.py new file mode 100644 index 0000000..170c476 --- /dev/null +++ b/mur.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 + +import os +import re +import sys +import json +import shutil +import zipfile +import argparse +import platform + +import api +import img2pdf +from tqdm import tqdm +from requests.exceptions import HTTPError +from api.exceptions import IneligibleError + +client = api.Client() + +def print_title(): + print(""" + _____ _____ _____ +| | | | __ | +| | | | | | -| +|_|_|_|_____|__|__| + """) + +def get_os(): + if platform.system() == 'Windows': + return True + return False + +def set_con_title(): + if get_os(): + os.system('title MUR R1 (by Sorrow446)') + else: + sys.stdout.write('\x1b]2;MUR R1 (by Sorrow446)\x07') + +def sanitize(fn): + if get_os(): + return re.sub(r'[\/:*?"><|]', '_', fn) + else: + return re.sub('/', '_', fn) + +def parse_args(): + parser = argparse.ArgumentParser( + description='Sorrow446.' + ) + parser.add_argument( + '-u', '--url', + help="URL - www.marvel.com/comics/issue/ or read.marvel.com/#/book/.", + nargs='*', + required=True + ) + parser.add_argument( + '-f', '--format', + help="Export format.", + choices=['cbz', 'pdf'], + required=True + ) + parser.add_argument( + '-m', '--meta', + help="Write comic's metadata to JSON file.", + action='store_true' + ) + return parser.parse_args() + +def parse_cookies(out_cookies={}): + with open('cookies.txt') as f: + for line in f.readlines(): + if not line.startswith('#'): + field = line.strip().split('\t') + out_cookies[field[5]] = field[6] + client.set_cookies(out_cookies) + +def exist_check(f): + if os.path.isfile(f): + return True + return False + +def dir_setup(tmp_dir, dl_dir): + if os.path.isdir(tmp_dir): + shutil.rmtree(tmp_dir) + if not os.path.isdir(dl_dir): + os.makedirs(dl_dir) + os.makedirs(tmp_dir) + +def check_url(url): + regexes=[ + r'http[s]://(read).marvel.com/#/book/([0-9]{5}$)', + r'http[s]://(www).marvel.com/comics/issue/([0-9]{5})/.+' + ] + for regex in regexes: + match = re.match(regex, url) + if match: + return match.group(1), match.group(2) + +def download(urls, tmp_dir, cur=0): + total = len(urls) + for url in urls: + cur += 1 + print('Downloading image {} of {}...'.format(cur, total)) + r = client.session.get(url, stream=True) + r.raise_for_status() + size = int(r.headers.get('content-length', 0)) + abs = os.path.join(tmp_dir, str(cur) + '.jpg') + with open(abs, 'wb') as f: + with tqdm(total=size, unit='B', + unit_scale=True, unit_divisor=1024, + initial=0, miniters=1) as bar: + for chunk in r.iter_content(32*1024): + if chunk: + f.write(chunk) + bar.update(len(chunk)) + +def make_pdf(abs, images, title): + with open(abs, 'wb') as f: + f.write(img2pdf.convert(images, title=title)) + +def make_cbz(abs, images): + with zipfile.ZipFile(abs, 'w', zipfile.ZIP_STORED) as f: + for i in images: f.write(i) + +def write_meta(meta_abs, meta): + with open(meta_abs, 'w') as f: + json.dump(meta, f, indent=4) + +def err(e, cur, tot): + print(e) + if cur == tot: + sys.exit(1) + +def main(): + cd = os.getcwd() + tmp_dir = os.path.join(cd, 'mur_tmp') + dl_dir = os.path.join(cd, 'MUR downloads') + dir_setup(tmp_dir, dl_dir) + parse_cookies() + args = parse_args() + tot = len(args.url) + cur = 0 + for url in args.url: + cur += 1 + try: + print("Booklet {} of {}:".format(cur, tot)) + try: + type, id = check_url(url) + except TypeError: + err('Invalid URL: '+str(url), cur, tot) + continue + if type == "www": + id = client.get_id(url) + fmt = args.format + meta = client.get_comic_meta(id) + title = meta['title'] + title_s = sanitize(title) + print(str(title) + "\n") + abs = os.path.join(dl_dir, '{}.{}'.format(title_s, fmt)) + if exist_check(abs): + err('Comic already exists locally.', cur, tot) + continue + try: + download(client.get_comic(id), tmp_dir) + except IneligibleError as e: + print(e) + sys.exit(1) + images = [os.path.join(tmp_dir, i) for i in os.listdir(tmp_dir)] + meta_abs = os.path.join(dl_dir, '{}_meta.json'.format(title_s)) + print('Converting to {}...'.format(fmt.upper())) + if fmt == 'pdf': + make_pdf(abs, images, title) + else: + make_cbz(abs, images) + if args.meta: + print("Writing metadata to JSON file...") + write_meta(meta_abs, meta) + for i in images: + os.remove(i) + except HTTPError as e: + err(e, cur, tot) + except Exception as e: + err(e, cur, tot) + +if __name__ == '__main__': + print_title() + set_con_title() + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8367c89 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +tqdm +img2pdf +requests \ No newline at end of file