diff options
-rw-r--r-- | .gitignore | 21 | ||||
-rw-r--r-- | Pipfile | 11 | ||||
-rw-r--r-- | README.md | 37 | ||||
-rw-r--r-- | example.env | 4 | ||||
-rwxr-xr-x | reupload.py | 157 |
5 files changed, 230 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..81a4762 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +*.o +*.a +*.pyc +#*# +*~ +*.swp +.* +*.tmp +*.old +*.profile +*.bkp +*.bak +[Tt]humbs.db +*.DS_Store +build/ +_build/ +src/build/ +*.log + +# Don't ignore this file itself +!.gitignore @@ -0,0 +1,11 @@ +[[source]] +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + +[packages] + +[dev-packages] + +[requires] +python_version = "3.7" diff --git a/README.md b/README.md new file mode 100644 index 0000000..7ba167d --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ + +Want to download all ELS instagram photos, with at least date metadata, then +re-upload to wiki.mako.cc (or some other location). + +## Experimentation + +Using `pipenv` to generate a python virtualenv: + + pipenv shell + pip install instaloader requests + +Then: + + instaloader profile --no-compress-json extraordinaryleastsquares + # "Warning: Use --login to download HD version of profile pictures" + +In the future: + + instaloader --fast-update --no-compress-json extraordinaryleastsquares + +For mediawiki API, docs describe using requests: <https://www.mediawiki.org/wiki/API:Images#Python> + +Metadata for items: + + filename: CEQD_<date>_<time>_<imagenum>.jpg + summary: + <instagram_comment> + Imported from ELS instagram: https://www.instagram.com/p/<short_id>/ + + page: + [[Category:Center for Extraordinary Quarantine Dining]] + + == Summary == + veggie #katsudon from a couple nights ago, galangal #broccoli on the side + + Imported from ELS instagram: https://www.instagram.com/p/B9r_0Fsl1eh/ + diff --git a/example.env b/example.env new file mode 100644 index 0000000..6be0480 --- /dev/null +++ b/example.env @@ -0,0 +1,4 @@ +# Copy this file to .env. pipenv will then detect it when you run things like +# `pipenv shell` or `pipenv run` +WIKI_USERNAME="CHANGEME" +WIKI_PASSWORD="CHANGEME" diff --git a/reupload.py b/reupload.py new file mode 100755 index 0000000..94fbbe3 --- /dev/null +++ b/reupload.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 + +import os +import sys +import json +from typing import Optional +import requests +import argparse + +BASE_URL = "https://wiki.mako.cc/api.php" + + +def login(session: requests.Session) -> (str, str): + + bot_username = os.environ['WIKI_USERNAME'] + bot_password = os.environ['WIKI_PASSWORD'] + + # Step 1: Retrieve a login token + resp = session.get( + BASE_URL, + params={ + "action": "query", + "meta": "tokens", + "type": "login", + "format": "json", + }, + ) + + resp.raise_for_status() + login_token = resp.json()['query']['tokens']['logintoken'] + assert login_token + + # Step 2: Send a post request to login. Use of main account for login is not + # supported. Obtain credentials via Special:BotPasswords + # (https://www.mediawiki.org/wiki/Special:BotPasswords) for lgname & lgpassword + resp = session.post( + BASE_URL, + data={ + "action": "login", + "lgname": bot_username, + "lgpassword": bot_password, + "format": "json", + "lgtoken": login_token, + }, + ) + + resp.raise_for_status() + + # Step 3: While logged in, retrieve a CSRF token + resp = session.get( + BASE_URL, + params={ + "action": "query", + "meta":"tokens", + "format":"json" + }, + ) + + resp.raise_for_status() + csrf_token = resp.json()['query']['tokens']['csrftoken'] + assert csrf_token + + return (login_token, csrf_token) + +def get_imageinfo(title: str, session: requests.Session) -> Optional[dict]: + """Returns a dict, or None if image not found""" + + assert not title.startswith("File:") + # Does not require authentication + resp = session.get( + BASE_URL, + params={ + "action": "query", + "format": "json", + "prop": "imageinfo", + "titles": f"File:{title}" + }, + ) + resp.raise_for_status() + pages = resp.json()['query']['pages'] + if "-1" in pages: + return None + else: + return list(pages.values())[0] + +def reupload_post(args): + + # parse metadata + base_path = args.json_file.replace('.json', '') + meta = json.loads(open(args.json_file, 'r').read())['node'] + #print(meta) + shortcode = meta['shortcode'] + caption = meta['edge_media_to_caption']['edges'][0]['node']['text'] + + if meta.get('edge_sidecar_to_children'): + image_count = len(meta['edge_sidecar_to_children']['edges']) + else: + image_count = 1 + + if image_count != 1: + raise NotImplementedError() + + jpeg_path = base_path + ".jpg" + date_part = base_path.split('/')[1].split('_')[0].replace('-', '') + time_part = base_path.split('/')[1].split('_')[1].replace('-', '')[:4] + remote_name = f"CEQD_{date_part}_{time_part}_1.jpg" + page_text = f"""[[Category:Center for Extraordinary Quarantine Dining]] + +== Summary == +{caption} + +Imported from ELS instagram: https://www.instagram.com/p/{shortcode}/ + """ + + session = requests.Session() + (login_token, csrf_token) = login(session) + + # First, check if file already exists + existing = get_imageinfo(remote_name, session) + if existing: + print(json.dumps(existing, sort_keys=True, indent=2)) + sys.exit(-1) + + # If it doesn't, upload it! + print(f"Uploading {remote_name}") + resp = session.post( + BASE_URL, + data={ + "action": "upload", + "filename": remote_name, + "text": page_text, + "format": "json", + "token": csrf_token, + "ignorewarnings": 1 + }, + files={ + 'file': ('filename.jpg', open(jpeg_path, 'rb'), 'multipart/form-data'), + }, + ) + resp.raise_for_status() + print(json.dumps(resp.json(), sort_keys=True, indent=2) + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + 'json_file', + help="JSON metadata file", + type=str, + #type=argparse.FileType('r'), + ) + args = parser.parse_args() + + reupload_post(args) + +if __name__ == "__main__": + main() |