aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-22 10:48:56 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-22 10:48:56 -0700
commit1f0b8be3756cc52f3c911735dbc15f4063e133d2 (patch)
tree5b20316467b2a66fa3ef7670f45baf7a1116ed05
downloadels-instawiki-1f0b8be3756cc52f3c911735dbc15f4063e133d2.tar.gz
els-instawiki-1f0b8be3756cc52f3c911735dbc15f4063e133d2.zip
init repo with work-in-progress script
-rw-r--r--.gitignore21
-rw-r--r--Pipfile11
-rw-r--r--README.md37
-rw-r--r--example.env4
-rwxr-xr-xreupload.py157
5 files changed, 230 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..81a4762
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,21 @@
+*.o
+*.a
+*.pyc
+#*#
+*~
+*.swp
+.*
+*.tmp
+*.old
+*.profile
+*.bkp
+*.bak
+[Tt]humbs.db
+*.DS_Store
+build/
+_build/
+src/build/
+*.log
+
+# Don't ignore this file itself
+!.gitignore
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..219b717
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,11 @@
+[[source]]
+url = "https://pypi.python.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+
+[dev-packages]
+
+[requires]
+python_version = "3.7"
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7ba167d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,37 @@
+
+Want to download all ELS instagram photos, with at least date metadata, then
+re-upload to wiki.mako.cc (or some other location).
+
+## Experimentation
+
+Using `pipenv` to generate a python virtualenv:
+
+ pipenv shell
+ pip install instaloader requests
+
+Then:
+
+ instaloader profile --no-compress-json extraordinaryleastsquares
+ # "Warning: Use --login to download HD version of profile pictures"
+
+In the future:
+
+ instaloader --fast-update --no-compress-json extraordinaryleastsquares
+
+For mediawiki API, docs describe using requests: <https://www.mediawiki.org/wiki/API:Images#Python>
+
+Metadata for items:
+
+ filename: CEQD_<date>_<time>_<imagenum>.jpg
+ summary:
+ <instagram_comment>
+ Imported from ELS instagram: https://www.instagram.com/p/<short_id>/
+
+ page:
+ [[Category:Center for Extraordinary Quarantine Dining]]
+
+ == Summary ==
+ veggie #katsudon from a couple nights ago, galangal #broccoli on the side
+
+ Imported from ELS instagram: https://www.instagram.com/p/B9r_0Fsl1eh/
+
diff --git a/example.env b/example.env
new file mode 100644
index 0000000..6be0480
--- /dev/null
+++ b/example.env
@@ -0,0 +1,4 @@
+# Copy this file to .env. pipenv will then detect it when you run things like
+# `pipenv shell` or `pipenv run`
+WIKI_USERNAME="CHANGEME"
+WIKI_PASSWORD="CHANGEME"
diff --git a/reupload.py b/reupload.py
new file mode 100755
index 0000000..94fbbe3
--- /dev/null
+++ b/reupload.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import json
+from typing import Optional
+import requests
+import argparse
+
+BASE_URL = "https://wiki.mako.cc/api.php"
+
+
+def login(session: requests.Session) -> (str, str):
+
+ bot_username = os.environ['WIKI_USERNAME']
+ bot_password = os.environ['WIKI_PASSWORD']
+
+ # Step 1: Retrieve a login token
+ resp = session.get(
+ BASE_URL,
+ params={
+ "action": "query",
+ "meta": "tokens",
+ "type": "login",
+ "format": "json",
+ },
+ )
+
+ resp.raise_for_status()
+ login_token = resp.json()['query']['tokens']['logintoken']
+ assert login_token
+
+ # Step 2: Send a post request to login. Use of main account for login is not
+ # supported. Obtain credentials via Special:BotPasswords
+ # (https://www.mediawiki.org/wiki/Special:BotPasswords) for lgname & lgpassword
+ resp = session.post(
+ BASE_URL,
+ data={
+ "action": "login",
+ "lgname": bot_username,
+ "lgpassword": bot_password,
+ "format": "json",
+ "lgtoken": login_token,
+ },
+ )
+
+ resp.raise_for_status()
+
+ # Step 3: While logged in, retrieve a CSRF token
+ resp = session.get(
+ BASE_URL,
+ params={
+ "action": "query",
+ "meta":"tokens",
+ "format":"json"
+ },
+ )
+
+ resp.raise_for_status()
+ csrf_token = resp.json()['query']['tokens']['csrftoken']
+ assert csrf_token
+
+ return (login_token, csrf_token)
+
+def get_imageinfo(title: str, session: requests.Session) -> Optional[dict]:
+ """Returns a dict, or None if image not found"""
+
+ assert not title.startswith("File:")
+ # Does not require authentication
+ resp = session.get(
+ BASE_URL,
+ params={
+ "action": "query",
+ "format": "json",
+ "prop": "imageinfo",
+ "titles": f"File:{title}"
+ },
+ )
+ resp.raise_for_status()
+ pages = resp.json()['query']['pages']
+ if "-1" in pages:
+ return None
+ else:
+ return list(pages.values())[0]
+
+def reupload_post(args):
+
+ # parse metadata
+ base_path = args.json_file.replace('.json', '')
+ meta = json.loads(open(args.json_file, 'r').read())['node']
+ #print(meta)
+ shortcode = meta['shortcode']
+ caption = meta['edge_media_to_caption']['edges'][0]['node']['text']
+
+ if meta.get('edge_sidecar_to_children'):
+ image_count = len(meta['edge_sidecar_to_children']['edges'])
+ else:
+ image_count = 1
+
+ if image_count != 1:
+ raise NotImplementedError()
+
+ jpeg_path = base_path + ".jpg"
+ date_part = base_path.split('/')[1].split('_')[0].replace('-', '')
+ time_part = base_path.split('/')[1].split('_')[1].replace('-', '')[:4]
+ remote_name = f"CEQD_{date_part}_{time_part}_1.jpg"
+ page_text = f"""[[Category:Center for Extraordinary Quarantine Dining]]
+
+== Summary ==
+{caption}
+
+Imported from ELS instagram: https://www.instagram.com/p/{shortcode}/
+ """
+
+ session = requests.Session()
+ (login_token, csrf_token) = login(session)
+
+ # First, check if file already exists
+ existing = get_imageinfo(remote_name, session)
+ if existing:
+ print(json.dumps(existing, sort_keys=True, indent=2))
+ sys.exit(-1)
+
+ # If it doesn't, upload it!
+ print(f"Uploading {remote_name}")
+ resp = session.post(
+ BASE_URL,
+ data={
+ "action": "upload",
+ "filename": remote_name,
+ "text": page_text,
+ "format": "json",
+ "token": csrf_token,
+ "ignorewarnings": 1
+ },
+ files={
+ 'file': ('filename.jpg', open(jpeg_path, 'rb'), 'multipart/form-data'),
+ },
+ )
+ resp.raise_for_status()
+ print(json.dumps(resp.json(), sort_keys=True, indent=2)
+
+def main():
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument(
+ 'json_file',
+ help="JSON metadata file",
+ type=str,
+ #type=argparse.FileType('r'),
+ )
+ args = parser.parse_args()
+
+ reupload_post(args)
+
+if __name__ == "__main__":
+ main()