diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-03-19 19:07:35 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-03-19 19:07:35 -0700 |
commit | c038294850e836c5dd24fd3dc89e77065a9d2f85 (patch) | |
tree | 3b70b6c6dd8d80d0887c29c53c0e0479c8df8142 /python/fatcat_import.py | |
parent | d9f9a84957913f0ddd878bb079b423c059b4c81d (diff) | |
download | fatcat-c038294850e836c5dd24fd3dc89e77065a9d2f85.tar.gz fatcat-c038294850e836c5dd24fd3dc89e77065a9d2f85.zip |
new importer: wayback_static
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 8090900f..ce5063de 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -38,6 +38,36 @@ def run_grobid_metadata(args): bezerk_mode=args.bezerk_mode) LinePusher(fmi, args.tsv_file).run() +def run_wayback_static(args): + api = args.api + + # find the release + if args.release_id: + release_id = args.release_id + elif args.extid: + idtype = args.extid.split(':')[0] + extid = ':'.join(args.extid.split(':')[1:]) + if idtype == "doi": + release_id = api.lookup_release(doi=extid).ident + elif idtype == "pmid": + release_id = api.lookup_release(pmid=extid).ident + elif idtype == "wikidata": + release_id = api.lookup_release(wikidata_qid=extid).ident + else: + raise NotImplementedError("extid type: {}".format(idtype)) + else: + raise Exception("need either release_id or extid argument") + + # create it + (editgroup_id, wc) = auto_wayback_static(api, release_id, args.wayback_url, + editgroup_id=args.editgroup_id) + if not wc: + return + print("release_id: {}".format(release_id)) + print("editgroup_id: {}".format(editgroup_id)) + print("edit id: {}".format(wc.ident)) + print("link: https://fatcat.wiki/webcapture/{}".format(wc.ident)) + def main(): parser = argparse.ArgumentParser() parser.add_argument('--debug', @@ -126,6 +156,24 @@ def main(): action='store_true', help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)") + sub_wayback_static = subparsers.add_parser('wayback-static') + sub_wayback_static.set_defaults( + func=run_wayback_static, + auth_var="FATCAT_API_AUTH_TOKEN", + ) + sub_wayback_static.add_argument('wayback_url', + type=str, + help="URL of wayback capture to extract from") + sub_wayback_static.add_argument('--extid', + type=str, + help="external identifier for release lookup") + sub_wayback_static.add_argument('--release-id', + type=str, + help="release entity identifier") + sub_wayback_static.add_argument('--editgroup-id', + type=str, + help="use existing editgroup (instead of creating a new one)") + args = parser.parse_args() if not args.__dict__.get("func"): print("tell me what to do!") |