diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-10 13:08:23 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-10 13:08:25 -0800 |
commit | ab4e1355bf93e3755985f1b5cd2589a78601d253 (patch) | |
tree | f50ee1492587fead94410e229963b18f88f203a9 /python/fatcat_import.py | |
parent | c133f3077aa975aa4706a8e5ca894fc1b71fbc67 (diff) | |
download | fatcat-ab4e1355bf93e3755985f1b5cd2589a78601d253.tar.gz fatcat-ab4e1355bf93e3755985f1b5cd2589a78601d253.zip |
remove cdl_dash_dat and wayback_static importers
Cleaning out dead code.
These importers were used to create demonstration fileset and webcapture
entities early in development. They have been replaced by the fileset
and webcapture ingest importers.
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 86 |
1 files changed, 0 insertions, 86 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 39ef200a..33679868 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -42,8 +42,6 @@ from fatcat_tools.importers import ( SavePaperNowWebImporter, ShadowLibraryImporter, SqlitePusher, - auto_cdl_dash_dat, - auto_wayback_static, ) # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable @@ -315,53 +313,6 @@ def run_shadow_lib(args: argparse.Namespace) -> None: JsonLinePusher(fmi, args.json_file).run() -def run_wayback_static(args: argparse.Namespace) -> None: - api = args.api - - # find the release - if args.release_id: - release_id = args.release_id - elif args.extid: - idtype = args.extid.split(":")[0] - extid = ":".join(args.extid.split(":")[1:]) - if idtype == "doi": - release_id = api.lookup_release(doi=extid).ident - elif idtype == "pmid": - release_id = api.lookup_release(pmid=extid).ident - elif idtype == "wikidata": - release_id = api.lookup_release(wikidata_qid=extid).ident - else: - raise NotImplementedError("extid type: {}".format(idtype)) - else: - raise Exception("need either release_id or extid argument") - - # create it - (editgroup_id, wc) = auto_wayback_static( - api, release_id, args.wayback_url, editgroup_id=args.editgroup_id - ) - if not wc: - return - print("release_id: {}".format(release_id)) - print("editgroup_id: {}".format(editgroup_id)) - print("webcapture id: {}".format(wc.ident)) - print("link: https://fatcat.wiki/webcapture/{}".format(wc.ident)) - - -def run_cdl_dash_dat(args: argparse.Namespace) -> None: - api = args.api - - # create it - (editgroup_id, release, fs) = auto_cdl_dash_dat( - api, args.dat_path, release_id=args.release_id, editgroup_id=args.editgroup_id - ) - if not (fs and release): - return - print("release_id: {}".format(release.ident)) - print("editgroup_id: {}".format(editgroup_id)) - print("fileset id: {}".format(fs.ident)) - print("link: https://fatcat.wiki/fileset/{}".format(fs.ident)) - - def run_datacite(args: argparse.Namespace) -> None: dci = DataciteImporter( args.api, @@ -899,43 +850,6 @@ def main() -> None: type=argparse.FileType("r"), ) - sub_wayback_static = subparsers.add_parser( - "wayback-static", help="crude crawl+ingest tool for single-page HTML docs from wayback" - ) - sub_wayback_static.set_defaults( - func=run_wayback_static, - auth_var="FATCAT_API_AUTH_TOKEN", - ) - sub_wayback_static.add_argument( - "wayback_url", type=str, help="URL of wayback capture to extract from" - ) - sub_wayback_static.add_argument( - "--extid", type=str, help="external identifier for release lookup" - ) - sub_wayback_static.add_argument("--release-id", type=str, help="release entity identifier") - sub_wayback_static.add_argument( - "--editgroup-id", - type=str, - help="use existing editgroup (instead of creating a new one)", - ) - - sub_cdl_dash_dat = subparsers.add_parser( - "cdl-dash-dat", help="crude helper to import datasets from Dat/CDL mirror pilot project" - ) - sub_cdl_dash_dat.set_defaults( - func=run_cdl_dash_dat, - auth_var="FATCAT_API_AUTH_TOKEN", - ) - sub_cdl_dash_dat.add_argument( - "dat_path", type=str, help="local path dat to import (must be the dat discovery key)" - ) - sub_cdl_dash_dat.add_argument("--release-id", type=str, help="release entity identifier") - sub_cdl_dash_dat.add_argument( - "--editgroup-id", - type=str, - help="use existing editgroup (instead of creating a new one)", - ) - sub_datacite = subparsers.add_parser("datacite", help="import datacite.org metadata") sub_datacite.add_argument( "json_file", |