diff options
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 86 |
1 files changed, 0 insertions, 86 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 39ef200a..33679868 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -42,8 +42,6 @@ from fatcat_tools.importers import ( SavePaperNowWebImporter, ShadowLibraryImporter, SqlitePusher, - auto_cdl_dash_dat, - auto_wayback_static, ) # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable @@ -315,53 +313,6 @@ def run_shadow_lib(args: argparse.Namespace) -> None: JsonLinePusher(fmi, args.json_file).run() -def run_wayback_static(args: argparse.Namespace) -> None: - api = args.api - - # find the release - if args.release_id: - release_id = args.release_id - elif args.extid: - idtype = args.extid.split(":")[0] - extid = ":".join(args.extid.split(":")[1:]) - if idtype == "doi": - release_id = api.lookup_release(doi=extid).ident - elif idtype == "pmid": - release_id = api.lookup_release(pmid=extid).ident - elif idtype == "wikidata": - release_id = api.lookup_release(wikidata_qid=extid).ident - else: - raise NotImplementedError("extid type: {}".format(idtype)) - else: - raise Exception("need either release_id or extid argument") - - # create it - (editgroup_id, wc) = auto_wayback_static( - api, release_id, args.wayback_url, editgroup_id=args.editgroup_id - ) - if not wc: - return - print("release_id: {}".format(release_id)) - print("editgroup_id: {}".format(editgroup_id)) - print("webcapture id: {}".format(wc.ident)) - print("link: https://fatcat.wiki/webcapture/{}".format(wc.ident)) - - -def run_cdl_dash_dat(args: argparse.Namespace) -> None: - api = args.api - - # create it - (editgroup_id, release, fs) = auto_cdl_dash_dat( - api, args.dat_path, release_id=args.release_id, editgroup_id=args.editgroup_id - ) - if not (fs and release): - return - print("release_id: {}".format(release.ident)) - print("editgroup_id: {}".format(editgroup_id)) - print("fileset id: {}".format(fs.ident)) - print("link: https://fatcat.wiki/fileset/{}".format(fs.ident)) - - def run_datacite(args: argparse.Namespace) -> None: dci = DataciteImporter( args.api, @@ -899,43 +850,6 @@ def main() -> None: type=argparse.FileType("r"), ) - sub_wayback_static = subparsers.add_parser( - "wayback-static", help="crude crawl+ingest tool for single-page HTML docs from wayback" - ) - sub_wayback_static.set_defaults( - func=run_wayback_static, - auth_var="FATCAT_API_AUTH_TOKEN", - ) - sub_wayback_static.add_argument( - "wayback_url", type=str, help="URL of wayback capture to extract from" - ) - sub_wayback_static.add_argument( - "--extid", type=str, help="external identifier for release lookup" - ) - sub_wayback_static.add_argument("--release-id", type=str, help="release entity identifier") - sub_wayback_static.add_argument( - "--editgroup-id", - type=str, - help="use existing editgroup (instead of creating a new one)", - ) - - sub_cdl_dash_dat = subparsers.add_parser( - "cdl-dash-dat", help="crude helper to import datasets from Dat/CDL mirror pilot project" - ) - sub_cdl_dash_dat.set_defaults( - func=run_cdl_dash_dat, - auth_var="FATCAT_API_AUTH_TOKEN", - ) - sub_cdl_dash_dat.add_argument( - "dat_path", type=str, help="local path dat to import (must be the dat discovery key)" - ) - sub_cdl_dash_dat.add_argument("--release-id", type=str, help="release entity identifier") - sub_cdl_dash_dat.add_argument( - "--editgroup-id", - type=str, - help="use existing editgroup (instead of creating a new one)", - ) - sub_datacite = subparsers.add_parser("datacite", help="import datacite.org metadata") sub_datacite.add_argument( "json_file", |