diff options
Diffstat (limited to 'python/fatcat_import.py')
| -rwxr-xr-x | python/fatcat_import.py | 86 | 
1 files changed, 0 insertions, 86 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 39ef200a..33679868 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -42,8 +42,6 @@ from fatcat_tools.importers import (      SavePaperNowWebImporter,      ShadowLibraryImporter,      SqlitePusher, -    auto_cdl_dash_dat, -    auto_wayback_static,  )  # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable @@ -315,53 +313,6 @@ def run_shadow_lib(args: argparse.Namespace) -> None:      JsonLinePusher(fmi, args.json_file).run() -def run_wayback_static(args: argparse.Namespace) -> None: -    api = args.api - -    # find the release -    if args.release_id: -        release_id = args.release_id -    elif args.extid: -        idtype = args.extid.split(":")[0] -        extid = ":".join(args.extid.split(":")[1:]) -        if idtype == "doi": -            release_id = api.lookup_release(doi=extid).ident -        elif idtype == "pmid": -            release_id = api.lookup_release(pmid=extid).ident -        elif idtype == "wikidata": -            release_id = api.lookup_release(wikidata_qid=extid).ident -        else: -            raise NotImplementedError("extid type: {}".format(idtype)) -    else: -        raise Exception("need either release_id or extid argument") - -    # create it -    (editgroup_id, wc) = auto_wayback_static( -        api, release_id, args.wayback_url, editgroup_id=args.editgroup_id -    ) -    if not wc: -        return -    print("release_id: {}".format(release_id)) -    print("editgroup_id: {}".format(editgroup_id)) -    print("webcapture id: {}".format(wc.ident)) -    print("link: https://fatcat.wiki/webcapture/{}".format(wc.ident)) - - -def run_cdl_dash_dat(args: argparse.Namespace) -> None: -    api = args.api - -    # create it -    (editgroup_id, release, fs) = auto_cdl_dash_dat( -        api, args.dat_path, release_id=args.release_id, editgroup_id=args.editgroup_id -    ) -    if not (fs and release): -        return -    print("release_id: {}".format(release.ident)) -    print("editgroup_id: {}".format(editgroup_id)) -    print("fileset id: {}".format(fs.ident)) -    print("link: https://fatcat.wiki/fileset/{}".format(fs.ident)) - -  def run_datacite(args: argparse.Namespace) -> None:      dci = DataciteImporter(          args.api, @@ -899,43 +850,6 @@ def main() -> None:          type=argparse.FileType("r"),      ) -    sub_wayback_static = subparsers.add_parser( -        "wayback-static", help="crude crawl+ingest tool for single-page HTML docs from wayback" -    ) -    sub_wayback_static.set_defaults( -        func=run_wayback_static, -        auth_var="FATCAT_API_AUTH_TOKEN", -    ) -    sub_wayback_static.add_argument( -        "wayback_url", type=str, help="URL of wayback capture to extract from" -    ) -    sub_wayback_static.add_argument( -        "--extid", type=str, help="external identifier for release lookup" -    ) -    sub_wayback_static.add_argument("--release-id", type=str, help="release entity identifier") -    sub_wayback_static.add_argument( -        "--editgroup-id", -        type=str, -        help="use existing editgroup (instead of creating a new one)", -    ) - -    sub_cdl_dash_dat = subparsers.add_parser( -        "cdl-dash-dat", help="crude helper to import datasets from Dat/CDL mirror pilot project" -    ) -    sub_cdl_dash_dat.set_defaults( -        func=run_cdl_dash_dat, -        auth_var="FATCAT_API_AUTH_TOKEN", -    ) -    sub_cdl_dash_dat.add_argument( -        "dat_path", type=str, help="local path dat to import (must be the dat discovery key)" -    ) -    sub_cdl_dash_dat.add_argument("--release-id", type=str, help="release entity identifier") -    sub_cdl_dash_dat.add_argument( -        "--editgroup-id", -        type=str, -        help="use existing editgroup (instead of creating a new one)", -    ) -      sub_datacite = subparsers.add_parser("datacite", help="import datacite.org metadata")      sub_datacite.add_argument(          "json_file",  | 
