diff options
Diffstat (limited to 'python/fatcat_tools/importers')
-rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 21 | ||||
-rw-r--r-- | python/fatcat_tools/importers/matched.py | 5 | ||||
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 12 | ||||
-rw-r--r-- | python/fatcat_tools/importers/shadow.py | 5 | ||||
-rwxr-xr-x | python/fatcat_tools/importers/wayback_static.py | 19 |
5 files changed, 41 insertions, 21 deletions
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 36a2f9a6..0340f6a3 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -1,16 +1,25 @@ #!/usr/bin/env python3 -import os -import sys -import json -import magic -import urllib import hashlib +import json import mimetypes +import os import subprocess +import sys +import urllib import fatcat_openapi_client -from fatcat_openapi_client import * +import magic +from fatcat_openapi_client import ( + Editgroup, + FilesetEntity, + FilesetFile, + ReleaseAbstract, + ReleaseContrib, + ReleaseEntity, + ReleaseExtIds, +) + from .common import clean from .crossref import lookup_license_slug diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index d95c5847..e0e4fc3c 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -1,8 +1,9 @@ import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from fatcat_tools.normal import clean_doi + +from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url class MatchedImporter(EntityImporter): diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index c34fd7d6..c9907c5e 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -1,13 +1,15 @@ -import sys -import json import datetime +import json +import sys import warnings -from bs4 import BeautifulSoup import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, clean, LANG_MAP_MARC +from bs4 import BeautifulSoup + +from fatcat_tools.normal import clean_doi, clean_issn, clean_pmcid, clean_pmid + +from .common import LANG_MAP_MARC, EntityImporter, clean # from: https://www.ncbi.nlm.nih.gov/books/NBK3827/table/pubmedhelp.T.publication_types/?report=objectonly PUBMED_RELEASE_TYPE_MAP = { diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py index c04e9aa8..fa9b4d10 100644 --- a/python/fatcat_tools/importers/shadow.py +++ b/python/fatcat_tools/importers/shadow.py @@ -1,8 +1,9 @@ import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from fatcat_tools.normal import clean_doi, clean_isbn13, clean_pmid + +from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url class ShadowLibraryImporter(EntityImporter): diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py index 5b23347f..196f86ff 100755 --- a/python/fatcat_tools/importers/wayback_static.py +++ b/python/fatcat_tools/importers/wayback_static.py @@ -6,16 +6,23 @@ Helpers to create Web Capture entities from extracted wayback content. Works as a stand-alone script (for debugging) or as library routines. """ -import sys -import json -import hashlib -import requests -import datetime import argparse +import datetime +import hashlib +import json import subprocess +import sys + +import requests from bs4 import BeautifulSoup +from fatcat_openapi_client import ( + ApiClient, + Editgroup, + WebcaptureCdxLine, + WebcaptureEntity, + WebcaptureUrl, +) -from fatcat_openapi_client import * from .common import b32_hex CDX_API_BASE = "https://web.archive.org/cdx/search/cdx" |