diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 21 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 5 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 12 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/shadow.py | 5 | ||||
| -rwxr-xr-x | python/fatcat_tools/importers/wayback_static.py | 19 | 
5 files changed, 41 insertions, 21 deletions
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 36a2f9a6..0340f6a3 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -1,16 +1,25 @@  #!/usr/bin/env python3 -import os -import sys -import json -import magic -import urllib  import hashlib +import json  import mimetypes +import os  import subprocess +import sys +import urllib  import fatcat_openapi_client -from fatcat_openapi_client import * +import magic +from fatcat_openapi_client import ( +    Editgroup, +    FilesetEntity, +    FilesetFile, +    ReleaseAbstract, +    ReleaseContrib, +    ReleaseEntity, +    ReleaseExtIds, +) +  from .common import clean  from .crossref import lookup_license_slug diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index d95c5847..e0e4fc3c 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -1,8 +1,9 @@  import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from fatcat_tools.normal import clean_doi + +from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url  class MatchedImporter(EntityImporter): diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index c34fd7d6..c9907c5e 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -1,13 +1,15 @@ -import sys -import json  import datetime +import json +import sys  import warnings -from bs4 import BeautifulSoup  import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, clean, LANG_MAP_MARC +from bs4 import BeautifulSoup + +from fatcat_tools.normal import clean_doi, clean_issn, clean_pmcid, clean_pmid + +from .common import LANG_MAP_MARC, EntityImporter, clean  # from: https://www.ncbi.nlm.nih.gov/books/NBK3827/table/pubmedhelp.T.publication_types/?report=objectonly  PUBMED_RELEASE_TYPE_MAP = { diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py index c04e9aa8..fa9b4d10 100644 --- a/python/fatcat_tools/importers/shadow.py +++ b/python/fatcat_tools/importers/shadow.py @@ -1,8 +1,9 @@  import fatcat_openapi_client -from fatcat_tools.normal import * -from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from fatcat_tools.normal import clean_doi, clean_isbn13, clean_pmid + +from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, make_rel_url  class ShadowLibraryImporter(EntityImporter): diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py index 5b23347f..196f86ff 100755 --- a/python/fatcat_tools/importers/wayback_static.py +++ b/python/fatcat_tools/importers/wayback_static.py @@ -6,16 +6,23 @@ Helpers to create Web Capture entities from extracted wayback content.  Works as a stand-alone script (for debugging) or as library routines.  """ -import sys -import json -import hashlib -import requests -import datetime  import argparse +import datetime +import hashlib +import json  import subprocess +import sys + +import requests  from bs4 import BeautifulSoup +from fatcat_openapi_client import ( +    ApiClient, +    Editgroup, +    WebcaptureCdxLine, +    WebcaptureEntity, +    WebcaptureUrl, +) -from fatcat_openapi_client import *  from .common import b32_hex  CDX_API_BASE = "https://web.archive.org/cdx/search/cdx"  | 
