aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-08 14:59:38 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-08 14:59:38 -0800
commit8c930ded07b6a668bc1721ef98312d9259c06b59 (patch)
treece31708556d17d7165a64f15d0dac78d881e80bc /python/fatcat_tools/importers
parentd1862ea4a6dcb781713ffc54bdc4bd7d7cdbdb9e (diff)
downloadfatcat-8c930ded07b6a668bc1721ef98312d9259c06b59.tar.gz
fatcat-8c930ded07b6a668bc1721ef98312d9259c06b59.zip
importers and tests all use new api-passing
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/common.py1
-rw-r--r--python/fatcat_tools/importers/crossref.py2
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py13
-rw-r--r--python/fatcat_tools/importers/issn.py10
-rw-r--r--python/fatcat_tools/importers/matched.py18
-rw-r--r--python/fatcat_tools/importers/orcid.py10
6 files changed, 44 insertions, 10 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 5c33ebc9..e39ec6c9 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -43,6 +43,7 @@ class FatcatImporter:
eg_extra = kwargs.get('editgroup_extra', dict())
eg_extra['git_rev'] = eg_extra.get('git_rev',
subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8')
+ eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.FatcatImporter')
self.api = api
self._editgroup_description = kwargs.get('editgroup_description')
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 4f7faf59..ed60a78c 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -46,7 +46,7 @@ class CrossrefImporter(FatcatImporter):
eg_desc = kwargs.get('editgroup_description',
"Automated import of Crossref DOI metadata, harvested from REST API")
eg_extra = kwargs.get('editgroup_extra', dict())
- eg_extra['agent'] = eg_extra.get('agent', 'CrossrefImporter')
+ eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.CrossrefImporter')
super().__init__(api,
issn_map_file=issn_map_file,
editgroup_description=eg_desc,
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 2cb97b01..5e61a154 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -12,9 +12,16 @@ MAX_ABSTRACT_BYTES=4096
class GrobidMetadataImporter(FatcatImporter):
- def __init__(self, host_url, default_link_rel="web"):
- super().__init__(host_url)
- self.default_link_rel = default_link_rel
+ def __init__(self, api, **kwargs):
+
+ eg_desc = kwargs.get('editgroup_description',
+ "Import of release and file metadata, as extracted from PDFs by GROBID.")
+ eg_extra = kwargs.get('editgroup_extra', dict())
+ eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.GrobidMetadataImporter')
+ super().__init__(api,
+ editgroup_description=eg_desc,
+ editgroup_extra=eg_extra)
+ self.default_link_rel = kwargs.get("default_link_rel", "web")
def parse_grobid_json(self, obj):
diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/issn.py
index 9b9ca63f..02a1eea0 100644
--- a/python/fatcat_tools/importers/issn.py
+++ b/python/fatcat_tools/importers/issn.py
@@ -35,6 +35,16 @@ class IssnImporter(FatcatImporter):
ISSN-L,in_doaj,in_road,in_norwegian,in_crossref,title,publisher,url,lang,ISSN-print,ISSN-electronic,doi_count,has_doi,is_oa,is_kept,publisher_size,url_live,url_live_status,url_live_final_status,url_live_final_url,url_live_status_simple,url_live_final_status_simple,url_domain,gwb_pdf_count
"""
+ def __init__(self, api, **kwargs):
+
+ eg_desc = kwargs.get('editgroup_description',
+ "Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.")
+ eg_extra = kwargs.get('editgroup_extra', dict())
+ eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IssnImporter')
+ super().__init__(api,
+ editgroup_description=eg_desc,
+ editgroup_extra=eg_extra)
+
def parse_issn_row(self, row):
"""
row is a python dict (parsed from CSV).
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 5dbda27c..0b77bcf0 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -37,12 +37,18 @@ class MatchedImporter(FatcatImporter):
- core_id, wikidata_id, pmcid, pmid: not as lists
"""
- def __init__(self, host_url, skip_file_updates=False, default_mime=None,
- default_link_rel="web"):
- super().__init__(host_url)
- self.default_mime = default_mime
- self.default_link_rel = default_link_rel
- self.skip_file_updates = skip_file_updates
+ def __init__(self, api, **kwargs):
+
+ eg_desc = kwargs.get('editgroup_description',
+ "Import of large-scale file-to-release match results. Source of metadata varies.")
+ eg_extra = kwargs.get('editgroup_extra', dict())
+ eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.MatchedImporter')
+ super().__init__(api,
+ editgroup_description=eg_desc,
+ editgroup_extra=eg_extra)
+ self.default_link_rel = kwargs.get("default_link_rel", "web")
+ self.default_mime = kwargs.get("default_mime", None)
+ self.skip_file_updates = kwargs.get("skip_file_updates", False)
def make_url(self, raw):
rel = self.default_link_rel
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index fc4562d0..0aa4ab00 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -22,6 +22,16 @@ def value_or_none(e):
class OrcidImporter(FatcatImporter):
+ def __init__(self, api, **kwargs):
+
+ eg_desc = kwargs.get('editgroup_description',
+ "Automated import of ORCID metadata, from official bulk releases.")
+ eg_extra = kwargs.get('editgroup_extra', dict())
+ eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.OrcidImporter')
+ super().__init__(api,
+ editgroup_description=eg_desc,
+ editgroup_extra=eg_extra)
+
def parse_orcid_dict(self, obj):
"""
obj is a python dict (parsed from json).