diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 1 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 13 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/issn.py | 10 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 18 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/orcid.py | 10 | 
6 files changed, 44 insertions, 10 deletions
| diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index 5c33ebc9..e39ec6c9 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -43,6 +43,7 @@ class FatcatImporter:          eg_extra = kwargs.get('editgroup_extra', dict())          eg_extra['git_rev'] = eg_extra.get('git_rev',              subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8') +        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.FatcatImporter')          self.api = api          self._editgroup_description = kwargs.get('editgroup_description') diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 4f7faf59..ed60a78c 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -46,7 +46,7 @@ class CrossrefImporter(FatcatImporter):          eg_desc = kwargs.get('editgroup_description',              "Automated import of Crossref DOI metadata, harvested from REST API")          eg_extra = kwargs.get('editgroup_extra', dict()) -        eg_extra['agent'] = eg_extra.get('agent', 'CrossrefImporter') +        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.CrossrefImporter')          super().__init__(api,              issn_map_file=issn_map_file,              editgroup_description=eg_desc, diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 2cb97b01..5e61a154 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -12,9 +12,16 @@ MAX_ABSTRACT_BYTES=4096  class GrobidMetadataImporter(FatcatImporter): -    def __init__(self, host_url, default_link_rel="web"): -        super().__init__(host_url) -        self.default_link_rel = default_link_rel +    def __init__(self, api, **kwargs): + +        eg_desc = kwargs.get('editgroup_description', +            "Import of release and file metadata, as extracted from PDFs by GROBID.") +        eg_extra = kwargs.get('editgroup_extra', dict()) +        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.GrobidMetadataImporter') +        super().__init__(api, +            editgroup_description=eg_desc, +            editgroup_extra=eg_extra) +        self.default_link_rel = kwargs.get("default_link_rel", "web")      def parse_grobid_json(self, obj): diff --git a/python/fatcat_tools/importers/issn.py b/python/fatcat_tools/importers/issn.py index 9b9ca63f..02a1eea0 100644 --- a/python/fatcat_tools/importers/issn.py +++ b/python/fatcat_tools/importers/issn.py @@ -35,6 +35,16 @@ class IssnImporter(FatcatImporter):          ISSN-L,in_doaj,in_road,in_norwegian,in_crossref,title,publisher,url,lang,ISSN-print,ISSN-electronic,doi_count,has_doi,is_oa,is_kept,publisher_size,url_live,url_live_status,url_live_final_status,url_live_final_url,url_live_status_simple,url_live_final_status_simple,url_domain,gwb_pdf_count      """ +    def __init__(self, api, **kwargs): + +        eg_desc = kwargs.get('editgroup_description', +            "Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.") +        eg_extra = kwargs.get('editgroup_extra', dict()) +        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IssnImporter') +        super().__init__(api, +            editgroup_description=eg_desc, +            editgroup_extra=eg_extra) +      def parse_issn_row(self, row):          """          row is a python dict (parsed from CSV). diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 5dbda27c..0b77bcf0 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -37,12 +37,18 @@ class MatchedImporter(FatcatImporter):      - core_id, wikidata_id, pmcid, pmid: not as lists      """ -    def __init__(self, host_url, skip_file_updates=False, default_mime=None, -            default_link_rel="web"): -        super().__init__(host_url) -        self.default_mime = default_mime -        self.default_link_rel = default_link_rel -        self.skip_file_updates = skip_file_updates +    def __init__(self, api, **kwargs): + +        eg_desc = kwargs.get('editgroup_description', +            "Import of large-scale file-to-release match results. Source of metadata varies.") +        eg_extra = kwargs.get('editgroup_extra', dict()) +        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.MatchedImporter') +        super().__init__(api, +            editgroup_description=eg_desc, +            editgroup_extra=eg_extra) +        self.default_link_rel = kwargs.get("default_link_rel", "web") +        self.default_mime = kwargs.get("default_mime", None) +        self.skip_file_updates = kwargs.get("skip_file_updates", False)      def make_url(self, raw):          rel = self.default_link_rel diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index fc4562d0..0aa4ab00 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -22,6 +22,16 @@ def value_or_none(e):  class OrcidImporter(FatcatImporter): +    def __init__(self, api, **kwargs): + +        eg_desc = kwargs.get('editgroup_description', +            "Automated import of ORCID metadata, from official bulk releases.") +        eg_extra = kwargs.get('editgroup_extra', dict()) +        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.OrcidImporter') +        super().__init__(api, +            editgroup_description=eg_desc, +            editgroup_extra=eg_extra) +      def parse_orcid_dict(self, obj):          """          obj is a python dict (parsed from json). | 
