diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-14 21:45:09 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-14 21:45:09 -0800 | 
| commit | 30bdb1b0ba28b2e4a81aa7209d294c224d8a2245 (patch) | |
| tree | 8556d05fef32c06ad8ad8b3b61649e8febae415f /python | |
| parent | 18c1434319ed7be67d11b47ea7c5d1393caba193 (diff) | |
| download | fatcat-30bdb1b0ba28b2e4a81aa7209d294c224d8a2245.tar.gz fatcat-30bdb1b0ba28b2e4a81aa7209d294c224d8a2245.zip | |
update crossref controlled vocab
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 33 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 2 | ||||
| -rw-r--r-- | python/tests/import_crossref.py | 8 | 
3 files changed, 39 insertions, 4 deletions
| diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index c4695c7f..fe80c2d3 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -8,6 +8,28 @@ import fatcat_client  from fatcat_tools.importers.common import FatcatImporter +CROSSREF_TYPE_MAP = { +    'book': 'book', +    'book-chapter': 'chapter', +    'book-part': 'chapter', +    'book-section': 'chapter', +    'component': None, +    'dataset': 'dataset', +    'dissertation': 'thesis', +    'edited-book': 'book', +    'journal-article': 'article-journal', +    'monograph': 'monograph', +    'other': None, +    'peer-review': 'peer_review', +    'posted-content': 'post', +    'proceedings-article': 'paper-conference', +    'reference-book': 'book', +    'reference-entry': 'entry', +    'report': 'report', +    'standard': 'standard', +} + +  class FatcatCrossrefImporter(FatcatImporter):      def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True): @@ -35,6 +57,9 @@ class FatcatCrossrefImporter(FatcatImporter):              pmcid=row[2],              wikidata_qid=row[3]) +    def map_release_type(self, crossref_type): +        return CROSSREF_TYPE_MAP.get(crossref_type) +      def parse_crossref_dict(self, obj):          """          obj is a python dict (parsed from json). @@ -46,7 +71,10 @@ class FatcatCrossrefImporter(FatcatImporter):              return None          # Other ways to be out of scope (provisionally) -        if (not 'type' in obj): +        # journal-issue and journal-volume map to None, but allowed for now +        if obj.get('type') in (None, 'journal', 'proceedings', +                'standard-series', 'report-series', 'book-series', 'book-set', +                'book-track', 'proceedings-series'):              return None          # contribs @@ -76,6 +104,7 @@ class FatcatCrossrefImporter(FatcatImporter):                      extra['sequence'] = am.get('sequence')                  if not extra:                      extra = None +                assert(ctype in ("author", "editor", "translator"))                  contribs.append(fatcat_client.ReleaseContrib(                      creator_id=creator_id,                      index=index, @@ -216,7 +245,7 @@ class FatcatCrossrefImporter(FatcatImporter):              refs=refs,              container_id=container_id,              publisher=publisher, -            release_type=obj['type'], +            release_type=self.map_release_type(obj['type']),              release_status=release_status,              doi=obj['DOI'].lower(),              isbn13=isbn13, diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 6d635479..dedc9728 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -67,7 +67,7 @@ class FatcatGrobidMetadataImporter(FatcatImporter):              ref['extra'] = cite_extra              refs.append(ref) -        release_type = "journal-article" +        release_type = "article-journal"          release_date = None          if obj.get('date'):              # TODO: only returns year, ever? how to handle? diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index ab33d0fc..078db184 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -17,6 +17,12 @@ def test_crossref_importer(crossref_importer):      with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f:          crossref_importer.process_source(f) +def test_crossref_mappings(crossref_importer): +    assert crossref_importer.map_release_type('journal-article') == "article-journal" +    assert crossref_importer.map_release_type('asdf') is None +    assert crossref_importer.map_release_type('component') is None +    assert crossref_importer.map_release_type('standard') == 'standard' +  def test_crossref_importer_create(crossref_importer):      crossref_importer.create_containers = True      with open('tests/files/crossref-works.2018-01-21.badsample.json', 'r') as f: @@ -33,7 +39,7 @@ def test_crossref_dict_parse(crossref_importer):          assert r.publisher == "Wiley-Blackwell"          print(extra)          assert extra['container-title'] == ["International Journal of Quantum Chemistry"] -        assert r.release_type == "journal-article" +        assert r.release_type == "article-journal"          assert r.release_status == "published"          assert r.isbn13 == "978-3-16-148410-0"          assert 'subtitle' not in extra | 
