diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-24 16:16:22 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-24 16:16:30 -0800 | 
| commit | a85de0b6e82c65d932c84a41b284083b4879934a (patch) | |
| tree | 231688205efb16670471c48ca719d3e636771f3d /python | |
| parent | 6b1b131ba5e899a069fe280663d331932a8cbae5 (diff) | |
| download | fatcat-a85de0b6e82c65d932c84a41b284083b4879934a.tar.gz fatcat-a85de0b6e82c65d932c84a41b284083b4879934a.zip | |
tweak crossref import, and update tests
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 38 | ||||
| -rw-r--r-- | python/fatcat_web/templates/release_view.html | 6 | ||||
| -rw-r--r-- | python/tests/files/crossref-works.single.json | 24 | ||||
| -rw-r--r-- | python/tests/import_crossref.py | 35 | 
4 files changed, 74 insertions, 29 deletions
| diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 4a0322e7..fbf30a32 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -152,8 +152,8 @@ class CrossrefImporter(EntityImporter):                  elif am.get('family'):                      raw_name = am['family']                  else: -                    # TODO: defaults back to a pseudo-null value -                    raw_name = am.get('given', '<blank>') +                    # TODO: can end up empty +                    raw_name = am.get('given')                  extra = dict()                  if ctype == "author":                      index = i @@ -244,9 +244,7 @@ class CrossrefImporter(EntityImporter):                      'collection_title', 'chapter_number'):                  if clean(rm.get(k)):                      extra[k] = clean(rm[k]) -            if extra: -                extra = dict(crossref=extra) -            else: +            if not extra:                  extra = None              refs.append(fatcat_client.ReleaseRef(                  index=i, @@ -269,20 +267,31 @@ class CrossrefImporter(EntityImporter):          # extra fields          extra = dict() -        for key in ('subject', 'type', 'alternative-id', 'container-title', -                'subtitle', 'archive', 'funder', 'group-title'): -            # TODO: unpack "container-title" array? +        extra_crossref = dict() +        # top-level extra keys +        if not container_id: +            if obj.get('container-title'): +                extra['container_name'] = clean(obj['container-title'][0]) +        for key in ('group-title', 'subtitle'):              val = obj.get(key)              if val:                  if type(val) == str:                      extra[key] = clean(val)                  else:                      extra[key] = val +        # crossref-nested extra keys +        for key in ('subject', 'type', 'alternative-id', 'archive', 'funder'): +            val = obj.get(key) +            if val: +                if type(val) == str: +                    extra_crossref[key] = clean(val) +                else: +                    extra_crossref[key] = val          if license_extra: -            extra['license'] = license_extra +            extra_crossref['license'] = license_extra          if len(obj['title']) > 1: -            extra['other-titles'] = [clean(t) for t in obj['title'][1:]] +            extra['aliases'] = [clean(t) for t in obj['title'][1:]]          # ISBN          isbn13 = None @@ -325,11 +334,18 @@ class CrossrefImporter(EntityImporter):              release_year = raw_date[0]              release_date = None +          original_title = None          if obj.get('original-title'):              original_title = clean(obj.get('original-title')[0], force_xml=True)          if obj.get('title'):              title = clean(obj.get('title')[0], force_xml=True) + +        if extra_crossref: +            extra['crossref'] = extra_crossref +        if not extra: +            extra = None +          re = fatcat_client.ReleaseEntity(              work_id=None,              container_id=container_id, @@ -353,7 +369,7 @@ class CrossrefImporter(EntityImporter):              pages=clean(obj.get('page')),              language=None,  # crossref doesn't supply language info              license_slug=license_slug, -            extra=dict(crossref=extra), +            extra=extra,              abstracts=abstracts,              contribs=contribs,              refs=refs, diff --git a/python/fatcat_web/templates/release_view.html b/python/fatcat_web/templates/release_view.html index 4e24b281..c81bf478 100644 --- a/python/fatcat_web/templates/release_view.html +++ b/python/fatcat_web/templates/release_view.html @@ -154,9 +154,9 @@ This release citing other releases.        {% if ref.container_name %}{{ ref.container_name }}.{% endif %}        {% if ref.year %}{{ ref.year }}{% endif %}        {% if ref.locator %}{{ ref.locator }}{% endif %} -    {% elif ref.extra and ref.extra.crossref %} -      {% if ref.extra.crossref.get('author') %}{{ ref.extra.crossref['author'] }}.{% endif %} -      {% if ref.extra.crossref.get('article-title') %}{{ ref.extra.crossref['article-title'] }}.{% endif %} +    {% elif ref.extra %} +      {% if ref.extra.get('author') %}{{ ref.extra['author'] }}.{% endif %} +      {% if ref.extra.get('article-title') %}{{ ref.extra['article-title'] }}.{% endif %}        {% if ref.container_name %}{{ ref.container_name }}.{% endif %}        {% if ref.year %}{{ ref.year }}.{% endif %}      {% elif ref.extra and ref.extra.unstructured %} diff --git a/python/tests/files/crossref-works.single.json b/python/tests/files/crossref-works.single.json index e3d2e05c..f00c2142 100644 --- a/python/tests/files/crossref-works.single.json +++ b/python/tests/files/crossref-works.single.json @@ -36,6 +36,10 @@        },        "delay-in-days": 6452,        "content-version": "tdm" +    }, +    { +      "URL": "http://creativecommons.org/licenses/by-nc-nd/3.0/", +      "content-version": "vor"      }    ],    "content-domain": { @@ -71,7 +75,11 @@    "source": "Crossref",    "is-referenced-by-count": 5,    "title": [ -    "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators" +    "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators", +    "some other title" +  ], +  "original-title": [ +    "Renormalized perturbation theory auf deutsch"    ],    "prefix": "10.1002",    "volume": "66", @@ -79,7 +87,8 @@      {        "given": "Marcelo D.",        "family": "Radicioni", -      "affiliation": [] +      "affiliation": [], +      "sequence": "first"      },      {        "given": "Carlos G.", @@ -90,6 +99,11 @@        "given": "Francisco M.",        "family": "Fern�ndez",        "affiliation": [] +    }, +    { +      "given": "", +      "family": "", +      "affiliation": []      }    ],    "editor": [ @@ -108,6 +122,7 @@    ],    "member": "311",    "ISBN": ["85-359-0277-5", "978-3-16-148410-0"], +  "archive": ["Portico", "LOCKSS"],    "reference": [      {        "key": "10.1002/(SICI)1097-461X(1998)66:4<261::AID-QUA1>3.0.CO;2-T-BIB1", @@ -115,6 +130,7 @@        "volume": "57",        "first-page": "1734",        "year": "1972", +      "medium": "DVD",        "journal-title": "J. Chem. Phys.",        "DOI": "10.1063/1.1678462",        "doi-asserted-by": "crossref" @@ -131,11 +147,11 @@      },      {        "key": "10.1002/(SICI)1097-461X(1998)66:4<261::AID-QUA1>3.0.CO;2-T-BIB3", -      "author": "Fernández", +      "author": "",        "volume": "43",        "year": "1987",        "unstructured": "and Hypervirial Theorems, Lecture Notes in Chemistry, Vol. 43, (Springer, Berlin, 1987).", -      "volume-title": "Hypervirial Theorems, Lecture Notes in Chemistry", +      "volume-title": "Hypervirial Theorem’s, Lecture Notes in Chemistry <3",        "DOI": "10.1007/978-3-642-93349-3",        "doi-asserted-by": "crossref"      }, diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index 6e7f72c5..cb2143b3 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -70,34 +70,47 @@ def test_crossref_dict_parse(crossref_importer):          # not a single line          raw = json.loads(f.read())          r = crossref_importer.parse_record(raw) -        extra = r.extra['crossref'] +        # ensure the API server is ok with format +        JsonLinePusher(crossref_importer, [json.dumps(raw)]).run() + +        print(r.extra)          assert r.title == "Renormalized perturbation theory by the moment method for degenerate states: Anharmonic oscillators"          assert r.doi == "10.1002/(sici)1097-461x(1998)66:4<261::aid-qua1>3.0.co;2-t"          assert r.publisher == "Wiley-Blackwell" -        print(extra) -        assert extra['container-title'] == ["International Journal of Quantum Chemistry"]          assert r.release_type == "article-journal"          assert r.release_status == "published" +        assert r.license_slug == "CC-BY-NC-ND" +        assert r.original_title == "Renormalized perturbation theory auf deutsch"          assert r.isbn13 == "978-3-16-148410-0" -        assert 'subtitle' not in extra -        assert 'archive' not in extra -        assert 'funder' not in extra -        assert len(r.contribs) == 5 +        assert 'subtitle' not in r.extra +        assert 'subtitle' not in r.extra['crossref'] +        assert 'funder' not in r.extra +        assert 'funder' not in r.extra['crossref'] +        # matched by ISSN, so shouldn't be in there +        #assert extra['container_name'] == "International Journal of Quantum Chemistry" +        assert r.extra['aliases'] == ["some other title"] +        assert r.extra['crossref']['archive'] == ['Portico', 'LOCKSS'] +        assert len(r.contribs) == 6          assert r.contribs[0].raw_name == "Marcelo D. Radicioni"          assert r.contribs[0].index == 0 +        assert r.contribs[0].extra['seq'] == "first"          assert r.contribs[1].raw_affiliation == "Some University"          assert r.contribs[1].extra['more_affiliations'] == ["Some Department"]          assert r.contribs[1].role == "author" -        assert r.contribs[3].role == "editor" -        assert r.contribs[3].index is None -        assert r.contribs[4].role == "translator" +        assert r.contribs[4].role == "editor"          assert r.contribs[4].index is None +        assert r.contribs[4].extra is None +        assert r.contribs[5].role == "translator" +        assert r.contribs[5].index is None          assert len(r.refs) == 25          assert r.refs[0].key == "BIB1"          assert r.refs[0].year == 1972          assert r.refs[0].locator == "1734"          assert r.refs[0].container_name == "J. Chem. Phys." -        assert r.refs[0].extra['crossref'] == {"volume": "57", "author": "Swenson", "doi": "10.1063/1.1678462"} +        assert r.refs[0].extra == {"volume": "57", "author": "Swenson", "doi": "10.1063/1.1678462", "medium": "DVD"} +        assert r.refs[2].key == 'BIB3' +        assert r.refs[2].extra.get('author') is None +        assert r.refs[2].container_name == "Hypervirial Theorem's, Lecture Notes in Chemistry <3"          assert r.refs[3].container_name == "Large Order Perturbation Theory and Summation Methods in Quantum Mechanics, Lecture Notes in Chemistry"  def test_stateful_checking(crossref_importer_existing): | 
