diff options
| -rw-r--r-- | python/fatcat_tools/importers/jalc.py | 21 | ||||
| -rw-r--r-- | python/tests/import_jalc.py | 31 | 
2 files changed, 46 insertions, 6 deletions
| diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 09b8bd76..e2ccb230 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -101,28 +101,37 @@ class JalcImporter(EntityImporter):                  name = eng                  if not name.find('name'):                      name = jpn +                surname = name.find('familyName') +                if surname: +                    surname = surname.string                  contrib = fatcat_client.ReleaseContrib(                      raw_name=clean(name.find('name').string),                      given_name=clean(name.find('givenName').string), -                    surname=clean(name.find('familyName').string), +                    surname=clean(surname),                      role='author',                  )                  if eng.find('name') and jpn.find('name'): +                    jpn_surname = jpn.find('familyName') +                    if jpn_surname: +                        jpn_surname = jpn_surname.string                      contrib.extra = {                          'original_name': {                              'lang': 'ja',                              'raw_name': clean(jpn.find('name').string),                              'given_name': clean(jpn.find('givenName').string), -                            'surname': clean(jpn.find('familyName').string), +                            'surname': clean(jpn_surname),                          }}                  contribs.append(contrib)          elif people:              # TODO: test for this codepath?              for eng in people: +                surname = eng.find('familyName') +                if surname: +                    surname = surname.string                  contrib = dict(                      raw_name=clean(eng.find('name').string),                      given_name=clean(eng.find('givenName').string), -                    surname=clean(eng.find('familyName').string), +                    surname=clean(surname),                      role='author',                  )                  contribs.append(contrib) @@ -228,10 +237,14 @@ class JalcImporter(EntityImporter):          # (informally)          extra['jalc'] = extra_jalc +        title = clean(title) +        if not title: +            return None +          re = fatcat_client.ReleaseEntity(              work_id=None,              title=title, -            original_title=original_title, +            original_title=clean(original_title),              release_type="article-journal",              release_stage='published',              release_date=release_date, diff --git a/python/tests/import_jalc.py b/python/tests/import_jalc.py index e92c26c0..693e77f4 100644 --- a/python/tests/import_jalc.py +++ b/python/tests/import_jalc.py @@ -1,7 +1,7 @@  import json, gzip  import pytest -from fatcat_tools.importers import JalcImporter, Bs4XmlFilePusher +from fatcat_tools.importers import JalcImporter, Bs4XmlFilePusher, Bs4XmlLinesPusher  from fixtures import api  from bs4 import BeautifulSoup @@ -43,6 +43,33 @@ def test_jalc_importer(jalc_importer):      assert counts['skip'] == 0      assert last_index == jalc_importer.api.get_changelog(limit=1)[0].index +def test_jalc_importer_lines(jalc_importer): +    last_index = jalc_importer.api.get_changelog(limit=1)[0].index +    with open('tests/files/jalc_rdf_sample_100.xml', 'r') as f: +        jalc_importer.bezerk_mode = True +        counts = Bs4XmlLinesPusher(jalc_importer, f, "<rdf:Description").run() +    assert counts['insert'] == 93 +    assert counts['exists'] == 0 +    assert counts['skip'] == 0 + +    # fetch most recent editgroup +    change = jalc_importer.api.get_changelog_entry(index=last_index+1) +    eg = change.editgroup +    assert eg.description +    assert "jalc" in eg.description.lower() +    assert eg.extra['git_rev'] +    assert "fatcat_tools.JalcImporter" in eg.extra['agent'] + +    last_index = jalc_importer.api.get_changelog(limit=1)[0].index +    with open('tests/files/jalc_rdf_sample_100.xml', 'r') as f: +        jalc_importer.bezerk_mode = False +        jalc_importer.reset() +        counts = Bs4XmlLinesPusher(jalc_importer, f, "<rdf:Description").run() +    assert counts['insert'] == 0 +    assert counts['exists'] == 93 +    assert counts['skip'] == 0 +    assert last_index == jalc_importer.api.get_changelog(limit=1)[0].index +  def test_jalc_xml_parse(jalc_importer):      with open('tests/files/jalc_lod_sample.xml', 'r') as f:          soup = BeautifulSoup(f, "xml") @@ -51,7 +78,7 @@ def test_jalc_xml_parse(jalc_importer):      print(r.extra)      assert r.title == "New carbides in the Ni-Ti-Mo-C system"      assert r.subtitle == None -    assert r.original_title == "Ni-Ti-Mo-C系に出現する新炭化物相について" +    assert r.original_title == "Ni-Ti-Mo-C系に出現する新炭化物相について"      assert r.publisher == "Japan Society of Powder and Powder Metallurgy"      assert r.release_type == "article-journal"      assert r.release_stage == "published" | 
