diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 18:32:03 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 18:32:03 -0700 | 
| commit | 1df0cd9cfe96609ff276362d10a5e50b723bbb7b (patch) | |
| tree | 5d5896a87be45dc2a783c44f28091041930b6ec5 /python | |
| parent | 0efc5bb015f3b00affc22662740429a1652b3064 (diff) | |
| download | fatcat-1df0cd9cfe96609ff276362d10a5e50b723bbb7b.tar.gz fatcat-1df0cd9cfe96609ff276362d10a5e50b723bbb7b.zip | |
JALC more robust to partial names
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/jalc.py | 27 | 
1 files changed, 19 insertions, 8 deletions
| diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 81c2b7f8..8f8d1f66 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -94,7 +94,9 @@ class JalcImporter(EntityImporter):          contribs = []          people = record.find_all("Person") -        if people and (len(people) % 2 == 0) and is_cjk(people[1].find('name').string): +        if (people and (len(people) % 2 == 0) +                and not is_cjk(people[0].find('name').string) +                and is_cjk(people[1].find('name').string)):              # both english and japanese names are usually included for every author              # TODO: turns out this isn't always the case; see              # 10.18948/shasetaikai.1990.0_601 as an example with 4 actual @@ -110,22 +112,28 @@ class JalcImporter(EntityImporter):                  surname = name.find('familyName')                  if surname:                      surname = surname.string +                given_name = name.find('givenName') +                if given_name: +                    given_name = given_name.string                  contrib = fatcat_client.ReleaseContrib(                      raw_name=clean(name.find('name').string), -                    given_name=clean(name.find('givenName').string), +                    given_name=clean(given_name),                      surname=clean(surname),                      role='author',                  )                  if eng.find('name') and jpn.find('name'): -                    jpn_surname = jpn.find('familyName') -                    if jpn_surname: -                        jpn_surname = jpn_surname.string +                    surname = jpn.find('familyName') +                    if surname: +                        surname = surname.string +                    given_name = jpn.find('givenName') +                    if given_name: +                        given_name = given_name.string                      contrib.extra = {                          'original_name': {                              'lang': 'ja',                              'raw_name': clean(jpn.find('name').string), -                            'given_name': clean(jpn.find('givenName').string), -                            'surname': clean(jpn_surname), +                            'given_name': clean(given_name), +                            'surname': clean(surname),                          }}                  contribs.append(contrib)          elif people: @@ -134,9 +142,12 @@ class JalcImporter(EntityImporter):                  surname = eng.find('familyName')                  if surname:                      surname = surname.string +                given_name = eng.find('givenName') +                if given_name: +                    given_name = given_name.string                  contrib = dict(                      raw_name=clean(eng.find('name').string), -                    given_name=clean(eng.find('givenName').string), +                    given_name=clean(given_name),                      surname=clean(surname),                      role='author',                  ) | 
