diff options
| -rw-r--r-- | python/fatcat_tools/importers/arxiv.py | 22 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/jalc.py | 14 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/jstor.py | 18 | 
3 files changed, 27 insertions, 27 deletions
| diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index c69ee16a..79b242c4 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -118,13 +118,13 @@ class ArxivRawImporter(EntityImporter):              if not (doi.startswith('10.') and '/' in doi and doi.split('/')[1]):                  sys.stderr.write("BOGUS DOI: {}\n".format(doi))                  doi = None -        title = latex_to_text(metadata.title.string) -        authors = parse_arxiv_authors(metadata.authors.string) +        title = latex_to_text(metadata.title.get_text()) +        authors = parse_arxiv_authors(metadata.authors.get_text())          contribs = [fatcat_openapi_client.ReleaseContrib(index=i, raw_name=a, role='author') for i, a in enumerate(authors)]          lang = "en"     # the vast majority in english -        if metadata.comments and metadata.comments.string: -            comments = metadata.comments.string.strip() +        if metadata.comments and metadata.comments.get_text(): +            comments = metadata.comments.get_text().strip()              extra_arxiv['comments'] = comments              if 'in french' in comments.lower():                  lang = 'fr' @@ -145,8 +145,8 @@ class ArxivRawImporter(EntityImporter):              # more languages?          number = None -        if metadata.find('journal-ref') and metadata.find('journal-ref').string: -            journal_ref = metadata.find('journal-ref').string.strip() +        if metadata.find('journal-ref') and metadata.find('journal-ref').get_text(): +            journal_ref = metadata.find('journal-ref').get_text().strip()              extra_arxiv['journal_ref'] = journal_ref              if "conf." in journal_ref.lower() or "proc." in journal_ref.lower():                  release_type = "paper-conference" @@ -160,16 +160,16 @@ class ArxivRawImporter(EntityImporter):                  release_type = "report"          if metadata.find('acm-class') and metadata.find('acm-class').string:              extra_arxiv['acm_class'] = metadata.find('acm-class').string.strip() -        if metadata.categories and metadata.categories.string: -            extra_arxiv['categories'] = metadata.categories.string.split() +        if metadata.categories and metadata.categories.get_text(): +            extra_arxiv['categories'] = metadata.categories.get_text().split()          license_slug = None -        if metadata.license and metadata.license.string: -            license_slug = lookup_license_slug(metadata.license.string) +        if metadata.license and metadata.license.get_text(): +            license_slug = lookup_license_slug(metadata.license.get_text())          abstracts = None          if metadata.abstract:              # TODO: test for this multi-abstract code path              abstracts = [] -            abst = metadata.abstract.string.strip() +            abst = metadata.abstract.get_text().strip()              orig = None              if '-----' in abst:                  both = abst.split('-----') diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 351a20a3..51760f8a 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -35,13 +35,13 @@ def parse_jalc_persons(raw_persons):      for raw in raw_persons:          name = raw.find('name') or None          if name: -            name = clean(name.string) +            name = clean(name.get_text())          surname = raw.find('familyName') or None          if surname: -            surname = clean(surname.string) +            surname = clean(surname.get_text())          given_name = raw.find('givenName') or None          if given_name: -            given_name = clean(given_name.string) +            given_name = clean(given_name.get_text())          lang = 'en'          if is_cjk(name):              lang = 'ja' @@ -163,12 +163,12 @@ class JalcImporter(EntityImporter):          titles = record.find_all("title")          if not titles:              return None -        title = titles[0].string.strip() +        title = titles[0].get_text().strip()          original_title = None          if title.endswith('.'):              title = title[:-1]          if len(titles) > 1: -            original_title = titles[1].string.strip() +            original_title = titles[1].get_text().strip()              if original_title.endswith('.'):                  original_title = original_title[:-1] @@ -242,7 +242,7 @@ class JalcImporter(EntityImporter):          container_extra = dict()          if record.publicationName: -            pubs = [p.string.strip() for p in record.find_all("publicationName") if p.string] +            pubs = [p.get_text().strip() for p in record.find_all("publicationName") if p.get_text()]              pubs = [clean(p) for p in pubs if p]              assert(pubs)              if len(pubs) > 1 and pubs[0] == pubs[1]: @@ -255,7 +255,7 @@ class JalcImporter(EntityImporter):                  container_extra['original_name'] = clean(pubs[1])          if record.publisher: -            pubs = [p.string.strip() for p in record.find_all("publisher") if p.string] +            pubs = [p.get_text().strip() for p in record.find_all("publisher") if p.get_text()]              pubs = [p for p in pubs if p]              if len(pubs) > 1 and pubs[0] == pubs[1]:                  pubs = [pubs[0]] diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py index 5ff1ecd9..184a0bb1 100644 --- a/python/fatcat_tools/importers/jstor.py +++ b/python/fatcat_tools/importers/jstor.py @@ -63,13 +63,13 @@ class JstorImporter(EntityImporter):          release_type = JSTOR_TYPE_MAP.get(article['article-type'])          title = article_meta.find("article-title") -        if title and title.string: -            title = title.string.strip() -        elif title and not title.string: +        if title and title.get_text(): +            title = title.get_text().strip() +        elif title and not title.get_text():              title = None          if not title and release_type.startswith('review') and article_meta.product.source: -            title = "Review: {}".format(article_meta.product.source.string) +            title = "Review: {}".format(article_meta.product.source.get_text())          if not title:              return None @@ -96,8 +96,8 @@ class JstorImporter(EntityImporter):          if journal_ids:              extra_jstor['journal_ids'] = journal_ids -        journal_title = journal_meta.find("journal-title").string -        publisher = journal_meta.find("publisher-name").string +        journal_title = journal_meta.find("journal-title").get_text() +        publisher = journal_meta.find("publisher-name").get_text()          issn = journal_meta.find("issn")          if issn:              issn = issn.string @@ -141,13 +141,13 @@ class JstorImporter(EntityImporter):              for c in cgroup.find_all("contrib"):                  given = c.find("given-names")                  if given: -                    given = clean(given.string) +                    given = clean(given.get_text())                  surname = c.find("surname")                  if surname: -                    surname = clean(surname.string) +                    surname = clean(surname.get_text())                  raw_name = c.find("string-name")                  if raw_name: -                    raw_name = clean(raw_name.string) +                    raw_name = clean(raw_name.get_text())                  if not raw_name:                      if given and surname: | 
