diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/arxiv.py | 7 | ||||
| -rw-r--r-- | python/tests/import_arxiv.py | 3 | 
2 files changed, 6 insertions, 4 deletions
| diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index 0d0179cd..03ef10d6 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -45,7 +45,6 @@ class ArxivRawImporter(EntityImporter):      """      Converts arxiv.org "arXivRaw" OAI-PMH XML records to fatcat release entities -    TODO: this will require a special importer that keeps works together      TODO: arxiv_id lookup in API (rust) with no version specified should select            the "most recent" version; can be a simple sort?      """ @@ -105,6 +104,7 @@ class ArxivRawImporter(EntityImporter):          # don't know!          release_type = "article" +        number = None          if metadata.find('journal-ref') and metadata.find('journal-ref').string:              journal_ref = metadata.find('journal-ref').string.strip() @@ -112,7 +112,7 @@ class ArxivRawImporter(EntityImporter):              if "conf." in journal_ref.lower() or "proc." in journal_ref.lower():                  release_type = "conference-paper"          if metadata.find('report-no') and metadata.find('report-no').string: -            extra['number'] = metadata.find('report-no').string.strip() +            number = metadata.find('report-no').string.strip()              release_type = "report"          if metadata.find('acm-class') and metadata.find('acm-class').string:              extra_arxiv['acm_class'] = metadata.find('acm_class').string.strip() @@ -161,7 +161,7 @@ class ArxivRawImporter(EntityImporter):              arxiv_id = base_id + version['version']              release_date = version.date.string.strip()              release_date = datetime.datetime.strptime(release_date, "%a, %d %b %Y %H:%M:%S %Z").date() -            # XXX: source_type? +            # TODO: source_type?              versions.append(fatcat_client.ReleaseEntity(                  work_id=None,                  title=title, @@ -174,6 +174,7 @@ class ArxivRawImporter(EntityImporter):                  ext_ids=fatcat_client.ReleaseExtIds(                      arxiv=arxiv_id,                  ), +                number=number,                  language=lang,                  license_slug=license_slug,                  abstracts=abstracts, diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py index 8d91be10..bc4fca02 100644 --- a/python/tests/import_arxiv.py +++ b/python/tests/import_arxiv.py @@ -69,7 +69,8 @@ def test_arxiv_xml_parse(arxiv_importer):      # matched by ISSN, so shouldn't be in there?      #assert extra['container_name'] == "Abstracts of the Papers Communicated to the Royal Society of London"      assert len(r1.contribs) == 4 -    # XXX: extra['arxiv'] stuff +    assert r1.extra['arxiv']['categories'] == ['cond-mat.stat-mech', 'physics.bio-ph', 'physics.data-an'] +    assert r1.extra['arxiv']['base_id'] == '1810.09584'      assert r1.contribs[0].raw_name == "Raphael Chetrite"      assert r1.contribs[0].role == "author" | 
