diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-03-06 17:30:20 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-21 11:41:29 -0700 | 
| commit | 351393f4a1c6e86e3fd48d158e6a173919a80db1 (patch) | |
| tree | 2bb915c59c9da3ed298345189961da7a08ef8019 /python | |
| parent | ee393b537f3164ad25c9337b658db81192b25629 (diff) | |
| download | fatcat-351393f4a1c6e86e3fd48d158e6a173919a80db1.tar.gz fatcat-351393f4a1c6e86e3fd48d158e6a173919a80db1.zip | |
small fixes to arxivraw parser
Diffstat (limited to 'python')
| -rw-r--r-- | python/parse_arxivraw_xml.py | 5 | 
1 files changed, 3 insertions, 2 deletions
| diff --git a/python/parse_arxivraw_xml.py b/python/parse_arxivraw_xml.py index e2fab510..16def821 100644 --- a/python/parse_arxivraw_xml.py +++ b/python/parse_arxivraw_xml.py @@ -118,6 +118,7 @@ class ArxivRawXmlParser():              license_slug = metadata.license.string.strip()          abstracts = None          if metadata.abstract: +            # TODO: test for this multi-abstract code path              abstracts = []              abst = metadata.abstract.string.strip()              orig = None @@ -144,14 +145,14 @@ class ArxivRawXmlParser():          for version in metadata.find_all('version'):              arxiv_id = base_id + version['version']              release_date = version.date.string.strip() -            release_date = datetime.datetime.strptime(release_date, "%a, %d %b %Y %H:%M:%S %Z") +            release_date = datetime.datetime.strptime(release_date, "%a, %d %b %Y %H:%M:%S %Z").date()              versions.append(dict(                  work_id=None,                  title=title,                  #original_title                  release_type="article-journal",                  release_status='submitted', # XXX: source_type? -                release_date=release_date.isoformat() + "Z", +                release_date=release_date.isoformat(),                  release_year=release_date.year,                  arxiv_id=arxiv_id,                  #doi (see below) | 
