summaryrefslogtreecommitdiffstats
path: root/python/parse_arxivraw_xml.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-03-06 17:30:20 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-05-21 11:41:29 -0700
commit351393f4a1c6e86e3fd48d158e6a173919a80db1 (patch)
tree2bb915c59c9da3ed298345189961da7a08ef8019 /python/parse_arxivraw_xml.py
parentee393b537f3164ad25c9337b658db81192b25629 (diff)
downloadfatcat-351393f4a1c6e86e3fd48d158e6a173919a80db1.tar.gz
fatcat-351393f4a1c6e86e3fd48d158e6a173919a80db1.zip
small fixes to arxivraw parser
Diffstat (limited to 'python/parse_arxivraw_xml.py')
-rw-r--r--python/parse_arxivraw_xml.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/python/parse_arxivraw_xml.py b/python/parse_arxivraw_xml.py
index e2fab510..16def821 100644
--- a/python/parse_arxivraw_xml.py
+++ b/python/parse_arxivraw_xml.py
@@ -118,6 +118,7 @@ class ArxivRawXmlParser():
license_slug = metadata.license.string.strip()
abstracts = None
if metadata.abstract:
+ # TODO: test for this multi-abstract code path
abstracts = []
abst = metadata.abstract.string.strip()
orig = None
@@ -144,14 +145,14 @@ class ArxivRawXmlParser():
for version in metadata.find_all('version'):
arxiv_id = base_id + version['version']
release_date = version.date.string.strip()
- release_date = datetime.datetime.strptime(release_date, "%a, %d %b %Y %H:%M:%S %Z")
+ release_date = datetime.datetime.strptime(release_date, "%a, %d %b %Y %H:%M:%S %Z").date()
versions.append(dict(
work_id=None,
title=title,
#original_title
release_type="article-journal",
release_status='submitted', # XXX: source_type?
- release_date=release_date.isoformat() + "Z",
+ release_date=release_date.isoformat(),
release_year=release_date.year,
arxiv_id=arxiv_id,
#doi (see below)