aboutsummaryrefslogtreecommitdiffstats
path: root/grobid_tei_xml/types.py
diff options
context:
space:
mode:
Diffstat (limited to 'grobid_tei_xml/types.py')
-rw-r--r--grobid_tei_xml/types.py74
1 files changed, 38 insertions, 36 deletions
diff --git a/grobid_tei_xml/types.py b/grobid_tei_xml/types.py
index 08be47a..725871b 100644
--- a/grobid_tei_xml/types.py
+++ b/grobid_tei_xml/types.py
@@ -47,13 +47,13 @@ def _csl_date(s: Optional[str]) -> Optional[list]:
return None
# YYYY-MM
- if len(s) >= 7 and s[4] == '-' and s[5:7].isdigit():
+ if len(s) >= 7 and s[4] == "-" and s[5:7].isdigit():
month = int(s[5:7])
else:
return [[year]]
# YYYY-MM-DD
- if len(s) == 10 and s[7] == '-' and s[8:10].isdigit():
+ if len(s) == 10 and s[7] == "-" and s[8:10].isdigit():
day = int(s[8:10])
return [[year, month, day]]
else:
@@ -112,26 +112,26 @@ class GrobidBiblio:
d = self.to_dict()
# new keys
- d.pop('first_page', None)
- d.pop('last_page', None)
- d.pop('note', None)
+ d.pop("first_page", None)
+ d.pop("last_page", None)
+ d.pop("note", None)
# legacy book title behavior
- if not d.get('journal') and d.get('book_title'):
- d['journal'] = d.pop('book_title')
+ if not d.get("journal") and d.get("book_title"):
+ d["journal"] = d.pop("book_title")
else:
- d.pop('book_title', None)
+ d.pop("book_title", None)
# author changes
- for a in d['authors']:
- a['name'] = a.pop('full_name', None)
- if not a.get('given_name'):
- a['given_name'] = a.pop('middle_name', None)
+ for a in d["authors"]:
+ a["name"] = a.pop("full_name", None)
+ if not a.get("given_name"):
+ a["given_name"] = a.pop("middle_name", None)
else:
- a.pop('middle_name', None)
- addr = a.get('affiliation', {}).get('address')
- if addr and addr.get('post_code'):
- addr['postCode'] = addr.pop('post_code')
+ a.pop("middle_name", None)
+ addr = a.get("affiliation", {}).get("address")
+ if addr and addr.get("post_code"):
+ addr["postCode"] = addr.pop("post_code")
return _simplify_dict(d)
@@ -155,18 +155,20 @@ class GrobidBiblio:
note=self.note,
)
# fields with '-' in the key name
- csl.update({
- "container-title": self.journal,
- "book-title": self.book_title,
- "series-title": self.series_title,
- "page-first": self.first_page,
- })
+ csl.update(
+ {
+ "container-title": self.journal,
+ "book-title": self.book_title,
+ "series-title": self.series_title,
+ "page-first": self.first_page,
+ }
+ )
# numeric fields
if self.issue and self.issue.isdigit():
- csl['issue'] = int(self.issue)
+ csl["issue"] = int(self.issue)
if self.volume and self.volume.isdigit():
- csl['volume'] = int(self.volume)
+ csl["volume"] = int(self.volume)
return _simplify_dict(csl)
@@ -201,23 +203,23 @@ class GrobidDocument:
Returns a dict in the old "grobid2json" format.
"""
d = self.to_dict()
- d.pop('header', None)
+ d.pop("header", None)
d.update(self.header.to_legacy_dict())
if self.citations:
- d['citations'] = [c.to_legacy_dict() for c in self.citations]
+ d["citations"] = [c.to_legacy_dict() for c in self.citations]
# all header fields at top-level
- d['journal'] = dict(
- name=d.pop('journal', None),
- publisher=d.pop('publisher', None),
- issn=d.pop('issn', None),
- issne=d.pop('issne', None),
- volume=d.pop('volume', None),
- issue=d.pop('issue', None),
+ d["journal"] = dict(
+ name=d.pop("journal", None),
+ publisher=d.pop("publisher", None),
+ issn=d.pop("issn", None),
+ issne=d.pop("issne", None),
+ volume=d.pop("volume", None),
+ issue=d.pop("issue", None),
)
# document fields not in the old schema
- d.pop('pdf_md5', None)
+ d.pop("pdf_md5", None)
return _simplify_dict(d)
@@ -246,7 +248,7 @@ def _simplify_dict(d: dict) -> dict:
TODO: should this return Optional[dict]?
"""
- if d in [None, {}, '']:
+ if d in [None, {}, ""]:
return {}
for k in list(d.keys()):
if isinstance(d[k], dict):
@@ -255,6 +257,6 @@ def _simplify_dict(d: dict) -> dict:
for i in range(len(d[k])):
if isinstance(d[k][i], dict):
d[k][i] = _simplify_dict(d[k][i])
- if d[k] in [None, {}, '']:
+ if d[k] in [None, {}, ""]:
d.pop(k)
return d