4 files changed, 108 insertions, 97 deletions
diff --git a/grobid_tei_xml/parse.py b/grobid_tei_xml/parse.py
index 66e4e72..cd55f9a 100755
--- a/grobid_tei_xml/parse.py
+++ b/grobid_tei_xml/parse.py
@@ -62,9 +62,9 @@ def _parse_author(elem: Optional[ET.Element], ns: str = ns) -> Optional[GrobidAu
                 affiliation_dict[orgname_type] = orgname_tag.text or None
         if affiliation_dict:
             ga.affiliation = GrobidAffiliation(
-                institution=affiliation_dict.get('institution'),
-                department=affiliation_dict.get('department'),
-                laboratory=affiliation_dict.get('laboratory'),
+                institution=affiliation_dict.get("institution"),
+                department=affiliation_dict.get("department"),
+                laboratory=affiliation_dict.get("laboratory"),
             )
             address_tag = affiliation_tag.find(f"./{{{ns}}}address")
             if address_tag is not None:
@@ -73,10 +73,10 @@ def _parse_author(elem: Optional[ET.Element], ns: str = ns) -> Optional[GrobidAu
                     address_dict[t.tag.split("}")[-1]] = t.text or None
                 if address_dict:
                     ga.affiliation.address = GrobidAddress(
-                        addr_line=address_dict.get('addrLine'),
-                        post_code=address_dict.get('postCode'),
-                        settlement=address_dict.get('settlement'),
-                        country=address_dict.get('country'),
+                        addr_line=address_dict.get("addrLine"),
+                        post_code=address_dict.get("postCode"),
+                        settlement=address_dict.get("settlement"),
+                        country=address_dict.get("country"),
                     )
     return ga
 
@@ -121,7 +121,7 @@ def test_clean_url() -> None:
     ]
 
     for row in examples:
-        assert row['clean'] == _clean_url(row['dirty'])
+        assert row["clean"] == _clean_url(row["dirty"])
 
 
 def _parse_biblio(elem: ET.Element, ns: str = ns) -> GrobidBiblio:
@@ -138,7 +138,7 @@ def _parse_biblio(elem: ET.Element, ns: str = ns) -> GrobidBiblio:
             authors.append(a)
 
     editors = []
-    editor_tags = elem.findall(f'.//{{{ns}}}editor')
+    editor_tags = elem.findall(f".//{{{ns}}}editor")
     if not editor_tags:
         editor_tags = elem.findall(f'.//{{{ns}}}contributor[@role="editor"]')
     for elt in editor_tags or []:
@@ -151,7 +151,6 @@ def _parse_biblio(elem: ET.Element, ns: str = ns) -> GrobidBiblio:
         editors=editors or None,
         id=elem.attrib.get("{http://www.w3.org/XML/1998/namespace}id"),
         unstructured=elem.findtext(f'.//{{{ns}}}note[@type="raw_reference"]'),
-
         # date below
         # titles: @level=a for article, @level=m for manuscrupt (book)
         title=elem.findtext(f'.//{{{ns}}}title[@type="main"]'),
@@ -175,14 +174,14 @@ def _parse_biblio(elem: ET.Element, ns: str = ns) -> GrobidBiblio:
     )
 
     book_title_tag = elem.find(f'.//{{{ns}}}title[@level="m"]')
-    if book_title_tag is not None and book_title_tag.attrib.get('type') is None:
+    if book_title_tag is not None and book_title_tag.attrib.get("type") is None:
         biblio.book_title = book_title_tag.text
     if biblio.book_title and not biblio.title:
         biblio.title = biblio.book_title
         biblio.book_title = None
 
-    note_tag = elem.find(f'.//{{{ns}}}note')
-    if note_tag is not None and note_tag.attrib.get('type') is None:
+    note_tag = elem.find(f".//{{{ns}}}note")
+    if note_tag is not None and note_tag.attrib.get("type") is None:
         biblio.note = note_tag.text
 
     if not biblio.publisher:
@@ -212,7 +211,7 @@ def _parse_biblio(elem: ET.Element, ns: str = ns) -> GrobidBiblio:
 
     # having DOI and a DOI URL is redundant
     if biblio.doi and biblio.url:
-        if ('://doi.org/' in biblio.url) or ('://dx.doi.org/' in biblio.url):
+        if ("://doi.org/" in biblio.url) or ("://dx.doi.org/" in biblio.url):
             biblio.url = None
 
     return biblio
@@ -283,20 +282,20 @@ def parse_citations_xml(xml_text: AnyStr) -> List[GrobidBiblio]:
     the namespace.
     """
     if isinstance(xml_text, bytes):
-        xml_text = xml_text.replace(b'xmlns="http://www.tei-c.org/ns/1.0"', b'')
+        xml_text = xml_text.replace(b'xmlns="http://www.tei-c.org/ns/1.0"', b"")
     elif isinstance(xml_text, str):
-        xml_text = xml_text.replace('xmlns="http://www.tei-c.org/ns/1.0"', '')
+        xml_text = xml_text.replace('xmlns="http://www.tei-c.org/ns/1.0"', "")
     tree = _string_to_tree(xml_text)
     root = tree.getroot()
 
-    if root.tag == 'biblStruct':
-        ref = _parse_biblio(root, ns='')
+    if root.tag == "biblStruct":
+        ref = _parse_biblio(root, ns="")
         ref.index = 0
         return [ref]
 
     refs = []
     for (i, bs) in enumerate(tree.findall(".//biblStruct")):
-        ref = _parse_biblio(bs, ns='')
+        ref = _parse_biblio(bs, ns="")
         ref.index = i
         refs.append(ref)
     return refs
diff --git a/grobid_tei_xml/types.py b/grobid_tei_xml/types.py
index 08be47a..725871b 100644
--- a/grobid_tei_xml/types.py
+++ b/grobid_tei_xml/types.py
@@ -47,13 +47,13 @@ def _csl_date(s: Optional[str]) -> Optional[list]:
         return None
 
     # YYYY-MM
-    if len(s) >= 7 and s[4] == '-' and s[5:7].isdigit():
+    if len(s) >= 7 and s[4] == "-" and s[5:7].isdigit():
         month = int(s[5:7])
     else:
         return [[year]]
 
     # YYYY-MM-DD
-    if len(s) == 10 and s[7] == '-' and s[8:10].isdigit():
+    if len(s) == 10 and s[7] == "-" and s[8:10].isdigit():
         day = int(s[8:10])
         return [[year, month, day]]
     else:
@@ -112,26 +112,26 @@ class GrobidBiblio:
         d = self.to_dict()
 
         # new keys
-        d.pop('first_page', None)
-        d.pop('last_page', None)
-        d.pop('note', None)
+        d.pop("first_page", None)
+        d.pop("last_page", None)
+        d.pop("note", None)
 
         # legacy book title behavior
-        if not d.get('journal') and d.get('book_title'):
-            d['journal'] = d.pop('book_title')
+        if not d.get("journal") and d.get("book_title"):
+            d["journal"] = d.pop("book_title")
         else:
-            d.pop('book_title', None)
+            d.pop("book_title", None)
 
         # author changes
-        for a in d['authors']:
-            a['name'] = a.pop('full_name', None)
-            if not a.get('given_name'):
-                a['given_name'] = a.pop('middle_name', None)
+        for a in d["authors"]:
+            a["name"] = a.pop("full_name", None)
+            if not a.get("given_name"):
+                a["given_name"] = a.pop("middle_name", None)
             else:
-                a.pop('middle_name', None)
-            addr = a.get('affiliation', {}).get('address')
-            if addr and addr.get('post_code'):
-                addr['postCode'] = addr.pop('post_code')
+                a.pop("middle_name", None)
+            addr = a.get("affiliation", {}).get("address")
+            if addr and addr.get("post_code"):
+                addr["postCode"] = addr.pop("post_code")
 
         return _simplify_dict(d)
 
@@ -155,18 +155,20 @@ class GrobidBiblio:
             note=self.note,
         )
         # fields with '-' in the key name
-        csl.update({
-            "container-title": self.journal,
-            "book-title": self.book_title,
-            "series-title": self.series_title,
-            "page-first": self.first_page,
-        })
+        csl.update(
+            {
+                "container-title": self.journal,
+                "book-title": self.book_title,
+                "series-title": self.series_title,
+                "page-first": self.first_page,
+            }
+        )
 
         # numeric fields
         if self.issue and self.issue.isdigit():
-            csl['issue'] = int(self.issue)
+            csl["issue"] = int(self.issue)
         if self.volume and self.volume.isdigit():
-            csl['volume'] = int(self.volume)
+            csl["volume"] = int(self.volume)
 
         return _simplify_dict(csl)
 
@@ -201,23 +203,23 @@ class GrobidDocument:
         Returns a dict in the old "grobid2json" format.
         """
         d = self.to_dict()
-        d.pop('header', None)
+        d.pop("header", None)
         d.update(self.header.to_legacy_dict())
         if self.citations:
-            d['citations'] = [c.to_legacy_dict() for c in self.citations]
+            d["citations"] = [c.to_legacy_dict() for c in self.citations]
 
         # all header fields at top-level
-        d['journal'] = dict(
-            name=d.pop('journal', None),
-            publisher=d.pop('publisher', None),
-            issn=d.pop('issn', None),
-            issne=d.pop('issne', None),
-            volume=d.pop('volume', None),
-            issue=d.pop('issue', None),
+        d["journal"] = dict(
+            name=d.pop("journal", None),
+            publisher=d.pop("publisher", None),
+            issn=d.pop("issn", None),
+            issne=d.pop("issne", None),
+            volume=d.pop("volume", None),
+            issue=d.pop("issue", None),
         )
 
         # document fields not in the old schema
-        d.pop('pdf_md5', None)
+        d.pop("pdf_md5", None)
 
         return _simplify_dict(d)
 
@@ -246,7 +248,7 @@ def _simplify_dict(d: dict) -> dict:
 
     TODO: should this return Optional[dict]?
     """
-    if d in [None, {}, '']:
+    if d in [None, {}, ""]:
         return {}
     for k in list(d.keys()):
         if isinstance(d[k], dict):
@@ -255,6 +257,6 @@ def _simplify_dict(d: dict) -> dict:
             for i in range(len(d[k])):
                 if isinstance(d[k][i], dict):
                     d[k][i] = _simplify_dict(d[k][i])
-        if d[k] in [None, {}, '']:
+        if d[k] in [None, {}, ""]:
             d.pop(k)
     return d
diff --git a/tests/test_csl.py b/tests/test_csl.py
index 27c8c3e..e8ded91 100644
--- a/tests/test_csl.py
+++ b/tests/test_csl.py
@@ -3,7 +3,7 @@ from grobid_tei_xml import parse_document_xml
 
 def test_small_xml_csl() -> None:
 
-    with open('tests/files/small.xml', 'r') as f:
+    with open("tests/files/small.xml", "r") as f:
         tei_xml = f.read()
 
     d = parse_document_xml(tei_xml)
@@ -11,10 +11,7 @@ def test_small_xml_csl() -> None:
         "type": "article-journal",
         "title": "Dummy Example File",
         "author": [
-            {
-                "given": "Brewster",
-                "family": "Kahle"
-            },
+            {"given": "Brewster", "family": "Kahle"},
             {
                 "given": "J",
                 "family": "Doe",
@@ -29,10 +26,7 @@ def test_small_xml_csl() -> None:
         "type": "article-journal",
         "title": "Everything is Wonderful",
         "author": [
-            {
-                "given": "A",
-                "family": "Seaperson"
-            },
+            {"given": "A", "family": "Seaperson"},
         ],
         "container-title": "Letters in the Alphabet",
         "issued": [[2001]],
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 976d1b1..25529c4 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -11,31 +11,33 @@ from grobid_tei_xml.types import *
 
 def test_small_xml() -> None:
 
-    with open('tests/files/small.xml', 'r') as f:
+    with open("tests/files/small.xml", "r") as f:
         tei_xml = f.read()
 
     doc = parse_document_xml(tei_xml)
     expected_body = """Introduction \nEverything starts somewhere, as somebody [1]  once said. \n\n In Depth \n Meat \nYou know, for kids. \n Potatos \nQED."""
     expected = GrobidDocument(
-        grobid_version='0.5.1-SNAPSHOT',
-        grobid_timestamp='2018-04-02T00:31+0000',
-        language_code='en',
+        grobid_version="0.5.1-SNAPSHOT",
+        grobid_timestamp="2018-04-02T00:31+0000",
+        language_code="en",
         header=GrobidBiblio(
             title="Dummy Example File",
             authors=[
-                GrobidAuthor(full_name="Brewster Kahle",
-                             given_name="Brewster",
-                             surname="Kahle",
-                             affiliation=GrobidAffiliation(
-                                 department="Faculty ofAgricultrial Engineering",
-                                 laboratory="Plant Physiology Laboratory",
-                                 institution="Technion-Israel Institute of Technology",
-                                 address=GrobidAddress(
-                                     post_code="32000",
-                                     settlement="Haifa",
-                                     country="Israel",
-                                 ),
-                             )),
+                GrobidAuthor(
+                    full_name="Brewster Kahle",
+                    given_name="Brewster",
+                    surname="Kahle",
+                    affiliation=GrobidAffiliation(
+                        department="Faculty ofAgricultrial Engineering",
+                        laboratory="Plant Physiology Laboratory",
+                        institution="Technion-Israel Institute of Technology",
+                        address=GrobidAddress(
+                            post_code="32000",
+                            settlement="Haifa",
+                            country="Israel",
+                        ),
+                    ),
+                ),
                 GrobidAuthor(
                     full_name="J Doe",
                     given_name="J",
@@ -80,9 +82,9 @@ def test_small_xml() -> None:
 
 def test_small_xml_legacy() -> None:
 
-    with open('tests/files/small.xml', 'r') as f:
+    with open("tests/files/small.xml", "r") as f:
         tei_xml = f.read()
-    with open('tests/files/small.json', 'r') as f:
+    with open("tests/files/small.json", "r") as f:
         json_form = json.loads(f.read())
 
     d = parse_document_xml(tei_xml).to_legacy_dict()
@@ -104,7 +106,7 @@ def test_invalid_xml() -> None:
 
 def test_bytes() -> None:
 
-    with open('tests/files/small.xml', 'rb') as f:
+    with open("tests/files/small.xml", "rb") as f:
         tei_xml = f.read()
 
     parse_document_xml(tei_xml)
@@ -113,7 +115,7 @@ def test_bytes() -> None:
 
 def test_elementtree() -> None:
 
-    with open('tests/files/small.xml', 'rb') as f:
+    with open("tests/files/small.xml", "rb") as f:
         tei_xml = f.read()
 
     parse_document_xml(xml.etree.ElementTree.parse(io.BytesIO(tei_xml)))  # type: ignore
@@ -126,8 +128,10 @@ def test_example_grobid_tei_xml() -> None:
 
     doc = parse_document_xml(blob)
 
-    assert doc.header.title == \
-        """Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network"""
+    assert (
+        doc.header.title
+        == """Changes of patients' satisfaction with the health care services in Lithuanian Health Promoting Hospitals network"""
+    )
 
     ref = [c for c in doc.citations or [] if c.id == "b12"][0]
     assert ref.authors[0].full_name == "K Tasa"
@@ -138,8 +142,10 @@ def test_example_grobid_tei_xml() -> None:
     assert ref.date == "1996"
     assert ref.pages == "206-225"
     assert ref.volume == "8"
-    assert ref.unstructured == \
-        """Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."""
+    assert (
+        ref.unstructured
+        == """Tasa K, Baker R, Murray M. Using patient feedback for qua- lity improvement. Quality Management in Health Care 1996;8:206-19."""
+    )
 
 
 def test_single_citations_xml() -> None:
@@ -193,7 +199,10 @@ def test_single_citations_xml() -> None:
 </biblStruct>"""
 
     d = parse_citations_xml(citation_xml)[0]
-    assert d.title == """Mesh migration following abdominal hernia repair: a comprehensive review"""
+    assert (
+        d.title
+        == """Mesh migration following abdominal hernia repair: a comprehensive review"""
+    )
     assert d.authors[2].given_name == "L"
     assert d.authors[2].middle_name == "R"
     assert d.authors[2].surname == "Taveras"
@@ -211,7 +220,7 @@ def test_single_citations_xml() -> None:
 
 def test_citation_list_xml() -> None:
 
-    with open('tests/files/example_citation_list.xml', 'r') as f:
+    with open("tests/files/example_citation_list.xml", "r") as f:
         tei_xml = f.read()
 
     citations = parse_citations_xml(tei_xml)
@@ -237,17 +246,21 @@ def test_citation_list_xml() -> None:
     assert citations[11].series_title == "Handbook of Optics"
     assert citations[11].publisher == "McGRAW-HILL"
 
-    assert citations[
-        12].title == "Implications of abandoned shoreline features above Glacial Lake Duluth levels along the north shore of the Superior Basin in the vicinity of the Brule River"
-    assert citations[
-        12].book_title == "Paper presented at the 13th Biennial Meeting of the American Quaternary Association"
+    assert (
+        citations[12].title
+        == "Implications of abandoned shoreline features above Glacial Lake Duluth levels along the north shore of the Superior Basin in the vicinity of the Brule River"
+    )
+    assert (
+        citations[12].book_title
+        == "Paper presented at the 13th Biennial Meeting of the American Quaternary Association"
+    )
     assert citations[12].institution == "University of Minnesota"
 
 
 def test_grobid_070_document() -> None:
     # more recent GROBID v0.7.0 output
 
-    with open('tests/files/example_grobid_plos.tei.xml', 'r') as f:
+    with open("tests/files/example_grobid_plos.tei.xml", "r") as f:
         tei_xml = f.read()
 
     doc = parse_document_xml(tei_xml)
@@ -267,7 +280,10 @@ def test_grobid_070_document() -> None:
 
     cite_b3 = doc.citations[3]
     assert cite_b3.url == "http://unesdoc.unesco.org/ulis/"
-    assert cite_b3.title == "Requirements for Global Implementation of the Strategic Plan for Coastal GOOS"
+    assert (
+        cite_b3.title
+        == "Requirements for Global Implementation of the Strategic Plan for Coastal GOOS"
+    )
     assert cite_b3.authors
     assert cite_b3.authors[0].surname == "Ioc-Unesco"
     assert cite_b3.date == "2012"