aboutsummaryrefslogtreecommitdiffstats
path: root/grobid_tei_xml/types.py
diff options
context:
space:
mode:
Diffstat (limited to 'grobid_tei_xml/types.py')
-rw-r--r--grobid_tei_xml/types.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/grobid_tei_xml/types.py b/grobid_tei_xml/types.py
new file mode 100644
index 0000000..795d37f
--- /dev/null
+++ b/grobid_tei_xml/types.py
@@ -0,0 +1,96 @@
+
+from typing import Any, AnyStr, Dict, List, Optional
+from dataclasses import dataclass
+
+
+@dataclass
+class GrobidAddress:
+ addr_line: Optional[str] = None
+ post_code: Optional[str] = None
+ settlement: Optional[str] = None
+ country: Optional[str] = None
+ country_code: Optional[str] = None
+
+@dataclass
+class GrobidAffiliation:
+ address: Optional[GrobidAddress] = None
+ institution: Optional[str] = None
+ department: Optional[str] = None
+ laboratory: Optional[str] = None
+
+@dataclass
+class GrobidAuthor:
+ name: Optional[str]
+ # TODO: 'forename'?
+ given_name: Optional[str] = None
+ surname: Optional[str] = None
+ affiliation: Optional[dict] = None
+
+@dataclass
+class GrobidCitation:
+ authors: List[GrobidAuthor]
+ index: Optional[int] = None
+ id: Optional[str] = None
+ date: Optional[str] = None
+ issue: Optional[str] = None
+ journal: Optional[str] = None
+ publisher: Optional[str] = None
+ title: Optional[str] = None
+ url: Optional[str] = None
+ volume: Optional[str] = None
+ pages: Optional[str] = None
+ first_page: Optional[str] = None
+ last_page: Optional[str] = None
+ unstructured: Optional[str] = None
+ # TODO: 'arxiv' for consistency?
+ arxiv_id: Optional[str] = None
+ doi: Optional[str] = None
+ pmid: Optional[str] = None
+ pmcid: Optional[str] = None
+ oa_url: Optional[str] = None
+
+ def to_dict(self) -> dict:
+ return _simplify_dict(asdict(self))
+
+@dataclass
+class GrobidJournal:
+ name: Optional[str] = None
+ abbrev: Optional[str] = None
+ publisher: Optional[str] = None
+ volume: Optional[str] = None
+ issue: Optional[str] = None
+ issn: Optional[str] = None
+ eissn: Optional[str] = None
+
+@dataclass
+class GrobidHeader:
+ title: Optional[str] = None
+ authors: Optional[str] = None
+ date: Optional[str] = None
+ doi: Optional[str] = None
+ #TODO: note: Optional[str]
+ journal: Optional[GrobidJournal] = None
+
+@dataclass
+class GrobidDocument:
+ grobid_version: str
+ grobid_timestamp: str
+ #TODO: pdf_md5: Optional[str]
+ header: GrobidHeader
+ citations: Optional[List[GrobidCitation]] = None
+ language_code: Optional[str] = None
+ abstract: Optional[str] = None
+ body: Optional[str] = None
+ acknowledgement: Optional[str] = None
+ annex: Optional[str] = None
+
+ def to_dict(self) -> dict:
+ return _simplify_dict(asdict(self))
+
+def _simplify_dict(d: dict) -> dict:
+ for k in list(d.keys()):
+ if isinstance(d[k], dict):
+ d[k] = _simplify_dict(d[k])
+ if d[k] in [None, [], {}, '']:
+ d.pop(k)
+ return d