aboutsummaryrefslogtreecommitdiffstats
path: root/grobid_tei_xml/__main__.py
diff options
context:
space:
mode:
Diffstat (limited to 'grobid_tei_xml/__main__.py')
-rw-r--r--grobid_tei_xml/__main__.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/grobid_tei_xml/__main__.py b/grobid_tei_xml/__main__.py
new file mode 100644
index 0000000..489bd4e
--- /dev/null
+++ b/grobid_tei_xml/__main__.py
@@ -0,0 +1,29 @@
+
+from .parse import parse_article
+
+def main() -> None: # pragma no cover
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ description="GROBID TEI XML to JSON",
+ usage="%(prog)s [options] <teifile>...",
+ )
+ parser.add_argument(
+ "--no-encumbered",
+ action="store_true",
+ help=
+ "don't include ambiguously copyright encumbered fields (eg, abstract, body)",
+ )
+ parser.add_argument("teifiles", nargs="+")
+
+ args = parser.parse_args()
+
+ for filename in args.teifiles:
+ content = open(filename, "r").read()
+ print(
+ json.dumps(
+ parse_article(content, encumbered=(not args.no_encumbered)),
+ sort_keys=True,
+ ))
+
+if __name__ == "__main__": # pragma no cover
+ main()