diff options
Diffstat (limited to 'grobid_tei_xml/__main__.py')
-rw-r--r-- | grobid_tei_xml/__main__.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/grobid_tei_xml/__main__.py b/grobid_tei_xml/__main__.py new file mode 100644 index 0000000..489bd4e --- /dev/null +++ b/grobid_tei_xml/__main__.py @@ -0,0 +1,29 @@ + +from .parse import parse_article + +def main() -> None: # pragma no cover + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="GROBID TEI XML to JSON", + usage="%(prog)s [options] <teifile>...", + ) + parser.add_argument( + "--no-encumbered", + action="store_true", + help= + "don't include ambiguously copyright encumbered fields (eg, abstract, body)", + ) + parser.add_argument("teifiles", nargs="+") + + args = parser.parse_args() + + for filename in args.teifiles: + content = open(filename, "r").read() + print( + json.dumps( + parse_article(content, encumbered=(not args.no_encumbered)), + sort_keys=True, + )) + +if __name__ == "__main__": # pragma no cover + main() |