diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-11-12 14:54:44 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-11-12 14:54:44 -0800 |
commit | 807233c4625dede9399b73b58b014bc5ce3abcda (patch) | |
tree | d5f2477c3c5e89d159e0f43c700d59f9ae1c575a /python | |
parent | bdccd79d741cab89cd28202a352044ed55624503 (diff) | |
download | sandcrawler-807233c4625dede9399b73b58b014bc5ce3abcda.tar.gz sandcrawler-807233c4625dede9399b73b58b014bc5ce3abcda.zip |
ingest_file: more efficient GROBID metadata copy
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/ingest_file.py | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py index b6a5115..d0c3e0e 100644 --- a/python/sandcrawler/ingest_file.py +++ b/python/sandcrawler/ingest_file.py @@ -378,9 +378,9 @@ class IngestFileWorker(SandcrawlerWorker): if result["status"] == "success": metadata = self.grobid_client.metadata(result) if metadata: - result["metadata"] = self.grobid_client.metadata(result) - result["fatcat_release"] = result["metadata"].pop("fatcat_release", None) - result["grobid_version"] = result["metadata"].pop("grobid_version", None) + result["metadata"] = metadata + result["fatcat_release"] = metadata.pop("fatcat_release", None) + result["grobid_version"] = metadata.pop("grobid_version", None) result.pop("tei_xml", None) result.pop("file_meta", None) result.pop("key", None) |