diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-10 13:52:39 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-10 13:52:43 -0800 |
commit | ddc757bc1d5c610f42e9f5f10a4f060f517b66ca (patch) | |
tree | 86ccdef998bd3da3910cfe8fb9f2177b58a664a0 /python/fatcat_tools/importers/grobid_metadata.py | |
parent | 16e9979a6f347b49764c1141209e84083ea81057 (diff) | |
download | fatcat-ddc757bc1d5c610f42e9f5f10a4f060f517b66ca.tar.gz fatcat-ddc757bc1d5c610f42e9f5f10a4f060f517b66ca.zip |
refactor importer metadata tables into separate file; move some helpers around
- MAX_ABSTRACT_LENGTH set in a single place (importer common)
- merge datacite license slug table in to common table, removing some
TDM-specific licenses (which do not apply in the context of preserving
the full work)
Diffstat (limited to 'python/fatcat_tools/importers/grobid_metadata.py')
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 6 |
1 files changed, 2 insertions, 4 deletions
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 9db499a0..3c85132c 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -9,9 +9,7 @@ from fatcat_openapi_client import ApiClient, FileEntity, ReleaseEntity from fatcat_tools.normal import clean_doi, clean_str -from .common import EntityImporter, make_rel_url - -MAX_ABSTRACT_BYTES = 4096 +from .common import MAX_ABSTRACT_LENGTH, EntityImporter, make_rel_url class GrobidMetadataImporter(EntityImporter): @@ -84,7 +82,7 @@ class GrobidMetadataImporter(EntityImporter): extra_grobid: Dict[str, Any] = dict() abstract = obj.get("abstract") - if abstract and len(abstract) < MAX_ABSTRACT_BYTES and len(abstract) > 10: + if abstract and len(abstract) < MAX_ABSTRACT_LENGTH and len(abstract) > 10: abobj = fatcat_openapi_client.ReleaseAbstract( mimetype="text/plain", content=clean_str(obj.get("abstract")) ) |