From ddc757bc1d5c610f42e9f5f10a4f060f517b66ca Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 10 Nov 2021 13:52:39 -0800 Subject: refactor importer metadata tables into separate file; move some helpers around - MAX_ABSTRACT_LENGTH set in a single place (importer common) - merge datacite license slug table in to common table, removing some TDM-specific licenses (which do not apply in the context of preserving the full work) --- python/fatcat_tools/importers/grobid_metadata.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'python/fatcat_tools/importers/grobid_metadata.py') diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 9db499a0..3c85132c 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -9,9 +9,7 @@ from fatcat_openapi_client import ApiClient, FileEntity, ReleaseEntity from fatcat_tools.normal import clean_doi, clean_str -from .common import EntityImporter, make_rel_url - -MAX_ABSTRACT_BYTES = 4096 +from .common import MAX_ABSTRACT_LENGTH, EntityImporter, make_rel_url class GrobidMetadataImporter(EntityImporter): @@ -84,7 +82,7 @@ class GrobidMetadataImporter(EntityImporter): extra_grobid: Dict[str, Any] = dict() abstract = obj.get("abstract") - if abstract and len(abstract) < MAX_ABSTRACT_BYTES and len(abstract) > 10: + if abstract and len(abstract) < MAX_ABSTRACT_LENGTH and len(abstract) > 10: abobj = fatcat_openapi_client.ReleaseAbstract( mimetype="text/plain", content=clean_str(obj.get("abstract")) ) -- cgit v1.2.3