summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/grobid_metadata.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-10 13:52:39 -0800
committerBryan Newbold <bnewbold@robocracy.org>2021-11-10 13:52:43 -0800
commitddc757bc1d5c610f42e9f5f10a4f060f517b66ca (patch)
tree86ccdef998bd3da3910cfe8fb9f2177b58a664a0 /python/fatcat_tools/importers/grobid_metadata.py
parent16e9979a6f347b49764c1141209e84083ea81057 (diff)
downloadfatcat-ddc757bc1d5c610f42e9f5f10a4f060f517b66ca.tar.gz
fatcat-ddc757bc1d5c610f42e9f5f10a4f060f517b66ca.zip
refactor importer metadata tables into separate file; move some helpers around
- MAX_ABSTRACT_LENGTH set in a single place (importer common) - merge datacite license slug table in to common table, removing some TDM-specific licenses (which do not apply in the context of preserving the full work)
Diffstat (limited to 'python/fatcat_tools/importers/grobid_metadata.py')
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py6
1 files changed, 2 insertions, 4 deletions
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 9db499a0..3c85132c 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -9,9 +9,7 @@ from fatcat_openapi_client import ApiClient, FileEntity, ReleaseEntity
from fatcat_tools.normal import clean_doi, clean_str
-from .common import EntityImporter, make_rel_url
-
-MAX_ABSTRACT_BYTES = 4096
+from .common import MAX_ABSTRACT_LENGTH, EntityImporter, make_rel_url
class GrobidMetadataImporter(EntityImporter):
@@ -84,7 +82,7 @@ class GrobidMetadataImporter(EntityImporter):
extra_grobid: Dict[str, Any] = dict()
abstract = obj.get("abstract")
- if abstract and len(abstract) < MAX_ABSTRACT_BYTES and len(abstract) > 10:
+ if abstract and len(abstract) < MAX_ABSTRACT_LENGTH and len(abstract) > 10:
abobj = fatcat_openapi_client.ReleaseAbstract(
mimetype="text/plain", content=clean_str(obj.get("abstract"))
)