aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/fatcat_tools/importers/datacite.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index d4d7a9f5..fe02cac4 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -597,7 +597,13 @@ class DataciteImporter(EntityImporter):
if license_extra:
extra_datacite["license"] = license_extra
if attributes.get("subjects"):
- extra_datacite["subjects"] = attributes["subjects"]
+ # these subjects with schemeUri are too much metadata, which
+ # doesn't compress. filter them out.
+ extra_subjects = [
+ subj for subj in attributes["subjects"] if not subj.get("schemeUri")
+ ]
+ if extra_subjects:
+ extra_datacite["subjects"] = extra_subjects
# Include version information.
metadata_version = attributes.get("metadataVersion") or ""