From 415cc3c78940247bcb48afcb62a612fec03261eb Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 9 Nov 2021 19:37:57 -0800 Subject: update link source filters in file/release bugfix --- python/fatcat_tools/cleanups/file_release_bugfix.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'python/fatcat_tools/cleanups') diff --git a/python/fatcat_tools/cleanups/file_release_bugfix.py b/python/fatcat_tools/cleanups/file_release_bugfix.py index 025c1370..6eb60205 100644 --- a/python/fatcat_tools/cleanups/file_release_bugfix.py +++ b/python/fatcat_tools/cleanups/file_release_bugfix.py @@ -49,9 +49,15 @@ class FileReleaseBugfix(EntityImporter): ): self.counts["skip-partial"] += 1 return False - if row["edit_extra"]["link_source"] not in ["unpaywall", "fatcat-changelog"]: + if row["edit_extra"]["link_source"] not in ["unpaywall", "doi"]: self.counts["skip-link-source"] += 1 return False + if row["edit_extra"].get("ingest_request_source") not in [ + "unpaywall", + "fatcat-changelog", + ]: + self.counts["skip-ingest-request-source"] += 1 + return False if not row["edit_extra"]["link_source_id"].startswith("10."): self.counts["skip-source-id-not-doi"] += 1 return False @@ -65,7 +71,7 @@ class FileReleaseBugfix(EntityImporter): file_ident = uuid2fcid(row["file_ident"]) wrong_release_ident = uuid2fcid(row["wrong_release_ident"]) edit_extra = row["edit_extra"] - assert edit_extra["link_source"] in ["unpaywall", "fatcat-changelog"] + assert edit_extra["link_source"] in ["unpaywall", "doi"] file_edit_doi = clean_doi(edit_extra["link_source_id"]) if not file_edit_doi: -- cgit v1.2.3