aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-09 19:37:57 -0800
committerBryan Newbold <bnewbold@robocracy.org>2021-11-09 19:37:57 -0800
commit415cc3c78940247bcb48afcb62a612fec03261eb (patch)
tree63c67a513c39c78bf5c20ca3c6809349c03af4d7
parentfd71860047f84305a50772aa2e7494fd61cfff30 (diff)
downloadfatcat-415cc3c78940247bcb48afcb62a612fec03261eb.tar.gz
fatcat-415cc3c78940247bcb48afcb62a612fec03261eb.zip
update link source filters in file/release bugfix
-rw-r--r--python/fatcat_tools/cleanups/file_release_bugfix.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/python/fatcat_tools/cleanups/file_release_bugfix.py b/python/fatcat_tools/cleanups/file_release_bugfix.py
index 025c1370..6eb60205 100644
--- a/python/fatcat_tools/cleanups/file_release_bugfix.py
+++ b/python/fatcat_tools/cleanups/file_release_bugfix.py
@@ -49,9 +49,15 @@ class FileReleaseBugfix(EntityImporter):
):
self.counts["skip-partial"] += 1
return False
- if row["edit_extra"]["link_source"] not in ["unpaywall", "fatcat-changelog"]:
+ if row["edit_extra"]["link_source"] not in ["unpaywall", "doi"]:
self.counts["skip-link-source"] += 1
return False
+ if row["edit_extra"].get("ingest_request_source") not in [
+ "unpaywall",
+ "fatcat-changelog",
+ ]:
+ self.counts["skip-ingest-request-source"] += 1
+ return False
if not row["edit_extra"]["link_source_id"].startswith("10."):
self.counts["skip-source-id-not-doi"] += 1
return False
@@ -65,7 +71,7 @@ class FileReleaseBugfix(EntityImporter):
file_ident = uuid2fcid(row["file_ident"])
wrong_release_ident = uuid2fcid(row["wrong_release_ident"])
edit_extra = row["edit_extra"]
- assert edit_extra["link_source"] in ["unpaywall", "fatcat-changelog"]
+ assert edit_extra["link_source"] in ["unpaywall", "doi"]
file_edit_doi = clean_doi(edit_extra["link_source_id"])
if not file_edit_doi: