diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-09 19:37:57 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-09 19:37:57 -0800 |
commit | 415cc3c78940247bcb48afcb62a612fec03261eb (patch) | |
tree | 63c67a513c39c78bf5c20ca3c6809349c03af4d7 /python/fatcat_tools/cleanups | |
parent | fd71860047f84305a50772aa2e7494fd61cfff30 (diff) | |
download | fatcat-415cc3c78940247bcb48afcb62a612fec03261eb.tar.gz fatcat-415cc3c78940247bcb48afcb62a612fec03261eb.zip |
update link source filters in file/release bugfix
Diffstat (limited to 'python/fatcat_tools/cleanups')
-rw-r--r-- | python/fatcat_tools/cleanups/file_release_bugfix.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/python/fatcat_tools/cleanups/file_release_bugfix.py b/python/fatcat_tools/cleanups/file_release_bugfix.py index 025c1370..6eb60205 100644 --- a/python/fatcat_tools/cleanups/file_release_bugfix.py +++ b/python/fatcat_tools/cleanups/file_release_bugfix.py @@ -49,9 +49,15 @@ class FileReleaseBugfix(EntityImporter): ): self.counts["skip-partial"] += 1 return False - if row["edit_extra"]["link_source"] not in ["unpaywall", "fatcat-changelog"]: + if row["edit_extra"]["link_source"] not in ["unpaywall", "doi"]: self.counts["skip-link-source"] += 1 return False + if row["edit_extra"].get("ingest_request_source") not in [ + "unpaywall", + "fatcat-changelog", + ]: + self.counts["skip-ingest-request-source"] += 1 + return False if not row["edit_extra"]["link_source_id"].startswith("10."): self.counts["skip-source-id-not-doi"] += 1 return False @@ -65,7 +71,7 @@ class FileReleaseBugfix(EntityImporter): file_ident = uuid2fcid(row["file_ident"]) wrong_release_ident = uuid2fcid(row["wrong_release_ident"]) edit_extra = row["edit_extra"] - assert edit_extra["link_source"] in ["unpaywall", "fatcat-changelog"] + assert edit_extra["link_source"] in ["unpaywall", "doi"] file_edit_doi = clean_doi(edit_extra["link_source_id"]) if not file_edit_doi: |