diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2022-04-20 16:05:29 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2022-04-20 16:05:29 -0700 |
commit | 3a8dada3267c56fd62b84201b4af96889e4103e6 (patch) | |
tree | 00278249dc6879e2ad0d4c617263cdd6265516f9 /extra/bulk_edits/2022-04-20_isiarticles.md | |
parent | cf7412634e3a6935d3f8f8a482d35242b7b17018 (diff) | |
download | fatcat-3a8dada3267c56fd62b84201b4af96889e4103e6.tar.gz fatcat-3a8dada3267c56fd62b84201b4af96889e4103e6.zip |
cleanups: isiarticles
Diffstat (limited to 'extra/bulk_edits/2022-04-20_isiarticles.md')
-rw-r--r-- | extra/bulk_edits/2022-04-20_isiarticles.md | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/extra/bulk_edits/2022-04-20_isiarticles.md b/extra/bulk_edits/2022-04-20_isiarticles.md new file mode 100644 index 00000000..ca2cc6f9 --- /dev/null +++ b/extra/bulk_edits/2022-04-20_isiarticles.md @@ -0,0 +1,26 @@ + +See metadata cleanups for context. Basically a couple tens of thousands of sample/spam articles hosted on the domain isiarticles.com. + +## Prod Updates + +Start small: + + export FATCAT_API_HOST=https://api.fatcat.wiki + export FATCAT_AUTH_WORKER_CLEANUP=[...] + export FATCAT_API_AUTH_TOKEN=$FATCAT_AUTH_WORKER_CLEANUP + + fatcat-cli search file domain:isiarticles.com --entity-json -n0 \ + | rg -v '"content_scope"' \ + | rg 'isiarticles.com/' \ + | head -n50 \ + | pv -l \ + | fatcat-cli batch update file release_ids= content_scope=sample --description 'Un-link and mark isiarticles PDFs as content_scope=sample' --auto-accept + # editgroup_ihx75kzsebgzfisgjrv67zew5e + +The full batch: + + fatcat-cli search file domain:isiarticles.com --entity-json -n0 \ + | rg -v '"content_scope"' \ + | rg 'isiarticles.com/' \ + | pv -l \ + | fatcat-cli batch update file release_ids= content_scope=sample --description 'Un-link and mark isiarticles PDFs as content_scope=sample' --auto-accept |