summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-03 16:44:42 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-11-03 16:52:44 -0700
commitefcc947482e9f281bb9c3f3484c29d2c8deffe09 (patch)
tree67a36d7c4a70778ccabe3bc787dd2b8ca1f28b8e
parent0af742bc24f39193a04c913b8abd7258fe10708a (diff)
downloadfatcat-efcc947482e9f281bb9c3f3484c29d2c8deffe09.tar.gz
fatcat-efcc947482e9f281bb9c3f3484c29d2c8deffe09.zip
cleanups: create a separate JsonLinePusher for cleanup workers (distinct base class)
-rwxr-xr-xpython/fatcat_cleanup.py3
-rw-r--r--python/fatcat_tools/cleanups/__init__.py2
-rw-r--r--python/fatcat_tools/cleanups/common.py19
3 files changed, 20 insertions, 4 deletions
diff --git a/python/fatcat_cleanup.py b/python/fatcat_cleanup.py
index 8bcc2ea9..b8d0ee4d 100755
--- a/python/fatcat_cleanup.py
+++ b/python/fatcat_cleanup.py
@@ -7,8 +7,7 @@ import sys
import raven
from fatcat_tools import authenticated_api
-from fatcat_tools.cleanups import FileCleaner
-from fatcat_tools.importers import JsonLinePusher
+from fatcat_tools.cleanups import FileCleaner, JsonLinePusher
# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable
sentry_client = raven.Client()
diff --git a/python/fatcat_tools/cleanups/__init__.py b/python/fatcat_tools/cleanups/__init__.py
index 0aeec977..d9438f06 100644
--- a/python/fatcat_tools/cleanups/__init__.py
+++ b/python/fatcat_tools/cleanups/__init__.py
@@ -1,2 +1,2 @@
-from .common import EntityCleaner
+from .common import EntityCleaner, JsonLinePusher
from .files import FileCleaner
diff --git a/python/fatcat_tools/cleanups/common.py b/python/fatcat_tools/cleanups/common.py
index 4e8e49fd..c8ca5800 100644
--- a/python/fatcat_tools/cleanups/common.py
+++ b/python/fatcat_tools/cleanups/common.py
@@ -1,8 +1,9 @@
import copy
import json
import subprocess
+import sys
from collections import Counter
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Sequence
from fatcat_openapi_client import ApiClient, Editgroup
@@ -143,3 +144,19 @@ class EntityCleaner:
assert self._editgroup_id
return self._editgroup_id
+
+
+class JsonLinePusher:
+ def __init__(self, cleaner: EntityCleaner, json_file: Sequence, **kwargs) -> None:
+ self.cleaner = cleaner
+ self.json_file = json_file
+
+ def run(self) -> Counter:
+ for line in self.json_file:
+ if not line:
+ continue
+ record = json.loads(line)
+ self.cleaner.push_record(record)
+ counts = self.cleaner.finish()
+ print(counts, file=sys.stderr)
+ return counts