single item verification

author: Martin Czygan <martin.czygan@gmail.com> 2020-12-15 02:39:35 +0100
committer: Martin Czygan <martin.czygan@gmail.com> 2020-12-15 02:39:35 +0100
commit: f939949ae3889078ef2c9d77d1cffdd939e11435 (patch)
tree: eeffa30293db814523582125f1b94c6e7e3517f5 /fuzzycat/utils.py
parent: b18c9c73150679a8e1ac92cd0bea7a649de0b39b (diff)
download: fuzzycat-f939949ae3889078ef2c9d77d1cffdd939e11435.tar.gz
fuzzycat-f939949ae3889078ef2c9d77d1cffdd939e11435.zip
1 files changed, 34 insertions, 0 deletions
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
index 682f912..cf74220 100644
--- a/fuzzycat/utils.py
+++ b/fuzzycat/utils.py
@@ -1,6 +1,8 @@
 import collections
 import io
 import itertools
+import os
+import random
 import re
 import string
 
@@ -13,6 +15,7 @@ CHEM_FORMULA = re.compile(r"([A-Z]{1,2}[0-9]{1,2})+")
 
 ParsedPages = collections.namedtuple("ParsedPages", "start end count")
 
+
 def parse_page_string(s):
     """
     Parse typical page strings, e.g. 150-180.
@@ -35,6 +38,7 @@ def parse_page_string(s):
     count = b - a + 1
     return ParsedPages(start=a, end=b, count=count)
 
+
 def dict_key_exists(doc, path):
     """
     Return true, if key in a dictionary at a given path exists. XXX: probably
@@ -138,3 +142,33 @@ def contains_chemical_formula(s):
     for token in s.split():
         if CHEM_FORMULA.search(token):
             return True
+
+
+def random_word(func=lambda w: True, wordsfile='/usr/share/dict/words'):
+    """
+    Requires the UNIX words file in a typical location. Returns a single,
+    random word.
+    """
+    if not os.path.exists(wordsfile):
+        raise RuntimeError('file not found: {}'.format(wordsfile))
+    with open(wordsfile) as f:
+        words = list(filter(func, (word.strip() for word in f)))
+    return random.choice(words)
+
+
+def random_idents_from_query(query="*",
+                             es="https://search.fatcat.wiki/fatcat_release/_search",
+                             max_retries=10,
+                             r=2):
+    """
+    Return a number of random idents from a search query.
+    """
+    for _ in range(max_retries):
+        r = requests.get(es, params={"q": query})
+        if r.status_code != 200:
+            raise RuntimeError('could not query {} for random item: {}'.format(es, r.url))
+        resp = r.json()
+        if resp["hits"]["total"] < 2:
+            continue
+        idents = [doc["_source"]["ident"] for doc in resp["hits"]["hits"]]
+        return random.sample(idents, r)
author	Martin Czygan <martin.czygan@gmail.com>	2020-12-15 02:39:35 +0100
committer	Martin Czygan <martin.czygan@gmail.com>	2020-12-15 02:39:35 +0100
commit	f939949ae3889078ef2c9d77d1cffdd939e11435 (patch)
tree	eeffa30293db814523582125f1b94c6e7e3517f5 /fuzzycat/utils.py
parent	b18c9c73150679a8e1ac92cd0bea7a649de0b39b (diff)
download	fuzzycat-f939949ae3889078ef2c9d77d1cffdd939e11435.tar.gz fuzzycat-f939949ae3889078ef2c9d77d1cffdd939e11435.zip