summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-12-02 11:29:29 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-12-17 23:03:08 -0800
commita57de2331caa6f92242dae6197b44926801b429a (patch)
tree0d401b2fff9cde24bbb275c8c0c81d1f0430797f
parent7d1ac51fb1b67f64f03e4c6e943202085cd4faa9 (diff)
downloadfatcat-a57de2331caa6f92242dae6197b44926801b429a.tar.gz
fatcat-a57de2331caa6f92242dae6197b44926801b429a.zip
wikidata QID normalize helper
-rw-r--r--python/fatcat_tools/normal.py26
1 files changed, 24 insertions, 2 deletions
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index 9ee641fa..d792979d 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -133,6 +133,28 @@ def test_clean_arxiv_id():
assert clean_arxiv_id("0806.v1") == None
assert clean_arxiv_id("08062878v1") == None
+def clean_wikidata_qid(raw):
+ if not raw:
+ return None
+ raw = raw.strip()
+ if len(raw.split()) != 1 or len(raw) < 2:
+ return None
+ if raw[0] == 'Q' and raw[1] != '0' and raw[1:].isdigit():
+ return raw
+ return None
+
+def test_clean_wikidata_qid():
+ assert clean_wikidata_qid("Q1234") == "Q1234"
+ assert clean_wikidata_qid("Q1") == "Q1"
+ assert clean_wikidata_qid(" Q1234 ") == "Q1234"
+ assert clean_wikidata_qid(" Q1 234 ") == None
+ assert clean_wikidata_qid("q1234") == None
+ assert clean_wikidata_qid("1234 ") == None
+ assert clean_wikidata_qid("Q0123") == None
+ assert clean_wikidata_qid("PMC123") == None
+ assert clean_wikidata_qid("qfba3") == None
+ assert clean_wikidata_qid("") == None
+
def clean_pmid(raw):
if not raw:
return None
@@ -147,8 +169,8 @@ def test_clean_pmid():
assert clean_pmid("1234") == "1234"
assert clean_pmid("1234 ") == "1234"
assert clean_pmid("PMC123") == None
- assert clean_sha1("qfba3") == None
- assert clean_sha1("") == None
+ assert clean_pmid("qfba3") == None
+ assert clean_pmid("") == None
def clean_pmcid(raw):
if not raw: