diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-02 11:29:29 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-12-17 23:03:08 -0800 |
commit | a57de2331caa6f92242dae6197b44926801b429a (patch) | |
tree | 0d401b2fff9cde24bbb275c8c0c81d1f0430797f | |
parent | 7d1ac51fb1b67f64f03e4c6e943202085cd4faa9 (diff) | |
download | fatcat-a57de2331caa6f92242dae6197b44926801b429a.tar.gz fatcat-a57de2331caa6f92242dae6197b44926801b429a.zip |
wikidata QID normalize helper
-rw-r--r-- | python/fatcat_tools/normal.py | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py index 9ee641fa..d792979d 100644 --- a/python/fatcat_tools/normal.py +++ b/python/fatcat_tools/normal.py @@ -133,6 +133,28 @@ def test_clean_arxiv_id(): assert clean_arxiv_id("0806.v1") == None assert clean_arxiv_id("08062878v1") == None +def clean_wikidata_qid(raw): + if not raw: + return None + raw = raw.strip() + if len(raw.split()) != 1 or len(raw) < 2: + return None + if raw[0] == 'Q' and raw[1] != '0' and raw[1:].isdigit(): + return raw + return None + +def test_clean_wikidata_qid(): + assert clean_wikidata_qid("Q1234") == "Q1234" + assert clean_wikidata_qid("Q1") == "Q1" + assert clean_wikidata_qid(" Q1234 ") == "Q1234" + assert clean_wikidata_qid(" Q1 234 ") == None + assert clean_wikidata_qid("q1234") == None + assert clean_wikidata_qid("1234 ") == None + assert clean_wikidata_qid("Q0123") == None + assert clean_wikidata_qid("PMC123") == None + assert clean_wikidata_qid("qfba3") == None + assert clean_wikidata_qid("") == None + def clean_pmid(raw): if not raw: return None @@ -147,8 +169,8 @@ def test_clean_pmid(): assert clean_pmid("1234") == "1234" assert clean_pmid("1234 ") == "1234" assert clean_pmid("PMC123") == None - assert clean_sha1("qfba3") == None - assert clean_sha1("") == None + assert clean_pmid("qfba3") == None + assert clean_pmid("") == None def clean_pmcid(raw): if not raw: |