From cdd0157f2e86086610b7c1a6288bda7d3fc7da7d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 19 Oct 2020 19:55:19 -0700 Subject: more SIM metadata mappings --- fatcat_scholar/transform.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 590d44a..8ba8b2a 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -46,7 +46,11 @@ def es_sim_from_sim(sim: Dict[str, Any]) -> ScholarSim: SIM_RELEASE_TYPE_MAP = { "Scholarly Journals": "article-journal", "Trade Journals": "article-magazine", - # TODO: + "Historical Journals": "article", + "Magazines": "article-magazine", + "Government Documents": "article", + "Law Journals": "article", + # TODO: more categories? } SIM_LANG_MAP = { "English": "en", @@ -55,13 +59,37 @@ SIM_LANG_MAP = { "French": "fr", "Afrikaans": "af", "Spanish": "es", - # TODO: more + "Portuguese": "pt", + "Polish": "pl", + # TODO: more languages in SIM corpus } SIM_COUNTRY_MAP = { "United States": "us", "Germany": "de", "Netherlands": "nl", - # TODO: + "United Kingdom": "", + "Canada": "ca", + "Switzerland": "ch", + "South Africa": "za", + "Japan": "jp", + "France": "fr", + "India": "in", + "Chile": "cl", + "Brazil": "br", + "Australia": "au", + "Argentina": "ar", + "Uganda": "ug", + "Thailand": "th", + "Puerto Rico": "pr", + "Poland": "pl", + "Philippines": "ph", + "Mexico": "mx", + "Jamaica": "jm", + "Italy": "it", + "Ireland": "ie", + "Finland": "fi", + "Bulgaria": "bg", + # TODO: more countries in SIM corpus } -- cgit v1.2.3