diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 19:55:19 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 19:55:19 -0700 |
commit | cdd0157f2e86086610b7c1a6288bda7d3fc7da7d (patch) | |
tree | 219bd0ae0041a1941693f021125c0652c4ae51e5 | |
parent | 9413d5e8dcdac01fc5d4fd65de6d19cffe02b872 (diff) | |
download | fatcat-scholar-cdd0157f2e86086610b7c1a6288bda7d3fc7da7d.tar.gz fatcat-scholar-cdd0157f2e86086610b7c1a6288bda7d3fc7da7d.zip |
more SIM metadata mappings
-rw-r--r-- | fatcat_scholar/transform.py | 34 |
1 files changed, 31 insertions, 3 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 590d44a..8ba8b2a 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -46,7 +46,11 @@ def es_sim_from_sim(sim: Dict[str, Any]) -> ScholarSim: SIM_RELEASE_TYPE_MAP = { "Scholarly Journals": "article-journal", "Trade Journals": "article-magazine", - # TODO: + "Historical Journals": "article", + "Magazines": "article-magazine", + "Government Documents": "article", + "Law Journals": "article", + # TODO: more categories? } SIM_LANG_MAP = { "English": "en", @@ -55,13 +59,37 @@ SIM_LANG_MAP = { "French": "fr", "Afrikaans": "af", "Spanish": "es", - # TODO: more + "Portuguese": "pt", + "Polish": "pl", + # TODO: more languages in SIM corpus } SIM_COUNTRY_MAP = { "United States": "us", "Germany": "de", "Netherlands": "nl", - # TODO: + "United Kingdom": "", + "Canada": "ca", + "Switzerland": "ch", + "South Africa": "za", + "Japan": "jp", + "France": "fr", + "India": "in", + "Chile": "cl", + "Brazil": "br", + "Australia": "au", + "Argentina": "ar", + "Uganda": "ug", + "Thailand": "th", + "Puerto Rico": "pr", + "Poland": "pl", + "Philippines": "ph", + "Mexico": "mx", + "Jamaica": "jm", + "Italy": "it", + "Ireland": "ie", + "Finland": "fi", + "Bulgaria": "bg", + # TODO: more countries in SIM corpus } |