aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-19 19:55:19 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-19 19:55:19 -0700
commitcdd0157f2e86086610b7c1a6288bda7d3fc7da7d (patch)
tree219bd0ae0041a1941693f021125c0652c4ae51e5
parent9413d5e8dcdac01fc5d4fd65de6d19cffe02b872 (diff)
downloadfatcat-scholar-cdd0157f2e86086610b7c1a6288bda7d3fc7da7d.tar.gz
fatcat-scholar-cdd0157f2e86086610b7c1a6288bda7d3fc7da7d.zip
more SIM metadata mappings
-rw-r--r--fatcat_scholar/transform.py34
1 files changed, 31 insertions, 3 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 590d44a..8ba8b2a 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -46,7 +46,11 @@ def es_sim_from_sim(sim: Dict[str, Any]) -> ScholarSim:
SIM_RELEASE_TYPE_MAP = {
"Scholarly Journals": "article-journal",
"Trade Journals": "article-magazine",
- # TODO:
+ "Historical Journals": "article",
+ "Magazines": "article-magazine",
+ "Government Documents": "article",
+ "Law Journals": "article",
+ # TODO: more categories?
}
SIM_LANG_MAP = {
"English": "en",
@@ -55,13 +59,37 @@ SIM_LANG_MAP = {
"French": "fr",
"Afrikaans": "af",
"Spanish": "es",
- # TODO: more
+ "Portuguese": "pt",
+ "Polish": "pl",
+ # TODO: more languages in SIM corpus
}
SIM_COUNTRY_MAP = {
"United States": "us",
"Germany": "de",
"Netherlands": "nl",
- # TODO:
+ "United Kingdom": "",
+ "Canada": "ca",
+ "Switzerland": "ch",
+ "South Africa": "za",
+ "Japan": "jp",
+ "France": "fr",
+ "India": "in",
+ "Chile": "cl",
+ "Brazil": "br",
+ "Australia": "au",
+ "Argentina": "ar",
+ "Uganda": "ug",
+ "Thailand": "th",
+ "Puerto Rico": "pr",
+ "Poland": "pl",
+ "Philippines": "ph",
+ "Mexico": "mx",
+ "Jamaica": "jm",
+ "Italy": "it",
+ "Ireland": "ie",
+ "Finland": "fi",
+ "Bulgaria": "bg",
+ # TODO: more countries in SIM corpus
}