diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 19:55:19 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 19:55:19 -0700 | 
| commit | cdd0157f2e86086610b7c1a6288bda7d3fc7da7d (patch) | |
| tree | 219bd0ae0041a1941693f021125c0652c4ae51e5 | |
| parent | 9413d5e8dcdac01fc5d4fd65de6d19cffe02b872 (diff) | |
| download | fatcat-scholar-cdd0157f2e86086610b7c1a6288bda7d3fc7da7d.tar.gz fatcat-scholar-cdd0157f2e86086610b7c1a6288bda7d3fc7da7d.zip | |
more SIM metadata mappings
| -rw-r--r-- | fatcat_scholar/transform.py | 34 | 
1 files changed, 31 insertions, 3 deletions
| diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 590d44a..8ba8b2a 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -46,7 +46,11 @@ def es_sim_from_sim(sim: Dict[str, Any]) -> ScholarSim:  SIM_RELEASE_TYPE_MAP = {      "Scholarly Journals": "article-journal",      "Trade Journals": "article-magazine", -    # TODO: +    "Historical Journals": "article", +    "Magazines": "article-magazine", +    "Government Documents": "article", +    "Law Journals": "article", +    # TODO: more categories?  }  SIM_LANG_MAP = {      "English": "en", @@ -55,13 +59,37 @@ SIM_LANG_MAP = {      "French": "fr",      "Afrikaans": "af",      "Spanish": "es", -    # TODO: more +    "Portuguese": "pt", +    "Polish": "pl", +    # TODO: more languages in SIM corpus  }  SIM_COUNTRY_MAP = {      "United States": "us",      "Germany": "de",      "Netherlands": "nl", -    # TODO: +    "United Kingdom": "", +    "Canada": "ca", +    "Switzerland": "ch", +    "South Africa": "za", +    "Japan": "jp", +    "France": "fr", +    "India": "in", +    "Chile": "cl", +    "Brazil": "br", +    "Australia": "au", +    "Argentina": "ar", +    "Uganda": "ug", +    "Thailand": "th", +    "Puerto Rico": "pr", +    "Poland": "pl", +    "Philippines": "ph", +    "Mexico": "mx", +    "Jamaica": "jm", +    "Italy": "it", +    "Ireland": "ie", +    "Finland": "fi", +    "Bulgaria": "bg", +    # TODO: more countries in SIM corpus  } | 
