aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 23:11:47 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 23:11:47 -0700
commit66ff1990d81bfc461c4cbbcc46278f785c4f273c (patch)
tree4474a5662e5072c4f6206986847ce9653683099d
parent3a4344d1e26e679b0dc9558d15752e53ce86f8ac (diff)
downloadchocula-66ff1990d81bfc461c4cbbcc46278f785c4f273c.tar.gz
chocula-66ff1990d81bfc461c4cbbcc46278f785c4f273c.zip
add MAG importer; reorder directory class listing
-rw-r--r--chocula/directories/__init__.py23
-rw-r--r--chocula/directories/mag.py60
-rw-r--r--sources.toml5
-rw-r--r--tests/files/ISSN-to-ISSN-L.txt7
-rw-r--r--tests/files/mag-journals.txt25
5 files changed, 110 insertions, 10 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py
index 17329e1..ed306c0 100644
--- a/chocula/directories/__init__.py
+++ b/chocula/directories/__init__.py
@@ -18,26 +18,29 @@ from chocula.directories.vanished_inactive import VanishedInactiveLoader
from chocula.directories.issn_meta import IssnMetaLoader
from chocula.directories.australian_era import AustralianEraLoader
from chocula.directories.awol import AwolLoader
+from chocula.directories.mag import MagLoader
+# sort order roughly results in metadata prioritization
ALL_CHOCULA_DIR_CLASSES = [
- CrossrefLoader,
+ IssnMetaLoader,
+ ManualHomepageLoader,
+ ScieloLoader,
DoajLoader,
+ CrossrefLoader,
EntrezLoader,
EzbLoader,
GoldOALoader,
NorwegianLoader,
+ AustralianEraLoader,
+ SzczepanskiLoader,
+ WikidataLoader,
+ AwolLoader,
+ VanishedDisapearedLoader,
+ VanishedInactiveLoader,
OpenAPCLoader,
RoadLoader,
SherpaRomeoLoader,
- SzczepanskiLoader,
- WikidataLoader,
SimLoader,
- ScieloLoader,
- ManualHomepageLoader,
ZdbFizeLoader,
- VanishedDisapearedLoader,
- VanishedInactiveLoader,
- IssnMetaLoader,
- AustralianEraLoader,
- AwolLoader,
+ MagLoader,
]
diff --git a/chocula/directories/mag.py b/chocula/directories/mag.py
new file mode 100644
index 0000000..dbbc324
--- /dev/null
+++ b/chocula/directories/mag.py
@@ -0,0 +1,60 @@
+from typing import Iterable, Optional
+import csv
+
+from chocula.util import clean_str, clean_issn
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class MagLoader(DirectoryLoader):
+ """
+ TSV Columns (from schema docs):
+
+ 1 JournalId long PRIMARY KEY
+ 2 Rank uint See FAQ
+ 3 NormalizedName string
+ 4 DisplayName string
+ 5 Issn string
+ 6 Publisher string
+ 7 Webpage string
+ 8 PaperCount long
+ 9 PaperFamilyCount long See FAQ
+ 10 CitationCount long
+ 11 CreatedDate DateTime
+
+ """
+
+ source_slug = "mag"
+
+ def open_file(self) -> Iterable:
+ return csv.DictReader(
+ open(self.config.mag.filepath, "r"),
+ delimiter="\t",
+ fieldnames=[
+ "JournalId",
+ "Rank",
+ "NormalizedName",
+ "DisplayName",
+ "Issn",
+ "Publisher",
+ "Webpage",
+ "PaperCount",
+ "PaperFamilyCount",
+ "CitationCount",
+ "CreatedDate",
+ ],
+ )
+
+ def parse_record(self, record) -> Optional[DirectoryInfo]:
+ info = DirectoryInfo(
+ directory_slug=self.source_slug,
+ raw_issn=clean_issn(record["Issn"]),
+ custom_id=record["JournalId"],
+ name=clean_str(record["DisplayName"]),
+ publisher=clean_str(record["Publisher"]),
+ )
+ homepage = HomepageUrl.from_url(record["Webpage"] or "")
+ if homepage:
+ info.homepage_urls.append(homepage)
+
+ return info
diff --git a/sources.toml b/sources.toml
index e44d85f..9a24420 100644
--- a/sources.toml
+++ b/sources.toml
@@ -168,3 +168,8 @@ filename = "awol-index-top-issn.json"
original_url = "https://isaw.nyu.edu/publications/awol-index/"
mirror_url = "https://archive.org/download/awol-index-json"
+[mag]
+date = "2020-01-23"
+original_url = "https://docs.microsoft.com/en-us/academic-services/graph/"
+filename = "mag-journals.txt"
+mirror_url = "https://archive.org/download/mag-2020-01-23"
diff --git a/tests/files/ISSN-to-ISSN-L.txt b/tests/files/ISSN-to-ISSN-L.txt
index e62a880..655570a 100644
--- a/tests/files/ISSN-to-ISSN-L.txt
+++ b/tests/files/ISSN-to-ISSN-L.txt
@@ -337,3 +337,10 @@ ISSN ISSN-L
0258-8315 0258-8315
2048-7177 2048-7177
0011-1287 0011-1287
+1882-7616 1882-7616
+1948-6553 1948-6553
+0036-4355 0036-4355
+1609-3321 1609-3321
+8756-4629 8756-4629
+1648-6897 1648-6897
+0030-9648 0030-9648
diff --git a/tests/files/mag-journals.txt b/tests/files/mag-journals.txt
new file mode 100644
index 0000000..8148bba
--- /dev/null
+++ b/tests/files/mag-journals.txt
@@ -0,0 +1,25 @@
+465895 12312 eureka Eureka 1342-5641 http://www.archim.org.uk/eureka/ 1106 168 2016-06-24
+1137746 11759 the artist and journal of home culture The Artist and Journal of Home Culture 2151-4879 518 8675 2016-06-24
+2978343 15234 cumberland law review Cumberland Law Review 0360-8298 http://www.cumberlandlawreview.com/ 47 44 2016-06-24
+3010151 14881 comparative medicine east and west Comparative Medicine East and West 0147-2917 19 122 2016-06-24
+3164724 9968 physiological measurement Physiological Measurement 0967-3334 3276 69046 2016-06-24
+17807283 9520 theoretical population biology Theoretical Population Biology 0040-5809 http://www.journals.elsevier.com/theoretical-population-biology/ 2478 107627 2016-06-24
+18204665 9332 international journal of multiphase flow International Journal of Multiphase Flow 0301-9322 4818 124251 2016-06-24
+27908409 11407 acta veterinaria hungarica Acta Veterinaria Hungarica 0236-6290 1727 11463 2016-06-24
+40275224 12875 revista latinoamericana de quimica Revista Latinoamericana de Química 0370-5943 http://www.relaquim.com/ 557 815 2016-06-24
+42124886 9771 scandinavian journal of rheumatology Scandinavian Journal of Rheumatology 0300-9742 4499 77427 2016-06-24
+2739456633 13593 derim Derim 2149-2182 308 98 2017-07-31
+2764395831 13860 the naham management journal The NAHAM management journal 1057-3526 229 25 2017-10-27
+49789000 9765 fatigue & fracture of engineering materials & structures Fatigue & Fracture of Engineering Materials & Structures 1460-2695 3983 65247 2016-06-24
+2736806674 12364 sequencia estudos juridicos e politicos Sequência: Estudos Juridicos e Politicos 0101-9562 949 240 2017-07-31
+2764944378 13733 journal of sport psychology in action Journal of sport psychology in action 2152-0712 220 956 2017-10-27
+2789086 13552 japanese dental science review Japanese Dental Science Review 1882-7616 212 1672 2016-06-24
+26559856 15149 obesity and weight management Obesity and Weight Management 1948-6553 55 119 2016-06-24
+2755455544 11243 sangre Sangre 0036-4355 3038 2091 2017-09-25
+2764925653 12051 applied journal of general practice Applied Journal of General Practice 1465 29 2017-10-27
+28565804 11453 moscow mathematical journal Moscow Mathematical Journal 1609-3321 http://www.ams.org/distribution/mmj/ 668 8844 2016-06-24
+26007720 14383 journal of geriatric drug therapy Journal of Geriatric Drug Therapy 8756-4629 99 247 2016-06-24
+117080907 11805 journal of the korean society for library and information science Journal of The Korean Society for Library and Information Science 1631 1458 2016-06-24
+62893890 12653 journal of environmental engineering and landscape management Journal of Environmental Engineering and Landscape Management 1648-6897 526 3649 2016-06-24
+2764717979 14185 journal of basic research in medical sciences Journal of Basic Research in Medical Sciences 170 56 2017-10-27
+2739194844 12343 pakistan armed forces medical journal Pakistan Armed Forces Medical Journal 0030-9648 1088 90 2017-07-31