aboutsummaryrefslogtreecommitdiffstats
path: root/notebooks/Journal_Names.ipynb
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-08-12 20:07:47 +0200
committerMartin Czygan <martin.czygan@gmail.com>2020-08-12 20:07:47 +0200
commit139f3eb991f560cee365b68eaab415398c1a8d37 (patch)
tree8168d8232915767633b40c425f1dbc0b8f14845b /notebooks/Journal_Names.ipynb
parentd06877139f4148ecc10d7976cc9dfd9c0582a6e9 (diff)
downloadfuzzycat-139f3eb991f560cee365b68eaab415398c1a8d37.tar.gz
fuzzycat-139f3eb991f560cee365b68eaab415398c1a8d37.zip
update notebook
Diffstat (limited to 'notebooks/Journal_Names.ipynb')
-rw-r--r--notebooks/Journal_Names.ipynb815
1 files changed, 729 insertions, 86 deletions
diff --git a/notebooks/Journal_Names.ipynb b/notebooks/Journal_Names.ipynb
index 9b28d6e..68fedb5 100644
--- a/notebooks/Journal_Names.ipynb
+++ b/notebooks/Journal_Names.ipynb
@@ -27,7 +27,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -37,12 +37,12 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"with open(\"../data/name_to_issn.json\") as f:\n",
- " mapping = json.load(f)\n"
+ " mapping = json.load(f)"
]
},
{
@@ -54,7 +54,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -63,7 +63,7 @@
"2929727"
]
},
- "execution_count": 7,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -74,7 +74,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -83,7 +83,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -92,7 +92,7 @@
"2929727"
]
},
- "execution_count": 25,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -103,7 +103,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -170,7 +170,7 @@
"4 ASMT news 1"
]
},
- "execution_count": 26,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -181,7 +181,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -190,7 +190,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -199,7 +199,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -208,7 +208,7 @@
"194241"
]
},
- "execution_count": 34,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -219,7 +219,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -228,7 +228,7 @@
"0.06630003409874026"
]
},
- "execution_count": 33,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -246,7 +246,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -322,7 +322,7 @@
"max 8980.000000"
]
},
- "execution_count": 35,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -340,7 +340,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -351,7 +351,7 @@
"Name: 45907, dtype: object"
]
},
- "execution_count": 40,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -369,7 +369,7 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -387,7 +387,7 @@
" '0702-7702']"
]
},
- "execution_count": 42,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -405,7 +405,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -414,7 +414,7 @@
"24107"
]
},
- "execution_count": 45,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -425,7 +425,7 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -455,53 +455,53 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
- " <th>322100</th>\n",
- " <td>Philosophica.</td>\n",
- " <td>17</td>\n",
+ " <th>395908</th>\n",
+ " <td>Criminal trial manual.</td>\n",
+ " <td>4</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>183928</th>\n",
- " <td>Edad de oro.</td>\n",
+ " <th>336247</th>\n",
+ " <td>Mesures.</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>294309</th>\n",
- " <td>Horoskop.</td>\n",
- " <td>10</td>\n",
+ " <th>2242923</th>\n",
+ " <td>Tinere speranţe.</td>\n",
+ " <td>4</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>517039</th>\n",
- " <td>˜Le œGrand journal.</td>\n",
- " <td>11</td>\n",
+ " <th>380962</th>\n",
+ " <td>Résistances.</td>\n",
+ " <td>5</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>1664616</th>\n",
- " <td>Caleidoscop şcolar.</td>\n",
- " <td>4</td>\n",
+ " <th>823415</th>\n",
+ " <td>˜Le œPaysan.</td>\n",
+ " <td>14</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>258430</th>\n",
- " <td>˜La œFeuille.</td>\n",
- " <td>34</td>\n",
+ " <th>157897</th>\n",
+ " <td>Barron's regents exams and answers.</td>\n",
+ " <td>18</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>309546</th>\n",
- " <td>˜The œWilson quarterly.</td>\n",
- " <td>4</td>\n",
+ " <th>468031</th>\n",
+ " <td>Revista científica.</td>\n",
+ " <td>14</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>795859</th>\n",
- " <td>Introductory research essay</td>\n",
+ " <th>1102294</th>\n",
+ " <td>Kotoba to bunka.</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>1470838</th>\n",
- " <td>Publicaciones del SEMYR.</td>\n",
- " <td>4</td>\n",
+ " <th>2049420</th>\n",
+ " <td>˜L'œHarmonie.</td>\n",
+ " <td>7</td>\n",
" </tr>\n",
" <tr>\n",
- " <th>657041</th>\n",
- " <td>˜Le œKiosque.</td>\n",
+ " <th>10843</th>\n",
+ " <td>Design.</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
@@ -509,20 +509,20 @@
"</div>"
],
"text/plain": [
- " name issn_count\n",
- "322100 Philosophica. 17\n",
- "183928 Edad de oro. 4\n",
- "294309 Horoskop. 10\n",
- "517039 ˜Le œGrand journal. 11\n",
- "1664616 Caleidoscop şcolar. 4\n",
- "258430 ˜La œFeuille. 34\n",
- "309546 ˜The œWilson quarterly. 4\n",
- "795859 Introductory research essay 4\n",
- "1470838 Publicaciones del SEMYR. 4\n",
- "657041 ˜Le œKiosque. 14"
+ " name issn_count\n",
+ "395908 Criminal trial manual. 4\n",
+ "336247 Mesures. 4\n",
+ "2242923 Tinere speranţe. 4\n",
+ "380962 Résistances. 5\n",
+ "823415 ˜Le œPaysan. 14\n",
+ "157897 Barron's regents exams and answers. 18\n",
+ "468031 Revista científica. 14\n",
+ "1102294 Kotoba to bunka. 4\n",
+ "2049420 ˜L'œHarmonie. 7\n",
+ "10843 Design. 14"
]
},
- "execution_count": 49,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -533,7 +533,7 @@
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -558,7 +558,7 @@
" '0379-8402']"
]
},
- "execution_count": 50,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -569,7 +569,7 @@
},
{
"cell_type": "code",
- "execution_count": 61,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -578,7 +578,7 @@
"<AxesSubplot:>"
]
},
- "execution_count": 61,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
},
@@ -601,7 +601,7 @@
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -610,7 +610,7 @@
"<AxesSubplot:>"
]
},
- "execution_count": 60,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
},
@@ -633,7 +633,7 @@
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -642,7 +642,7 @@
"<AxesSubplot:>"
]
},
- "execution_count": 62,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
},
@@ -665,7 +665,7 @@
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -674,7 +674,7 @@
"<AxesSubplot:>"
]
},
- "execution_count": 64,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
},
@@ -697,7 +697,7 @@
},
{
"cell_type": "code",
- "execution_count": 70,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -706,7 +706,7 @@
"<AxesSubplot:>"
]
},
- "execution_count": 70,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
},
@@ -729,7 +729,7 @@
},
{
"cell_type": "code",
- "execution_count": 71,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -814,7 +814,7 @@
"47217 Rapport annuel. 2656"
]
},
- "execution_count": 71,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -825,7 +825,7 @@
},
{
"cell_type": "code",
- "execution_count": 72,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -982,7 +982,7 @@
"269004 Estimates. 535"
]
},
- "execution_count": 72,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -993,7 +993,7 @@
},
{
"cell_type": "code",
- "execution_count": 75,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -1099,7 +1099,7 @@
"[66 rows x 2 columns]"
]
},
- "execution_count": 75,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1110,7 +1110,7 @@
},
{
"cell_type": "code",
- "execution_count": 76,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -1216,7 +1216,7 @@
"[191 rows x 2 columns]"
]
},
- "execution_count": 76,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -1227,7 +1227,7 @@
},
{
"cell_type": "code",
- "execution_count": 82,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -1333,7 +1333,7 @@
"[194241 rows x 2 columns]"
]
},
- "execution_count": 82,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -1351,6 +1351,649 @@
},
{
"cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['2735-928X', '2735-9298']"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mapping[\"Nigerian Journal of Wildlife Management\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "These are two ISSN refering to the same journal."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import requests"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'@graph': [{'@id': 'http://id.loc.gov/vocabulary/countries/nr',\n",
+ " 'label': 'Nigeria'},\n",
+ " {'@id': 'organization/ISSNCenter#_e',\n",
+ " '@type': 'http://schema.org/Organization'},\n",
+ " {'@id': 'resource/ISSN-L/2735-9298',\n",
+ " 'identifiedBy': 'resource/ISSN/2735-928X#ISSN-L'},\n",
+ " {'@id': 'resource/ISSN/2735-928X',\n",
+ " '@type': ['http://id.loc.gov/ontologies/bibframe/Work',\n",
+ " 'http://id.loc.gov/ontologies/bibframe/Instance',\n",
+ " 'http://schema.org/Periodical'],\n",
+ " 'identifiedBy': ['resource/ISSN/2735-928X#ISSN-L',\n",
+ " 'resource/ISSN/2735-928X#ISSN',\n",
+ " 'resource/ISSN/2735-928X#KeyTitle'],\n",
+ " 'mainTitle': 'Nigerian Journal of Wildlife Management',\n",
+ " 'otherPhysicalFormat': 'resource/ISSN/2735-9298',\n",
+ " 'title': 'resource/ISSN/2735-928X#KeyTitle',\n",
+ " 'format': 'vocabularies/medium#Online',\n",
+ " 'identifier': '2735-928X',\n",
+ " 'isFormatOf': 'resource/ISSN/2735-9298',\n",
+ " 'type': 'http://marc21rdf.info/terms/formofmaterial#a',\n",
+ " 'http://purl.org/ontology/bibo/issn': '2735-928X',\n",
+ " 'isPartOf': 'resource/ISSN-L/2735-9298',\n",
+ " 'issn': '2735-928X',\n",
+ " 'name': ['Nigerian JOurnal of Wildlife Management (Ondo. Online)',\n",
+ " 'Nigerian Journal of Wildlife Management'],\n",
+ " 'publication': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n",
+ " 'url': 'http://www.wildlifesociety.ng.org/'},\n",
+ " {'@id': 'resource/ISSN/2735-928X#ISSN',\n",
+ " '@type': 'http://id.loc.gov/ontologies/bibframe/Issn',\n",
+ " 'status': 'vocabularies/IdentifierStatus#Valid',\n",
+ " 'value': '2735-928X'},\n",
+ " {'@id': 'resource/ISSN/2735-928X#ISSN-L',\n",
+ " '@type': 'http://id.loc.gov/ontologies/bibframe/IssnL',\n",
+ " 'status': 'vocabularies/IdentifierStatus#Valid',\n",
+ " 'value': '2735-9298'},\n",
+ " {'@id': 'resource/ISSN/2735-928X#KeyTitle',\n",
+ " '@type': ['http://id.loc.gov/ontologies/bibframe/Identifier',\n",
+ " 'http://id.loc.gov/ontologies/bibframe/KeyTitle'],\n",
+ " 'value': 'Nigerian JOurnal of Wildlife Management (Ondo. Online)'},\n",
+ " {'@id': 'resource/ISSN/2735-928X#Record',\n",
+ " '@type': 'http://schema.org/CreativeWork',\n",
+ " 'status': 'vocabularies/RecordStatus#Register',\n",
+ " 'modified': '20200808163600.0',\n",
+ " 'mainEntity': 'resource/ISSN/2735-928X',\n",
+ " 'wasAttributedTo': 'organization/ISSNCenter#_e'},\n",
+ " {'@id': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n",
+ " '@type': 'http://schema.org/PublicationEvent',\n",
+ " 'location': 'http://id.loc.gov/vocabulary/countries/nr'}],\n",
+ " '@context': {'status': {'@id': 'http://id.loc.gov/ontologies/bibframe/status',\n",
+ " '@type': '@id'},\n",
+ " 'value': {'@id': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#value'},\n",
+ " 'publication': {'@id': 'http://schema.org/publication', '@type': '@id'},\n",
+ " 'mainTitle': {'@id': 'http://id.loc.gov/ontologies/bibframe/mainTitle'},\n",
+ " 'title': {'@id': 'http://id.loc.gov/ontologies/bibframe/title',\n",
+ " '@type': '@id'},\n",
+ " 'name': {'@id': 'http://schema.org/name'},\n",
+ " 'issn': {'@id': 'http://schema.org/issn'},\n",
+ " 'format': {'@id': 'http://purl.org/dc/elements/1.1/format', '@type': '@id'},\n",
+ " 'url': {'@id': 'http://schema.org/url'},\n",
+ " 'identifiedBy': {'@id': 'http://id.loc.gov/ontologies/bibframe/identifiedBy',\n",
+ " '@type': '@id'},\n",
+ " 'otherPhysicalFormat': {'@id': 'http://id.loc.gov/ontologies/bibframe/otherPhysicalFormat',\n",
+ " '@type': '@id'},\n",
+ " 'isPartOf': {'@id': 'http://schema.org/isPartOf', '@type': '@id'},\n",
+ " 'type': {'@id': 'http://purl.org/dc/terms/type', '@type': '@id'},\n",
+ " 'identifier': {'@id': 'http://purl.org/dc/elements/1.1/identifier'},\n",
+ " 'isFormatOf': {'@id': 'http://purl.org/dc/terms/isFormatOf', '@type': '@id'},\n",
+ " 'wasAttributedTo': {'@id': 'http://www.w3.org/ns/prov#wasAttributedTo',\n",
+ " '@type': '@id'},\n",
+ " 'mainEntity': {'@id': 'http://schema.org/mainEntity', '@type': '@id'},\n",
+ " 'modified': {'@id': 'http://purl.org/dc/terms/modified',\n",
+ " '@type': 'http://www.w3.org/2001/XMLSchema#dateTime'},\n",
+ " 'location': {'@id': 'http://schema.org/location', '@type': '@id'},\n",
+ " 'label': {'@id': 'http://www.w3.org/2000/01/rdf-schema#label'}}}"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "requests.get(\"https://portal.issn.org/resource/ISSN/2735-928X?format=json\").json()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Interestingly, most (93%) journal do not seem to have the distinction between electronic and print. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9336999659012597"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(unique_name) / len(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>name</th>\n",
+ " <th>issn_count</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>702322</th>\n",
+ " <td>International accounting and auditing standards</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>39992</th>\n",
+ " <td>Varaždinske vijesti (Tisak)</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1722785</th>\n",
+ " <td>Nihon Shuwa Gakkai Taikai yokoushuu</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2598901</th>\n",
+ " <td>Solidarité laïque</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2408544</th>\n",
+ " <td>AUP!</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1785394</th>\n",
+ " <td>˜The œASHRAE handbook CD. HVAC applications</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>325078</th>\n",
+ " <td>Kampō iyaku</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>49595</th>\n",
+ " <td>Fieldiana. Anthropology</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>110831</th>\n",
+ " <td>Vyčislitelʹnaâ matematika i matematičeskaâ fiz...</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2880661</th>\n",
+ " <td>The Assembly Reporters Africa ( Abuja)</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " name issn_count\n",
+ "702322 International accounting and auditing standards 1\n",
+ "39992 Varaždinske vijesti (Tisak) 1\n",
+ "1722785 Nihon Shuwa Gakkai Taikai yokoushuu 1\n",
+ "2598901 Solidarité laïque 1\n",
+ "2408544 AUP! 1\n",
+ "1785394 ˜The œASHRAE handbook CD. HVAC applications 1\n",
+ "325078 Kampō iyaku 1\n",
+ "49595 Fieldiana. Anthropology 1\n",
+ "110831 Vyčislitelʹnaâ matematika i matematičeskaâ fiz... 1\n",
+ "2880661 The Assembly Reporters Africa ( Abuja) 1"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "unique_name.sample(n=10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Example \"Fieldiana\"\n",
+ "\n",
+ "* \"Fieldiana. Anthropology\"\n",
+ "* https://www.jstor.org/journal/fieldianaanthro"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['0071-4739']"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mapping[\"Fieldiana. Anthropology\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "However, jstor reports two ISSN: INSSP: 0071-4739, EISSN: 2162-4321 - but ISSN.org does not know about it?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unique_issn = set([item for v in mapping.values() for item in v])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\"2162-4321\" in unique_issn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fieldiana. ['2163-7105', '2162-4321', '0096-0438', '0015-0746', '0096-2651', '0097-3572', '2162-4267', '0015-0754', '0071-4739', '2162-4348', '2162-4291', '2158-5520']\n",
+ "Fieldiana. Anthropology (Online) ['2162-4321']\n"
+ ]
+ }
+ ],
+ "source": [
+ "for k, v in mapping.items():\n",
+ " if \"2162-4321\" in v:\n",
+ " print(k, v)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As an example: when given a string like \"Fieldiana\" we would return ambiguous. But \"Fieldiana. (Online)\" might be matched to '2162-4321'. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Other Examples"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>name</th>\n",
+ " <th>issn_count</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>2190239</th>\n",
+ " <td>Mediazine.</td>\n",
+ " <td>4</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>396642</th>\n",
+ " <td>San Bernardino County popular street atlas</td>\n",
+ " <td>3</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>510019</th>\n",
+ " <td>˜L'œesquisse.</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1517119</th>\n",
+ " <td>Developing mental health.</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1853045</th>\n",
+ " <td>Jan Dix</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1021374</th>\n",
+ " <td>Rennes 1 campus.</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>746313</th>\n",
+ " <td>Russian journal of coordination chemistry.</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1087451</th>\n",
+ " <td>Pomorac.</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>109465</th>\n",
+ " <td>Bloknot agitatora.</td>\n",
+ " <td>36</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1154969</th>\n",
+ " <td>Estonia :</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " name issn_count\n",
+ "2190239 Mediazine. 4\n",
+ "396642 San Bernardino County popular street atlas 3\n",
+ "510019 ˜L'œesquisse. 2\n",
+ "1517119 Developing mental health. 2\n",
+ "1853045 Jan Dix 2\n",
+ "1021374 Rennes 1 campus. 2\n",
+ "746313 Russian journal of coordination chemistry. 2\n",
+ "1087451 Pomorac. 2\n",
+ "109465 Bloknot agitatora. 36\n",
+ "1154969 Estonia : 2"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "repeated_names.sample(n=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['1608-3318', '1070-3284']"
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mapping[\"Russian journal of coordination chemistry.\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A new question: How many journals are listed under different names, yet still refer to the same journal?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['0733-7167', '0733-7159', '0733-2289']"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mapping[\"San Bernardino County popular street atlas\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Journal of ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>name</th>\n",
+ " <th>issn_count</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>977</th>\n",
+ " <td>Journal of vocational behavior.</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>978</th>\n",
+ " <td>Journal of vocational behavior (Print)</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1206</th>\n",
+ " <td>Journal of American Concrete Institute.</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1207</th>\n",
+ " <td>Journal of the American Concrete Institute</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1310</th>\n",
+ " <td>Journal of the American Dental Hygienists' Ass...</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2929340</th>\n",
+ " <td>Journal of surgical procedures and case reports</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2929448</th>\n",
+ " <td>Journal of Epidermiological Society of Nigeria...</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2929449</th>\n",
+ " <td>Journal of Epidermiological Society of Nigeria</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2929663</th>\n",
+ " <td>Journal of Practicing Teachers</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2929664</th>\n",
+ " <td>Journal of Practicing Teachers (Uyo. Online)</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>40190 rows × 2 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " name issn_count\n",
+ "977 Journal of vocational behavior. 1\n",
+ "978 Journal of vocational behavior (Print) 1\n",
+ "1206 Journal of American Concrete Institute. 1\n",
+ "1207 Journal of the American Concrete Institute 1\n",
+ "1310 Journal of the American Dental Hygienists' Ass... 1\n",
+ "... ... ...\n",
+ "2929340 Journal of surgical procedures and case reports 1\n",
+ "2929448 Journal of Epidermiological Society of Nigeria... 1\n",
+ "2929449 Journal of Epidermiological Society of Nigeria 1\n",
+ "2929663 Journal of Practicing Teachers 1\n",
+ "2929664 Journal of Practicing Teachers (Uyo. Online) 1\n",
+ "\n",
+ "[40190 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[df.name.str.startswith(\"Journal of\")]"
+ ]
+ },
+ {
+ "cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],