From 139f3eb991f560cee365b68eaab415398c1a8d37 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 12 Aug 2020 20:07:47 +0200 Subject: update notebook --- notebooks/Journal_Names.ipynb | 815 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 729 insertions(+), 86 deletions(-) diff --git a/notebooks/Journal_Names.ipynb b/notebooks/Journal_Names.ipynb index 9b28d6e..68fedb5 100644 --- a/notebooks/Journal_Names.ipynb +++ b/notebooks/Journal_Names.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -37,12 +37,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "with open(\"../data/name_to_issn.json\") as f:\n", - " mapping = json.load(f)\n" + " mapping = json.load(f)" ] }, { @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -63,7 +63,7 @@ "2929727" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -92,7 +92,7 @@ "2929727" ] }, - "execution_count": 25, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -103,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -170,7 +170,7 @@ "4 ASMT news 1" ] }, - "execution_count": 26, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -181,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -190,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -199,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -208,7 +208,7 @@ "194241" ] }, - "execution_count": 34, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -219,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -228,7 +228,7 @@ "0.06630003409874026" ] }, - "execution_count": 33, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -246,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -322,7 +322,7 @@ "max 8980.000000" ] }, - "execution_count": 35, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -340,7 +340,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -351,7 +351,7 @@ "Name: 45907, dtype: object" ] }, - "execution_count": 40, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -369,7 +369,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -387,7 +387,7 @@ " '0702-7702']" ] }, - "execution_count": 42, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -405,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -414,7 +414,7 @@ "24107" ] }, - "execution_count": 45, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -425,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -455,53 +455,53 @@ " \n", " \n", " \n", - " 322100\n", - " Philosophica.\n", - " 17\n", + " 395908\n", + " Criminal trial manual.\n", + " 4\n", " \n", " \n", - " 183928\n", - " Edad de oro.\n", + " 336247\n", + " Mesures.\n", " 4\n", " \n", " \n", - " 294309\n", - " Horoskop.\n", - " 10\n", + " 2242923\n", + " Tinere speranţe.\n", + " 4\n", " \n", " \n", - " 517039\n", - " ˜Le œGrand journal.\n", - " 11\n", + " 380962\n", + " Résistances.\n", + " 5\n", " \n", " \n", - " 1664616\n", - " Caleidoscop şcolar.\n", - " 4\n", + " 823415\n", + " ˜Le œPaysan.\n", + " 14\n", " \n", " \n", - " 258430\n", - " ˜La œFeuille.\n", - " 34\n", + " 157897\n", + " Barron's regents exams and answers.\n", + " 18\n", " \n", " \n", - " 309546\n", - " ˜The œWilson quarterly.\n", - " 4\n", + " 468031\n", + " Revista científica.\n", + " 14\n", " \n", " \n", - " 795859\n", - " Introductory research essay\n", + " 1102294\n", + " Kotoba to bunka.\n", " 4\n", " \n", " \n", - " 1470838\n", - " Publicaciones del SEMYR.\n", - " 4\n", + " 2049420\n", + " ˜L'œHarmonie.\n", + " 7\n", " \n", " \n", - " 657041\n", - " ˜Le œKiosque.\n", + " 10843\n", + " Design.\n", " 14\n", " \n", " \n", @@ -509,20 +509,20 @@ "" ], "text/plain": [ - " name issn_count\n", - "322100 Philosophica. 17\n", - "183928 Edad de oro. 4\n", - "294309 Horoskop. 10\n", - "517039 ˜Le œGrand journal. 11\n", - "1664616 Caleidoscop şcolar. 4\n", - "258430 ˜La œFeuille. 34\n", - "309546 ˜The œWilson quarterly. 4\n", - "795859 Introductory research essay 4\n", - "1470838 Publicaciones del SEMYR. 4\n", - "657041 ˜Le œKiosque. 14" + " name issn_count\n", + "395908 Criminal trial manual. 4\n", + "336247 Mesures. 4\n", + "2242923 Tinere speranţe. 4\n", + "380962 Résistances. 5\n", + "823415 ˜Le œPaysan. 14\n", + "157897 Barron's regents exams and answers. 18\n", + "468031 Revista científica. 14\n", + "1102294 Kotoba to bunka. 4\n", + "2049420 ˜L'œHarmonie. 7\n", + "10843 Design. 14" ] }, - "execution_count": 49, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -533,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -558,7 +558,7 @@ " '0379-8402']" ] }, - "execution_count": 50, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -569,7 +569,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -578,7 +578,7 @@ "" ] }, - "execution_count": 61, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, @@ -601,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -610,7 +610,7 @@ "" ] }, - "execution_count": 60, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" }, @@ -633,7 +633,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -642,7 +642,7 @@ "" ] }, - "execution_count": 62, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, @@ -665,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -674,7 +674,7 @@ "" ] }, - "execution_count": 64, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, @@ -697,7 +697,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -706,7 +706,7 @@ "" ] }, - "execution_count": 70, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, @@ -729,7 +729,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -814,7 +814,7 @@ "47217 Rapport annuel. 2656" ] }, - "execution_count": 71, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -825,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -982,7 +982,7 @@ "269004 Estimates. 535" ] }, - "execution_count": 72, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -993,7 +993,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1099,7 +1099,7 @@ "[66 rows x 2 columns]" ] }, - "execution_count": 75, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1110,7 +1110,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -1216,7 +1216,7 @@ "[191 rows x 2 columns]" ] }, - "execution_count": 76, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1227,7 +1227,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1333,7 +1333,7 @@ "[194241 rows x 2 columns]" ] }, - "execution_count": 82, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1349,6 +1349,649 @@ "If a name matches a repeated name exactly or fuzzy matches to a repeated name and there is not other information available, the match status must be ambigious." ] }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['2735-928X', '2735-9298']" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[\"Nigerian Journal of Wildlife Management\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are two ISSN refering to the same journal." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@graph': [{'@id': 'http://id.loc.gov/vocabulary/countries/nr',\n", + " 'label': 'Nigeria'},\n", + " {'@id': 'organization/ISSNCenter#_e',\n", + " '@type': 'http://schema.org/Organization'},\n", + " {'@id': 'resource/ISSN-L/2735-9298',\n", + " 'identifiedBy': 'resource/ISSN/2735-928X#ISSN-L'},\n", + " {'@id': 'resource/ISSN/2735-928X',\n", + " '@type': ['http://id.loc.gov/ontologies/bibframe/Work',\n", + " 'http://id.loc.gov/ontologies/bibframe/Instance',\n", + " 'http://schema.org/Periodical'],\n", + " 'identifiedBy': ['resource/ISSN/2735-928X#ISSN-L',\n", + " 'resource/ISSN/2735-928X#ISSN',\n", + " 'resource/ISSN/2735-928X#KeyTitle'],\n", + " 'mainTitle': 'Nigerian Journal of Wildlife Management',\n", + " 'otherPhysicalFormat': 'resource/ISSN/2735-9298',\n", + " 'title': 'resource/ISSN/2735-928X#KeyTitle',\n", + " 'format': 'vocabularies/medium#Online',\n", + " 'identifier': '2735-928X',\n", + " 'isFormatOf': 'resource/ISSN/2735-9298',\n", + " 'type': 'http://marc21rdf.info/terms/formofmaterial#a',\n", + " 'http://purl.org/ontology/bibo/issn': '2735-928X',\n", + " 'isPartOf': 'resource/ISSN-L/2735-9298',\n", + " 'issn': '2735-928X',\n", + " 'name': ['Nigerian JOurnal of Wildlife Management (Ondo. Online)',\n", + " 'Nigerian Journal of Wildlife Management'],\n", + " 'publication': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n", + " 'url': 'http://www.wildlifesociety.ng.org/'},\n", + " {'@id': 'resource/ISSN/2735-928X#ISSN',\n", + " '@type': 'http://id.loc.gov/ontologies/bibframe/Issn',\n", + " 'status': 'vocabularies/IdentifierStatus#Valid',\n", + " 'value': '2735-928X'},\n", + " {'@id': 'resource/ISSN/2735-928X#ISSN-L',\n", + " '@type': 'http://id.loc.gov/ontologies/bibframe/IssnL',\n", + " 'status': 'vocabularies/IdentifierStatus#Valid',\n", + " 'value': '2735-9298'},\n", + " {'@id': 'resource/ISSN/2735-928X#KeyTitle',\n", + " '@type': ['http://id.loc.gov/ontologies/bibframe/Identifier',\n", + " 'http://id.loc.gov/ontologies/bibframe/KeyTitle'],\n", + " 'value': 'Nigerian JOurnal of Wildlife Management (Ondo. Online)'},\n", + " {'@id': 'resource/ISSN/2735-928X#Record',\n", + " '@type': 'http://schema.org/CreativeWork',\n", + " 'status': 'vocabularies/RecordStatus#Register',\n", + " 'modified': '20200808163600.0',\n", + " 'mainEntity': 'resource/ISSN/2735-928X',\n", + " 'wasAttributedTo': 'organization/ISSNCenter#_e'},\n", + " {'@id': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n", + " '@type': 'http://schema.org/PublicationEvent',\n", + " 'location': 'http://id.loc.gov/vocabulary/countries/nr'}],\n", + " '@context': {'status': {'@id': 'http://id.loc.gov/ontologies/bibframe/status',\n", + " '@type': '@id'},\n", + " 'value': {'@id': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#value'},\n", + " 'publication': {'@id': 'http://schema.org/publication', '@type': '@id'},\n", + " 'mainTitle': {'@id': 'http://id.loc.gov/ontologies/bibframe/mainTitle'},\n", + " 'title': {'@id': 'http://id.loc.gov/ontologies/bibframe/title',\n", + " '@type': '@id'},\n", + " 'name': {'@id': 'http://schema.org/name'},\n", + " 'issn': {'@id': 'http://schema.org/issn'},\n", + " 'format': {'@id': 'http://purl.org/dc/elements/1.1/format', '@type': '@id'},\n", + " 'url': {'@id': 'http://schema.org/url'},\n", + " 'identifiedBy': {'@id': 'http://id.loc.gov/ontologies/bibframe/identifiedBy',\n", + " '@type': '@id'},\n", + " 'otherPhysicalFormat': {'@id': 'http://id.loc.gov/ontologies/bibframe/otherPhysicalFormat',\n", + " '@type': '@id'},\n", + " 'isPartOf': {'@id': 'http://schema.org/isPartOf', '@type': '@id'},\n", + " 'type': {'@id': 'http://purl.org/dc/terms/type', '@type': '@id'},\n", + " 'identifier': {'@id': 'http://purl.org/dc/elements/1.1/identifier'},\n", + " 'isFormatOf': {'@id': 'http://purl.org/dc/terms/isFormatOf', '@type': '@id'},\n", + " 'wasAttributedTo': {'@id': 'http://www.w3.org/ns/prov#wasAttributedTo',\n", + " '@type': '@id'},\n", + " 'mainEntity': {'@id': 'http://schema.org/mainEntity', '@type': '@id'},\n", + " 'modified': {'@id': 'http://purl.org/dc/terms/modified',\n", + " '@type': 'http://www.w3.org/2001/XMLSchema#dateTime'},\n", + " 'location': {'@id': 'http://schema.org/location', '@type': '@id'},\n", + " 'label': {'@id': 'http://www.w3.org/2000/01/rdf-schema#label'}}}" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "requests.get(\"https://portal.issn.org/resource/ISSN/2735-928X?format=json\").json()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Interestingly, most (93%) journal do not seem to have the distinction between electronic and print. " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9336999659012597" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(unique_name) / len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameissn_count
702322International accounting and auditing standards1
39992Varaždinske vijesti (Tisak)1
1722785Nihon Shuwa Gakkai Taikai yokoushuu1
2598901Solidarité laïque1
2408544AUP!1
1785394˜The œASHRAE handbook CD. HVAC applications1
325078Kampō iyaku1
49595Fieldiana. Anthropology1
110831Vyčislitelʹnaâ matematika i matematičeskaâ fiz...1
2880661The Assembly Reporters Africa ( Abuja)1
\n", + "
" + ], + "text/plain": [ + " name issn_count\n", + "702322 International accounting and auditing standards 1\n", + "39992 Varaždinske vijesti (Tisak) 1\n", + "1722785 Nihon Shuwa Gakkai Taikai yokoushuu 1\n", + "2598901 Solidarité laïque 1\n", + "2408544 AUP! 1\n", + "1785394 ˜The œASHRAE handbook CD. HVAC applications 1\n", + "325078 Kampō iyaku 1\n", + "49595 Fieldiana. Anthropology 1\n", + "110831 Vyčislitelʹnaâ matematika i matematičeskaâ fiz... 1\n", + "2880661 The Assembly Reporters Africa ( Abuja) 1" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unique_name.sample(n=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example \"Fieldiana\"\n", + "\n", + "* \"Fieldiana. Anthropology\"\n", + "* https://www.jstor.org/journal/fieldianaanthro" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['0071-4739']" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[\"Fieldiana. Anthropology\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, jstor reports two ISSN: INSSP: 0071-4739, EISSN: 2162-4321 - but ISSN.org does not know about it?" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "unique_issn = set([item for v in mapping.values() for item in v])" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"2162-4321\" in unique_issn" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fieldiana. ['2163-7105', '2162-4321', '0096-0438', '0015-0746', '0096-2651', '0097-3572', '2162-4267', '0015-0754', '0071-4739', '2162-4348', '2162-4291', '2158-5520']\n", + "Fieldiana. Anthropology (Online) ['2162-4321']\n" + ] + } + ], + "source": [ + "for k, v in mapping.items():\n", + " if \"2162-4321\" in v:\n", + " print(k, v)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As an example: when given a string like \"Fieldiana\" we would return ambiguous. But \"Fieldiana. (Online)\" might be matched to '2162-4321'. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other Examples" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameissn_count
2190239Mediazine.4
396642San Bernardino County popular street atlas3
510019˜L'œesquisse.2
1517119Developing mental health.2
1853045Jan Dix2
1021374Rennes 1 campus.2
746313Russian journal of coordination chemistry.2
1087451Pomorac.2
109465Bloknot agitatora.36
1154969Estonia :2
\n", + "
" + ], + "text/plain": [ + " name issn_count\n", + "2190239 Mediazine. 4\n", + "396642 San Bernardino County popular street atlas 3\n", + "510019 ˜L'œesquisse. 2\n", + "1517119 Developing mental health. 2\n", + "1853045 Jan Dix 2\n", + "1021374 Rennes 1 campus. 2\n", + "746313 Russian journal of coordination chemistry. 2\n", + "1087451 Pomorac. 2\n", + "109465 Bloknot agitatora. 36\n", + "1154969 Estonia : 2" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "repeated_names.sample(n=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['1608-3318', '1070-3284']" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[\"Russian journal of coordination chemistry.\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A new question: How many journals are listed under different names, yet still refer to the same journal?" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['0733-7167', '0733-7159', '0733-2289']" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[\"San Bernardino County popular street atlas\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Journal of ..." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameissn_count
977Journal of vocational behavior.1
978Journal of vocational behavior (Print)1
1206Journal of American Concrete Institute.1
1207Journal of the American Concrete Institute1
1310Journal of the American Dental Hygienists' Ass...1
.........
2929340Journal of surgical procedures and case reports1
2929448Journal of Epidermiological Society of Nigeria...1
2929449Journal of Epidermiological Society of Nigeria1
2929663Journal of Practicing Teachers1
2929664Journal of Practicing Teachers (Uyo. Online)1
\n", + "

40190 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " name issn_count\n", + "977 Journal of vocational behavior. 1\n", + "978 Journal of vocational behavior (Print) 1\n", + "1206 Journal of American Concrete Institute. 1\n", + "1207 Journal of the American Concrete Institute 1\n", + "1310 Journal of the American Dental Hygienists' Ass... 1\n", + "... ... ...\n", + "2929340 Journal of surgical procedures and case reports 1\n", + "2929448 Journal of Epidermiological Society of Nigeria... 1\n", + "2929449 Journal of Epidermiological Society of Nigeria 1\n", + "2929663 Journal of Practicing Teachers 1\n", + "2929664 Journal of Practicing Teachers (Uyo. Online) 1\n", + "\n", + "[40190 rows x 2 columns]" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.name.str.startswith(\"Journal of\")]" + ] + }, { "cell_type": "code", "execution_count": null, -- cgit v1.2.3