diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-08-12 20:07:47 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-08-12 20:07:47 +0200 |
commit | 139f3eb991f560cee365b68eaab415398c1a8d37 (patch) | |
tree | 8168d8232915767633b40c425f1dbc0b8f14845b | |
parent | d06877139f4148ecc10d7976cc9dfd9c0582a6e9 (diff) | |
download | fuzzycat-139f3eb991f560cee365b68eaab415398c1a8d37.tar.gz fuzzycat-139f3eb991f560cee365b68eaab415398c1a8d37.zip |
update notebook
-rw-r--r-- | notebooks/Journal_Names.ipynb | 815 |
1 files changed, 729 insertions, 86 deletions
diff --git a/notebooks/Journal_Names.ipynb b/notebooks/Journal_Names.ipynb index 9b28d6e..68fedb5 100644 --- a/notebooks/Journal_Names.ipynb +++ b/notebooks/Journal_Names.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -37,12 +37,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "with open(\"../data/name_to_issn.json\") as f:\n", - " mapping = json.load(f)\n" + " mapping = json.load(f)" ] }, { @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -63,7 +63,7 @@ "2929727" ] }, - "execution_count": 7, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -92,7 +92,7 @@ "2929727" ] }, - "execution_count": 25, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -103,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -170,7 +170,7 @@ "4 ASMT news 1" ] }, - "execution_count": 26, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -181,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -190,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -199,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -208,7 +208,7 @@ "194241" ] }, - "execution_count": 34, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -219,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -228,7 +228,7 @@ "0.06630003409874026" ] }, - "execution_count": 33, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -246,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -322,7 +322,7 @@ "max 8980.000000" ] }, - "execution_count": 35, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -340,7 +340,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -351,7 +351,7 @@ "Name: 45907, dtype: object" ] }, - "execution_count": 40, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -369,7 +369,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -387,7 +387,7 @@ " '0702-7702']" ] }, - "execution_count": 42, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -405,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -414,7 +414,7 @@ "24107" ] }, - "execution_count": 45, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -425,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -455,53 +455,53 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>322100</th>\n", - " <td>Philosophica.</td>\n", - " <td>17</td>\n", + " <th>395908</th>\n", + " <td>Criminal trial manual.</td>\n", + " <td>4</td>\n", " </tr>\n", " <tr>\n", - " <th>183928</th>\n", - " <td>Edad de oro.</td>\n", + " <th>336247</th>\n", + " <td>Mesures.</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", - " <th>294309</th>\n", - " <td>Horoskop.</td>\n", - " <td>10</td>\n", + " <th>2242923</th>\n", + " <td>Tinere speranţe.</td>\n", + " <td>4</td>\n", " </tr>\n", " <tr>\n", - " <th>517039</th>\n", - " <td>Le Grand journal.</td>\n", - " <td>11</td>\n", + " <th>380962</th>\n", + " <td>Résistances.</td>\n", + " <td>5</td>\n", " </tr>\n", " <tr>\n", - " <th>1664616</th>\n", - " <td>Caleidoscop şcolar.</td>\n", - " <td>4</td>\n", + " <th>823415</th>\n", + " <td>Le Paysan.</td>\n", + " <td>14</td>\n", " </tr>\n", " <tr>\n", - " <th>258430</th>\n", - " <td>La Feuille.</td>\n", - " <td>34</td>\n", + " <th>157897</th>\n", + " <td>Barron's regents exams and answers.</td>\n", + " <td>18</td>\n", " </tr>\n", " <tr>\n", - " <th>309546</th>\n", - " <td>The Wilson quarterly.</td>\n", - " <td>4</td>\n", + " <th>468031</th>\n", + " <td>Revista científica.</td>\n", + " <td>14</td>\n", " </tr>\n", " <tr>\n", - " <th>795859</th>\n", - " <td>Introductory research essay</td>\n", + " <th>1102294</th>\n", + " <td>Kotoba to bunka.</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", - " <th>1470838</th>\n", - " <td>Publicaciones del SEMYR.</td>\n", - " <td>4</td>\n", + " <th>2049420</th>\n", + " <td>L'Harmonie.</td>\n", + " <td>7</td>\n", " </tr>\n", " <tr>\n", - " <th>657041</th>\n", - " <td>Le Kiosque.</td>\n", + " <th>10843</th>\n", + " <td>Design.</td>\n", " <td>14</td>\n", " </tr>\n", " </tbody>\n", @@ -509,20 +509,20 @@ "</div>" ], "text/plain": [ - " name issn_count\n", - "322100 Philosophica. 17\n", - "183928 Edad de oro. 4\n", - "294309 Horoskop. 10\n", - "517039 Le Grand journal. 11\n", - "1664616 Caleidoscop şcolar. 4\n", - "258430 La Feuille. 34\n", - "309546 The Wilson quarterly. 4\n", - "795859 Introductory research essay 4\n", - "1470838 Publicaciones del SEMYR. 4\n", - "657041 Le Kiosque. 14" + " name issn_count\n", + "395908 Criminal trial manual. 4\n", + "336247 Mesures. 4\n", + "2242923 Tinere speranţe. 4\n", + "380962 Résistances. 5\n", + "823415 Le Paysan. 14\n", + "157897 Barron's regents exams and answers. 18\n", + "468031 Revista científica. 14\n", + "1102294 Kotoba to bunka. 4\n", + "2049420 L'Harmonie. 7\n", + "10843 Design. 14" ] }, - "execution_count": 49, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -533,7 +533,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -558,7 +558,7 @@ " '0379-8402']" ] }, - "execution_count": 50, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -569,7 +569,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -578,7 +578,7 @@ "<AxesSubplot:>" ] }, - "execution_count": 61, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, @@ -601,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -610,7 +610,7 @@ "<AxesSubplot:>" ] }, - "execution_count": 60, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" }, @@ -633,7 +633,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -642,7 +642,7 @@ "<AxesSubplot:>" ] }, - "execution_count": 62, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, @@ -665,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -674,7 +674,7 @@ "<AxesSubplot:>" ] }, - "execution_count": 64, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, @@ -697,7 +697,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -706,7 +706,7 @@ "<AxesSubplot:>" ] }, - "execution_count": 70, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, @@ -729,7 +729,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -814,7 +814,7 @@ "47217 Rapport annuel. 2656" ] }, - "execution_count": 71, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -825,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -982,7 +982,7 @@ "269004 Estimates. 535" ] }, - "execution_count": 72, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -993,7 +993,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1099,7 +1099,7 @@ "[66 rows x 2 columns]" ] }, - "execution_count": 75, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1110,7 +1110,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -1216,7 +1216,7 @@ "[191 rows x 2 columns]" ] }, - "execution_count": 76, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1227,7 +1227,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1333,7 +1333,7 @@ "[194241 rows x 2 columns]" ] }, - "execution_count": 82, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1351,6 +1351,649 @@ }, { "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['2735-928X', '2735-9298']" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[\"Nigerian Journal of Wildlife Management\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These are two ISSN refering to the same journal." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'@graph': [{'@id': 'http://id.loc.gov/vocabulary/countries/nr',\n", + " 'label': 'Nigeria'},\n", + " {'@id': 'organization/ISSNCenter#_e',\n", + " '@type': 'http://schema.org/Organization'},\n", + " {'@id': 'resource/ISSN-L/2735-9298',\n", + " 'identifiedBy': 'resource/ISSN/2735-928X#ISSN-L'},\n", + " {'@id': 'resource/ISSN/2735-928X',\n", + " '@type': ['http://id.loc.gov/ontologies/bibframe/Work',\n", + " 'http://id.loc.gov/ontologies/bibframe/Instance',\n", + " 'http://schema.org/Periodical'],\n", + " 'identifiedBy': ['resource/ISSN/2735-928X#ISSN-L',\n", + " 'resource/ISSN/2735-928X#ISSN',\n", + " 'resource/ISSN/2735-928X#KeyTitle'],\n", + " 'mainTitle': 'Nigerian Journal of Wildlife Management',\n", + " 'otherPhysicalFormat': 'resource/ISSN/2735-9298',\n", + " 'title': 'resource/ISSN/2735-928X#KeyTitle',\n", + " 'format': 'vocabularies/medium#Online',\n", + " 'identifier': '2735-928X',\n", + " 'isFormatOf': 'resource/ISSN/2735-9298',\n", + " 'type': 'http://marc21rdf.info/terms/formofmaterial#a',\n", + " 'http://purl.org/ontology/bibo/issn': '2735-928X',\n", + " 'isPartOf': 'resource/ISSN-L/2735-9298',\n", + " 'issn': '2735-928X',\n", + " 'name': ['Nigerian JOurnal of Wildlife Management (Ondo. Online)',\n", + " 'Nigerian Journal of Wildlife Management'],\n", + " 'publication': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n", + " 'url': 'http://www.wildlifesociety.ng.org/'},\n", + " {'@id': 'resource/ISSN/2735-928X#ISSN',\n", + " '@type': 'http://id.loc.gov/ontologies/bibframe/Issn',\n", + " 'status': 'vocabularies/IdentifierStatus#Valid',\n", + " 'value': '2735-928X'},\n", + " {'@id': 'resource/ISSN/2735-928X#ISSN-L',\n", + " '@type': 'http://id.loc.gov/ontologies/bibframe/IssnL',\n", + " 'status': 'vocabularies/IdentifierStatus#Valid',\n", + " 'value': '2735-9298'},\n", + " {'@id': 'resource/ISSN/2735-928X#KeyTitle',\n", + " '@type': ['http://id.loc.gov/ontologies/bibframe/Identifier',\n", + " 'http://id.loc.gov/ontologies/bibframe/KeyTitle'],\n", + " 'value': 'Nigerian JOurnal of Wildlife Management (Ondo. Online)'},\n", + " {'@id': 'resource/ISSN/2735-928X#Record',\n", + " '@type': 'http://schema.org/CreativeWork',\n", + " 'status': 'vocabularies/RecordStatus#Register',\n", + " 'modified': '20200808163600.0',\n", + " 'mainEntity': 'resource/ISSN/2735-928X',\n", + " 'wasAttributedTo': 'organization/ISSNCenter#_e'},\n", + " {'@id': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n", + " '@type': 'http://schema.org/PublicationEvent',\n", + " 'location': 'http://id.loc.gov/vocabulary/countries/nr'}],\n", + " '@context': {'status': {'@id': 'http://id.loc.gov/ontologies/bibframe/status',\n", + " '@type': '@id'},\n", + " 'value': {'@id': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#value'},\n", + " 'publication': {'@id': 'http://schema.org/publication', '@type': '@id'},\n", + " 'mainTitle': {'@id': 'http://id.loc.gov/ontologies/bibframe/mainTitle'},\n", + " 'title': {'@id': 'http://id.loc.gov/ontologies/bibframe/title',\n", + " '@type': '@id'},\n", + " 'name': {'@id': 'http://schema.org/name'},\n", + " 'issn': {'@id': 'http://schema.org/issn'},\n", + " 'format': {'@id': 'http://purl.org/dc/elements/1.1/format', '@type': '@id'},\n", + " 'url': {'@id': 'http://schema.org/url'},\n", + " 'identifiedBy': {'@id': 'http://id.loc.gov/ontologies/bibframe/identifiedBy',\n", + " '@type': '@id'},\n", + " 'otherPhysicalFormat': {'@id': 'http://id.loc.gov/ontologies/bibframe/otherPhysicalFormat',\n", + " '@type': '@id'},\n", + " 'isPartOf': {'@id': 'http://schema.org/isPartOf', '@type': '@id'},\n", + " 'type': {'@id': 'http://purl.org/dc/terms/type', '@type': '@id'},\n", + " 'identifier': {'@id': 'http://purl.org/dc/elements/1.1/identifier'},\n", + " 'isFormatOf': {'@id': 'http://purl.org/dc/terms/isFormatOf', '@type': '@id'},\n", + " 'wasAttributedTo': {'@id': 'http://www.w3.org/ns/prov#wasAttributedTo',\n", + " '@type': '@id'},\n", + " 'mainEntity': {'@id': 'http://schema.org/mainEntity', '@type': '@id'},\n", + " 'modified': {'@id': 'http://purl.org/dc/terms/modified',\n", + " '@type': 'http://www.w3.org/2001/XMLSchema#dateTime'},\n", + " 'location': {'@id': 'http://schema.org/location', '@type': '@id'},\n", + " 'label': {'@id': 'http://www.w3.org/2000/01/rdf-schema#label'}}}" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "requests.get(\"https://portal.issn.org/resource/ISSN/2735-928X?format=json\").json()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Interestingly, most (93%) journal do not seem to have the distinction between electronic and print. " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9336999659012597" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(unique_name) / len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>name</th>\n", + " <th>issn_count</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>702322</th>\n", + " <td>International accounting and auditing standards</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39992</th>\n", + " <td>Varaždinske vijesti (Tisak)</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1722785</th>\n", + " <td>Nihon Shuwa Gakkai Taikai yokoushuu</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2598901</th>\n", + " <td>Solidarité laïque</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2408544</th>\n", + " <td>AUP!</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1785394</th>\n", + " <td>The ASHRAE handbook CD. HVAC applications</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>325078</th>\n", + " <td>Kampō iyaku</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49595</th>\n", + " <td>Fieldiana. Anthropology</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>110831</th>\n", + " <td>Vyčislitelʹnaâ matematika i matematičeskaâ fiz...</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2880661</th>\n", + " <td>The Assembly Reporters Africa ( Abuja)</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " name issn_count\n", + "702322 International accounting and auditing standards 1\n", + "39992 Varaždinske vijesti (Tisak) 1\n", + "1722785 Nihon Shuwa Gakkai Taikai yokoushuu 1\n", + "2598901 Solidarité laïque 1\n", + "2408544 AUP! 1\n", + "1785394 The ASHRAE handbook CD. HVAC applications 1\n", + "325078 Kampō iyaku 1\n", + "49595 Fieldiana. Anthropology 1\n", + "110831 Vyčislitelʹnaâ matematika i matematičeskaâ fiz... 1\n", + "2880661 The Assembly Reporters Africa ( Abuja) 1" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unique_name.sample(n=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example \"Fieldiana\"\n", + "\n", + "* \"Fieldiana. Anthropology\"\n", + "* https://www.jstor.org/journal/fieldianaanthro" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['0071-4739']" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[\"Fieldiana. Anthropology\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, jstor reports two ISSN: INSSP: 0071-4739, EISSN: 2162-4321 - but ISSN.org does not know about it?" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "unique_issn = set([item for v in mapping.values() for item in v])" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"2162-4321\" in unique_issn" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fieldiana. ['2163-7105', '2162-4321', '0096-0438', '0015-0746', '0096-2651', '0097-3572', '2162-4267', '0015-0754', '0071-4739', '2162-4348', '2162-4291', '2158-5520']\n", + "Fieldiana. Anthropology (Online) ['2162-4321']\n" + ] + } + ], + "source": [ + "for k, v in mapping.items():\n", + " if \"2162-4321\" in v:\n", + " print(k, v)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As an example: when given a string like \"Fieldiana\" we would return ambiguous. But \"Fieldiana. (Online)\" might be matched to '2162-4321'. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other Examples" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>name</th>\n", + " <th>issn_count</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2190239</th>\n", + " <td>Mediazine.</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>396642</th>\n", + " <td>San Bernardino County popular street atlas</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>510019</th>\n", + " <td>L'esquisse.</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1517119</th>\n", + " <td>Developing mental health.</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1853045</th>\n", + " <td>Jan Dix</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1021374</th>\n", + " <td>Rennes 1 campus.</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>746313</th>\n", + " <td>Russian journal of coordination chemistry.</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1087451</th>\n", + " <td>Pomorac.</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>109465</th>\n", + " <td>Bloknot agitatora.</td>\n", + " <td>36</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1154969</th>\n", + " <td>Estonia :</td>\n", + " <td>2</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " name issn_count\n", + "2190239 Mediazine. 4\n", + "396642 San Bernardino County popular street atlas 3\n", + "510019 L'esquisse. 2\n", + "1517119 Developing mental health. 2\n", + "1853045 Jan Dix 2\n", + "1021374 Rennes 1 campus. 2\n", + "746313 Russian journal of coordination chemistry. 2\n", + "1087451 Pomorac. 2\n", + "109465 Bloknot agitatora. 36\n", + "1154969 Estonia : 2" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "repeated_names.sample(n=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['1608-3318', '1070-3284']" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[\"Russian journal of coordination chemistry.\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A new question: How many journals are listed under different names, yet still refer to the same journal?" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['0733-7167', '0733-7159', '0733-2289']" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping[\"San Bernardino County popular street atlas\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Journal of ..." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>name</th>\n", + " <th>issn_count</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>977</th>\n", + " <td>Journal of vocational behavior.</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>978</th>\n", + " <td>Journal of vocational behavior (Print)</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1206</th>\n", + " <td>Journal of American Concrete Institute.</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1207</th>\n", + " <td>Journal of the American Concrete Institute</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1310</th>\n", + " <td>Journal of the American Dental Hygienists' Ass...</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2929340</th>\n", + " <td>Journal of surgical procedures and case reports</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2929448</th>\n", + " <td>Journal of Epidermiological Society of Nigeria...</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2929449</th>\n", + " <td>Journal of Epidermiological Society of Nigeria</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2929663</th>\n", + " <td>Journal of Practicing Teachers</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2929664</th>\n", + " <td>Journal of Practicing Teachers (Uyo. Online)</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>40190 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " name issn_count\n", + "977 Journal of vocational behavior. 1\n", + "978 Journal of vocational behavior (Print) 1\n", + "1206 Journal of American Concrete Institute. 1\n", + "1207 Journal of the American Concrete Institute 1\n", + "1310 Journal of the American Dental Hygienists' Ass... 1\n", + "... ... ...\n", + "2929340 Journal of surgical procedures and case reports 1\n", + "2929448 Journal of Epidermiological Society of Nigeria... 1\n", + "2929449 Journal of Epidermiological Society of Nigeria 1\n", + "2929663 Journal of Practicing Teachers 1\n", + "2929664 Journal of Practicing Teachers (Uyo. Online) 1\n", + "\n", + "[40190 rows x 2 columns]" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.name.str.startswith(\"Journal of\")]" + ] + }, + { + "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], |