From 621f50e685d9beeb1fe502a133e76fbd5a8a9c5c Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 24 Nov 2020 15:06:34 +0100 Subject: cleanup --- notebooks/Cluster Size and Title Length.ipynb | 553 - notebooks/Journal_Names.html | 14635 ------------------------ notebooks/Journal_Names.ipynb | 2032 ---- notebooks/Journal_Names.pdf | Bin 69255 -> 0 bytes notebooks/Journal_Names.zip | Bin 26734 -> 0 bytes 5 files changed, 17220 deletions(-) delete mode 100644 notebooks/Cluster Size and Title Length.ipynb delete mode 100644 notebooks/Journal_Names.html delete mode 100644 notebooks/Journal_Names.ipynb delete mode 100644 notebooks/Journal_Names.pdf delete mode 100644 notebooks/Journal_Names.zip (limited to 'notebooks') diff --git a/notebooks/Cluster Size and Title Length.ipynb b/notebooks/Cluster Size and Title Length.ipynb deleted file mode 100644 index b78ba8b..0000000 --- a/notebooks/Cluster Size and Title Length.ipynb +++ /dev/null @@ -1,553 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_csv(\"../fixtures/cluster_title_normalized_dups_size_keylen.tsv\", sep=\"\\t\", names=[\"size\", \"len\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "5818143" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sizelen
02640
123
223
323
424
\n", - "
" - ], - "text/plain": [ - " size len\n", - "0 264 0\n", - "1 2 3\n", - "2 2 3\n", - "3 2 3\n", - "4 2 4" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "pd.set_option('display.float_format', lambda x: '%.3f' % x)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " size len\n", - "count 5818143.000 5818143.000\n", - "mean 4.350 52.120\n", - "std 196.347 35.026\n", - "min 2.000 0.000\n", - "25% 2.000 24.000\n", - "50% 2.000 46.000\n", - "75% 3.000 72.000\n", - "max 151383.000 11686.000\n" - ] - } - ], - "source": [ - "print(df.describe())" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sizelen
02640
18751
2762811
28476
28866
.........
581805472
581806064
581810462
581811854
5818128134
\n", - "

448170 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " size len\n", - "0 264 0\n", - "187 5 1\n", - "276 28 11\n", - "284 7 6\n", - "288 6 6\n", - "... ... ...\n", - "5818054 7 2\n", - "5818060 6 4\n", - "5818104 6 2\n", - "5818118 5 4\n", - "5818128 13 4\n", - "\n", - "[448170 rows x 2 columns]" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df[\"size\"] > 4]" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sizelen
02640
2762811
31419515
3291014
3649815
.........
5817734185
5817835114
5817886205
58179011510
5818128134
\n", - "

159500 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " size len\n", - "0 264 0\n", - "276 28 11\n", - "314 195 15\n", - "329 10 14\n", - "364 98 15\n", - "... ... ...\n", - "5817734 18 5\n", - "5817835 11 4\n", - "5817886 20 5\n", - "5817901 15 10\n", - "5818128 13 4\n", - "\n", - "[159500 rows x 2 columns]" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df[\"size\"] >= 10]" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sizelen
02640
31419515
42812231
52517328
72727031
.........
58161001474
58173451672
58173612582
58173662982
58173742522
\n", - "

9610 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " size len\n", - "0 264 0\n", - "314 195 15\n", - "428 122 31\n", - "525 173 28\n", - "727 270 31\n", - "... ... ...\n", - "5816100 147 4\n", - "5817345 167 2\n", - "5817361 258 2\n", - "5817366 298 2\n", - "5817374 252 2\n", - "\n", - "[9610 rows x 2 columns]" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df[\"size\"] > 100]" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "5818143" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "fuzzycat", - "language": "python", - "name": "fuzzycat" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.8" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/Journal_Names.html b/notebooks/Journal_Names.html deleted file mode 100644 index a87394a..0000000 --- a/notebooks/Journal_Names.html +++ /dev/null @@ -1,14635 +0,0 @@ - - - - -Journal_Names - - - - - - - - - - - - - - - - - - - - - - -
-
- -
-
-
-

Journal Names

Questions in the context of fuzzy matching.

-
    -
  • How many journal names appear more than once?
  • -
  • What is the average length of the duplicated names vs the unique names?
  • -
-

Input file is a single larger JSON, mapping names to issns.

-
{
-  "Acta Orientalia.": [
-    "0001-6438"
-  ],
-  "Acta Orientalia (København)": [
-    "0001-6438"
-  ],
-..
-
- -
-
-
-
-
-
In [4]:
-
-
-
import json
-import pandas as pd
-
- -
-
-
- -
-
-
-
In [5]:
-
-
-
with open("../data/name_to_issn.json") as f:
-    mapping = json.load(f)
-
- -
-
-
- -
-
-
-
-

We have about 3M keys.

- -
-
-
-
-
-
In [7]:
-
-
-
len(mapping)
-
- -
-
-
- -
-
- - -
- -
Out[7]:
- - - - -
-
2929727
-
- -
- -
-
- -
-
-
-
In [21]:
-
-
-
df = pd.DataFrame(((k, len(v)) for k, v in mapping.items()), columns=["name", "issn_count"])
-
- -
-
-
- -
-
-
-
In [25]:
-
-
-
len(df)
-
- -
-
-
- -
-
- - -
- -
Out[25]:
- - - - -
-
2929727
-
- -
- -
-
- -
-
-
-
In [26]:
-
-
-
df.head()
-
- -
-
-
- -
-
- - -
- -
Out[26]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameissn_count
0Acta Orientalia.1
1Acta Orientalia (København)1
2˜The œpublishers weekly.1
3Publishers weekly1
4ASMT news1
-
-
- -
- -
-
- -
-
-
-
In [31]:
-
-
-
unique_name = df[df.issn_count == 1]
-
- -
-
-
- -
-
-
-
In [32]:
-
-
-
repeated_names = df[df.issn_count > 1]
-
- -
-
-
- -
-
-
-
In [34]:
-
-
-
len(repeated_names)
-
- -
-
-
- -
-
- - -
- -
Out[34]:
- - - - -
-
194241
-
- -
- -
-
- -
-
-
-
In [33]:
-
-
-
len(repeated_names) / len(df)
-
- -
-
-
- -
-
- - -
- -
Out[33]:
- - - - -
-
0.06630003409874026
-
- -
- -
-
- -
-
-
-
-

About 6% (or 194241) names are repeated.

- -
-
-
-
-
-
In [35]:
-
-
-
repeated_names.describe()
-
- -
-
-
- -
-
- - -
- -
Out[35]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_count
count194241.000000
mean3.197523
std25.081605
min2.000000
25%2.000000
50%2.000000
75%2.000000
max8980.000000
-
-
- -
- -
-
- -
-
-
-
-

Which name is shared by over 8000 ISSN?

- -
-
-
-
-
-
In [40]:
-
-
-
repeated_names.iloc[repeated_names.issn_count.argmax()] # Annual report.
-
- -
-
-
- -
-
- - -
- -
Out[40]:
- - - - -
-
name          Annual report.
-issn_count              8980
-Name: 45907, dtype: object
-
- -
- -
-
- -
-
-
-
-

It is the "Annual report."

- -
-
-
-
-
-
In [42]:
-
-
-
mapping["Annual report."][:10]
-
- -
-
-
- -
-
- - -
- -
Out[42]:
- - - - -
-
['0706-537X',
- '1186-7957',
- '2324-1926',
- '1445-9248',
- '0872-3982',
- '1714-1524',
- '1037-8812',
- '0225-0241',
- '1327-6344',
- '0702-7702']
-
- -
- -
-
- -
-
-
-
-

On average a repeated name will point to 3 ISSN. About 24k names point to more than 3 ISSN.

- -
-
-
-
-
-
In [45]:
-
-
-
len(repeated_names[repeated_names.issn_count > 3])
-
- -
-
-
- -
-
- - -
- -
Out[45]:
- - - - -
-
24107
-
- -
- -
-
- -
-
-
-
In [49]:
-
-
-
repeated_names[repeated_names.issn_count > 3].sample(n=10)
-
- -
-
-
- -
-
- - -
- -
Out[49]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameissn_count
322100Philosophica.17
183928Edad de oro.4
294309Horoskop.10
517039˜Le œGrand journal.11
1664616Caleidoscop şcolar.4
258430˜La œFeuille.34
309546˜The œWilson quarterly.4
795859Introductory research essay4
1470838Publicaciones del SEMYR.4
657041˜Le œKiosque.14
-
-
- -
- -
-
- -
-
-
-
In [50]:
-
-
-
mapping["Philosophica."]
-
- -
-
-
- -
-
- - -
- -
Out[50]:
- - - - -
-
['1285-9133',
- '1480-4670',
- '1487-5349',
- '1724-6598',
- '2183-0134',
- '2538-693X',
- '2610-8933',
- '2035-8326',
- '2295-9084',
- '1517-8889',
- '2249-5053',
- '2420-9198',
- '2654-9263',
- '2610-8925',
- '1158-9574',
- '0872-4784',
- '0379-8402']
-
- -
- -
-
- -
-
-
-
In [61]:
-
-
-
repeated_names[repeated_names.issn_count > 3].issn_count.hist(bins=20)
-
- -
-
-
- -
-
- - -
- -
Out[61]:
- - - - -
-
<AxesSubplot:>
-
- -
- -
- -
- - - - -
- -
- -
- -
-
- -
-
-
-
In [60]:
-
-
-
repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 50)].issn_count.hist(bins=10)
-
- -
-
-
- -
-
- - -
- -
Out[60]:
- - - - -
-
<AxesSubplot:>
-
- -
- -
- -
- - - - -
- -
- -
- -
-
- -
-
-
-
In [62]:
-
-
-
repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 20)].issn_count.hist(bins=10)
-
- -
-
-
- -
-
- - -
- -
Out[62]:
- - - - -
-
<AxesSubplot:>
-
- -
- -
- -
- - - - -
- -
- -
- -
-
- -
-
-
-
In [64]:
-
-
-
repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 8)].issn_count.hist()
-
- -
-
-
- -
-
- - -
- -
Out[64]:
- - - - -
-
<AxesSubplot:>
-
- -
- -
- -
- - - - -
- -
- -
- -
-
- -
-
-
-
In [70]:
-
-
-
repeated_names[repeated_names.issn_count > 1000].issn_count.hist(bins=10)
-
- -
-
-
- -
-
- - -
- -
Out[70]:
- - - - -
-
<AxesSubplot:>
-
- -
- -
- -
- - - - -
- -
- -
- -
-
- -
-
-
-
In [71]:
-
-
-
repeated_names[repeated_names.issn_count > 1000]
-
- -
-
-
- -
-
- - -
- -
Out[71]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameissn_count
3499Bulletin.2752
7632Newsletter.2715
8317Rapport.1050
23662Proceedings.1403
45839Annual report /1090
45907Annual report.8980
45964Annuaire.1260
47217Rapport annuel.2656
-
-
- -
- -
-
- -
-
-
-
In [72]:
-
-
-
repeated_names[repeated_names.issn_count > 500]
-
- -
-
-
- -
-
- - -
- -
Out[72]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameissn_count
102Bulletin d'information.693
3218Bulletin de liaison.510
3499Bulletin.2752
7632Newsletter.2715
8317Rapport.1050
23662Proceedings.1403
45794Report.743
45839Annual report /1090
45907Annual report.8980
45964Annuaire.1260
46370Jaarverslag.675
47142Rapport d'activité.660
47217Rapport annuel.2656
49289Jahresbericht.518
57558Annual report760
121599Alumni directory /511
128827Bulletin municipal.521
150529˜La œLettre.623
168933Local climatological data.613
269004Estimates.535
-
-
- -
- -
-
- -
-
-
-
In [75]:
-
-
-
repeated_names[repeated_names.issn_count > 200]
-
- -
-
-
- -
-
- - -
- -
Out[75]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameissn_count
102Bulletin d'information.693
2665Newsletter /259
3218Bulletin de liaison.510
3499Bulletin.2752
3926Boletín.216
.........
425644Rapport d'activité ...394
532500Relatório e contas.247
603144Bildung und Beruf regional.292
1006131Vies de famille.222
1110247Country risk service.271
-

66 rows × 2 columns

-
-
- -
- -
-
- -
-
-
-
In [76]:
-
-
-
repeated_names[repeated_names.issn_count > 100]
-
- -
-
-
- -
-
- - -
- -
Out[76]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameissn_count
102Bulletin d'information.693
2665Newsletter /259
3218Bulletin de liaison.510
3499Bulletin.2752
3926Boletín.216
.........
1306798Country commerce.120
1318569Bible studies for life.159
1796742LexisNexis practice guide.101
2628387Operational risk report.119
2650557Interempresas net.108
-

191 rows × 2 columns

-
-
- -
- -
-
- -
-
-
-
In [82]:
-
-
-
repeated_names
-
- -
-
-
- -
-
- - -
- -
Out[82]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameissn_count
5Activitas Nervosa Superior.2
11Library journal.2
23Acta cardiologica.2
26Actualidad económica.3
31Acta Ornithologica.3
.........
2929626Modern machine shop México.2
2929635Lecture notes in control and information scien...2
2929646Critical Studies in Dance Leadership and Inclu...2
2929691Nigerian Journal of Wildlife Management2
2929702Verzeichniss der Kunstwerke lebender Künstler,...2
-

194241 rows × 2 columns

-
-
- -
- -
-
- -
-
-
-
-

If a name matches a repeated name exactly or fuzzy matches to a repeated name and there is not other information available, the match status must be ambigious.

- -
-
-
-
-
-
In [ ]:
-
-
-
 
-
- -
-
-
- -
-
-
- - - - - - diff --git a/notebooks/Journal_Names.ipynb b/notebooks/Journal_Names.ipynb deleted file mode 100644 index 91da2a2..0000000 --- a/notebooks/Journal_Names.ipynb +++ /dev/null @@ -1,2032 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Journal Names\n", - "\n", - "Questions in the context of fuzzy matching.\n", - "\n", - "* How many journal names appear more than once?\n", - "* What is the average length of the duplicated names vs the unique names?\n", - "\n", - "\n", - "Input file is a single larger JSON, mapping names to issns.\n", - "```json\n", - "{\n", - " \"Acta Orientalia.\": [\n", - " \"0001-6438\"\n", - " ],\n", - " \"Acta Orientalia (København)\": [\n", - " \"0001-6438\"\n", - " ],\n", - "..\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [], - "source": [ - "with open(\"../data/name_to_issn.json\") as f:\n", - " mapping = json.load(f)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We have about 3M keys." - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2938859" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(mapping)" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame(((k, len(v)) for k, v in mapping.items()), columns=[\"name\", \"issn_count\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2938859" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
0Acta Orientalia.1
1Acta Orientalia (København)1
2˜The œpublishers weekly.2
3Publishers weekly2
4ASMT news1
\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "0 Acta Orientalia. 1\n", - "1 Acta Orientalia (København) 1\n", - "2 ˜The œpublishers weekly. 2\n", - "3 Publishers weekly 2\n", - "4 ASMT news 1" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "unique_name = df[df.issn_count == 1]" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [], - "source": [ - "repeated_names = df[df.issn_count > 1]" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "586466" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(repeated_names)" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.1995556779008452" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(repeated_names) / len(df)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "About 19% (or 586466) names are repeated. " - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
issn_count
count586466.000000
mean2.443930
std15.260303
min2.000000
25%2.000000
50%2.000000
75%2.000000
max9520.000000
\n", - "
" - ], - "text/plain": [ - " issn_count\n", - "count 586466.000000\n", - "mean 2.443930\n", - "std 15.260303\n", - "min 2.000000\n", - "25% 2.000000\n", - "50% 2.000000\n", - "75% 2.000000\n", - "max 9520.000000" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Which name is shared most?" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "name Annual report.\n", - "issn_count 9520\n", - "Name: 45999, dtype: object" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names.iloc[repeated_names.issn_count.argmax()] # Annual report." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is the \"Annual report.\"" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['1495-9097',\n", - " '2205-7919',\n", - " '1447-3836',\n", - " '1914-9220',\n", - " '0707-1515',\n", - " '1494-6149',\n", - " '2293-3174',\n", - " '1977-9046',\n", - " '0707-4298',\n", - " '2239-9674']" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapping[\"Annual report.\"][:10]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "On average a repeated name will point to 3 ISSN. Over 20k names point to more than 3 ISSN." - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "27230" - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(repeated_names[repeated_names.issn_count > 3])" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
1356987Karrier online6
1778072Curriculum express for parents.4
366690Annual Report ....7
878789Brilliant.4
37507Synthesis.35
942931Special Burda11
640073Belle.6
2666134Naše delo6
376646Opinion.12
918869Limited edition magazine.16
\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "1356987 Karrier online 6\n", - "1778072 Curriculum express for parents. 4\n", - "366690 Annual Report .... 7\n", - "878789 Brilliant. 4\n", - "37507 Synthesis. 35\n", - "942931 Special Burda 11\n", - "640073 Belle. 6\n", - "2666134 Naše delo 6\n", - "376646 Opinion. 12\n", - "918869 Limited edition magazine. 16" - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names[repeated_names.issn_count > 3].sample(n=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['1158-9574',\n", - " '2183-0134',\n", - " '1517-8889',\n", - " '1480-4670',\n", - " '0379-8402',\n", - " '1724-6598',\n", - " '2035-8326',\n", - " '2249-5053',\n", - " '0872-4784',\n", - " '2610-8933',\n", - " '2610-8925',\n", - " '2654-9263',\n", - " '2420-9198',\n", - " '2538-693X',\n", - " '1487-5349',\n", - " '1285-9133',\n", - " '2655-5662',\n", - " '2295-9084']" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapping[\"Philosophica.\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAASEklEQVR4nO3df6zddX3H8edrrWDFHxTZbljbrDU2S6pkgDfYRbPcyVIKLismxECIVGTWTEh0I5lF/8CJJLIM3SCKq9JZFhQY4tq4uq5jnBj/AAFllB9ir1BHmwJK+WExkdW998f5VI/lXnp7envv7T3PR3Jyvt/39/P98T7fe/u653u+9zZVhSRpsP3WdB+AJGn6GQaSJMNAkmQYSJIwDCRJwNzpPoB+nXjiibV48eK+1n3xxRc57rjjJveAjiL2b//2P7j933fffT+tqt8+sH7UhsHixYu59957+1q30+kwMjIyuQd0FLF/+7f/kek+jGmT5Mdj1b1MJEkyDCRJhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkjuLfQD4c23Y9z/vX/ltf6+74zLsn+Wgkafr5zkCSZBhIkgwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkiQmEQZJFSe5M8nCSh5J8pNU/mWRXkvvb4+yedS5PMprk0SRn9tRXttpokrU99SVJ7m71W5IcM9mNSpLGN5F3BvuAy6pqGbAcuCTJsrbsc1V1SntsBmjLzgPeAqwEvpBkTpI5wOeBs4BlwPk927m6bevNwLPAxZPUnyRpAg4aBlW1u6q+16Z/BjwCLHiFVVYBN1fVL6rqcWAUOL09Rqvqsap6CbgZWJUkwLuA29r6G4Bz+uxHktSHQ/rMIMli4FTg7la6NMkDSdYnmd9qC4Anelbb2Wrj1d8IPFdV+w6oS5KmyIT/c5skrwW+Dny0ql5Icj1wJVDt+RrgA0fkKH99DGuANQBDQ0N0Op2+tjM0Dy47ed/BB46h333OJHv37p0VffTL/u1/kPsfz4TCIMmr6AbBTVV1O0BVPdWz/EvAN9vsLmBRz+oLW41x6s8AxyeZ294d9I7/DVW1DlgHMDw8XCMjIxM5/Je57qaNXLOtv//kbccF/e1zJul0OvT72s0G9m//g9z/eCZyN1GAG4BHquqzPfWTeoa9B3iwTW8CzktybJIlwFLgu8A9wNJ259AxdD9k3lRVBdwJnNvWXw1sPLy2JEmHYiI/Hr8DeB+wLcn9rfZxuncDnUL3MtEO4EMAVfVQkluBh+neiXRJVf0SIMmlwBZgDrC+qh5q2/sYcHOSTwPfpxs+kqQpctAwqKrvABlj0eZXWOcq4Kox6pvHWq+qHqN7t5EkaRr4G8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJCYQBkkWJbkzycNJHkrykVY/IcnWJNvb8/xWT5Jrk4wmeSDJaT3bWt3Gb0+yuqf+tiTb2jrXJsmRaFaSNLaJvDPYB1xWVcuA5cAlSZYBa4E7qmopcEebBzgLWNoea4DroRsewBXA24HTgSv2B0gb88Ge9VYefmuSpIk6aBhU1e6q+l6b/hnwCLAAWAVsaMM2AOe06VXAjdV1F3B8kpOAM4GtVbWnqp4FtgIr27LXV9VdVVXAjT3bkiRNgUP6zCDJYuBU4G5gqKp2t0VPAkNtegHwRM9qO1vtleo7x6hLkqbI3IkOTPJa4OvAR6vqhd7L+lVVSeoIHN+Bx7CG7qUnhoaG6HQ6fW1naB5cdvK+vtbtd58zyd69e2dFH/2yf/sf5P7HM6EwSPIqukFwU1Xd3spPJTmpqna3Sz1Pt/ouYFHP6gtbbRcwckC90+oLxxj/MlW1DlgHMDw8XCMjI2MNO6jrbtrINdsmnIO/YccF/e1zJul0OvT72s0G9m//g9z/eCZyN1GAG4BHquqzPYs2AfvvCFoNbOypX9juKloOPN8uJ20BViSZ3z44XgFsacteSLK87evCnm1JkqbARH48fgfwPmBbkvtb7ePAZ4Bbk1wM/Bh4b1u2GTgbGAV+DlwEUFV7klwJ3NPGfaqq9rTpDwNfAeYB32oPSdIUOWgYVNV3gPHu+z9jjPEFXDLOttYD68eo3wu89WDHIkk6MvwNZEmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kSEwiDJOuTPJ3kwZ7aJ5PsSnJ/e5zds+zyJKNJHk1yZk99ZauNJlnbU1+S5O5WvyXJMZPZoCTp4CbyzuArwMox6p+rqlPaYzNAkmXAecBb2jpfSDInyRzg88BZwDLg/DYW4Oq2rTcDzwIXH05DkqRDd9AwqKpvA3smuL1VwM1V9YuqehwYBU5vj9GqeqyqXgJuBlYlCfAu4La2/gbgnENrQZJ0uA7nM4NLkzzQLiPNb7UFwBM9Y3a22nj1NwLPVdW+A+qSpCk0t8/1rgeuBKo9XwN8YLIOajxJ1gBrAIaGhuh0On1tZ2geXHbyvoMPHEO/+5xJ9u7dOyv66Jf92/8g9z+evsKgqp7aP53kS8A32+wuYFHP0IWtxjj1Z4Djk8xt7w56x4+133XAOoDh4eEaGRnp5/C57qaNXLOtvxzccUF/+5xJOp0O/b52s4H92/8g9z+evi4TJTmpZ/Y9wP47jTYB5yU5NskSYCnwXeAeYGm7c+gYuh8yb6qqAu4Ezm3rrwY29nNMkqT+HfTH4yRfA0aAE5PsBK4ARpKcQvcy0Q7gQwBV9VCSW4GHgX3AJVX1y7adS4EtwBxgfVU91HbxMeDmJJ8Gvg/cMFnNSZIm5qBhUFXnj1Ee9x/sqroKuGqM+mZg8xj1x+jebSRJmib+BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAksQEwiDJ+iRPJ3mwp3ZCkq1Jtrfn+a2eJNcmGU3yQJLTetZZ3cZvT7K6p/62JNvaOtcmyWQ3KUl6ZRN5Z/AVYOUBtbXAHVW1FLijzQOcBSxtjzXA9dAND+AK4O3A6cAV+wOkjflgz3oH7kuSdIQdNAyq6tvAngPKq4ANbXoDcE5P/cbqugs4PslJwJnA1qraU1XPAluBlW3Z66vqrqoq4MaebUmSpsjcPtcbqqrdbfpJYKhNLwCe6Bm3s9Veqb5zjPqYkqyh+46DoaEhOp1Ofwc/Dy47eV9f6/a7z5lk7969s6KPftm//Q9y/+PpNwx+paoqSU3GwUxgX+uAdQDDw8M1MjLS13auu2kj12zrr/UdF/S3z5mk0+nQ72s3G9i//Q9y/+Pp926ip9olHtrz062+C1jUM25hq71SfeEYdUnSFOo3DDYB++8IWg1s7Klf2O4qWg483y4nbQFWJJnfPjheAWxpy15IsrzdRXRhz7YkSVPkoNdKknwNGAFOTLKT7l1BnwFuTXIx8GPgvW34ZuBsYBT4OXARQFXtSXIlcE8b96mq2v+h9Ifp3rE0D/hWe0iSptBBw6Cqzh9n0RljjC3gknG2sx5YP0b9XuCtBzsOSdKR428gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJI4zDBIsiPJtiT3J7m31U5IsjXJ9vY8v9WT5Noko0keSHJaz3ZWt/Hbk6w+vJYkSYdqMt4Z/HFVnVJVw21+LXBHVS0F7mjzAGcBS9tjDXA9dMMDuAJ4O3A6cMX+AJEkTY0jcZloFbChTW8Azump31hddwHHJzkJOBPYWlV7qupZYCuw8ggclyRpHHMPc/0C/iNJAf9YVeuAoara3ZY/CQy16QXAEz3r7my18eovk2QN3XcVDA0N0el0+jrooXlw2cn7+lq3333OJHv37p0VffTL/u1/kPsfz+GGwTuraleS3wG2JvlB78KqqhYUk6KFzTqA4eHhGhkZ6Ws71920kWu29df6jgv62+dM0ul06Pe1mw3s3/4Huf/xHNZloqra1Z6fBr5B95r/U+3yD+356TZ8F7CoZ/WFrTZeXZI0RfoOgyTHJXnd/mlgBfAgsAnYf0fQamBjm94EXNjuKloOPN8uJ20BViSZ3z44XtFqkqQpcjiXiYaAbyTZv52vVtW/J7kHuDXJxcCPgfe28ZuBs4FR4OfARQBVtSfJlcA9bdynqmrPYRyXJOkQ9R0GVfUY8Adj1J8BzhijXsAl42xrPbC+32ORJB0efwNZkmQYSJIMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAksQMCoMkK5M8mmQ0ydrpPh5JGiQzIgySzAE+D5wFLAPOT7Jseo9KkgbHjAgD4HRgtKoeq6qXgJuBVdN8TJI0MOZO9wE0C4AneuZ3Am8/cFCSNcCaNrs3yaN97u9E4Kf9rJir+9zjzNJ3/7OE/dv/IPf/e2MVZ0oYTEhVrQPWHe52ktxbVcOTcEhHJfu3f/sf3P7HM1MuE+0CFvXML2w1SdIUmClhcA+wNMmSJMcA5wGbpvmYJGlgzIjLRFW1L8mlwBZgDrC+qh46grs87EtNRzn7H2z2r5dJVU33MUiSptlMuUwkSZpGhoEkabDCYLb+yYski5LcmeThJA8l+Uirn5Bka5Lt7Xl+qyfJte11eCDJaT3bWt3Gb0+yerp66keSOUm+n+SbbX5Jkrtbn7e0mxNIcmybH23LF/ds4/JWfzTJmdPUyiFLcnyS25L8IMkjSf5wkM5/kr9sX/sPJvlaklcP0vmfFFU1EA+6H0z/CHgTcAzw38Cy6T6uSertJOC0Nv064Id0/6zH3wJrW30tcHWbPhv4FhBgOXB3q58APNae57fp+dPd3yG8Dn8FfBX4Zpu/FTivTX8R+Is2/WHgi236POCWNr2sfV0cCyxpXy9zpruvCfa+AfjzNn0McPygnH+6v7T6ODCv57y/f5DO/2Q8Bumdwaz9kxdVtbuqvtemfwY8QvcbZBXdfyRoz+e06VXAjdV1F3B8kpOAM4GtVbWnqp4FtgIrp66T/iVZCLwb+HKbD/Au4LY25MD+978utwFntPGrgJur6hdV9TgwSvfrZkZL8gbgj4AbAKrqpap6jgE6/3TvjJyXZC7wGmA3A3L+J8sghcFYf/JiwTQdyxHT3vKeCtwNDFXV7rboSWCoTY/3WhzNr9HfA38N/F+bfyPwXFXta/O9vfyqz7b8+Tb+aO1/CfAT4J/aZbIvJzmOATn/VbUL+Dvgf+iGwPPAfQzO+Z8UgxQGs16S1wJfBz5aVS/0Lqvu++BZeR9xkj8Fnq6q+6b7WKbJXOA04PqqOhV4ke5loV+Z5ed/Pt2f6pcAvwscx9HzjmbGGKQwmNV/8iLJq+gGwU1VdXsrP9Xe/tOen2718V6Lo/U1egfwZ0l20L389y7gH+he/tj/i5W9vfyqz7b8DcAzHL397wR2VtXdbf42uuEwKOf/T4DHq+onVfW/wO10vyYG5fxPikEKg1n7Jy/a9c4bgEeq6rM9izYB++8IWQ1s7Klf2O4qWQ483y4nbAFWJJnfftpa0WozWlVdXlULq2ox3fP6X1V1AXAncG4bdmD/+1+Xc9v4avXz2t0mS4ClwHenqI2+VdWTwBNJfr+VzgAeZkDOP93LQ8uTvKZ9L+zvfyDO/6SZ7k+wp/JB9y6KH9K9S+AT0308k9jXO+leAngAuL89zqZ7HfQOYDvwn8AJbXzo/mdCPwK2AcM92/oA3Q/ORoGLpru3Pl6LEX59N9Gb6H4zjwL/Ahzb6q9u86Nt+Zt61v9Ee10eBc6a7n4Ooe9TgHvb18C/0r0baGDOP/A3wA+AB4F/pntH0MCc/8l4+OcoJEkDdZlIkjQOw0CSZBhIkgwDSRKGgSQJw0CShGEgSQL+Hz8ESgUOWx2AAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "repeated_names[repeated_names.issn_count > 3].issn_count.hist(bins=20)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAW/UlEQVR4nO3df6zddZ3n8edr8ccaGBcQ56ZDcYuZ6gZkpsoNsNGdXGWFimaKG8NCWCnKWI2QaNLNbHUnwZUlYXZH3SXrMFuHhpJ1qGQQaQQXO11PdJKtUpTlpywFS2i3tJGieHXCbp33/nE+13tuve09Pef+svf5SE7O97y/n+/3fs6bXF79fs/33G+qCknS0vYPFnoCkqSFZxhIkgwDSZJhIEnCMJAkAa9Y6AkM6rTTTqsVK1Ys9DTmzM9//nNOPPHEhZ7GomAvprIfU9mPSf304sEHH/xxVb3+8PpvbBisWLGCnTt3LvQ05kyn02FsbGyhp7Eo2Iup7MdU9mNSP71I8ux0dU8TSZIMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJ3+BvIA9jxYZ7F+Tn7r7pvQvycyVpJh4ZSJIMA0mSYSBJwjCQJGEYSJLoIwySnJHkW0keT/JYkk+0+qlJtiV5qj2f0upJcnOSXUkeTvK2nn2tbeOfSrK2p35ukkfaNjcnyVy8WUnS9Po5MjgErK+qs4ALgGuTnAVsALZX1Upge3sN8B5gZXusA26BbngA1wPnA+cB108ESBvzkZ7tVg//1iRJ/ZoxDKpqX1V9vy3/DHgCOB1YA2xuwzYDl7blNcDt1bUDODnJMuBiYFtVHayqF4FtwOq27rVVtaOqCri9Z1+SpHlwTF86S7ICeCvwXWCkqva1Vc8DI235dOC5ns32tNrR6numqU/389fRPdpgZGSETqdzLNP/lfXnHBpou2Edy3zHx8cHfn/HG3sxlf2Yyn5MGqYXfYdBkpOAu4BPVtVLvaf1q6qS1EAzOAZVtRHYCDA6OlqD3vf06oX6BvKVY32P9b6uk+zFVPZjKvsxaZhe9HU1UZJX0g2CL1fVV1t5fzvFQ3s+0Op7gTN6Nl/eakerL5+mLkmaJ/1cTRTgVuCJqvp8z6qtwMQVQWuBe3rqV7Wrii4AftpOJ90PXJTklPbB8UXA/W3dS0kuaD/rqp59SZLmQT+nid4OfBB4JMlDrfZp4CbgziTXAM8Cl7V19wGXALuAXwAfAqiqg0luAB5o4z5bVQfb8seB24DXAN9oD0nSPJkxDKrqb4EjXfd/4TTjC7j2CPvaBGyapr4TeMtMc5EkzQ2/gSxJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSfR328tNSQ4kebSn9pUkD7XH7ok7oCVZkeTvetb9Rc825yZ5JMmuJDe3W1yS5NQk25I81Z5PmYP3KUk6in6ODG4DVvcWqupfVtWqqloF3AV8tWf10xPrqupjPfVbgI8AK9tjYp8bgO1VtRLY3l5LkubRjGFQVd8GDk63rv3r/jLgjqPtI8ky4LVVtaPdFvN24NK2eg2wuS1v7qlLkubJjPdAnsE/A/ZX1VM9tTOT/AB4CfiTqvoOcDqwp2fMnlYDGKmqfW35eWDkSD8syTpgHcDIyAidTmegSa8/59BA2w3rWOY7Pj4+8Ps73tiLqezHVPZj0jC9GDYMrmDqUcE+4A1V9UKSc4GvJTm7351VVSWpo6zfCGwEGB0drbGxsYEmffWGewfabli7rxzre2yn02HQ93e8sRdT2Y+p7MekYXoxcBgkeQXwL4BzJ2pV9TLwclt+MMnTwJuAvcDyns2XtxrA/iTLqmpfO510YNA5SZIGM8ylpf8c+GFV/er0T5LXJzmhLb+R7gfFz7TTQC8luaB9znAVcE/bbCuwti2v7alLkuZJP5eW3gH8T+DNSfYkuaatupxf/+D4D4CH26Wmfw18rKomPnz+OPCXwC7gaeAbrX4T8O4kT9ENmJsGfzuSpEHMeJqoqq44Qv3qaWp30b3UdLrxO4G3TFN/AbhwpnlIkuaO30CWJBkGkiTDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSfR3c5tNSQ4kebSn9pkke5M81B6X9Kz7VJJdSZ5McnFPfXWr7Uqyoad+ZpLvtvpXkrxqNt+gJGlm/RwZ3Aasnqb+hapa1R73ASQ5i+4d0M5u2/x5khParTC/CLwHOAu4oo0F+NO2r98FXgSuOfwHSZLm1oxhUFXfBg7ONK5ZA2ypqper6kd0b3F5Xnvsqqpnqur/AluANe1+yO+ie4tMgM3Apcf2FiRJw5rxtpdHcV2Sq4CdwPqqehE4HdjRM2ZPqwE8d1j9fOB1wE+q6tA0439NknXAOoCRkRE6nc5AE19/zqGZB82BY5nv+Pj4wO/veGMvprIfU9mPScP0YtAwuAW4Aaj2/DngwwPuq29VtRHYCDA6OlpjY2MD7efqDffO4qz6t/vKsb7HdjodBn1/xxt7MZX9mMp+TBqmFwOFQVXtn1hO8iXg6+3lXuCMnqHLW40j1F8ATk7yinZ00DtekjRPBrq0NMmynpfvByauNNoKXJ7k1UnOBFYC3wMeAFa2K4deRfdD5q1VVcC3gA+07dcC9wwyJ0nS4GY8MkhyBzAGnJZkD3A9MJZkFd3TRLuBjwJU1WNJ7gQeBw4B11bVL9t+rgPuB04ANlXVY+1H/BtgS5J/D/wAuHW23pwkqT8zhkFVXTFN+Yj/w66qG4Ebp6nfB9w3Tf0ZulcbSZIWiN9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIk+giDJJuSHEjyaE/tPyb5YZKHk9yd5ORWX5Hk75I81B5/0bPNuUkeSbIryc1J0uqnJtmW5Kn2fMocvE9J0lH0c2RwG7D6sNo24C1V9XvA/wY+1bPu6apa1R4f66nfAnyE7n2RV/bscwOwvapWAtvba0nSPJoxDKrq28DBw2rfrKpD7eUOYPnR9pFkGfDaqtpRVQXcDlzaVq8BNrflzT11SdI8mfEeyH34MPCVntdnJvkB8BLwJ1X1HeB0YE/PmD2tBjBSVfva8vPAyJF+UJJ1wDqAkZEROp3OQBNef86hmQfNgWOZ7/j4+MDv73hjL6ayH1PZj0nD9GKoMEjyb4FDwJdbaR/whqp6Icm5wNeSnN3v/qqqktRR1m8ENgKMjo7W2NjYQPO+esO9A203rN1XjvU9ttPpMOj7O97Yi6nsx1T2Y9IwvRg4DJJcDbwPuLCd+qGqXgZebssPJnkaeBOwl6mnkpa3GsD+JMuqal87nXRg0DlJkgYz0KWlSVYDfwz8YVX9oqf++iQntOU30v2g+Jl2GuilJBe0q4iuAu5pm20F1rbltT11SdI8mfHIIMkdwBhwWpI9wPV0rx56NbCtXSG6o1059AfAZ5P8P+DvgY9V1cSHzx+ne2XSa4BvtAfATcCdSa4BngUum5V3Jknq24xhUFVXTFO+9Qhj7wLuOsK6ncBbpqm/AFw40zwkSXPHbyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRJ9hkGSTUkOJHm0p3Zqkm1JnmrPp7R6ktycZFeSh5O8rWebtW38U0nW9tTPTfJI2+bmdmtMSdI86ffI4DZg9WG1DcD2qloJbG+vAd5D997HK4F1wC3QDQ+6t8w8HzgPuH4iQNqYj/Rsd/jPkiTNob7CoKq+DRw8rLwG2NyWNwOX9tRvr64dwMlJlgEXA9uq6mBVvQhsA1a3da+tqh1VVcDtPfuSJM2DGe+BfBQjVbWvLT8PjLTl04HnesbtabWj1fdMU/81SdbRPdpgZGSETqcz0MTXn3NooO2GdSzzHR8fH/j9HW/sxVT2Yyr7MWmYXgwTBr9SVZWkZmNfM/ycjcBGgNHR0RobGxtoP1dvuHcWZ9W/3VeO9T220+kw6Ps73tiLqezHVPZj0jC9GOZqov3tFA/t+UCr7wXO6Bm3vNWOVl8+TV2SNE+GCYOtwMQVQWuBe3rqV7Wrii4AftpOJ90PXJTklPbB8UXA/W3dS0kuaFcRXdWzL0nSPOjrNFGSO4Ax4LQke+heFXQTcGeSa4Bngcva8PuAS4BdwC+ADwFU1cEkNwAPtHGfraqJD6U/TveKpdcA32gPSdI86SsMquqKI6y6cJqxBVx7hP1sAjZNU98JvKWfuUiSZp/fQJYkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJDBEGSd6c5KGex0tJPpnkM0n29tQv6dnmU0l2JXkyycU99dWttivJhmHflCTp2PR1c5vpVNWTwCqAJCfQvW/x3XTvbPaFqvqz3vFJzgIuB84Gfgf4myRvaqu/CLwb2AM8kGRrVT0+6NwkScdm4DA4zIXA01X1bPc2xtNaA2ypqpeBHyXZBZzX1u2qqmcAkmxpYw0DSZons/WZweXAHT2vr0vycJJNSU5ptdOB53rG7Gm1I9UlSfMk3VsWD7GD5FXA/wHOrqr9SUaAHwMF3AAsq6oPJ/kvwI6q+m9tu1uZvPH96qr6o1b/IHB+VV03zc9aB6wDGBkZOXfLli0DzfmRvT8daLthnXP6P+p77Pj4OCeddNIczuY3h72Yyn5MZT8m9dOLd77znQ9W1ejh9dk4TfQe4PtVtR9g4hkgyZeAr7eXe4EzerZb3mocpT5FVW0ENgKMjo7W2NjYQBO+esO9A203rN1XjvU9ttPpMOj7O97Yi6nsx1T2Y9IwvZiN00RX0HOKKMmynnXvBx5ty1uBy5O8OsmZwErge8ADwMokZ7ajjMvbWEnSPBnqyCDJiXSvAvpoT/k/JFlF9zTR7ol1VfVYkjvpfjB8CLi2qn7Z9nMdcD9wArCpqh4bZl6SpGMzVBhU1c+B1x1W++BRxt8I3DhN/T7gvmHmIkkanN9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkZiEMkuxO8kiSh5LsbLVTk2xL8lR7PqXVk+TmJLuSPJzkbT37WdvGP5Vk7bDzkiT1b7aODN5ZVauqarS93gBsr6qVwPb2GuA9dO99vBJYB9wC3fAArgfOB84Drp8IEEnS3Jur00RrgM1teTNwaU/99uraAZycZBlwMbCtqg5W1YvANmD1HM1NknSYoe6B3BTwzSQF/Neq2giMVNW+tv55YKQtnw4817PtnlY7Un2KJOvoHlEwMjJCp9MZaMLrzzk00HbDOpb5jo+PD/z+jjf2Yir7MZX9mDRML2YjDN5RVXuT/DawLckPe1dWVbWgGFoLmo0Ao6OjNTY2NtB+rt5w72xM55jtvnKs77GdTodB39/xxl5MZT+msh+ThunF0KeJqmpvez4A3E33nP/+dvqH9nygDd8LnNGz+fJWO1JdkjQPhgqDJCcm+a2JZeAi4FFgKzBxRdBa4J62vBW4ql1VdAHw03Y66X7goiSntA+OL2o1SdI8GPY00Qhwd5KJff1VVf33JA8Adya5BngWuKyNvw+4BNgF/AL4EEBVHUxyA/BAG/fZqjo45NwkSX0aKgyq6hng96epvwBcOE29gGuPsK9NwKZh5iNJGozfQJYkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJIYIgyRnJPlWkseTPJbkE63+mSR7kzzUHpf0bPOpJLuSPJnk4p766lbblWTDcG9JknSshrnT2SFgfVV9v90H+cEk29q6L1TVn/UOTnIWcDlwNvA7wN8keVNb/UXg3cAe4IEkW6vq8SHmtiit2HBv32PXn3OIq49h/Ex23/TeWduXpOPPwGHQbmS/ry3/LMkTwOlH2WQNsKWqXgZ+lGQXcF5bt6vdQpMkW9rY4y4MJGmxGuoeyBOSrADeCnwXeDtwXZKrgJ10jx5epBsUO3o228NkeDx3WP38I/ycdcA6gJGRETqdzkDzXX/OoYG2m08jr5ndeQ7aq8VgfHz8N3r+s81+TGU/Jg3Ti6HDIMlJwF3AJ6vqpSS3ADcA1Z4/B3x42J8DUFUbgY0Ao6OjNTY2NtB+ZvP0y1xZf84hPvfIrGQ1ALuvHJu1fc23TqfDoP+tj0f2Yyr7MWmYXgz1f5skr6QbBF+uqq8CVNX+nvVfAr7eXu4FzujZfHmrcZS6JGkeDHM1UYBbgSeq6vM99WU9w94PPNqWtwKXJ3l1kjOBlcD3gAeAlUnOTPIquh8ybx10XpKkYzfMkcHbgQ8CjyR5qNU+DVyRZBXd00S7gY8CVNVjSe6k+8HwIeDaqvolQJLrgPuBE4BNVfXYEPOSJB2jYa4m+lsg06y67yjb3AjcOE39vqNtJ0maW34DWZJkGEiSDANJEoaBJAnDQJLELP05Ci1+x/JH8maTfyBP+s3gkYEkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnC7xlojs3G9xvWn3NooLvT+R0HqX8eGUiSPDLQ8ctvXUv9WzRHBklWJ3kyya4kGxZ6PpK0lCyKI4MkJwBfBN4N7AEeSLK1qh5f2JlJx26uj0gG/QxlLnk09JtvUYQBcB6wq6qeAUiyBVhD937Jkha5hTolB4szHOfSXAVvqmpOdnxMk0g+AKyuqj9qrz8InF9V1x02bh2wrr18M/DkvE50fp0G/HihJ7FI2Iup7MdU9mNSP734x1X1+sOLi+XIoC9VtRHYuNDzmA9JdlbV6ELPYzGwF1PZj6nsx6RherFYPkDeC5zR83p5q0mS5sFiCYMHgJVJzkzyKuByYOsCz0mSloxFcZqoqg4luQ64HzgB2FRVjy3wtBbakjgd1id7MZX9mMp+TBq4F4viA2RJ0sJaLKeJJEkLyDCQJBkGCy3JpiQHkjzaUzs1ybYkT7XnUxZyjvMpyRlJvpXk8SSPJflEqy+5niT5h0m+l+R/tV78u1Y/M8l3259u+Uq76GLJSHJCkh8k+Xp7vWT7kWR3kkeSPJRkZ6sN9LtiGCy824DVh9U2ANuraiWwvb1eKg4B66vqLOAC4NokZ7E0e/Iy8K6q+n1gFbA6yQXAnwJfqKrfBV4Erlm4KS6ITwBP9Lxe6v14Z1Wt6vl+wUC/K4bBAquqbwMHDyuvATa35c3ApfM5p4VUVfuq6vtt+Wd0f+lPZwn2pLrG28tXtkcB7wL+utWXRC8mJFkOvBf4y/Y6LOF+HMFAvyuGweI0UlX72vLzwMhCTmahJFkBvBX4Lku0J+2UyEPAAWAb8DTwk6o61IbsoRuWS8V/Av4Y+Pv2+nUs7X4U8M0kD7Y/1wMD/q4siu8Z6MiqqpIsuet/k5wE3AV8sqpe6v4DsGsp9aSqfgmsSnIycDfwTxZ2RgsnyfuAA1X1YJKxBZ7OYvGOqtqb5LeBbUl+2LvyWH5XPDJYnPYnWQbQng8s8HzmVZJX0g2CL1fVV1t5Sfekqn4CfAv4p8DJSSb+IbeU/nTL24E/TLIb2EL39NB/Zun2g6ra254P0P3HwnkM+LtiGCxOW4G1bXktcM8CzmVetXPAtwJPVNXne1YtuZ4keX07IiDJa+je7+MJuqHwgTZsSfQCoKo+VVXLq2oF3T9Z8z+q6kqWaD+SnJjktyaWgYuARxnwd8VvIC+wJHcAY3T/9Ox+4Hrga8CdwBuAZ4HLqurwD5mPS0neAXwHeITJ88Kfpvu5wZLqSZLfo/sB4Al0/+F2Z1V9Nskb6f7L+FTgB8C/qqqXF26m86+dJvrXVfW+pdqP9r7vbi9fAfxVVd2Y5HUM8LtiGEiSPE0kSTIMJEkYBpIkDANJEoaBJAnDQJKEYSBJAv4/mF0rfaZ2dTEAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 50)].issn_count.hist(bins=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWyklEQVR4nO3df7DddX3n8edryaIglYC0t26S2TA1tYNQXbgLdJ3tXqQDQR3DH9bBYUu0rJlt0Vo3swp2usyqzGCVpcJWdrKSBdoMkVK7ySiKGeSu05kGEX8Qfmi5ixGSRWINxI0/N+57/zifrIfLveTec27u+aZ5Pmbu3PP9fD/f73l9Q25e9/s933NIVSFJOrr9o1EHkCSNnmUgSbIMJEmWgSQJy0CSBCwZdYBBnXLKKbVy5cqBtv3BD37AS17ykoUNtMC6nrHr+aD7GbueD7qfsev5oHsZH3jggb+vql983oqqOiK/zjrrrBrUvffeO/C2i6XrGbuer6r7Gbuer6r7Gbuer6p7GYEv1wz/pnqZSJJkGUiSLANJEpaBJAnLQJKEZSBJwjKQJGEZSJKwDCRJHMEfRzGMHbv38bYrP7Poz7vz2jcs+nNK0lx4ZiBJsgwkSZaBJAnLQJKEZSBJwjKQJGEZSJKwDCRJzKEMkmxMsifJQzOsW5+kkpzSlpPkhiRTSR5Mcmbf3LVJHmtfa/vGz0qyo21zQ5Is1MFJkuZmLmcGtwCrpw8mWQFcADzRN3wRsKp9rQNuanNPBq4GzgHOBq5OclLb5ibgHX3bPe+5JEmH1yHLoKq+COydYdX1wHuB6htbA9zW/r/L24GlSV4OXAhsq6q9VfUMsA1Y3da9tKq2t/9R823AxUMdkSRp3gb6bKIka4DdVfX1aVd1lgFP9i3vamMvNL5rhvHZnncdvTMOxsbGmJycHCQ+Y8fB+jMODLTtMOaTd//+/QMf32Loej7ofsau54PuZ+x6PjgyMsIAZZDkeOD99C4RLaqq2gBsABgfH6+JiYmB9nPjpi1ct2PxP6Nv56UTc547OTnJoMe3GLqeD7qfsev5oPsZu54PjoyMMNjdRL8CnAp8PclOYDnwlSS/DOwGVvTNXd7GXmh8+QzjkqRFNO8yqKodVfVLVbWyqlbSu7RzZlV9B9gKXNbuKjoX2FdVTwF3AxckOam9cHwBcHdb9/0k57a7iC4DtizQsUmS5mgut5beDvwt8Moku5Jc/gLT7wIeB6aA/wr8PkBV7QU+CNzfvj7QxmhzPtG2+Z/AZwc7FEnSoA554byq3nqI9Sv7HhdwxSzzNgIbZxj/MnD6oXJIkg4f34EsSbIMJEmWgSQJy0CShGUgScIykCRhGUiSsAwkSVgGkiQsA0kSloEkCctAkoRlIEnCMpAkYRlIkrAMJElYBpIkLANJEpaBJIk5lEGSjUn2JHmob+wjSb6R5MEkf51kad+6q5JMJflmkgv7xle3sakkV/aNn5rkvjb+ySTHLuDxSZLmYC5nBrcAq6eNbQNOr6pfB/4OuAogyWnAJcCr2jYfT3JMkmOAPwMuAk4D3trmAnwYuL6qXgE8A1w+1BFJkubtkGVQVV8E9k4b+3xVHWiL24Hl7fEaYHNV/aSqvgVMAWe3r6mqeryqfgpsBtYkCfA64M62/a3AxcMdkiRpvpYswD5+F/hke7yMXjkctKuNATw5bfwc4GXAs33F0j//eZKsA9YBjI2NMTk5OVDgseNg/RkHDj1xgc0n7/79+wc+vsXQ9XzQ/Yxdzwfdz9j1fHBkZIQhyyDJHwEHgE0LE+eFVdUGYAPA+Ph4TUxMDLSfGzdt4bodC9GD87Pz0ok5z52cnGTQ41sMXc8H3c/Y9XzQ/YxdzwdHRkYYogySvA14I3B+VVUb3g2s6Ju2vI0xy/j3gKVJlrSzg/75kqRFMtCtpUlWA+8F3lRVP+xbtRW4JMmLkpwKrAK+BNwPrGp3Dh1L70Xmra1E7gXe3LZfC2wZ7FAkSYOay62ltwN/C7wyya4klwP/GfgFYFuSryX5LwBV9TBwB/AI8Dngiqr6Wfut/53A3cCjwB1tLsD7gH+XZIreawg3L+gRSpIO6ZCXiarqrTMMz/oPdlVdA1wzw/hdwF0zjD9O724jSdKI+A5kSZJlIEmyDCRJWAaSJCwDSRKWgSQJy0CShGUgScIykCRhGUiSsAwkSVgGkiQsA0kSloEkCctAkoRlIEnCMpAkYRlIkrAMJEnMoQySbEyyJ8lDfWMnJ9mW5LH2/aQ2niQ3JJlK8mCSM/u2WdvmP5Zkbd/4WUl2tG1uSJKFPkhJ0guby5nBLcDqaWNXAvdU1SrgnrYMcBGwqn2tA26CXnkAVwPnAGcDVx8skDbnHX3bTX8uSdJhdsgyqKovAnunDa8Bbm2PbwUu7hu/rXq2A0uTvBy4ENhWVXur6hlgG7C6rXtpVW2vqgJu69uXJGmRLBlwu7Gqeqo9/g4w1h4vA57sm7erjb3Q+K4ZxmeUZB29Mw7GxsaYnJwcLPxxsP6MAwNtO4z55N2/f//Ax7cYup4Pup+x6/mg+xm7ng+OjIwweBn8f1VVSWohwszhuTYAGwDGx8drYmJioP3cuGkL1+0Y+tDnbeelE3OeOzk5yaDHtxi6ng+6n7Hr+aD7GbueD46MjDD43URPt0s8tO972vhuYEXfvOVt7IXGl88wLklaRIOWwVbg4B1Ba4EtfeOXtbuKzgX2tctJdwMXJDmpvXB8AXB3W/f9JOe2u4gu69uXJGmRHPJaSZLbgQnglCS76N0VdC1wR5LLgW8Db2nT7wJeD0wBPwTeDlBVe5N8ELi/zftAVR18Ufr36d2xdBzw2fYlSVpEhyyDqnrrLKvOn2FuAVfMsp+NwMYZxr8MnH6oHJKkw8d3IEuSLANJkmUgScIykCRhGUiSsAwkSVgGkiQsA0kSloEkCctAkoRlIEnCMpAkYRlIkrAMJElYBpIkLANJEpaBJAnLQJLEkGWQ5D1JHk7yUJLbk7w4yalJ7ksyleSTSY5tc1/Ulqfa+pV9+7mqjX8zyYVDHpMkaZ4GLoMky4A/AMar6nTgGOAS4MPA9VX1CuAZ4PK2yeXAM238+jaPJKe17V4FrAY+nuSYQXNJkuZv2MtES4DjkiwBjgeeAl4H3NnW3wpc3B6vacu09ecnSRvfXFU/qapvAVPA2UPmkiTNw8BlUFW7gY8CT9ArgX3AA8CzVXWgTdsFLGuPlwFPtm0PtPkv6x+fYRtJ0iJYMuiGSU6i91v9qcCzwF/Su8xz2CRZB6wDGBsbY3JycqD9jB0H6884cOiJC2w+effv3z/w8S2GrueD7mfsej7ofsau54MjIyMMUQbAbwHfqqrvAiT5FPBaYGmSJe23/+XA7jZ/N7AC2NUuK50IfK9v/KD+bZ6jqjYAGwDGx8drYmJioOA3btrCdTuGOfTB7Lx0Ys5zJycnGfT4FkPX80H3M3Y9H3Q/Y9fzwZGREYZ7zeAJ4Nwkx7dr/+cDjwD3Am9uc9YCW9rjrW2Ztv4LVVVt/JJ2t9GpwCrgS0PkkiTN08C/HlfVfUnuBL4CHAC+Su+39s8Am5N8qI3d3Da5GfjzJFPAXnp3EFFVDye5g16RHACuqKqfDZpLkjR/Q10rqaqrgaunDT/ODHcDVdWPgd+eZT/XANcMk0WSNDjfgSxJsgwkSZaBJAnLQJKEZSBJwjKQJGEZSJKwDCRJWAaSJCwDSRKWgSQJy0CShGUgScIykCRhGUiSsAwkSVgGkiQsA0kSloEkiSHLIMnSJHcm+UaSR5P8RpKTk2xL8lj7flKbmyQ3JJlK8mCSM/v2s7bNfyzJ2mEPSpI0P8OeGXwM+FxV/RrwauBR4ErgnqpaBdzTlgEuAla1r3XATQBJTgauBs4BzgauPlggkqTFMXAZJDkR+E3gZoCq+mlVPQusAW5t024FLm6P1wC3Vc92YGmSlwMXAtuqam9VPQNsA1YPmkuSNH+pqsE2TF4DbAAeoXdW8ADwbmB3VS1tcwI8U1VLk3wauLaq/qatuwd4HzABvLiqPtTG/xj4UVV9dIbnXEfvrIKxsbGzNm/ePFD2PXv38fSPBtp0KGcsO3HOc/fv388JJ5xwGNMMp+v5oPsZu54Pup+x6/mgexnPO++8B6pqfPr4kiH2uQQ4E3hXVd2X5GP8/JIQAFVVSQZrmxlU1QZ6BcT4+HhNTEwMtJ8bN23huh3DHPpgdl46Mee5k5OTDHp8i6Hr+aD7GbueD7qfsev54MjICMO9ZrAL2FVV97XlO+mVw9Pt8g/t+562fjewom/75W1stnFJ0iIZuAyq6jvAk0le2YbOp3fJaCtw8I6gtcCW9ngrcFm7q+hcYF9VPQXcDVyQ5KT2wvEFbUyStEiGvVbyLmBTkmOBx4G30yuYO5JcDnwbeEubexfwemAK+GGbS1XtTfJB4P427wNVtXfIXJKkeRiqDKrqa8DzXoigd5YwfW4BV8yyn43AxmGySJIG5zuQJUmWgSTJMpAkYRlIkrAMJElYBpIkLANJEpaBJAnLQJKEZSBJwjKQJGEZSJKwDCRJWAaSJCwDSRKWgSQJy0CShGUgScIykCSxAGWQ5JgkX03y6bZ8apL7kkwl+WSSY9v4i9ryVFu/sm8fV7Xxbya5cNhMkqT5WYgzg3cDj/Ytfxi4vqpeATwDXN7GLweeaePXt3kkOQ24BHgVsBr4eJJjFiCXJGmOhiqDJMuBNwCfaMsBXgfc2abcClzcHq9py7T157f5a4DNVfWTqvoWMAWcPUwuSdL8LBly+z8F3gv8Qlt+GfBsVR1oy7uAZe3xMuBJgKo6kGRfm78M2N63z/5tniPJOmAdwNjYGJOTkwOFHjsO1p9x4NATF9h88u7fv3/g41sMXc8H3c/Y9XzQ/YxdzwdHRkYYogySvBHYU1UPJJlYsEQvoKo2ABsAxsfHa2JisKe9cdMWrtsxbA/O385LJ+Y8d3JykkGPbzF0PR90P2PX80H3M3Y9HxwZGWG4M4PXAm9K8nrgxcBLgY8BS5MsaWcHy4Hdbf5uYAWwK8kS4ETge33jB/VvI0laBAO/ZlBVV1XV8qpaSe8F4C9U1aXAvcCb27S1wJb2eGtbpq3/QlVVG7+k3W10KrAK+NKguSRJ83c4rpW8D9ic5EPAV4Gb2/jNwJ8nmQL20isQqurhJHcAjwAHgCuq6meHIZckaRYLUgZVNQlMtsePM8PdQFX1Y+C3Z9n+GuCahcgiSZo/34EsSbIMJEmWgSQJy0CShGUgScIykCRxeN5noA5aeeVnFnR/6884wNvmsM+d175hQZ9X0uHhmYEkyTKQJHmZaFHN51LNXC/DSNJC8MxAkmQZSJIsA0kSloEkCctAkoRlIEnCMpAkYRlIkrAMJEkMUQZJViS5N8kjSR5O8u42fnKSbUkea99PauNJckOSqSQPJjmzb19r2/zHkqwd/rAkSfMxzJnBAWB9VZ0GnAtckeQ04ErgnqpaBdzTlgEuAla1r3XATdArD+Bq4BzgbODqgwUiSVocA5dBVT1VVV9pj/838CiwDFgD3Nqm3Qpc3B6vAW6rnu3A0iQvBy4EtlXV3qp6BtgGrB40lyRp/lJVw+8kWQl8ETgdeKKqlrbxAM9U1dIknwauraq/aevuAd4HTAAvrqoPtfE/Bn5UVR+d4XnW0TurYGxs7KzNmzcPlHfP3n08/aOBNl00Y8fR6YxzzXfGshMPf5hZ7N+/nxNOOGFkz38oXc8H3c/Y9XzQvYznnXfeA1U1Pn186E8tTXIC8FfAH1bV93v//vdUVSUZvm1+vr8NwAaA8fHxmpiYGGg/N27awnU7uv2BrevPONDpjHPNt/PSicMfZhaTk5MM+ndkMXQ9H3Q/Y9fzwZGREYa8myjJP6ZXBJuq6lNt+Ol2+Yf2fU8b3w2s6Nt8eRubbVyStEiGuZsowM3Ao1X1n/pWbQUO3hG0FtjSN35Zu6voXGBfVT0F3A1ckOSk9sLxBW1MkrRIhrkO8Vrgd4AdSb7Wxt4PXAvckeRy4NvAW9q6u4DXA1PAD4G3A1TV3iQfBO5v8z5QVXuHyCVJmqeBy6C9EJxZVp8/w/wCrphlXxuBjYNmkSQNx3cgS5IsA0mSZSBJwjKQJLEAbzqTumrH7n287crPLPrz7rz2DYv+nNKwPDOQJFkGkiTLQJKEZSBJwheQdZitHMELuAetP2NkTy0dcTwzkCRZBpIkLxNJ/6As9GW59WccmNN7NXxvxZHPMwNJkmUgSfIykbTg5nqpZq6XYKTFYBlIOmKN6vOn4B/e6yReJpIkeWYgaXijenPhKN9YOKrLgYfrjKQzZwZJVif5ZpKpJFeOOo8kHU06UQZJjgH+DLgIOA14a5LTRptKko4enSgD4Gxgqqoer6qfApuBNSPOJElHjVTVqDOQ5M3A6qr6N235d4Bzquqd0+atA9a1xVcC3xzwKU8B/n7AbRdL1zN2PR90P2PX80H3M3Y9H3Qv4z+tql+cPnhEvYBcVRuADcPuJ8mXq2p8ASIdNl3P2PV80P2MXc8H3c/Y9XxwZGSE7lwm2g2s6Fte3sYkSYugK2VwP7AqyalJjgUuAbaOOJMkHTU6cZmoqg4keSdwN3AMsLGqHj6MTzn0paZF0PWMXc8H3c/Y9XzQ/YxdzwdHRsZuvIAsSRqtrlwmkiSNkGUgSTr6yiDJMUm+muTTo84ykyRLk9yZ5BtJHk3yG6PONF2S9yR5OMlDSW5P8uIR59mYZE+Sh/rGTk6yLclj7ftJHcz4kfbf+cEkf51k6Qgjzpixb936JJXklFFkaxlmzJfkXe3P8eEkfzKqfC3LTP+dX5Nke5KvJflykrNHmXE2R10ZAO8GHh11iBfwMeBzVfVrwKvpWNYky4A/AMar6nR6L/hfMtpU3AKsnjZ2JXBPVa0C7mnLo3QLz8+4DTi9qn4d+DvgqsUONc0tPD8jSVYAFwBPLHagaW5hWr4k59H7tIJXV9WrgI+OIFe/W3j+n+GfAP+xql4D/Ie23DlHVRkkWQ68AfjEqLPMJMmJwG8CNwNU1U+r6tmRhprZEuC4JEuA44H/NcowVfVFYO+04TXAre3xrcDFi5lpupkyVtXnq+pAW9xO7/01IzPLnyPA9cB7gZHebTJLvt8Drq2qn7Q5exY9WJ9ZMhbw0vb4REb88zKbo6oMgD+l95f6/444x2xOBb4L/Ld2KesTSV4y6lD9qmo3vd++ngCeAvZV1edHm2pGY1X1VHv8HWBslGHm4HeBz446xHRJ1gC7q+rro84yi18F/mWS+5L8jyT/fNSBZvCHwEeSPEnvZ2fUZ4AzOmrKIMkbgT1V9cCos7yAJcCZwE1V9c+AHzD6yxvP0a69r6FXXP8EeEmSfz3aVC+sevdPd/Ye6iR/BBwANo06S78kxwPvp3dpo6uWACcD5wL/HrgjSUYb6Xl+D3hPVa0A3kM78++ao6YMgNcCb0qyk96nor4uyV+MNtLz7AJ2VdV9bflOeuXQJb8FfKuqvltV/wf4FPAvRpxpJk8neTlA+z7SywezSfI24I3ApdW9N/38Cr3S/3r7uVkOfCXJL4801XPtAj5VPV+id9Y/she5Z7GW3s8JwF/S+5TmzjlqyqCqrqqq5VW1kt4Lnl+oqk79RltV3wGeTPLKNnQ+8MgII83kCeDcJMe338DOp2Mvcjdb6f0Q0r5vGWGWGSVZTe+y5Zuq6oejzjNdVe2oql+qqpXt52YXcGb7e9oV/x04DyDJrwLH0q1PCIXeawT/qj1+HfDYCLPMqhMfR6HneBewqX1G0+PA20ec5zmq6r4kdwJfoXdp46uM+O32SW4HJoBTkuwCrgaupXfJ4HLg28BbRpdw1oxXAS8CtrUrG9ur6t92KWNVdeaSxix/hhuBje1Wzp8Ca0d5hjVLxncAH2s3XPyYn38Mf6f4cRSSpKPnMpEkaXaWgSTJMpAkWQaSJCwDSRKWgSQJy0CSBPw/7n+kOOvFQywAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 20)].issn_count.hist(bins=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAASuUlEQVR4nO3df4xd5X3n8fdncZMQaDAJ1Syy0RopVipSti0ZAVFW0RDvgoEII5VmiWgxiK73B02zK6SGrNRFSoJEpLBJQFtWVmBrsjQOddO1m9BQi2R2t39Ag5NsHKCIWWKKLYLT2DjrhCbr7nf/uI/Tm9HYvnPvnTt3Lu+XNJpznvOcc5/vfTTzmXPuuXdSVUiSXtv+wXIPQJK0/AwDSZJhIEkyDCRJGAaSJGDVcg+gX+ecc06tW7eur31/+MMfcsYZZwx3QMtkUmqZlDrAWsbRpNQBg9WyZ8+ev6mqX1ho24oNg3Xr1vHkk0/2te/s7CwzMzPDHdAymZRaJqUOsJZxNCl1wGC1JHnhRNu8TCRJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJFbwO5AHsffAEW66/Usjf9x9d1098seUpF54ZiBJMgwkST2EQZIHkhxM8u2utjcn2Z3kufb97NaeJPckmUvyrSQXde2zufV/LsnmrvZ3JNnb9rknSYZdpCTp5Ho5M/gDYOO8ttuBx6pqPfBYWwe4EljfvrYA90EnPIA7gEuAi4E7jgdI6/Mvuvab/1iSpCV2yjCoqv8BHJrXvAnY1pa3Add2tT9YHY8Dq5OcC1wB7K6qQ1V1GNgNbGzb3lRVj1dVAQ92HUuSNCL93k00VVUvteXvAlNteQ3wYle//a3tZO37F2hfUJItdM44mJqaYnZ2tr/Bnw63XXisr30H0e94T+bo0aNLctxRm5Q6wFrG0aTUAUtXy8C3llZVJalhDKaHx9oKbAWYnp6ufv/Bw70P7eTuvaO/q3bfDTNDP+ak/NOOSakDrGUcTUodsHS19Hs30cvtEg/t+8HWfgA4r6vf2tZ2sva1C7RLkkao3zDYBRy/I2gzsLOr/cZ2V9GlwJF2OelR4PIkZ7cXji8HHm3bfpDk0nYX0Y1dx5Ikjcgpr5Uk+RwwA5yTZD+du4LuAh5OcgvwAvC+1v0R4CpgDvgRcDNAVR1K8lHga63fR6rq+IvS/4bOHUunA3/WviRJI3TKMKiq959g04YF+hZw6wmO8wDwwALtTwK/dKpxSJKWju9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSQwYBkn+XZKnknw7yeeSvCHJ+UmeSDKX5PNJXtf6vr6tz7Xt67qO8+HW/mySKwasSZK0SH2HQZI1wO8A01X1S8BpwPXAx4FPVtVbgcPALW2XW4DDrf2TrR9JLmj7vR3YCPx+ktP6HZckafEGvUy0Cjg9ySrgjcBLwHuAHW37NuDatryprdO2b0iS1r69qn5cVd8B5oCLBxyXJGkRVvW7Y1UdSPIJ4K+BV4E/B/YAr1TVsdZtP7CmLa8BXmz7HktyBHhLa3+869Dd+/yMJFuALQBTU1PMzs72Nfap0+G2C4+duuOQ9Tvekzl69OiSHHfUJqUOsJZxNCl1wNLV0ncYJDmbzl/15wOvAH9E5zLPkqmqrcBWgOnp6ZqZmenrOPc+tJO79/Zdet/23TAz9GPOzs7S7/MwTialDrCWcTQpdcDS1TLIZaJ/Cnynqr5XVf8X+ALwLmB1u2wEsBY40JYPAOcBtO1nAd/vbl9gH0nSCAwSBn8NXJrkje3a/wbgaeCrwHWtz2ZgZ1ve1dZp279SVdXar293G50PrAf+coBxSZIWaZDXDJ5IsgP4OnAM+AadSzhfArYn+Vhru7/tcj/w2SRzwCE6dxBRVU8leZhOkBwDbq2qv+t3XJKkxRvownlV3QHcMa/5eRa4G6iq/hb49RMc507gzkHGIknqn+9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSQwYBklWJ9mR5K+SPJPknUnenGR3kufa97Nb3yS5J8lckm8luajrOJtb/+eSbB60KEnS4gx6ZvBp4MtV9YvALwPPALcDj1XVeuCxtg5wJbC+fW0B7gNI8mbgDuAS4GLgjuMBIkkajb7DIMlZwLuB+wGq6idV9QqwCdjWum0Drm3Lm4AHq+NxYHWSc4ErgN1VdaiqDgO7gY39jkuStHipqv52TH4F2Ao8TeesYA/wQeBAVa1ufQIcrqrVSb4I3FVVf9G2PQZ8CJgB3lBVH2vtvwe8WlWfWOAxt9A5q2Bqauod27dv72vsBw8d4eVX+9p1IBeuOWvoxzx69Chnnnnm0I87apNSB1jLOJqUOmCwWi677LI9VTW90LZVA4xpFXAR8IGqeiLJp/n7S0IAVFUl6S9tFlBVW+kEENPT0zUzM9PXce59aCd37x2k9P7su2Fm6MecnZ2l3+dhnExKHWAt42hS6oClq2WQ1wz2A/ur6om2voNOOLzcLv/Qvh9s2w8A53Xtv7a1nahdkjQifYdBVX0XeDHJ21rTBjqXjHYBx+8I2gzsbMu7gBvbXUWXAkeq6iXgUeDyJGe3F44vb22SpBEZ9FrJB4CHkrwOeB64mU7APJzkFuAF4H2t7yPAVcAc8KPWl6o6lOSjwNdav49U1aEBxyVJWoSBwqCqvgks9GLEhgX6FnDrCY7zAPDAIGORJPXPdyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkScCq5R6AJtu627/UU7/bLjzGTT327dW+u64e6vGkSeaZgSTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCQxhDBIclqSbyT5Yls/P8kTSeaSfD7J61r769v6XNu+rusYH27tzya5YtAxSZIWZxhnBh8Enula/zjwyap6K3AYuKW13wIcbu2fbP1IcgFwPfB2YCPw+0lOG8K4JEk9GigMkqwFrgY+09YDvAfY0bpsA65ty5vaOm37htZ/E7C9qn5cVd8B5oCLBxmXJGlxBv1sok8Bvwv8fFt/C/BKVR1r6/uBNW15DfAiQFUdS3Kk9V8DPN51zO59fkaSLcAWgKmpKWZnZ/sa9NTpnc/CGbV+x3syR48eXZLjDkuvz/NSzMlyPS/jPieLMSm1TEodsHS19B0GSd4LHKyqPUlmhjaik6iqrcBWgOnp6ZqZ6e9h731oJ3fvHf1n9O27YWbox5ydnaXf52EUev3wudsuPDb0OVmK57sX4z4nizEptUxKHbB0tQzy0/cu4JokVwFvAN4EfBpYnWRVOztYCxxo/Q8A5wH7k6wCzgK+39V+XPc+kqQR6Ps1g6r6cFWtrap1dF4A/kpV3QB8FbiuddsM7GzLu9o6bftXqqpa+/XtbqPzgfXAX/Y7LknS4i3FtZIPAduTfAz4BnB/a78f+GySOeAQnQChqp5K8jDwNHAMuLWq/m4JxiVJOoGhhEFVzQKzbfl5FrgbqKr+Fvj1E+x/J3DnMMYiSVo834EsSTIMJEmGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkszX86k7QM1t3+paEf87YLj3FTD8fdd9fVQ39sjZZnBpIkw0CSZBhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRIDhEGS85J8NcnTSZ5K8sHW/uYku5M8176f3dqT5J4kc0m+leSirmNtbv2fS7J58LIkSYsxyJnBMeC2qroAuBS4NckFwO3AY1W1HnisrQNcCaxvX1uA+6ATHsAdwCXAxcAdxwNEkjQafYdBVb1UVV9vy/8HeAZYA2wCtrVu24Br2/Im4MHqeBxYneRc4Apgd1UdqqrDwG5gY7/jkiQt3lBeM0iyDvhV4Algqqpeapu+C0y15TXAi1277W9tJ2qXJI1IqmqwAyRnAv8duLOqvpDklapa3bX9cFWdneSLwF1V9Ret/THgQ8AM8Iaq+lhr/z3g1ar6xAKPtYXOJSampqbesX379r7GfPDQEV5+ta9dB3LhmrOGfsyjR49y5plnDv24w7L3wJGe+k2dztDnZCme714s15z0+lwvRq/zslzPda/G/edkMQap5bLLLttTVdMLbRvofyAn+Tngj4GHquoLrfnlJOdW1UvtMtDB1n4AOK9r97Wt7QCdQOhun13o8apqK7AVYHp6umZmZhbqdkr3PrSTu/eO/t8/77thZujHnJ2dpd/nYRR6+f+50Plfu8Oek6V4vnuxXHPS63O9GL3Oy3I9170a95+TxViqWga5myjA/cAzVfUfuzbtAo7fEbQZ2NnVfmO7q+hS4Ei7nPQocHmSs9sLx5e3NknSiAzyp9i7gN8E9ib5Zmv798BdwMNJbgFeAN7Xtj0CXAXMAT8CbgaoqkNJPgp8rfX7SFUdGmBckqRF6jsM2rX/nGDzhgX6F3DrCY71APBAv2ORpFFbtwSX5XrxBxvPWJLj+g5kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgSWKMwiDJxiTPJplLcvtyj0eSXkvGIgySnAb8J+BK4ALg/UkuWN5RSdJrx1iEAXAxMFdVz1fVT4DtwKZlHpMkvWakqpZ7DCS5DthYVb/V1n8TuKSqfntevy3Alrb6NuDZPh/yHOBv+tx33ExKLZNSB1jLOJqUOmCwWv5RVf3CQhtW9T+e0auqrcDWQY+T5Mmqmh7CkJbdpNQyKXWAtYyjSakDlq6WcblMdAA4r2t9bWuTJI3AuITB14D1Sc5P8jrgemDXMo9Jkl4zxuIyUVUdS/LbwKPAacADVfXUEj7kwJeaxsik1DIpdYC1jKNJqQOWqJaxeAFZkrS8xuUykSRpGRkGkqTJDoMkpyX5RpIvLrDt9Uk+3z7+4okk65ZhiD07RS03Jflekm+2r99ajjH2Ism+JHvbOJ9cYHuS3NPm5VtJLlqOcfaih1pmkhzpmpf/sBzj7EWS1Ul2JPmrJM8keee87StiXnqoY0XMSZK3dY3xm0l+kOTfzusz1DkZixeQl9AHgWeANy2w7RbgcFW9Ncn1wMeBfz7KwS3SyWoB+Pz8N+mNscuq6kRvmrkSWN++LgHua9/H1clqAfifVfXekY2mf58GvlxV17U7+t44b/tKmZdT1QErYE6q6lngV+CnH9dzAPiTed2GOicTe2aQZC1wNfCZE3TZBGxryzuADUkyirEtVg+1TJJNwIPV8TiwOsm5yz2oSZbkLODdwP0AVfWTqnplXrexn5ce61iJNgD/u6pemNc+1DmZ2DAAPgX8LvD/TrB9DfAidG5tBY4AbxnJyBbvU5y8FoBfa6eKO5Kcd5J+y62AP0+yp328yHw/nZdmf2sbR6eqBeCdSf5Xkj9L8vZRDm4Rzge+B/yXdinyM0nOmNdnJcxLL3XAypiTbtcDn1ugfahzMpFhkOS9wMGq2rPcYxlUj7X8KbCuqv4xsJu/P+MZR/+kqi6ic4p7a5J3L/eABnCqWr5O57Ngfhm4F/hvIx5fr1YBFwH3VdWvAj8EVuLHyPdSx0qZEwDapa5rgD9a6seayDAA3gVck2QfnU9AfU+S/zqvz08/AiPJKuAs4PujHGSPTllLVX2/qn7cVj8DvGO0Q+xdVR1o3w/SuQZ68bwuK+ajSU5VS1X9oKqOtuVHgJ9Lcs7IB3pq+4H9VfVEW99B55dqt5UwL6esYwXNyXFXAl+vqpcX2DbUOZnIMKiqD1fV2qpaR+cU6ytV9Rvzuu0CNrfl61qfsXsHXi+1zLtOeA2dF5rHTpIzkvz88WXgcuDb87rtAm5sd0pcChypqpdGPNRT6qWWJP/w+OtQSS6m8/M2dn9wVNV3gReTvK01bQCentdt7OellzpWypx0eT8LXyKCIc/JpN9N9DOSfAR4sqp20XmR6bNJ5oBDdH7RrhjzavmdJNcAx+jUctNyju0kpoA/aT+Lq4A/rKovJ/lXAFX1n4FHgKuAOeBHwM3LNNZT6aWW64B/neQY8Cpw/Tj+wdF8AHioXZZ4Hrh5hc7LqepYMXPS/sj4Z8C/7Gpbsjnx4ygkSZN5mUiStDiGgSTJMJAkGQaSJAwDSRKGgSQJw0CSBPx/YueE4LohpnIAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 8)].issn_count.hist()" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAD4CAYAAADFAawfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAANX0lEQVR4nO3dbYxc91XH8e/BmybFGzkJKSuzidhUqipFtSDxqCQKL3YNTdMkat/0haMCKQ9aCUQVnoRsISH1BSJFVQVIiMZqCxW03ZY0gcpWiUKbJaoELrttGjtxTNzENLFS3PDgdoMEBA4v5q69a2ZmZx9m5qz9/UijvQ//O3Pu0Z3fzt65147MRJJU1/eNugBJUm8GtSQVZ1BLUnEGtSQVZ1BLUnFjg3jS66+/Pqempgbx1EPz2muvsXPnzlGXUZb96c7e9GZ/OltcXHw1M9/Uad1AgnpqaoqFhYVBPPXQzM/PMz09PeoyyrI/3dmb3uxPZxHxT93WeepDkoozqCWpOINakoozqCWpOINakoozqCWpuL4uz4uI08D3gP8BXs/M1iCLkiRdsJ7rqGcy89WBVSJJ6shTH5JUXPTzHwdExIvAvwEJPJSZhzqMmQVmASYmJvbOzc1tqKBjZ85taLvN2jO5a9X80tIS4+PjI6llO7A/3dmb3uxPZzMzM4vdTiv3G9STmXkmIn4QeBz4QGY+2W18q9XKjd5CPnXgyIa226zTD96zat7bXHuzP93Zm97sT2cR0TWo+zr1kZlnmp9ngUeBt29deZKkXtYM6ojYGRFXL08DdwLHB12YJKmtn6s+JoBHI2J5/Kcz868HWpUk6bw1gzozXwB+ZAi1SJI68PI8SSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4voO6ojYERFfj4jDgyxIkrTaej5RPwCcGFQhkqTO+grqiLgBuAf42GDLkSRdLDJz7UERDwO/C1wN/EZm3tthzCwwCzAxMbF3bm5uQwUdO3NuQ9tt1p7JXavml5aWGB8fH0kt24H96c7e9GZ/OpuZmVnMzFandWNrbRwR9wJnM3MxIqa7jcvMQ8AhgFarldPTXYf29P4DRza03Wadft/0qvn5+Xk2ug+XA/vTnb3pzf6sXz+nPu4A3h0Rp4E5YF9E/PlAq5IknbdmUGfmwcy8ITOngP3AlzPzpwZemSQJ8DpqSSpvzXPUK2XmPDA/kEokSR35iVqSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJam4NYM6Iq6KiK9GxDci4pmI+OAwCpMktY31MeY/gX2ZuRQRVwBfiYgvZubfD7g2SRJ9BHVmJrDUzF7RPHKQRUmSLujrHHVE7IiIp4CzwOOZeXSgVUmSzov2B+Y+B0dcAzwKfCAzj1+0bhaYBZiYmNg7Nze3oYKOnTm3oe02a8/krlXzS0tLjI+PD+W1q+zzegyzP9uNvenN/nQ2MzOzmJmtTuvWFdQAEfHbwH9k5oe7jWm1WrmwsLC+KhtTB45saLvNOv3gPavm5+fnmZ6eHsprV9nn9Rhmf7Ybe9Ob/eksIroGdT9Xfbyp+SRNRLwReAfw3JZWKEnqqp+rPnYDn4yIHbSD/XOZeXiwZUmSlvVz1cfTwC1DqEWS1IF3JkpScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScWsGdUTcGBFPRMSzEfFMRDwwjMIkSW1jfYx5Hfj1zPxaRFwNLEbE45n57IBrkyTRxyfqzHwlM7/WTH8POAFMDrowSVJbZGb/gyOmgCeBt2Xmdy9aNwvMAkxMTOydm5vbUEHHzpzb0HabtWdy16r5paUlxsfHh/LaVfZ5PYbZn+3G3vRmfzqbmZlZzMxWp3V9B3VEjAN/C/xOZj7Sa2yr1cqFhYV1FwowdeDIhrbbrNMP3rNqfn5+nunp6aG8dpV9Xo9h9me7sTe92Z/OIqJrUPd11UdEXAF8HvjUWiEtSdpa/Vz1EcDHgROZ+ZHBlyRJWqmfT9R3AD8N7IuIp5rH3QOuS5LUWPPyvMz8ChBDqEWS1IF3JkpScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBW3ZlBHxCci4mxEHB9GQZKk1fr5RP2nwF0DrkOS1MWaQZ2ZTwL/OoRaJEkdRGauPShiCjicmW/rMWYWmAWYmJjYOzc3t6GCjp05t6HtNmvP5K5V80tLS4yPjw/ltavs83oMsz/bjb3pbRj92Y7vqZmZmcXMbHVat2VBvVKr1cqFhYV1Fbls6sCRDW23WacfvGfV/Pz8PNPT00N57Sr7vB7D7M92Y296G0Z/tuN7KiK6BrVXfUhScQa1JBXXz+V5nwH+DnhrRLwcET8/+LIkScvG1hqQmfcNoxBJUmee+pCk4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4voK6oi4KyJORsSpiDgw6KIkSResGdQRsQP4I+BdwM3AfRFx86ALkyS19fOJ+u3Aqcx8ITP/C5gD3jPYsiRJy8b6GDMJvLRi/mXgxy4eFBGzwGwzuxQRJzdf3vDEh/7fouuBV4dfyfB02Of1uOT7swn2prdLtj+bfE/9cLcV/QR1XzLzEHBoq55v1CJiITNbo66jKvvTnb3pzf6sXz+nPs4AN66Yv6FZJkkagn6C+h+At0TETRHxBmA/8IXBliVJWrbmqY/MfD0ifhl4DNgBfCIznxl4ZaN3yZzGGRD705296c3+rFNk5qhrkCT14J2JklScQS1JxV02QR0RN0bEExHxbEQ8ExEPNMuvi4jHI+L55ue1zfKIiD9sbpt/OiJuXfFc9zfjn4+I+0e1T1stInZExNcj4nAzf1NEHG168Nnmy2Qi4spm/lSzfmrFcxxslp+MiHeOaFcGIiKuiYiHI+K5iDgREbd7/FwQEb/avLeOR8RnIuIqj6EtkpmXxQPYDdzaTF8N/CPtW+J/DzjQLD8AfKiZvhv4IhDAbcDRZvl1wAvNz2ub6WtHvX9b1KNfAz4NHG7mPwfsb6Y/CvxiM/1LwEeb6f3AZ5vpm4FvAFcCNwHfBHaMer+2sD+fBH6hmX4DcI3Hz/neTAIvAm9ccey832Noi/o76gJGtuPwV8A7gJPA7mbZbuBkM/0QcN+K8Seb9fcBD61Yvmrcdn3Qvj7+S8A+4HATMK8CY83624HHmunHgNub6bFmXAAHgYMrnvP8uO3+AHY1QRQXLff4yfNB/VLzC2isOYbe6TG0NY/L5tTHSs2fWbcAR4GJzHylWfVtYKKZ7nTr/GSP5dvd7wO/CfxvM/8DwL9n5uvN/Mr9PN+DZv25Zvyl2htof7r7DvAnzemhj0XETjx+AMjMM8CHgW8Br9A+JhbxGNoSl11QR8Q48HngVzLzuyvXZftX+GV3vWJE3AuczczFUddS2BhwK/DHmXkL8BrtUx3nXa7HD0Bzbv49tH+h/RCwE7hrpEVdQi6roI6IK2iH9Kcy85Fm8T9HxO5m/W7gbLO8263zl+It9XcA746I07T/dcR9wB8A10TE8k1RK/fzfA+a9buAf+HS7M2yl4GXM/NoM/8w7eD2+Gn7SeDFzPxOZv438Ajt48pjaAtcNkEdEQF8HDiRmR9ZseoLwPI37/fTPne9vPxnmm/vbwPONX/iPgbcGRHXNp8i7myWbVuZeTAzb8jMKdpf7Hw5M98HPAG8txl2cW+We/beZnw2y/c33+jfBLwF+OqQdmOgMvPbwEsR8dZm0U8Az+Lxs+xbwG0R8f3Ne225Px5DW2HUJ8mH9QB+nPafpU8DTzWPu2mfF/sS8DzwN8B1zfig/R8mfBM4BrRWPNfPAaeax8+Oet+2uE/TXLjq48203ySngL8ArmyWX9XMn2rWv3nF9r/V9Owk8K5R788W9+ZHgYXmGPpL2ldtePxc2K8PAs8Bx4E/o33lhsfQFjy8hVySirtsTn1I0nZlUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBX3fwsmzuohajGeAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "repeated_names[repeated_names.issn_count > 1000].issn_count.hist(bins=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
3511Bulletin.2797
7652Newsletter.2773
8338Rapport.1062
23716Proceedings.1565
45931Annual report /1382
45999Annual report.9520
46056Annuaire.1263
47310Rapport annuel.2811
72341Annual report1074
\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "3511 Bulletin. 2797\n", - "7652 Newsletter. 2773\n", - "8338 Rapport. 1062\n", - "23716 Proceedings. 1565\n", - "45931 Annual report / 1382\n", - "45999 Annual report. 9520\n", - "46056 Annuaire. 1263\n", - "47310 Rapport annuel. 2811\n", - "72341 Annual report 1074" - ] - }, - "execution_count": 84, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names[repeated_names.issn_count > 1000]" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
103Bulletin d'information.696
3230Bulletin de liaison.512
3511Bulletin.2797
7652Newsletter.2773
8338Rapport.1062
23716Proceedings.1565
45886Report.764
45931Annual report /1382
45999Annual report.9520
46056Annuaire.1263
46462Jaarverslag.678
47235Rapport d'activité.690
47310Rapport annuel.2811
49388Jahresbericht.528
72341Annual report1074
121778Alumni directory /511
129027Bulletin municipal.521
150771˜La œLettre.630
169246Local climatological data.613
269569Estimates.680
\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "103 Bulletin d'information. 696\n", - "3230 Bulletin de liaison. 512\n", - "3511 Bulletin. 2797\n", - "7652 Newsletter. 2773\n", - "8338 Rapport. 1062\n", - "23716 Proceedings. 1565\n", - "45886 Report. 764\n", - "45931 Annual report / 1382\n", - "45999 Annual report. 9520\n", - "46056 Annuaire. 1263\n", - "46462 Jaarverslag. 678\n", - "47235 Rapport d'activité. 690\n", - "47310 Rapport annuel. 2811\n", - "49388 Jahresbericht. 528\n", - "72341 Annual report 1074\n", - "121778 Alumni directory / 511\n", - "129027 Bulletin municipal. 521\n", - "150771 ˜La œLettre. 630\n", - "169246 Local climatological data. 613\n", - "269569 Estimates. 680" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names[repeated_names.issn_count > 500]" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
103Bulletin d'information.696
2676Newsletter /290
3230Bulletin de liaison.512
3511Bulletin.2797
3941Boletín.227
.........
534658Relatório e contas.248
606501Bildung und Beruf regional.292
1013518Vies de famille.222
1117647Country risk service.271
1236478Performance report for the period ending March...217
\n", - "

72 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "103 Bulletin d'information. 696\n", - "2676 Newsletter / 290\n", - "3230 Bulletin de liaison. 512\n", - "3511 Bulletin. 2797\n", - "3941 Boletín. 227\n", - "... ... ...\n", - "534658 Relatório e contas. 248\n", - "606501 Bildung und Beruf regional. 292\n", - "1013518 Vies de famille. 222\n", - "1117647 Country risk service. 271\n", - "1236478 Performance report for the period ending March... 217\n", - "\n", - "[72 rows x 2 columns]" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names[repeated_names.issn_count > 200]" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
103Bulletin d'information.696
2676Newsletter /290
3230Bulletin de liaison.512
3511Bulletin.2797
3941Boletín.227
.........
1315194Country commerce.120
1327255Bible studies for life.159
1805527LexisNexis practice guide.110
2637306Operational risk report.119
2659477Interempresas net.115
\n", - "

204 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "103 Bulletin d'information. 696\n", - "2676 Newsletter / 290\n", - "3230 Bulletin de liaison. 512\n", - "3511 Bulletin. 2797\n", - "3941 Boletín. 227\n", - "... ... ...\n", - "1315194 Country commerce. 120\n", - "1327255 Bible studies for life. 159\n", - "1805527 LexisNexis practice guide. 110\n", - "2637306 Operational risk report. 119\n", - "2659477 Interempresas net. 115\n", - "\n", - "[204 rows x 2 columns]" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names[repeated_names.issn_count > 100]" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
2˜The œpublishers weekly.2
3Publishers weekly2
6Activitas Nervosa Superior.2
12Library journal.2
24Acta cardiologica.2
.........
2938851AAPS introductions in the pharmaceutical scien...2
2938852AAPS introductions in the pharmaceutical scien...2
2938856Verzeichniss der Werke lebender Künstler auf d...2
2938857IEEE Advanced Information Management, Communic...2
2938858Products finishing México (Print)2
\n", - "

586466 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "2 ˜The œpublishers weekly. 2\n", - "3 Publishers weekly 2\n", - "6 Activitas Nervosa Superior. 2\n", - "12 Library journal. 2\n", - "24 Acta cardiologica. 2\n", - "... ... ...\n", - "2938851 AAPS introductions in the pharmaceutical scien... 2\n", - "2938852 AAPS introductions in the pharmaceutical scien... 2\n", - "2938856 Verzeichniss der Werke lebender Künstler auf d... 2\n", - "2938857 IEEE Advanced Information Management, Communic... 2\n", - "2938858 Products finishing México (Print) 2\n", - "\n", - "[586466 rows x 2 columns]" - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If a name matches a repeated name exactly or fuzzy matches to a repeated name and there is not other information available, the match status must be ambigious." - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['2735-9298', '2735-928X']" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapping[\"Nigerian Journal of Wildlife Management\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "These are two ISSN refering to the same journal." - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [], - "source": [ - "import requests" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'@graph': [{'@id': 'http://id.loc.gov/vocabulary/countries/nr',\n", - " 'label': 'Nigeria'},\n", - " {'@id': 'organization/ISSNCenter#_e',\n", - " '@type': 'http://schema.org/Organization'},\n", - " {'@id': 'resource/ISSN-L/2735-9298',\n", - " 'identifiedBy': 'resource/ISSN/2735-928X#ISSN-L'},\n", - " {'@id': 'resource/ISSN/2735-928X',\n", - " '@type': ['http://id.loc.gov/ontologies/bibframe/Work',\n", - " 'http://id.loc.gov/ontologies/bibframe/Instance',\n", - " 'http://schema.org/Periodical'],\n", - " 'identifiedBy': ['resource/ISSN/2735-928X#ISSN-L',\n", - " 'resource/ISSN/2735-928X#ISSN',\n", - " 'resource/ISSN/2735-928X#KeyTitle'],\n", - " 'mainTitle': 'Nigerian Journal of Wildlife Management',\n", - " 'otherPhysicalFormat': 'resource/ISSN/2735-9298',\n", - " 'title': 'resource/ISSN/2735-928X#KeyTitle',\n", - " 'format': 'vocabularies/medium#Online',\n", - " 'identifier': '2735-928X',\n", - " 'isFormatOf': 'resource/ISSN/2735-9298',\n", - " 'type': 'http://marc21rdf.info/terms/formofmaterial#a',\n", - " 'http://purl.org/ontology/bibo/issn': '2735-928X',\n", - " 'isPartOf': 'resource/ISSN-L/2735-9298',\n", - " 'issn': '2735-928X',\n", - " 'name': ['Nigerian JOurnal of Wildlife Management (Ondo. Online)',\n", - " 'Nigerian Journal of Wildlife Management'],\n", - " 'publication': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n", - " 'url': 'http://www.wildlifesociety.ng.org/'},\n", - " {'@id': 'resource/ISSN/2735-928X#ISSN',\n", - " '@type': 'http://id.loc.gov/ontologies/bibframe/Issn',\n", - " 'status': 'vocabularies/IdentifierStatus#Valid',\n", - " 'value': '2735-928X'},\n", - " {'@id': 'resource/ISSN/2735-928X#ISSN-L',\n", - " '@type': 'http://id.loc.gov/ontologies/bibframe/IssnL',\n", - " 'status': 'vocabularies/IdentifierStatus#Valid',\n", - " 'value': '2735-9298'},\n", - " {'@id': 'resource/ISSN/2735-928X#KeyTitle',\n", - " '@type': ['http://id.loc.gov/ontologies/bibframe/Identifier',\n", - " 'http://id.loc.gov/ontologies/bibframe/KeyTitle'],\n", - " 'value': 'Nigerian JOurnal of Wildlife Management (Ondo. Online)'},\n", - " {'@id': 'resource/ISSN/2735-928X#Record',\n", - " '@type': 'http://schema.org/CreativeWork',\n", - " 'status': 'vocabularies/RecordStatus#Register',\n", - " 'modified': '20200808163600.0',\n", - " 'mainEntity': 'resource/ISSN/2735-928X',\n", - " 'wasAttributedTo': 'organization/ISSNCenter#_e'},\n", - " {'@id': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n", - " '@type': 'http://schema.org/PublicationEvent',\n", - " 'location': 'http://id.loc.gov/vocabulary/countries/nr'}],\n", - " '@context': {'status': {'@id': 'http://id.loc.gov/ontologies/bibframe/status',\n", - " '@type': '@id'},\n", - " 'value': {'@id': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#value'},\n", - " 'publication': {'@id': 'http://schema.org/publication', '@type': '@id'},\n", - " 'mainTitle': {'@id': 'http://id.loc.gov/ontologies/bibframe/mainTitle'},\n", - " 'title': {'@id': 'http://id.loc.gov/ontologies/bibframe/title',\n", - " '@type': '@id'},\n", - " 'name': {'@id': 'http://schema.org/name'},\n", - " 'issn': {'@id': 'http://schema.org/issn'},\n", - " 'format': {'@id': 'http://purl.org/dc/elements/1.1/format', '@type': '@id'},\n", - " 'url': {'@id': 'http://schema.org/url'},\n", - " 'identifiedBy': {'@id': 'http://id.loc.gov/ontologies/bibframe/identifiedBy',\n", - " '@type': '@id'},\n", - " 'otherPhysicalFormat': {'@id': 'http://id.loc.gov/ontologies/bibframe/otherPhysicalFormat',\n", - " '@type': '@id'},\n", - " 'isPartOf': {'@id': 'http://schema.org/isPartOf', '@type': '@id'},\n", - " 'type': {'@id': 'http://purl.org/dc/terms/type', '@type': '@id'},\n", - " 'identifier': {'@id': 'http://purl.org/dc/elements/1.1/identifier'},\n", - " 'isFormatOf': {'@id': 'http://purl.org/dc/terms/isFormatOf', '@type': '@id'},\n", - " 'wasAttributedTo': {'@id': 'http://www.w3.org/ns/prov#wasAttributedTo',\n", - " '@type': '@id'},\n", - " 'mainEntity': {'@id': 'http://schema.org/mainEntity', '@type': '@id'},\n", - " 'modified': {'@id': 'http://purl.org/dc/terms/modified',\n", - " '@type': 'http://www.w3.org/2001/XMLSchema#dateTime'},\n", - " 'location': {'@id': 'http://schema.org/location', '@type': '@id'},\n", - " 'label': {'@id': 'http://www.w3.org/2000/01/rdf-schema#label'}}}" - ] - }, - "execution_count": 91, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "requests.get(\"https://portal.issn.org/resource/ISSN/2735-928X?format=json\").json()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Interestingly, most (80%) journal do not seem to have the distinction between electronic and print. But it may be that names are not used consistently." - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.8004443220991548" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(unique_name) / len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
1848393Aux petits bonheurs des enfants1
1147789Statistik om boliger ... i Aalborg Kommune.1
2789832Mur (Regensburg)1
35201Boletín de la Sociedad Vasco-Navarra de pediat...1
1955109Cucina piatti unici.1
1039ABD1
2595157Tribuna quinzenal de Mataró1
416131Japan high tech review1
2885449Revista cósmica calavera.1
343232First days.1
\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "1848393 Aux petits bonheurs des enfants 1\n", - "1147789 Statistik om boliger ... i Aalborg Kommune. 1\n", - "2789832 Mur (Regensburg) 1\n", - "35201 Boletín de la Sociedad Vasco-Navarra de pediat... 1\n", - "1955109 Cucina piatti unici. 1\n", - "1039 ABD 1\n", - "2595157 Tribuna quinzenal de Mataró 1\n", - "416131 Japan high tech review 1\n", - "2885449 Revista cósmica calavera. 1\n", - "343232 First days. 1" - ] - }, - "execution_count": 96, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "unique_name.sample(n=10)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Example \"Fieldiana\"\n", - "\n", - "* \"Fieldiana. Anthropology\"\n", - "* https://www.jstor.org/journal/fieldianaanthro" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['2162-4321', '0071-4739']" - ] - }, - "execution_count": 97, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapping[\"Fieldiana. Anthropology\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "However, jstor reports two ISSN: INSSP: 0071-4739, EISSN: 2162-4321 - but ISSN.org does not know about it?" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [], - "source": [ - "unique_issn = set([item for v in mapping.values() for item in v])" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 99, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"2162-4321\" in unique_issn" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fieldiana. ['2162-4267', '2162-4321', '2162-4291', '0096-2651', '0015-0746', '0097-3572', '2163-7105', '0071-4739', '0015-0754', '2158-5520', '0096-0438', '2162-4348']\n", - "Fieldiana. Anthropology ['2162-4321', '0071-4739']\n", - "Fieldiana. Anthropology (Online) ['2162-4321', '0071-4739']\n" - ] - } - ], - "source": [ - "for k, v in mapping.items():\n", - " if \"2162-4321\" in v:\n", - " print(k, v)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As an example: when given a string like \"Fieldiana\" we would return ambiguous. But \"Fieldiana. (Online)\" might be matched to '2162-4321'. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Other Examples" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
571163Afhandling.2
1800364Advances in e-business research series (Online)2
2476171Journal of Automatic Control (Online)2
2756754Gerencia de riesgos y seguros.2
2799421˜The œskinny (Scotland ed. Online)2
1254438Accessible news (Print)2
2177530Biomathematical and biomechanical modeling of ...2
2093431˜Le œJura socialiste (Saint-Claude)2
2752857European Journal of Formal Sciences and Engine...2
389738Austral journal of veterinary sciences (Online)2
\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "571163 Afhandling. 2\n", - "1800364 Advances in e-business research series (Online) 2\n", - "2476171 Journal of Automatic Control (Online) 2\n", - "2756754 Gerencia de riesgos y seguros. 2\n", - "2799421 ˜The œskinny (Scotland ed. Online) 2\n", - "1254438 Accessible news (Print) 2\n", - "2177530 Biomathematical and biomechanical modeling of ... 2\n", - "2093431 ˜Le œJura socialiste (Saint-Claude) 2\n", - "2752857 European Journal of Formal Sciences and Engine... 2\n", - "389738 Austral journal of veterinary sciences (Online) 2" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "repeated_names.sample(n=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['1608-3318', '1070-3284']" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapping[\"Russian journal of coordination chemistry.\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A new question: How many journals are listed under different names, yet still refer to the same journal?" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['0733-2289']" - ] - }, - "execution_count": 103, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapping[\"San Bernardino County popular street atlas\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Journal of ..." - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nameissn_count
983Journal of vocational behavior.2
984Journal of vocational behavior (Print)2
1213Journal of American Concrete Institute.1
1214Journal of the American Concrete Institute1
1317Journal of the American Dental Hygienists' Ass...1
.........
2938472Journal of surgical procedures and case reports1
2938580Journal of Epidermiological Society of Nigeria...2
2938581Journal of Epidermiological Society of Nigeria2
2938795Journal of Practicing Teachers1
2938796Journal of Practicing Teachers (Uyo. Online)1
\n", - "

40257 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " name issn_count\n", - "983 Journal of vocational behavior. 2\n", - "984 Journal of vocational behavior (Print) 2\n", - "1213 Journal of American Concrete Institute. 1\n", - "1214 Journal of the American Concrete Institute 1\n", - "1317 Journal of the American Dental Hygienists' Ass... 1\n", - "... ... ...\n", - "2938472 Journal of surgical procedures and case reports 1\n", - "2938580 Journal of Epidermiological Society of Nigeria... 2\n", - "2938581 Journal of Epidermiological Society of Nigeria 2\n", - "2938795 Journal of Practicing Teachers 1\n", - "2938796 Journal of Practicing Teachers (Uyo. Online) 1\n", - "\n", - "[40257 rows x 2 columns]" - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.name.str.startswith(\"Journal of\")]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.8" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/Journal_Names.pdf b/notebooks/Journal_Names.pdf deleted file mode 100644 index 04f392b..0000000 Binary files a/notebooks/Journal_Names.pdf and /dev/null differ diff --git a/notebooks/Journal_Names.zip b/notebooks/Journal_Names.zip deleted file mode 100644 index 00e96a3..0000000 Binary files a/notebooks/Journal_Names.zip and /dev/null differ -- cgit v1.2.3