aboutsummaryrefslogtreecommitdiffstats
path: root/notebooks/Journal_Names.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'notebooks/Journal_Names.ipynb')
-rw-r--r--notebooks/Journal_Names.ipynb2032
1 files changed, 0 insertions, 2032 deletions
diff --git a/notebooks/Journal_Names.ipynb b/notebooks/Journal_Names.ipynb
deleted file mode 100644
index 91da2a2..0000000
--- a/notebooks/Journal_Names.ipynb
+++ /dev/null
@@ -1,2032 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Journal Names\n",
- "\n",
- "Questions in the context of fuzzy matching.\n",
- "\n",
- "* How many journal names appear more than once?\n",
- "* What is the average length of the duplicated names vs the unique names?\n",
- "\n",
- "\n",
- "Input file is a single larger JSON, mapping names to issns.\n",
- "```json\n",
- "{\n",
- " \"Acta Orientalia.\": [\n",
- " \"0001-6438\"\n",
- " ],\n",
- " \"Acta Orientalia (København)\": [\n",
- " \"0001-6438\"\n",
- " ],\n",
- "..\n",
- "```"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 62,
- "metadata": {},
- "outputs": [],
- "source": [
- "import json\n",
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 63,
- "metadata": {},
- "outputs": [],
- "source": [
- "with open(\"../data/name_to_issn.json\") as f:\n",
- " mapping = json.load(f)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We have about 3M keys."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 64,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "2938859"
- ]
- },
- "execution_count": 64,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(mapping)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 65,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = pd.DataFrame(((k, len(v)) for k, v in mapping.items()), columns=[\"name\", \"issn_count\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 66,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "2938859"
- ]
- },
- "execution_count": 66,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(df)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 67,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>Acta Orientalia.</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>Acta Orientalia (København)</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>˜The œpublishers weekly.</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>Publishers weekly</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>ASMT news</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "0 Acta Orientalia. 1\n",
- "1 Acta Orientalia (København) 1\n",
- "2 ˜The œpublishers weekly. 2\n",
- "3 Publishers weekly 2\n",
- "4 ASMT news 1"
- ]
- },
- "execution_count": 67,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 68,
- "metadata": {},
- "outputs": [],
- "source": [
- "unique_name = df[df.issn_count == 1]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 69,
- "metadata": {},
- "outputs": [],
- "source": [
- "repeated_names = df[df.issn_count > 1]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 70,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "586466"
- ]
- },
- "execution_count": 70,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(repeated_names)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 71,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.1995556779008452"
- ]
- },
- "execution_count": 71,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(repeated_names) / len(df)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "About 19% (or 586466) names are repeated. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 72,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>count</th>\n",
- " <td>586466.000000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>mean</th>\n",
- " <td>2.443930</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>std</th>\n",
- " <td>15.260303</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>min</th>\n",
- " <td>2.000000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>25%</th>\n",
- " <td>2.000000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>50%</th>\n",
- " <td>2.000000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>75%</th>\n",
- " <td>2.000000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>max</th>\n",
- " <td>9520.000000</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " issn_count\n",
- "count 586466.000000\n",
- "mean 2.443930\n",
- "std 15.260303\n",
- "min 2.000000\n",
- "25% 2.000000\n",
- "50% 2.000000\n",
- "75% 2.000000\n",
- "max 9520.000000"
- ]
- },
- "execution_count": 72,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names.describe()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Which name is shared most?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 73,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "name Annual report.\n",
- "issn_count 9520\n",
- "Name: 45999, dtype: object"
- ]
- },
- "execution_count": 73,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names.iloc[repeated_names.issn_count.argmax()] # Annual report."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "It is the \"Annual report.\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 74,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['1495-9097',\n",
- " '2205-7919',\n",
- " '1447-3836',\n",
- " '1914-9220',\n",
- " '0707-1515',\n",
- " '1494-6149',\n",
- " '2293-3174',\n",
- " '1977-9046',\n",
- " '0707-4298',\n",
- " '2239-9674']"
- ]
- },
- "execution_count": 74,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mapping[\"Annual report.\"][:10]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "On average a repeated name will point to 3 ISSN. Over 20k names point to more than 3 ISSN."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 76,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "27230"
- ]
- },
- "execution_count": 76,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(repeated_names[repeated_names.issn_count > 3])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 77,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>1356987</th>\n",
- " <td>Karrier online</td>\n",
- " <td>6</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1778072</th>\n",
- " <td>Curriculum express for parents.</td>\n",
- " <td>4</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>366690</th>\n",
- " <td>Annual Report ....</td>\n",
- " <td>7</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>878789</th>\n",
- " <td>Brilliant.</td>\n",
- " <td>4</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>37507</th>\n",
- " <td>Synthesis.</td>\n",
- " <td>35</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>942931</th>\n",
- " <td>Special Burda</td>\n",
- " <td>11</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>640073</th>\n",
- " <td>Belle.</td>\n",
- " <td>6</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2666134</th>\n",
- " <td>Naše delo</td>\n",
- " <td>6</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>376646</th>\n",
- " <td>Opinion.</td>\n",
- " <td>12</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>918869</th>\n",
- " <td>Limited edition magazine.</td>\n",
- " <td>16</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "1356987 Karrier online 6\n",
- "1778072 Curriculum express for parents. 4\n",
- "366690 Annual Report .... 7\n",
- "878789 Brilliant. 4\n",
- "37507 Synthesis. 35\n",
- "942931 Special Burda 11\n",
- "640073 Belle. 6\n",
- "2666134 Naše delo 6\n",
- "376646 Opinion. 12\n",
- "918869 Limited edition magazine. 16"
- ]
- },
- "execution_count": 77,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names[repeated_names.issn_count > 3].sample(n=10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 78,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['1158-9574',\n",
- " '2183-0134',\n",
- " '1517-8889',\n",
- " '1480-4670',\n",
- " '0379-8402',\n",
- " '1724-6598',\n",
- " '2035-8326',\n",
- " '2249-5053',\n",
- " '0872-4784',\n",
- " '2610-8933',\n",
- " '2610-8925',\n",
- " '2654-9263',\n",
- " '2420-9198',\n",
- " '2538-693X',\n",
- " '1487-5349',\n",
- " '1285-9133',\n",
- " '2655-5662',\n",
- " '2295-9084']"
- ]
- },
- "execution_count": 78,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mapping[\"Philosophica.\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 79,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<AxesSubplot:>"
- ]
- },
- "execution_count": 79,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAASEklEQVR4nO3df6zddX3H8edrrWDFHxTZbljbrDU2S6pkgDfYRbPcyVIKLismxECIVGTWTEh0I5lF/8CJJLIM3SCKq9JZFhQY4tq4uq5jnBj/AAFllB9ir1BHmwJK+WExkdW998f5VI/lXnp7envv7T3PR3Jyvt/39/P98T7fe/u653u+9zZVhSRpsP3WdB+AJGn6GQaSJMNAkmQYSJIwDCRJwNzpPoB+nXjiibV48eK+1n3xxRc57rjjJveAjiL2b//2P7j933fffT+tqt8+sH7UhsHixYu59957+1q30+kwMjIyuQd0FLF/+7f/kek+jGmT5Mdj1b1MJEkyDCRJhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkjuLfQD4c23Y9z/vX/ltf6+74zLsn+Wgkafr5zkCSZBhIkgwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkiQmEQZJFSe5M8nCSh5J8pNU/mWRXkvvb4+yedS5PMprk0SRn9tRXttpokrU99SVJ7m71W5IcM9mNSpLGN5F3BvuAy6pqGbAcuCTJsrbsc1V1SntsBmjLzgPeAqwEvpBkTpI5wOeBs4BlwPk927m6bevNwLPAxZPUnyRpAg4aBlW1u6q+16Z/BjwCLHiFVVYBN1fVL6rqcWAUOL09Rqvqsap6CbgZWJUkwLuA29r6G4Bz+uxHktSHQ/rMIMli4FTg7la6NMkDSdYnmd9qC4Anelbb2Wrj1d8IPFdV+w6oS5KmyIT/c5skrwW+Dny0ql5Icj1wJVDt+RrgA0fkKH99DGuANQBDQ0N0Op2+tjM0Dy47ed/BB46h333OJHv37p0VffTL/u1/kPsfz4TCIMmr6AbBTVV1O0BVPdWz/EvAN9vsLmBRz+oLW41x6s8AxyeZ294d9I7/DVW1DlgHMDw8XCMjIxM5/Je57qaNXLOtv//kbccF/e1zJul0OvT72s0G9m//g9z/eCZyN1GAG4BHquqzPfWTeoa9B3iwTW8CzktybJIlwFLgu8A9wNJ259AxdD9k3lRVBdwJnNvWXw1sPLy2JEmHYiI/Hr8DeB+wLcn9rfZxuncDnUL3MtEO4EMAVfVQkluBh+neiXRJVf0SIMmlwBZgDrC+qh5q2/sYcHOSTwPfpxs+kqQpctAwqKrvABlj0eZXWOcq4Kox6pvHWq+qHqN7t5EkaRr4G8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJCYQBkkWJbkzycNJHkrykVY/IcnWJNvb8/xWT5Jrk4wmeSDJaT3bWt3Gb0+yuqf+tiTb2jrXJsmRaFaSNLaJvDPYB1xWVcuA5cAlSZYBa4E7qmopcEebBzgLWNoea4DroRsewBXA24HTgSv2B0gb88Ge9VYefmuSpIk6aBhU1e6q+l6b/hnwCLAAWAVsaMM2AOe06VXAjdV1F3B8kpOAM4GtVbWnqp4FtgIr27LXV9VdVVXAjT3bkiRNgUP6zCDJYuBU4G5gqKp2t0VPAkNtegHwRM9qO1vtleo7x6hLkqbI3IkOTPJa4OvAR6vqhd7L+lVVSeoIHN+Bx7CG7qUnhoaG6HQ6fW1naB5cdvK+vtbtd58zyd69e2dFH/2yf/sf5P7HM6EwSPIqukFwU1Xd3spPJTmpqna3Sz1Pt/ouYFHP6gtbbRcwckC90+oLxxj/MlW1DlgHMDw8XCMjI2MNO6jrbtrINdsmnIO/YccF/e1zJul0OvT72s0G9m//g9z/eCZyN1GAG4BHquqzPYs2AfvvCFoNbOypX9juKloOPN8uJ20BViSZ3z44XgFsacteSLK87evCnm1JkqbARH48fgfwPmBbkvtb7ePAZ4Bbk1wM/Bh4b1u2GTgbGAV+DlwEUFV7klwJ3NPGfaqq9rTpDwNfAeYB32oPSdIUOWgYVNV3gPHu+z9jjPEFXDLOttYD68eo3wu89WDHIkk6MvwNZEmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kSEwiDJOuTPJ3kwZ7aJ5PsSnJ/e5zds+zyJKNJHk1yZk99ZauNJlnbU1+S5O5WvyXJMZPZoCTp4CbyzuArwMox6p+rqlPaYzNAkmXAecBb2jpfSDInyRzg88BZwDLg/DYW4Oq2rTcDzwIXH05DkqRDd9AwqKpvA3smuL1VwM1V9YuqehwYBU5vj9GqeqyqXgJuBlYlCfAu4La2/gbgnENrQZJ0uA7nM4NLkzzQLiPNb7UFwBM9Y3a22nj1NwLPVdW+A+qSpCk0t8/1rgeuBKo9XwN8YLIOajxJ1gBrAIaGhuh0On1tZ2geXHbyvoMPHEO/+5xJ9u7dOyv66Jf92/8g9z+evsKgqp7aP53kS8A32+wuYFHP0IWtxjj1Z4Djk8xt7w56x4+133XAOoDh4eEaGRnp5/C57qaNXLOtvxzccUF/+5xJOp0O/b52s4H92/8g9z+evi4TJTmpZ/Y9wP47jTYB5yU5NskSYCnwXeAeYGm7c+gYuh8yb6qqAu4Ezm3rrwY29nNMkqT+HfTH4yRfA0aAE5PsBK4ARpKcQvcy0Q7gQwBV9VCSW4GHgX3AJVX1y7adS4EtwBxgfVU91HbxMeDmJJ8Gvg/cMFnNSZIm5qBhUFXnj1Ee9x/sqroKuGqM+mZg8xj1x+jebSRJmib+BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAksQEwiDJ+iRPJ3mwp3ZCkq1Jtrfn+a2eJNcmGU3yQJLTetZZ3cZvT7K6p/62JNvaOtcmyWQ3KUl6ZRN5Z/AVYOUBtbXAHVW1FLijzQOcBSxtjzXA9dAND+AK4O3A6cAV+wOkjflgz3oH7kuSdIQdNAyq6tvAngPKq4ANbXoDcE5P/cbqugs4PslJwJnA1qraU1XPAluBlW3Z66vqrqoq4MaebUmSpsjcPtcbqqrdbfpJYKhNLwCe6Bm3s9Veqb5zjPqYkqyh+46DoaEhOp1Ofwc/Dy47eV9f6/a7z5lk7969s6KPftm//Q9y/+PpNwx+paoqSU3GwUxgX+uAdQDDw8M1MjLS13auu2kj12zrr/UdF/S3z5mk0+nQ72s3G9i//Q9y/+Pp926ip9olHtrz062+C1jUM25hq71SfeEYdUnSFOo3DDYB++8IWg1s7Klf2O4qWg483y4nbQFWJJnfPjheAWxpy15IsrzdRXRhz7YkSVPkoNdKknwNGAFOTLKT7l1BnwFuTXIx8GPgvW34ZuBsYBT4OXARQFXtSXIlcE8b96mq2v+h9Ifp3rE0D/hWe0iSptBBw6Cqzh9n0RljjC3gknG2sx5YP0b9XuCtBzsOSdKR428gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJI4zDBIsiPJtiT3J7m31U5IsjXJ9vY8v9WT5Noko0keSHJaz3ZWt/Hbk6w+vJYkSYdqMt4Z/HFVnVJVw21+LXBHVS0F7mjzAGcBS9tjDXA9dMMDuAJ4O3A6cMX+AJEkTY0jcZloFbChTW8Azump31hddwHHJzkJOBPYWlV7qupZYCuw8ggclyRpHHMPc/0C/iNJAf9YVeuAoara3ZY/CQy16QXAEz3r7my18eovk2QN3XcVDA0N0el0+jrooXlw2cn7+lq3333OJHv37p0VffTL/u1/kPsfz+GGwTuraleS3wG2JvlB78KqqhYUk6KFzTqA4eHhGhkZ6Ws71920kWu29df6jgv62+dM0ul06Pe1mw3s3/4Huf/xHNZloqra1Z6fBr5B95r/U+3yD+356TZ8F7CoZ/WFrTZeXZI0RfoOgyTHJXnd/mlgBfAgsAnYf0fQamBjm94EXNjuKloOPN8uJ20BViSZ3z44XtFqkqQpcjiXiYaAbyTZv52vVtW/J7kHuDXJxcCPgfe28ZuBs4FR4OfARQBVtSfJlcA9bdynqmrPYRyXJOkQ9R0GVfUY8Adj1J8BzhijXsAl42xrPbC+32ORJB0efwNZkmQYSJIMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAksQMCoMkK5M8mmQ0ydrpPh5JGiQzIgySzAE+D5wFLAPOT7Jseo9KkgbHjAgD4HRgtKoeq6qXgJuBVdN8TJI0MOZO9wE0C4AneuZ3Am8/cFCSNcCaNrs3yaN97u9E4Kf9rJir+9zjzNJ3/7OE/dv/IPf/e2MVZ0oYTEhVrQPWHe52ktxbVcOTcEhHJfu3f/sf3P7HM1MuE+0CFvXML2w1SdIUmClhcA+wNMmSJMcA5wGbpvmYJGlgzIjLRFW1L8mlwBZgDrC+qh46grs87EtNRzn7H2z2r5dJVU33MUiSptlMuUwkSZpGhoEkabDCYLb+yYski5LcmeThJA8l+Uirn5Bka5Lt7Xl+qyfJte11eCDJaT3bWt3Gb0+yerp66keSOUm+n+SbbX5Jkrtbn7e0mxNIcmybH23LF/ds4/JWfzTJmdPUyiFLcnyS25L8IMkjSf5wkM5/kr9sX/sPJvlaklcP0vmfFFU1EA+6H0z/CHgTcAzw38Cy6T6uSertJOC0Nv064Id0/6zH3wJrW30tcHWbPhv4FhBgOXB3q58APNae57fp+dPd3yG8Dn8FfBX4Zpu/FTivTX8R+Is2/WHgi236POCWNr2sfV0cCyxpXy9zpruvCfa+AfjzNn0McPygnH+6v7T6ODCv57y/f5DO/2Q8Bumdwaz9kxdVtbuqvtemfwY8QvcbZBXdfyRoz+e06VXAjdV1F3B8kpOAM4GtVbWnqp4FtgIrp66T/iVZCLwb+HKbD/Au4LY25MD+978utwFntPGrgJur6hdV9TgwSvfrZkZL8gbgj4AbAKrqpap6jgE6/3TvjJyXZC7wGmA3A3L+J8sghcFYf/JiwTQdyxHT3vKeCtwNDFXV7rboSWCoTY/3WhzNr9HfA38N/F+bfyPwXFXta/O9vfyqz7b8+Tb+aO1/CfAT4J/aZbIvJzmOATn/VbUL+Dvgf+iGwPPAfQzO+Z8UgxQGs16S1wJfBz5aVS/0Lqvu++BZeR9xkj8Fnq6q+6b7WKbJXOA04PqqOhV4ke5loV+Z5ed/Pt2f6pcAvwscx9HzjmbGGKQwmNV/8iLJq+gGwU1VdXsrP9Xe/tOen2718V6Lo/U1egfwZ0l20L389y7gH+he/tj/i5W9vfyqz7b8DcAzHL397wR2VtXdbf42uuEwKOf/T4DHq+onVfW/wO10vyYG5fxPikEKg1n7Jy/a9c4bgEeq6rM9izYB++8IWQ1s7Klf2O4qWQ483y4nbAFWJJnfftpa0WozWlVdXlULq2ox3fP6X1V1AXAncG4bdmD/+1+Xc9v4avXz2t0mS4ClwHenqI2+VdWTwBNJfr+VzgAeZkDOP93LQ8uTvKZ9L+zvfyDO/6SZ7k+wp/JB9y6KH9K9S+AT0308k9jXO+leAngAuL89zqZ7HfQOYDvwn8AJbXzo/mdCPwK2AcM92/oA3Q/ORoGLpru3Pl6LEX59N9Gb6H4zjwL/Ahzb6q9u86Nt+Zt61v9Ee10eBc6a7n4Ooe9TgHvb18C/0r0baGDOP/A3wA+AB4F/pntH0MCc/8l4+OcoJEkDdZlIkjQOw0CSZBhIkgwDSRKGgSQJw0CShGEgSQL+Hz8ESgUOWx2AAAAAAElFTkSuQmCC\n",
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "repeated_names[repeated_names.issn_count > 3].issn_count.hist(bins=20)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 80,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<AxesSubplot:>"
- ]
- },
- "execution_count": 80,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAW/UlEQVR4nO3df6zddZ3n8edr8ccaGBcQ56ZDcYuZ6gZkpsoNsNGdXGWFimaKG8NCWCnKWI2QaNLNbHUnwZUlYXZH3SXrMFuHhpJ1qGQQaQQXO11PdJKtUpTlpywFS2i3tJGieHXCbp33/nE+13tuve09Pef+svf5SE7O97y/n+/3fs6bXF79fs/33G+qCknS0vYPFnoCkqSFZxhIkgwDSZJhIEnCMJAkAa9Y6AkM6rTTTqsVK1Ys9DTmzM9//nNOPPHEhZ7GomAvprIfU9mPSf304sEHH/xxVb3+8PpvbBisWLGCnTt3LvQ05kyn02FsbGyhp7Eo2Iup7MdU9mNSP71I8ux0dU8TSZIMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJ3+BvIA9jxYZ7F+Tn7r7pvQvycyVpJh4ZSJIMA0mSYSBJwjCQJGEYSJLoIwySnJHkW0keT/JYkk+0+qlJtiV5qj2f0upJcnOSXUkeTvK2nn2tbeOfSrK2p35ukkfaNjcnyVy8WUnS9Po5MjgErK+qs4ALgGuTnAVsALZX1Upge3sN8B5gZXusA26BbngA1wPnA+cB108ESBvzkZ7tVg//1iRJ/ZoxDKpqX1V9vy3/DHgCOB1YA2xuwzYDl7blNcDt1bUDODnJMuBiYFtVHayqF4FtwOq27rVVtaOqCri9Z1+SpHlwTF86S7ICeCvwXWCkqva1Vc8DI235dOC5ns32tNrR6numqU/389fRPdpgZGSETqdzLNP/lfXnHBpou2Edy3zHx8cHfn/HG3sxlf2Yyn5MGqYXfYdBkpOAu4BPVtVLvaf1q6qS1EAzOAZVtRHYCDA6OlqD3vf06oX6BvKVY32P9b6uk+zFVPZjKvsxaZhe9HU1UZJX0g2CL1fVV1t5fzvFQ3s+0Op7gTN6Nl/eakerL5+mLkmaJ/1cTRTgVuCJqvp8z6qtwMQVQWuBe3rqV7Wrii4AftpOJ90PXJTklPbB8UXA/W3dS0kuaD/rqp59SZLmQT+nid4OfBB4JMlDrfZp4CbgziTXAM8Cl7V19wGXALuAXwAfAqiqg0luAB5o4z5bVQfb8seB24DXAN9oD0nSPJkxDKrqb4EjXfd/4TTjC7j2CPvaBGyapr4TeMtMc5EkzQ2/gSxJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSfR328tNSQ4kebSn9pUkD7XH7ok7oCVZkeTvetb9Rc825yZ5JMmuJDe3W1yS5NQk25I81Z5PmYP3KUk6in6ODG4DVvcWqupfVtWqqloF3AV8tWf10xPrqupjPfVbgI8AK9tjYp8bgO1VtRLY3l5LkubRjGFQVd8GDk63rv3r/jLgjqPtI8ky4LVVtaPdFvN24NK2eg2wuS1v7qlLkubJjPdAnsE/A/ZX1VM9tTOT/AB4CfiTqvoOcDqwp2fMnlYDGKmqfW35eWDkSD8syTpgHcDIyAidTmegSa8/59BA2w3rWOY7Pj4+8Ps73tiLqezHVPZj0jC9GDYMrmDqUcE+4A1V9UKSc4GvJTm7351VVSWpo6zfCGwEGB0drbGxsYEmffWGewfabli7rxzre2yn02HQ93e8sRdT2Y+p7MekYXoxcBgkeQXwL4BzJ2pV9TLwclt+MMnTwJuAvcDyns2XtxrA/iTLqmpfO510YNA5SZIGM8ylpf8c+GFV/er0T5LXJzmhLb+R7gfFz7TTQC8luaB9znAVcE/bbCuwti2v7alLkuZJP5eW3gH8T+DNSfYkuaatupxf/+D4D4CH26Wmfw18rKomPnz+OPCXwC7gaeAbrX4T8O4kT9ENmJsGfzuSpEHMeJqoqq44Qv3qaWp30b3UdLrxO4G3TFN/AbhwpnlIkuaO30CWJBkGkiTDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSfR3c5tNSQ4kebSn9pkke5M81B6X9Kz7VJJdSZ5McnFPfXWr7Uqyoad+ZpLvtvpXkrxqNt+gJGlm/RwZ3Aasnqb+hapa1R73ASQ5i+4d0M5u2/x5khParTC/CLwHOAu4oo0F+NO2r98FXgSuOfwHSZLm1oxhUFXfBg7ONK5ZA2ypqper6kd0b3F5Xnvsqqpnqur/AluANe1+yO+ie4tMgM3Apcf2FiRJw5rxtpdHcV2Sq4CdwPqqehE4HdjRM2ZPqwE8d1j9fOB1wE+q6tA0439NknXAOoCRkRE6nc5AE19/zqGZB82BY5nv+Pj4wO/veGMvprIfU9mPScP0YtAwuAW4Aaj2/DngwwPuq29VtRHYCDA6OlpjY2MD7efqDffO4qz6t/vKsb7HdjodBn1/xxt7MZX9mMp+TBqmFwOFQVXtn1hO8iXg6+3lXuCMnqHLW40j1F8ATk7yinZ00DtekjRPBrq0NMmynpfvByauNNoKXJ7k1UnOBFYC3wMeAFa2K4deRfdD5q1VVcC3gA+07dcC9wwyJ0nS4GY8MkhyBzAGnJZkD3A9MJZkFd3TRLuBjwJU1WNJ7gQeBw4B11bVL9t+rgPuB04ANlXVY+1H/BtgS5J/D/wAuHW23pwkqT8zhkFVXTFN+Yj/w66qG4Ebp6nfB9w3Tf0ZulcbSZIWiN9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIk+giDJJuSHEjyaE/tPyb5YZKHk9yd5ORWX5Hk75I81B5/0bPNuUkeSbIryc1J0uqnJtmW5Kn2fMocvE9J0lH0c2RwG7D6sNo24C1V9XvA/wY+1bPu6apa1R4f66nfAnyE7n2RV/bscwOwvapWAtvba0nSPJoxDKrq28DBw2rfrKpD7eUOYPnR9pFkGfDaqtpRVQXcDlzaVq8BNrflzT11SdI8mfEeyH34MPCVntdnJvkB8BLwJ1X1HeB0YE/PmD2tBjBSVfva8vPAyJF+UJJ1wDqAkZEROp3OQBNef86hmQfNgWOZ7/j4+MDv73hjL6ayH1PZj0nD9GKoMEjyb4FDwJdbaR/whqp6Icm5wNeSnN3v/qqqktRR1m8ENgKMjo7W2NjYQPO+esO9A203rN1XjvU9ttPpMOj7O97Yi6nsx1T2Y9IwvRg4DJJcDbwPuLCd+qGqXgZebssPJnkaeBOwl6mnkpa3GsD+JMuqal87nXRg0DlJkgYz0KWlSVYDfwz8YVX9oqf++iQntOU30v2g+Jl2GuilJBe0q4iuAu5pm20F1rbltT11SdI8mfHIIMkdwBhwWpI9wPV0rx56NbCtXSG6o1059AfAZ5P8P+DvgY9V1cSHzx+ne2XSa4BvtAfATcCdSa4BngUum5V3Jknq24xhUFVXTFO+9Qhj7wLuOsK6ncBbpqm/AFw40zwkSXPHbyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRJ9hkGSTUkOJHm0p3Zqkm1JnmrPp7R6ktycZFeSh5O8rWebtW38U0nW9tTPTfJI2+bmdmtMSdI86ffI4DZg9WG1DcD2qloJbG+vAd5D997HK4F1wC3QDQ+6t8w8HzgPuH4iQNqYj/Rsd/jPkiTNob7CoKq+DRw8rLwG2NyWNwOX9tRvr64dwMlJlgEXA9uq6mBVvQhsA1a3da+tqh1VVcDtPfuSJM2DGe+BfBQjVbWvLT8PjLTl04HnesbtabWj1fdMU/81SdbRPdpgZGSETqcz0MTXn3NooO2GdSzzHR8fH/j9HW/sxVT2Yyr7MWmYXgwTBr9SVZWkZmNfM/ycjcBGgNHR0RobGxtoP1dvuHcWZ9W/3VeO9T220+kw6Ps73tiLqezHVPZj0jC9GOZqov3tFA/t+UCr7wXO6Bm3vNWOVl8+TV2SNE+GCYOtwMQVQWuBe3rqV7Wrii4AftpOJ90PXJTklPbB8UXA/W3dS0kuaFcRXdWzL0nSPOjrNFGSO4Ax4LQke+heFXQTcGeSa4Bngcva8PuAS4BdwC+ADwFU1cEkNwAPtHGfraqJD6U/TveKpdcA32gPSdI86SsMquqKI6y6cJqxBVx7hP1sAjZNU98JvKWfuUiSZp/fQJYkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJDBEGSd6c5KGex0tJPpnkM0n29tQv6dnmU0l2JXkyycU99dWttivJhmHflCTp2PR1c5vpVNWTwCqAJCfQvW/x3XTvbPaFqvqz3vFJzgIuB84Gfgf4myRvaqu/CLwb2AM8kGRrVT0+6NwkScdm4DA4zIXA01X1bPc2xtNaA2ypqpeBHyXZBZzX1u2qqmcAkmxpYw0DSZons/WZweXAHT2vr0vycJJNSU5ptdOB53rG7Gm1I9UlSfMk3VsWD7GD5FXA/wHOrqr9SUaAHwMF3AAsq6oPJ/kvwI6q+m9tu1uZvPH96qr6o1b/IHB+VV03zc9aB6wDGBkZOXfLli0DzfmRvT8daLthnXP6P+p77Pj4OCeddNIczuY3h72Yyn5MZT8m9dOLd77znQ9W1ejh9dk4TfQe4PtVtR9g4hkgyZeAr7eXe4EzerZb3mocpT5FVW0ENgKMjo7W2NjYQBO+esO9A203rN1XjvU9ttPpMOj7O97Yi6nsx1T2Y9IwvZiN00RX0HOKKMmynnXvBx5ty1uBy5O8OsmZwErge8ADwMokZ7ajjMvbWEnSPBnqyCDJiXSvAvpoT/k/JFlF9zTR7ol1VfVYkjvpfjB8CLi2qn7Z9nMdcD9wArCpqh4bZl6SpGMzVBhU1c+B1x1W++BRxt8I3DhN/T7gvmHmIkkanN9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkZiEMkuxO8kiSh5LsbLVTk2xL8lR7PqXVk+TmJLuSPJzkbT37WdvGP5Vk7bDzkiT1b7aODN5ZVauqarS93gBsr6qVwPb2GuA9dO99vBJYB9wC3fAArgfOB84Drp8IEEnS3Jur00RrgM1teTNwaU/99uraAZycZBlwMbCtqg5W1YvANmD1HM1NknSYoe6B3BTwzSQF/Neq2giMVNW+tv55YKQtnw4817PtnlY7Un2KJOvoHlEwMjJCp9MZaMLrzzk00HbDOpb5jo+PD/z+jjf2Yir7MZX9mDRML2YjDN5RVXuT/DawLckPe1dWVbWgGFoLmo0Ao6OjNTY2NtB+rt5w72xM55jtvnKs77GdTodB39/xxl5MZT+msh+ThunF0KeJqmpvez4A3E33nP/+dvqH9nygDd8LnNGz+fJWO1JdkjQPhgqDJCcm+a2JZeAi4FFgKzBxRdBa4J62vBW4ql1VdAHw03Y66X7goiSntA+OL2o1SdI8GPY00Qhwd5KJff1VVf33JA8Adya5BngWuKyNvw+4BNgF/AL4EEBVHUxyA/BAG/fZqjo45NwkSX0aKgyq6hng96epvwBcOE29gGuPsK9NwKZh5iNJGozfQJYkGQaSJMNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJIYIgyRnJPlWkseTPJbkE63+mSR7kzzUHpf0bPOpJLuSPJnk4p766lbblWTDcG9JknSshrnT2SFgfVV9v90H+cEk29q6L1TVn/UOTnIWcDlwNvA7wN8keVNb/UXg3cAe4IEkW6vq8SHmtiit2HBv32PXn3OIq49h/Ex23/TeWduXpOPPwGHQbmS/ry3/LMkTwOlH2WQNsKWqXgZ+lGQXcF5bt6vdQpMkW9rY4y4MJGmxGuoeyBOSrADeCnwXeDtwXZKrgJ10jx5epBsUO3o228NkeDx3WP38I/ycdcA6gJGRETqdzkDzXX/OoYG2m08jr5ndeQ7aq8VgfHz8N3r+s81+TGU/Jg3Ti6HDIMlJwF3AJ6vqpSS3ADcA1Z4/B3x42J8DUFUbgY0Ao6OjNTY2NtB+ZvP0y1xZf84hPvfIrGQ1ALuvHJu1fc23TqfDoP+tj0f2Yyr7MWmYXgz1f5skr6QbBF+uqq8CVNX+nvVfAr7eXu4FzujZfHmrcZS6JGkeDHM1UYBbgSeq6vM99WU9w94PPNqWtwKXJ3l1kjOBlcD3gAeAlUnOTPIquh8ybx10XpKkYzfMkcHbgQ8CjyR5qNU+DVyRZBXd00S7gY8CVNVjSe6k+8HwIeDaqvolQJLrgPuBE4BNVfXYEPOSJB2jYa4m+lsg06y67yjb3AjcOE39vqNtJ0maW34DWZJkGEiSDANJEoaBJAnDQJLELP05Ci1+x/JH8maTfyBP+s3gkYEkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnC7xlojs3G9xvWn3NooLvT+R0HqX8eGUiSPDLQ8ctvXUv9WzRHBklWJ3kyya4kGxZ6PpK0lCyKI4MkJwBfBN4N7AEeSLK1qh5f2JlJx26uj0gG/QxlLnk09JtvUYQBcB6wq6qeAUiyBVhD937Jkha5hTolB4szHOfSXAVvqmpOdnxMk0g+AKyuqj9qrz8InF9V1x02bh2wrr18M/DkvE50fp0G/HihJ7FI2Iup7MdU9mNSP734x1X1+sOLi+XIoC9VtRHYuNDzmA9JdlbV6ELPYzGwF1PZj6nsx6RherFYPkDeC5zR83p5q0mS5sFiCYMHgJVJzkzyKuByYOsCz0mSloxFcZqoqg4luQ64HzgB2FRVjy3wtBbakjgd1id7MZX9mMp+TBq4F4viA2RJ0sJaLKeJJEkLyDCQJBkGCy3JpiQHkjzaUzs1ybYkT7XnUxZyjvMpyRlJvpXk8SSPJflEqy+5niT5h0m+l+R/tV78u1Y/M8l3259u+Uq76GLJSHJCkh8k+Xp7vWT7kWR3kkeSPJRkZ6sN9LtiGCy824DVh9U2ANuraiWwvb1eKg4B66vqLOAC4NokZ7E0e/Iy8K6q+n1gFbA6yQXAnwJfqKrfBV4Erlm4KS6ITwBP9Lxe6v14Z1Wt6vl+wUC/K4bBAquqbwMHDyuvATa35c3ApfM5p4VUVfuq6vtt+Wd0f+lPZwn2pLrG28tXtkcB7wL+utWXRC8mJFkOvBf4y/Y6LOF+HMFAvyuGweI0UlX72vLzwMhCTmahJFkBvBX4Lku0J+2UyEPAAWAb8DTwk6o61IbsoRuWS8V/Av4Y+Pv2+nUs7X4U8M0kD7Y/1wMD/q4siu8Z6MiqqpIsuet/k5wE3AV8sqpe6v4DsGsp9aSqfgmsSnIycDfwTxZ2RgsnyfuAA1X1YJKxBZ7OYvGOqtqb5LeBbUl+2LvyWH5XPDJYnPYnWQbQng8s8HzmVZJX0g2CL1fVV1t5Sfekqn4CfAv4p8DJSSb+IbeU/nTL24E/TLIb2EL39NB/Zun2g6ra254P0P3HwnkM+LtiGCxOW4G1bXktcM8CzmVetXPAtwJPVNXne1YtuZ4keX07IiDJa+je7+MJuqHwgTZsSfQCoKo+VVXLq2oF3T9Z8z+q6kqWaD+SnJjktyaWgYuARxnwd8VvIC+wJHcAY3T/9Ox+4Hrga8CdwBuAZ4HLqurwD5mPS0neAXwHeITJ88Kfpvu5wZLqSZLfo/sB4Al0/+F2Z1V9Nskb6f7L+FTgB8C/qqqXF26m86+dJvrXVfW+pdqP9r7vbi9fAfxVVd2Y5HUM8LtiGEiSPE0kSTIMJEkYBpIkDANJEoaBJAnDQJKEYSBJAv4/mF0rfaZ2dTEAAAAASUVORK5CYII=\n",
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 50)].issn_count.hist(bins=10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 81,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<AxesSubplot:>"
- ]
- },
- "execution_count": 81,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWyklEQVR4nO3df7DddX3n8edryaIglYC0t26S2TA1tYNQXbgLdJ3tXqQDQR3DH9bBYUu0rJlt0Vo3swp2usyqzGCVpcJWdrKSBdoMkVK7ySiKGeSu05kGEX8Qfmi5ixGSRWINxI0/N+57/zifrIfLveTec27u+aZ5Pmbu3PP9fD/f73l9Q25e9/s933NIVSFJOrr9o1EHkCSNnmUgSbIMJEmWgSQJy0CSBCwZdYBBnXLKKbVy5cqBtv3BD37AS17ykoUNtMC6nrHr+aD7GbueD7qfsev5oHsZH3jggb+vql983oqqOiK/zjrrrBrUvffeO/C2i6XrGbuer6r7Gbuer6r7Gbuer6p7GYEv1wz/pnqZSJJkGUiSLANJEpaBJAnLQJKEZSBJwjKQJGEZSJKwDCRJHMEfRzGMHbv38bYrP7Poz7vz2jcs+nNK0lx4ZiBJsgwkSZaBJAnLQJKEZSBJwjKQJGEZSJKwDCRJzKEMkmxMsifJQzOsW5+kkpzSlpPkhiRTSR5Mcmbf3LVJHmtfa/vGz0qyo21zQ5Is1MFJkuZmLmcGtwCrpw8mWQFcADzRN3wRsKp9rQNuanNPBq4GzgHOBq5OclLb5ibgHX3bPe+5JEmH1yHLoKq+COydYdX1wHuB6htbA9zW/r/L24GlSV4OXAhsq6q9VfUMsA1Y3da9tKq2t/9R823AxUMdkSRp3gb6bKIka4DdVfX1aVd1lgFP9i3vamMvNL5rhvHZnncdvTMOxsbGmJycHCQ+Y8fB+jMODLTtMOaTd//+/QMf32Loej7ofsau54PuZ+x6PjgyMsIAZZDkeOD99C4RLaqq2gBsABgfH6+JiYmB9nPjpi1ct2PxP6Nv56UTc547OTnJoMe3GLqeD7qfsev5oPsZu54PjoyMMNjdRL8CnAp8PclOYDnwlSS/DOwGVvTNXd7GXmh8+QzjkqRFNO8yqKodVfVLVbWyqlbSu7RzZlV9B9gKXNbuKjoX2FdVTwF3AxckOam9cHwBcHdb9/0k57a7iC4DtizQsUmS5mgut5beDvwt8Moku5Jc/gLT7wIeB6aA/wr8PkBV7QU+CNzfvj7QxmhzPtG2+Z/AZwc7FEnSoA554byq3nqI9Sv7HhdwxSzzNgIbZxj/MnD6oXJIkg4f34EsSbIMJEmWgSQJy0CShGUgScIykCRhGUiSsAwkSVgGkiQsA0kSloEkCctAkoRlIEnCMpAkYRlIkrAMJElYBpIkLANJEpaBJIk5lEGSjUn2JHmob+wjSb6R5MEkf51kad+6q5JMJflmkgv7xle3sakkV/aNn5rkvjb+ySTHLuDxSZLmYC5nBrcAq6eNbQNOr6pfB/4OuAogyWnAJcCr2jYfT3JMkmOAPwMuAk4D3trmAnwYuL6qXgE8A1w+1BFJkubtkGVQVV8E9k4b+3xVHWiL24Hl7fEaYHNV/aSqvgVMAWe3r6mqeryqfgpsBtYkCfA64M62/a3AxcMdkiRpvpYswD5+F/hke7yMXjkctKuNATw5bfwc4GXAs33F0j//eZKsA9YBjI2NMTk5OVDgseNg/RkHDj1xgc0n7/79+wc+vsXQ9XzQ/Yxdzwfdz9j1fHBkZIQhyyDJHwEHgE0LE+eFVdUGYAPA+Ph4TUxMDLSfGzdt4bodC9GD87Pz0ok5z52cnGTQ41sMXc8H3c/Y9XzQ/YxdzwdHRkYYogySvA14I3B+VVUb3g2s6Ju2vI0xy/j3gKVJlrSzg/75kqRFMtCtpUlWA+8F3lRVP+xbtRW4JMmLkpwKrAK+BNwPrGp3Dh1L70Xmra1E7gXe3LZfC2wZ7FAkSYOay62ltwN/C7wyya4klwP/GfgFYFuSryX5LwBV9TBwB/AI8Dngiqr6Wfut/53A3cCjwB1tLsD7gH+XZIreawg3L+gRSpIO6ZCXiarqrTMMz/oPdlVdA1wzw/hdwF0zjD9O724jSdKI+A5kSZJlIEmyDCRJWAaSJCwDSRKWgSQJy0CShGUgScIykCRhGUiSsAwkSVgGkiQsA0kSloEkCctAkoRlIEnCMpAkYRlIkrAMJEnMoQySbEyyJ8lDfWMnJ9mW5LH2/aQ2niQ3JJlK8mCSM/u2WdvmP5Zkbd/4WUl2tG1uSJKFPkhJ0guby5nBLcDqaWNXAvdU1SrgnrYMcBGwqn2tA26CXnkAVwPnAGcDVx8skDbnHX3bTX8uSdJhdsgyqKovAnunDa8Bbm2PbwUu7hu/rXq2A0uTvBy4ENhWVXur6hlgG7C6rXtpVW2vqgJu69uXJGmRLBlwu7Gqeqo9/g4w1h4vA57sm7erjb3Q+K4ZxmeUZB29Mw7GxsaYnJwcLPxxsP6MAwNtO4z55N2/f//Ax7cYup4Pup+x6/mg+xm7ng+OjIwweBn8f1VVSWohwszhuTYAGwDGx8drYmJioP3cuGkL1+0Y+tDnbeelE3OeOzk5yaDHtxi6ng+6n7Hr+aD7GbueD46MjDD43URPt0s8tO972vhuYEXfvOVt7IXGl88wLklaRIOWwVbg4B1Ba4EtfeOXtbuKzgX2tctJdwMXJDmpvXB8AXB3W/f9JOe2u4gu69uXJGmRHPJaSZLbgQnglCS76N0VdC1wR5LLgW8Db2nT7wJeD0wBPwTeDlBVe5N8ELi/zftAVR18Ufr36d2xdBzw2fYlSVpEhyyDqnrrLKvOn2FuAVfMsp+NwMYZxr8MnH6oHJKkw8d3IEuSLANJkmUgScIykCRhGUiSsAwkSVgGkiQsA0kSloEkCctAkoRlIEnCMpAkYRlIkrAMJElYBpIkLANJEpaBJAnLQJLEkGWQ5D1JHk7yUJLbk7w4yalJ7ksyleSTSY5tc1/Ulqfa+pV9+7mqjX8zyYVDHpMkaZ4GLoMky4A/AMar6nTgGOAS4MPA9VX1CuAZ4PK2yeXAM238+jaPJKe17V4FrAY+nuSYQXNJkuZv2MtES4DjkiwBjgeeAl4H3NnW3wpc3B6vacu09ecnSRvfXFU/qapvAVPA2UPmkiTNw8BlUFW7gY8CT9ArgX3AA8CzVXWgTdsFLGuPlwFPtm0PtPkv6x+fYRtJ0iJYMuiGSU6i91v9qcCzwF/Su8xz2CRZB6wDGBsbY3JycqD9jB0H6884cOiJC2w+effv3z/w8S2GrueD7mfsej7ofsau54MjIyMMUQbAbwHfqqrvAiT5FPBaYGmSJe23/+XA7jZ/N7AC2NUuK50IfK9v/KD+bZ6jqjYAGwDGx8drYmJioOA3btrCdTuGOfTB7Lx0Ys5zJycnGfT4FkPX80H3M3Y9H3Q/Y9fzwZGREYZ7zeAJ4Nwkx7dr/+cDjwD3Am9uc9YCW9rjrW2Ztv4LVVVt/JJ2t9GpwCrgS0PkkiTN08C/HlfVfUnuBL4CHAC+Su+39s8Am5N8qI3d3Da5GfjzJFPAXnp3EFFVDye5g16RHACuqKqfDZpLkjR/Q10rqaqrgaunDT/ODHcDVdWPgd+eZT/XANcMk0WSNDjfgSxJsgwkSZaBJAnLQJKEZSBJwjKQJGEZSJKwDCRJWAaSJCwDSRKWgSQJy0CShGUgScIykCRhGUiSsAwkSVgGkiQsA0kSloEkiSHLIMnSJHcm+UaSR5P8RpKTk2xL8lj7flKbmyQ3JJlK8mCSM/v2s7bNfyzJ2mEPSpI0P8OeGXwM+FxV/RrwauBR4ErgnqpaBdzTlgEuAla1r3XATQBJTgauBs4BzgauPlggkqTFMXAZJDkR+E3gZoCq+mlVPQusAW5t024FLm6P1wC3Vc92YGmSlwMXAtuqam9VPQNsA1YPmkuSNH+pqsE2TF4DbAAeoXdW8ADwbmB3VS1tcwI8U1VLk3wauLaq/qatuwd4HzABvLiqPtTG/xj4UVV9dIbnXEfvrIKxsbGzNm/ePFD2PXv38fSPBtp0KGcsO3HOc/fv388JJ5xwGNMMp+v5oPsZu54Pup+x6/mgexnPO++8B6pqfPr4kiH2uQQ4E3hXVd2X5GP8/JIQAFVVSQZrmxlU1QZ6BcT4+HhNTEwMtJ8bN23huh3DHPpgdl46Mee5k5OTDHp8i6Hr+aD7GbueD7qfsev54MjICMO9ZrAL2FVV97XlO+mVw9Pt8g/t+562fjewom/75W1stnFJ0iIZuAyq6jvAk0le2YbOp3fJaCtw8I6gtcCW9ngrcFm7q+hcYF9VPQXcDVyQ5KT2wvEFbUyStEiGvVbyLmBTkmOBx4G30yuYO5JcDnwbeEubexfwemAK+GGbS1XtTfJB4P427wNVtXfIXJKkeRiqDKrqa8DzXoigd5YwfW4BV8yyn43AxmGySJIG5zuQJUmWgSTJMpAkYRlIkrAMJElYBpIkLANJEpaBJAnLQJKEZSBJwjKQJGEZSJKwDCRJWAaSJCwDSRKWgSQJy0CShGUgScIykCSxAGWQ5JgkX03y6bZ8apL7kkwl+WSSY9v4i9ryVFu/sm8fV7Xxbya5cNhMkqT5WYgzg3cDj/Ytfxi4vqpeATwDXN7GLweeaePXt3kkOQ24BHgVsBr4eJJjFiCXJGmOhiqDJMuBNwCfaMsBXgfc2abcClzcHq9py7T157f5a4DNVfWTqvoWMAWcPUwuSdL8LBly+z8F3gv8Qlt+GfBsVR1oy7uAZe3xMuBJgKo6kGRfm78M2N63z/5tniPJOmAdwNjYGJOTkwOFHjsO1p9x4NATF9h88u7fv3/g41sMXc8H3c/Y9XzQ/YxdzwdHRkYYogySvBHYU1UPJJlYsEQvoKo2ABsAxsfHa2JisKe9cdMWrtsxbA/O385LJ+Y8d3JykkGPbzF0PR90P2PX80H3M3Y9HxwZGWG4M4PXAm9K8nrgxcBLgY8BS5MsaWcHy4Hdbf5uYAWwK8kS4ETge33jB/VvI0laBAO/ZlBVV1XV8qpaSe8F4C9U1aXAvcCb27S1wJb2eGtbpq3/QlVVG7+k3W10KrAK+NKguSRJ83c4rpW8D9ic5EPAV4Gb2/jNwJ8nmQL20isQqurhJHcAjwAHgCuq6meHIZckaRYLUgZVNQlMtsePM8PdQFX1Y+C3Z9n+GuCahcgiSZo/34EsSbIMJEmWgSQJy0CShGUgScIykCRxeN5noA5aeeVnFnR/6884wNvmsM+d175hQZ9X0uHhmYEkyTKQJHmZaFHN51LNXC/DSNJC8MxAkmQZSJIsA0kSloEkCctAkoRlIEnCMpAkYRlIkrAMJEkMUQZJViS5N8kjSR5O8u42fnKSbUkea99PauNJckOSqSQPJjmzb19r2/zHkqwd/rAkSfMxzJnBAWB9VZ0GnAtckeQ04ErgnqpaBdzTlgEuAla1r3XATdArD+Bq4BzgbODqgwUiSVocA5dBVT1VVV9pj/838CiwDFgD3Nqm3Qpc3B6vAW6rnu3A0iQvBy4EtlXV3qp6BtgGrB40lyRp/lJVw+8kWQl8ETgdeKKqlrbxAM9U1dIknwauraq/aevuAd4HTAAvrqoPtfE/Bn5UVR+d4XnW0TurYGxs7KzNmzcPlHfP3n08/aOBNl00Y8fR6YxzzXfGshMPf5hZ7N+/nxNOOGFkz38oXc8H3c/Y9XzQvYznnXfeA1U1Pn186E8tTXIC8FfAH1bV93v//vdUVSUZvm1+vr8NwAaA8fHxmpiYGGg/N27awnU7uv2BrevPONDpjHPNt/PSicMfZhaTk5MM+ndkMXQ9H3Q/Y9fzwZGREYa8myjJP6ZXBJuq6lNt+Ol2+Yf2fU8b3w2s6Nt8eRubbVyStEiGuZsowM3Ao1X1n/pWbQUO3hG0FtjSN35Zu6voXGBfVT0F3A1ckOSk9sLxBW1MkrRIhrkO8Vrgd4AdSb7Wxt4PXAvckeRy4NvAW9q6u4DXA1PAD4G3A1TV3iQfBO5v8z5QVXuHyCVJmqeBy6C9EJxZVp8/w/wCrphlXxuBjYNmkSQNx3cgS5IsA0mSZSBJwjKQJLEAbzqTumrH7n287crPLPrz7rz2DYv+nNKwPDOQJFkGkiTLQJKEZSBJwheQdZitHMELuAetP2NkTy0dcTwzkCRZBpIkLxNJ/6As9GW59WccmNN7NXxvxZHPMwNJkmUgSfIykbTg5nqpZq6XYKTFYBlIOmKN6vOn4B/e6yReJpIkeWYgaXijenPhKN9YOKrLgYfrjKQzZwZJVif5ZpKpJFeOOo8kHU06UQZJjgH+DLgIOA14a5LTRptKko4enSgD4Gxgqqoer6qfApuBNSPOJElHjVTVqDOQ5M3A6qr6N235d4Bzquqd0+atA9a1xVcC3xzwKU8B/n7AbRdL1zN2PR90P2PX80H3M3Y9H3Qv4z+tql+cPnhEvYBcVRuADcPuJ8mXq2p8ASIdNl3P2PV80P2MXc8H3c/Y9XxwZGSE7lwm2g2s6Fte3sYkSYugK2VwP7AqyalJjgUuAbaOOJMkHTU6cZmoqg4keSdwN3AMsLGqHj6MTzn0paZF0PWMXc8H3c/Y9XzQ/YxdzwdHRsZuvIAsSRqtrlwmkiSNkGUgSTr6yiDJMUm+muTTo84ykyRLk9yZ5BtJHk3yG6PONF2S9yR5OMlDSW5P8uIR59mYZE+Sh/rGTk6yLclj7ftJHcz4kfbf+cEkf51k6Qgjzpixb936JJXklFFkaxlmzJfkXe3P8eEkfzKqfC3LTP+dX5Nke5KvJflykrNHmXE2R10ZAO8GHh11iBfwMeBzVfVrwKvpWNYky4A/AMar6nR6L/hfMtpU3AKsnjZ2JXBPVa0C7mnLo3QLz8+4DTi9qn4d+DvgqsUONc0tPD8jSVYAFwBPLHagaW5hWr4k59H7tIJXV9WrgI+OIFe/W3j+n+GfAP+xql4D/Ie23DlHVRkkWQ68AfjEqLPMJMmJwG8CNwNU1U+r6tmRhprZEuC4JEuA44H/NcowVfVFYO+04TXAre3xrcDFi5lpupkyVtXnq+pAW9xO7/01IzPLnyPA9cB7gZHebTJLvt8Drq2qn7Q5exY9WJ9ZMhbw0vb4REb88zKbo6oMgD+l95f6/444x2xOBb4L/Ld2KesTSV4y6lD9qmo3vd++ngCeAvZV1edHm2pGY1X1VHv8HWBslGHm4HeBz446xHRJ1gC7q+rro84yi18F/mWS+5L8jyT/fNSBZvCHwEeSPEnvZ2fUZ4AzOmrKIMkbgT1V9cCos7yAJcCZwE1V9c+AHzD6yxvP0a69r6FXXP8EeEmSfz3aVC+sevdPd/Ye6iR/BBwANo06S78kxwPvp3dpo6uWACcD5wL/HrgjSUYb6Xl+D3hPVa0A3kM78++ao6YMgNcCb0qyk96nor4uyV+MNtLz7AJ2VdV9bflOeuXQJb8FfKuqvltV/wf4FPAvRpxpJk8neTlA+z7SywezSfI24I3ApdW9N/38Cr3S/3r7uVkOfCXJL4801XPtAj5VPV+id9Y/she5Z7GW3s8JwF/S+5TmzjlqyqCqrqqq5VW1kt4Lnl+oqk79RltV3wGeTPLKNnQ+8MgII83kCeDcJMe338DOp2Mvcjdb6f0Q0r5vGWGWGSVZTe+y5Zuq6oejzjNdVe2oql+qqpXt52YXcGb7e9oV/x04DyDJrwLH0q1PCIXeawT/qj1+HfDYCLPMqhMfR6HneBewqX1G0+PA20ec5zmq6r4kdwJfoXdp46uM+O32SW4HJoBTkuwCrgaupXfJ4HLg28BbRpdw1oxXAS8CtrUrG9ur6t92KWNVdeaSxix/hhuBje1Wzp8Ca0d5hjVLxncAH2s3XPyYn38Mf6f4cRSSpKPnMpEkaXaWgSTJMpAkWQaSJCwDSRKWgSQJy0CSBPw/7n+kOOvFQywAAAAASUVORK5CYII=\n",
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 20)].issn_count.hist(bins=10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 82,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<AxesSubplot:>"
- ]
- },
- "execution_count": 82,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAASuUlEQVR4nO3df4xd5X3n8fdncZMQaDAJ1Syy0RopVipSti0ZAVFW0RDvgoEII5VmiWgxiK73B02zK6SGrNRFSoJEpLBJQFtWVmBrsjQOddO1m9BQi2R2t39Ag5NsHKCIWWKKLYLT2DjrhCbr7nf/uI/Tm9HYvnPvnTt3Lu+XNJpznvOcc5/vfTTzmXPuuXdSVUiSXtv+wXIPQJK0/AwDSZJhIEkyDCRJGAaSJGDVcg+gX+ecc06tW7eur31/+MMfcsYZZwx3QMtkUmqZlDrAWsbRpNQBg9WyZ8+ev6mqX1ho24oNg3Xr1vHkk0/2te/s7CwzMzPDHdAymZRaJqUOsJZxNCl1wGC1JHnhRNu8TCRJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJFbwO5AHsffAEW66/Usjf9x9d1098seUpF54ZiBJMgwkST2EQZIHkhxM8u2utjcn2Z3kufb97NaeJPckmUvyrSQXde2zufV/LsnmrvZ3JNnb9rknSYZdpCTp5Ho5M/gDYOO8ttuBx6pqPfBYWwe4EljfvrYA90EnPIA7gEuAi4E7jgdI6/Mvuvab/1iSpCV2yjCoqv8BHJrXvAnY1pa3Add2tT9YHY8Dq5OcC1wB7K6qQ1V1GNgNbGzb3lRVj1dVAQ92HUuSNCL93k00VVUvteXvAlNteQ3wYle//a3tZO37F2hfUJItdM44mJqaYnZ2tr/Bnw63XXisr30H0e94T+bo0aNLctxRm5Q6wFrG0aTUAUtXy8C3llZVJalhDKaHx9oKbAWYnp6ufv/Bw70P7eTuvaO/q3bfDTNDP+ak/NOOSakDrGUcTUodsHS19Hs30cvtEg/t+8HWfgA4r6vf2tZ2sva1C7RLkkao3zDYBRy/I2gzsLOr/cZ2V9GlwJF2OelR4PIkZ7cXji8HHm3bfpDk0nYX0Y1dx5Ikjcgpr5Uk+RwwA5yTZD+du4LuAh5OcgvwAvC+1v0R4CpgDvgRcDNAVR1K8lHga63fR6rq+IvS/4bOHUunA3/WviRJI3TKMKiq959g04YF+hZw6wmO8wDwwALtTwK/dKpxSJKWju9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSQwYBkn+XZKnknw7yeeSvCHJ+UmeSDKX5PNJXtf6vr6tz7Xt67qO8+HW/mySKwasSZK0SH2HQZI1wO8A01X1S8BpwPXAx4FPVtVbgcPALW2XW4DDrf2TrR9JLmj7vR3YCPx+ktP6HZckafEGvUy0Cjg9ySrgjcBLwHuAHW37NuDatryprdO2b0iS1r69qn5cVd8B5oCLBxyXJGkRVvW7Y1UdSPIJ4K+BV4E/B/YAr1TVsdZtP7CmLa8BXmz7HktyBHhLa3+869Dd+/yMJFuALQBTU1PMzs72Nfap0+G2C4+duuOQ9Tvekzl69OiSHHfUJqUOsJZxNCl1wNLV0ncYJDmbzl/15wOvAH9E5zLPkqmqrcBWgOnp6ZqZmenrOPc+tJO79/Zdet/23TAz9GPOzs7S7/MwTialDrCWcTQpdcDS1TLIZaJ/Cnynqr5XVf8X+ALwLmB1u2wEsBY40JYPAOcBtO1nAd/vbl9gH0nSCAwSBn8NXJrkje3a/wbgaeCrwHWtz2ZgZ1ve1dZp279SVdXar293G50PrAf+coBxSZIWaZDXDJ5IsgP4OnAM+AadSzhfArYn+Vhru7/tcj/w2SRzwCE6dxBRVU8leZhOkBwDbq2qv+t3XJKkxRvownlV3QHcMa/5eRa4G6iq/hb49RMc507gzkHGIknqn+9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSQwYBklWJ9mR5K+SPJPknUnenGR3kufa97Nb3yS5J8lckm8luajrOJtb/+eSbB60KEnS4gx6ZvBp4MtV9YvALwPPALcDj1XVeuCxtg5wJbC+fW0B7gNI8mbgDuAS4GLgjuMBIkkajb7DIMlZwLuB+wGq6idV9QqwCdjWum0Drm3Lm4AHq+NxYHWSc4ErgN1VdaiqDgO7gY39jkuStHipqv52TH4F2Ao8TeesYA/wQeBAVa1ufQIcrqrVSb4I3FVVf9G2PQZ8CJgB3lBVH2vtvwe8WlWfWOAxt9A5q2Bqauod27dv72vsBw8d4eVX+9p1IBeuOWvoxzx69Chnnnnm0I87apNSB1jLOJqUOmCwWi677LI9VTW90LZVA4xpFXAR8IGqeiLJp/n7S0IAVFUl6S9tFlBVW+kEENPT0zUzM9PXce59aCd37x2k9P7su2Fm6MecnZ2l3+dhnExKHWAt42hS6oClq2WQ1wz2A/ur6om2voNOOLzcLv/Qvh9s2w8A53Xtv7a1nahdkjQifYdBVX0XeDHJ21rTBjqXjHYBx+8I2gzsbMu7gBvbXUWXAkeq6iXgUeDyJGe3F44vb22SpBEZ9FrJB4CHkrwOeB64mU7APJzkFuAF4H2t7yPAVcAc8KPWl6o6lOSjwNdav49U1aEBxyVJWoSBwqCqvgks9GLEhgX6FnDrCY7zAPDAIGORJPXPdyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkScCq5R6AJtu627/UU7/bLjzGTT327dW+u64e6vGkSeaZgSTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCQxhDBIclqSbyT5Yls/P8kTSeaSfD7J61r769v6XNu+rusYH27tzya5YtAxSZIWZxhnBh8Enula/zjwyap6K3AYuKW13wIcbu2fbP1IcgFwPfB2YCPw+0lOG8K4JEk9GigMkqwFrgY+09YDvAfY0bpsA65ty5vaOm37htZ/E7C9qn5cVd8B5oCLBxmXJGlxBv1sok8Bvwv8fFt/C/BKVR1r6/uBNW15DfAiQFUdS3Kk9V8DPN51zO59fkaSLcAWgKmpKWZnZ/sa9NTpnc/CGbV+x3syR48eXZLjDkuvz/NSzMlyPS/jPieLMSm1TEodsHS19B0GSd4LHKyqPUlmhjaik6iqrcBWgOnp6ZqZ6e9h731oJ3fvHf1n9O27YWbox5ydnaXf52EUev3wudsuPDb0OVmK57sX4z4nizEptUxKHbB0tQzy0/cu4JokVwFvAN4EfBpYnWRVOztYCxxo/Q8A5wH7k6wCzgK+39V+XPc+kqQR6Ps1g6r6cFWtrap1dF4A/kpV3QB8FbiuddsM7GzLu9o6bftXqqpa+/XtbqPzgfXAX/Y7LknS4i3FtZIPAduTfAz4BnB/a78f+GySOeAQnQChqp5K8jDwNHAMuLWq/m4JxiVJOoGhhEFVzQKzbfl5FrgbqKr+Fvj1E+x/J3DnMMYiSVo834EsSTIMJEmGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkszX86k7QM1t3+paEf87YLj3FTD8fdd9fVQ39sjZZnBpIkw0CSZBhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRIDhEGS85J8NcnTSZ5K8sHW/uYku5M8176f3dqT5J4kc0m+leSirmNtbv2fS7J58LIkSYsxyJnBMeC2qroAuBS4NckFwO3AY1W1HnisrQNcCaxvX1uA+6ATHsAdwCXAxcAdxwNEkjQafYdBVb1UVV9vy/8HeAZYA2wCtrVu24Br2/Im4MHqeBxYneRc4Apgd1UdqqrDwG5gY7/jkiQt3lBeM0iyDvhV4Algqqpeapu+C0y15TXAi1277W9tJ2qXJI1IqmqwAyRnAv8duLOqvpDklapa3bX9cFWdneSLwF1V9Ret/THgQ8AM8Iaq+lhr/z3g1ar6xAKPtYXOJSampqbesX379r7GfPDQEV5+ta9dB3LhmrOGfsyjR49y5plnDv24w7L3wJGe+k2dztDnZCme714s15z0+lwvRq/zslzPda/G/edkMQap5bLLLttTVdMLbRvofyAn+Tngj4GHquoLrfnlJOdW1UvtMtDB1n4AOK9r97Wt7QCdQOhun13o8apqK7AVYHp6umZmZhbqdkr3PrSTu/eO/t8/77thZujHnJ2dpd/nYRR6+f+50Plfu8Oek6V4vnuxXHPS63O9GL3Oy3I9170a95+TxViqWga5myjA/cAzVfUfuzbtAo7fEbQZ2NnVfmO7q+hS4Ei7nPQocHmSs9sLx5e3NknSiAzyp9i7gN8E9ib5Zmv798BdwMNJbgFeAN7Xtj0CXAXMAT8CbgaoqkNJPgp8rfX7SFUdGmBckqRF6jsM2rX/nGDzhgX6F3DrCY71APBAv2ORpFFbtwSX5XrxBxvPWJLj+g5kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgSWKMwiDJxiTPJplLcvtyj0eSXkvGIgySnAb8J+BK4ALg/UkuWN5RSdJrx1iEAXAxMFdVz1fVT4DtwKZlHpMkvWakqpZ7DCS5DthYVb/V1n8TuKSqfntevy3Alrb6NuDZPh/yHOBv+tx33ExKLZNSB1jLOJqUOmCwWv5RVf3CQhtW9T+e0auqrcDWQY+T5Mmqmh7CkJbdpNQyKXWAtYyjSakDlq6WcblMdAA4r2t9bWuTJI3AuITB14D1Sc5P8jrgemDXMo9Jkl4zxuIyUVUdS/LbwKPAacADVfXUEj7kwJeaxsik1DIpdYC1jKNJqQOWqJaxeAFZkrS8xuUykSRpGRkGkqTJDoMkpyX5RpIvLrDt9Uk+3z7+4okk65ZhiD07RS03Jflekm+2r99ajjH2Ism+JHvbOJ9cYHuS3NPm5VtJLlqOcfaih1pmkhzpmpf/sBzj7EWS1Ul2JPmrJM8keee87StiXnqoY0XMSZK3dY3xm0l+kOTfzusz1DkZixeQl9AHgWeANy2w7RbgcFW9Ncn1wMeBfz7KwS3SyWoB+Pz8N+mNscuq6kRvmrkSWN++LgHua9/H1clqAfifVfXekY2mf58GvlxV17U7+t44b/tKmZdT1QErYE6q6lngV+CnH9dzAPiTed2GOicTe2aQZC1wNfCZE3TZBGxryzuADUkyirEtVg+1TJJNwIPV8TiwOsm5yz2oSZbkLODdwP0AVfWTqnplXrexn5ce61iJNgD/u6pemNc+1DmZ2DAAPgX8LvD/TrB9DfAidG5tBY4AbxnJyBbvU5y8FoBfa6eKO5Kcd5J+y62AP0+yp328yHw/nZdmf2sbR6eqBeCdSf5Xkj9L8vZRDm4Rzge+B/yXdinyM0nOmNdnJcxLL3XAypiTbtcDn1ugfahzMpFhkOS9wMGq2rPcYxlUj7X8KbCuqv4xsJu/P+MZR/+kqi6ic4p7a5J3L/eABnCqWr5O57Ngfhm4F/hvIx5fr1YBFwH3VdWvAj8EVuLHyPdSx0qZEwDapa5rgD9a6seayDAA3gVck2QfnU9AfU+S/zqvz08/AiPJKuAs4PujHGSPTllLVX2/qn7cVj8DvGO0Q+xdVR1o3w/SuQZ68bwuK+ajSU5VS1X9oKqOtuVHgJ9Lcs7IB3pq+4H9VfVEW99B55dqt5UwL6esYwXNyXFXAl+vqpcX2DbUOZnIMKiqD1fV2qpaR+cU6ytV9Rvzuu0CNrfl61qfsXsHXi+1zLtOeA2dF5rHTpIzkvz88WXgcuDb87rtAm5sd0pcChypqpdGPNRT6qWWJP/w+OtQSS6m8/M2dn9wVNV3gReTvK01bQCentdt7OellzpWypx0eT8LXyKCIc/JpN9N9DOSfAR4sqp20XmR6bNJ5oBDdH7RrhjzavmdJNcAx+jUctNyju0kpoA/aT+Lq4A/rKovJ/lXAFX1n4FHgKuAOeBHwM3LNNZT6aWW64B/neQY8Cpw/Tj+wdF8AHioXZZ4Hrh5hc7LqepYMXPS/sj4Z8C/7Gpbsjnx4ygkSZN5mUiStDiGgSTJMJAkGQaSJAwDSRKGgSQJw0CSBPx/YueE4LohpnIAAAAASUVORK5CYII=\n",
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "repeated_names[(repeated_names.issn_count > 3) & (repeated_names.issn_count < 8)].issn_count.hist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 83,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<AxesSubplot:>"
- ]
- },
- "execution_count": 83,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAD4CAYAAADFAawfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAANX0lEQVR4nO3dbYxc91XH8e/BmybFGzkJKSuzidhUqipFtSDxqCQKL3YNTdMkat/0haMCKQ9aCUQVnoRsISH1BSJFVQVIiMZqCxW03ZY0gcpWiUKbJaoELrttGjtxTNzENLFS3PDgdoMEBA4v5q69a2ZmZx9m5qz9/UijvQ//O3Pu0Z3fzt65147MRJJU1/eNugBJUm8GtSQVZ1BLUnEGtSQVZ1BLUnFjg3jS66+/Pqempgbx1EPz2muvsXPnzlGXUZb96c7e9GZ/OltcXHw1M9/Uad1AgnpqaoqFhYVBPPXQzM/PMz09PeoyyrI/3dmb3uxPZxHxT93WeepDkoozqCWpOINakoozqCWpOINakoozqCWpuL4uz4uI08D3gP8BXs/M1iCLkiRdsJ7rqGcy89WBVSJJ6shTH5JUXPTzHwdExIvAvwEJPJSZhzqMmQVmASYmJvbOzc1tqKBjZ85taLvN2jO5a9X80tIS4+PjI6llO7A/3dmb3uxPZzMzM4vdTiv3G9STmXkmIn4QeBz4QGY+2W18q9XKjd5CPnXgyIa226zTD96zat7bXHuzP93Zm97sT2cR0TWo+zr1kZlnmp9ngUeBt29deZKkXtYM6ojYGRFXL08DdwLHB12YJKmtn6s+JoBHI2J5/Kcz868HWpUk6bw1gzozXwB+ZAi1SJI68PI8SSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4voO6ojYERFfj4jDgyxIkrTaej5RPwCcGFQhkqTO+grqiLgBuAf42GDLkSRdLDJz7UERDwO/C1wN/EZm3tthzCwwCzAxMbF3bm5uQwUdO3NuQ9tt1p7JXavml5aWGB8fH0kt24H96c7e9GZ/OpuZmVnMzFandWNrbRwR9wJnM3MxIqa7jcvMQ8AhgFarldPTXYf29P4DRza03Wadft/0qvn5+Xk2ug+XA/vTnb3pzf6sXz+nPu4A3h0Rp4E5YF9E/PlAq5IknbdmUGfmwcy8ITOngP3AlzPzpwZemSQJ8DpqSSpvzXPUK2XmPDA/kEokSR35iVqSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJak4g1qSijOoJam4NYM6Iq6KiK9GxDci4pmI+OAwCpMktY31MeY/gX2ZuRQRVwBfiYgvZubfD7g2SRJ9BHVmJrDUzF7RPHKQRUmSLujrHHVE7IiIp4CzwOOZeXSgVUmSzov2B+Y+B0dcAzwKfCAzj1+0bhaYBZiYmNg7Nze3oYKOnTm3oe02a8/krlXzS0tLjI+PD+W1q+zzegyzP9uNvenN/nQ2MzOzmJmtTuvWFdQAEfHbwH9k5oe7jWm1WrmwsLC+KhtTB45saLvNOv3gPavm5+fnmZ6eHsprV9nn9Rhmf7Ybe9Ob/eksIroGdT9Xfbyp+SRNRLwReAfw3JZWKEnqqp+rPnYDn4yIHbSD/XOZeXiwZUmSlvVz1cfTwC1DqEWS1IF3JkpScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScWsGdUTcGBFPRMSzEfFMRDwwjMIkSW1jfYx5Hfj1zPxaRFwNLEbE45n57IBrkyTRxyfqzHwlM7/WTH8POAFMDrowSVJbZGb/gyOmgCeBt2Xmdy9aNwvMAkxMTOydm5vbUEHHzpzb0HabtWdy16r5paUlxsfHh/LaVfZ5PYbZn+3G3vRmfzqbmZlZzMxWp3V9B3VEjAN/C/xOZj7Sa2yr1cqFhYV1FwowdeDIhrbbrNMP3rNqfn5+nunp6aG8dpV9Xo9h9me7sTe92Z/OIqJrUPd11UdEXAF8HvjUWiEtSdpa/Vz1EcDHgROZ+ZHBlyRJWqmfT9R3AD8N7IuIp5rH3QOuS5LUWPPyvMz8ChBDqEWS1IF3JkpScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBW3ZlBHxCci4mxEHB9GQZKk1fr5RP2nwF0DrkOS1MWaQZ2ZTwL/OoRaJEkdRGauPShiCjicmW/rMWYWmAWYmJjYOzc3t6GCjp05t6HtNmvP5K5V80tLS4yPjw/ltavs83oMsz/bjb3pbRj92Y7vqZmZmcXMbHVat2VBvVKr1cqFhYV1Fbls6sCRDW23WacfvGfV/Pz8PNPT00N57Sr7vB7D7M92Y296G0Z/tuN7KiK6BrVXfUhScQa1JBXXz+V5nwH+DnhrRLwcET8/+LIkScvG1hqQmfcNoxBJUmee+pCk4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4gxqSSrOoJak4voK6oi4KyJORsSpiDgw6KIkSResGdQRsQP4I+BdwM3AfRFx86ALkyS19fOJ+u3Aqcx8ITP/C5gD3jPYsiRJy8b6GDMJvLRi/mXgxy4eFBGzwGwzuxQRJzdf3vDEh/7fouuBV4dfyfB02Of1uOT7swn2prdLtj+bfE/9cLcV/QR1XzLzEHBoq55v1CJiITNbo66jKvvTnb3pzf6sXz+nPs4AN66Yv6FZJkkagn6C+h+At0TETRHxBmA/8IXBliVJWrbmqY/MfD0ifhl4DNgBfCIznxl4ZaN3yZzGGRD705296c3+rFNk5qhrkCT14J2JklScQS1JxV02QR0RN0bEExHxbEQ8ExEPNMuvi4jHI+L55ue1zfKIiD9sbpt/OiJuXfFc9zfjn4+I+0e1T1stInZExNcj4nAzf1NEHG168Nnmy2Qi4spm/lSzfmrFcxxslp+MiHeOaFcGIiKuiYiHI+K5iDgREbd7/FwQEb/avLeOR8RnIuIqj6EtkpmXxQPYDdzaTF8N/CPtW+J/DzjQLD8AfKiZvhv4IhDAbcDRZvl1wAvNz2ub6WtHvX9b1KNfAz4NHG7mPwfsb6Y/CvxiM/1LwEeb6f3AZ5vpm4FvAFcCNwHfBHaMer+2sD+fBH6hmX4DcI3Hz/neTAIvAm9ccey832Noi/o76gJGtuPwV8A7gJPA7mbZbuBkM/0QcN+K8Seb9fcBD61Yvmrcdn3Qvj7+S8A+4HATMK8CY83624HHmunHgNub6bFmXAAHgYMrnvP8uO3+AHY1QRQXLff4yfNB/VLzC2isOYbe6TG0NY/L5tTHSs2fWbcAR4GJzHylWfVtYKKZ7nTr/GSP5dvd7wO/CfxvM/8DwL9n5uvN/Mr9PN+DZv25Zvyl2htof7r7DvAnzemhj0XETjx+AMjMM8CHgW8Br9A+JhbxGNoSl11QR8Q48HngVzLzuyvXZftX+GV3vWJE3AuczczFUddS2BhwK/DHmXkL8BrtUx3nXa7HD0Bzbv49tH+h/RCwE7hrpEVdQi6roI6IK2iH9Kcy85Fm8T9HxO5m/W7gbLO8263zl+It9XcA746I07T/dcR9wB8A10TE8k1RK/fzfA+a9buAf+HS7M2yl4GXM/NoM/8w7eD2+Gn7SeDFzPxOZv438Ajt48pjaAtcNkEdEQF8HDiRmR9ZseoLwPI37/fTPne9vPxnmm/vbwPONX/iPgbcGRHXNp8i7myWbVuZeTAzb8jMKdpf7Hw5M98HPAG8txl2cW+We/beZnw2y/c33+jfBLwF+OqQdmOgMvPbwEsR8dZm0U8Az+Lxs+xbwG0R8f3Ne225Px5DW2HUJ8mH9QB+nPafpU8DTzWPu2mfF/sS8DzwN8B1zfig/R8mfBM4BrRWPNfPAaeax8+Oet+2uE/TXLjq48203ySngL8ArmyWX9XMn2rWv3nF9r/V9Owk8K5R788W9+ZHgYXmGPpL2ldtePxc2K8PAs8Bx4E/o33lhsfQFjy8hVySirtsTn1I0nZlUEtScQa1JBVnUEtScQa1JBVnUEtScQa1JBX3fwsmzuohajGeAAAAAElFTkSuQmCC\n",
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "repeated_names[repeated_names.issn_count > 1000].issn_count.hist(bins=10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 84,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>3511</th>\n",
- " <td>Bulletin.</td>\n",
- " <td>2797</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7652</th>\n",
- " <td>Newsletter.</td>\n",
- " <td>2773</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8338</th>\n",
- " <td>Rapport.</td>\n",
- " <td>1062</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>23716</th>\n",
- " <td>Proceedings.</td>\n",
- " <td>1565</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>45931</th>\n",
- " <td>Annual report /</td>\n",
- " <td>1382</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>45999</th>\n",
- " <td>Annual report.</td>\n",
- " <td>9520</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>46056</th>\n",
- " <td>Annuaire.</td>\n",
- " <td>1263</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>47310</th>\n",
- " <td>Rapport annuel.</td>\n",
- " <td>2811</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>72341</th>\n",
- " <td>Annual report</td>\n",
- " <td>1074</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "3511 Bulletin. 2797\n",
- "7652 Newsletter. 2773\n",
- "8338 Rapport. 1062\n",
- "23716 Proceedings. 1565\n",
- "45931 Annual report / 1382\n",
- "45999 Annual report. 9520\n",
- "46056 Annuaire. 1263\n",
- "47310 Rapport annuel. 2811\n",
- "72341 Annual report 1074"
- ]
- },
- "execution_count": 84,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names[repeated_names.issn_count > 1000]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 85,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>103</th>\n",
- " <td>Bulletin d'information.</td>\n",
- " <td>696</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3230</th>\n",
- " <td>Bulletin de liaison.</td>\n",
- " <td>512</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3511</th>\n",
- " <td>Bulletin.</td>\n",
- " <td>2797</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7652</th>\n",
- " <td>Newsletter.</td>\n",
- " <td>2773</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8338</th>\n",
- " <td>Rapport.</td>\n",
- " <td>1062</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>23716</th>\n",
- " <td>Proceedings.</td>\n",
- " <td>1565</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>45886</th>\n",
- " <td>Report.</td>\n",
- " <td>764</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>45931</th>\n",
- " <td>Annual report /</td>\n",
- " <td>1382</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>45999</th>\n",
- " <td>Annual report.</td>\n",
- " <td>9520</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>46056</th>\n",
- " <td>Annuaire.</td>\n",
- " <td>1263</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>46462</th>\n",
- " <td>Jaarverslag.</td>\n",
- " <td>678</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>47235</th>\n",
- " <td>Rapport d'activité.</td>\n",
- " <td>690</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>47310</th>\n",
- " <td>Rapport annuel.</td>\n",
- " <td>2811</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>49388</th>\n",
- " <td>Jahresbericht.</td>\n",
- " <td>528</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>72341</th>\n",
- " <td>Annual report</td>\n",
- " <td>1074</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>121778</th>\n",
- " <td>Alumni directory /</td>\n",
- " <td>511</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>129027</th>\n",
- " <td>Bulletin municipal.</td>\n",
- " <td>521</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>150771</th>\n",
- " <td>˜La œLettre.</td>\n",
- " <td>630</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>169246</th>\n",
- " <td>Local climatological data.</td>\n",
- " <td>613</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>269569</th>\n",
- " <td>Estimates.</td>\n",
- " <td>680</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "103 Bulletin d'information. 696\n",
- "3230 Bulletin de liaison. 512\n",
- "3511 Bulletin. 2797\n",
- "7652 Newsletter. 2773\n",
- "8338 Rapport. 1062\n",
- "23716 Proceedings. 1565\n",
- "45886 Report. 764\n",
- "45931 Annual report / 1382\n",
- "45999 Annual report. 9520\n",
- "46056 Annuaire. 1263\n",
- "46462 Jaarverslag. 678\n",
- "47235 Rapport d'activité. 690\n",
- "47310 Rapport annuel. 2811\n",
- "49388 Jahresbericht. 528\n",
- "72341 Annual report 1074\n",
- "121778 Alumni directory / 511\n",
- "129027 Bulletin municipal. 521\n",
- "150771 ˜La œLettre. 630\n",
- "169246 Local climatological data. 613\n",
- "269569 Estimates. 680"
- ]
- },
- "execution_count": 85,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names[repeated_names.issn_count > 500]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 86,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>103</th>\n",
- " <td>Bulletin d'information.</td>\n",
- " <td>696</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2676</th>\n",
- " <td>Newsletter /</td>\n",
- " <td>290</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3230</th>\n",
- " <td>Bulletin de liaison.</td>\n",
- " <td>512</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3511</th>\n",
- " <td>Bulletin.</td>\n",
- " <td>2797</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3941</th>\n",
- " <td>Boletín.</td>\n",
- " <td>227</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>534658</th>\n",
- " <td>Relatório e contas.</td>\n",
- " <td>248</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>606501</th>\n",
- " <td>Bildung und Beruf regional.</td>\n",
- " <td>292</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1013518</th>\n",
- " <td>Vies de famille.</td>\n",
- " <td>222</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1117647</th>\n",
- " <td>Country risk service.</td>\n",
- " <td>271</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1236478</th>\n",
- " <td>Performance report for the period ending March...</td>\n",
- " <td>217</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>72 rows × 2 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "103 Bulletin d'information. 696\n",
- "2676 Newsletter / 290\n",
- "3230 Bulletin de liaison. 512\n",
- "3511 Bulletin. 2797\n",
- "3941 Boletín. 227\n",
- "... ... ...\n",
- "534658 Relatório e contas. 248\n",
- "606501 Bildung und Beruf regional. 292\n",
- "1013518 Vies de famille. 222\n",
- "1117647 Country risk service. 271\n",
- "1236478 Performance report for the period ending March... 217\n",
- "\n",
- "[72 rows x 2 columns]"
- ]
- },
- "execution_count": 86,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names[repeated_names.issn_count > 200]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 87,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>103</th>\n",
- " <td>Bulletin d'information.</td>\n",
- " <td>696</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2676</th>\n",
- " <td>Newsletter /</td>\n",
- " <td>290</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3230</th>\n",
- " <td>Bulletin de liaison.</td>\n",
- " <td>512</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3511</th>\n",
- " <td>Bulletin.</td>\n",
- " <td>2797</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3941</th>\n",
- " <td>Boletín.</td>\n",
- " <td>227</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1315194</th>\n",
- " <td>Country commerce.</td>\n",
- " <td>120</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1327255</th>\n",
- " <td>Bible studies for life.</td>\n",
- " <td>159</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1805527</th>\n",
- " <td>LexisNexis practice guide.</td>\n",
- " <td>110</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2637306</th>\n",
- " <td>Operational risk report.</td>\n",
- " <td>119</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2659477</th>\n",
- " <td>Interempresas net.</td>\n",
- " <td>115</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>204 rows × 2 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "103 Bulletin d'information. 696\n",
- "2676 Newsletter / 290\n",
- "3230 Bulletin de liaison. 512\n",
- "3511 Bulletin. 2797\n",
- "3941 Boletín. 227\n",
- "... ... ...\n",
- "1315194 Country commerce. 120\n",
- "1327255 Bible studies for life. 159\n",
- "1805527 LexisNexis practice guide. 110\n",
- "2637306 Operational risk report. 119\n",
- "2659477 Interempresas net. 115\n",
- "\n",
- "[204 rows x 2 columns]"
- ]
- },
- "execution_count": 87,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names[repeated_names.issn_count > 100]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 88,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>˜The œpublishers weekly.</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>Publishers weekly</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6</th>\n",
- " <td>Activitas Nervosa Superior.</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12</th>\n",
- " <td>Library journal.</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>24</th>\n",
- " <td>Acta cardiologica.</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938851</th>\n",
- " <td>AAPS introductions in the pharmaceutical scien...</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938852</th>\n",
- " <td>AAPS introductions in the pharmaceutical scien...</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938856</th>\n",
- " <td>Verzeichniss der Werke lebender Künstler auf d...</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938857</th>\n",
- " <td>IEEE Advanced Information Management, Communic...</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938858</th>\n",
- " <td>Products finishing México (Print)</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>586466 rows × 2 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "2 ˜The œpublishers weekly. 2\n",
- "3 Publishers weekly 2\n",
- "6 Activitas Nervosa Superior. 2\n",
- "12 Library journal. 2\n",
- "24 Acta cardiologica. 2\n",
- "... ... ...\n",
- "2938851 AAPS introductions in the pharmaceutical scien... 2\n",
- "2938852 AAPS introductions in the pharmaceutical scien... 2\n",
- "2938856 Verzeichniss der Werke lebender Künstler auf d... 2\n",
- "2938857 IEEE Advanced Information Management, Communic... 2\n",
- "2938858 Products finishing México (Print) 2\n",
- "\n",
- "[586466 rows x 2 columns]"
- ]
- },
- "execution_count": 88,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "If a name matches a repeated name exactly or fuzzy matches to a repeated name and there is not other information available, the match status must be ambigious."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 89,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['2735-9298', '2735-928X']"
- ]
- },
- "execution_count": 89,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mapping[\"Nigerian Journal of Wildlife Management\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "These are two ISSN refering to the same journal."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 90,
- "metadata": {},
- "outputs": [],
- "source": [
- "import requests"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 91,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'@graph': [{'@id': 'http://id.loc.gov/vocabulary/countries/nr',\n",
- " 'label': 'Nigeria'},\n",
- " {'@id': 'organization/ISSNCenter#_e',\n",
- " '@type': 'http://schema.org/Organization'},\n",
- " {'@id': 'resource/ISSN-L/2735-9298',\n",
- " 'identifiedBy': 'resource/ISSN/2735-928X#ISSN-L'},\n",
- " {'@id': 'resource/ISSN/2735-928X',\n",
- " '@type': ['http://id.loc.gov/ontologies/bibframe/Work',\n",
- " 'http://id.loc.gov/ontologies/bibframe/Instance',\n",
- " 'http://schema.org/Periodical'],\n",
- " 'identifiedBy': ['resource/ISSN/2735-928X#ISSN-L',\n",
- " 'resource/ISSN/2735-928X#ISSN',\n",
- " 'resource/ISSN/2735-928X#KeyTitle'],\n",
- " 'mainTitle': 'Nigerian Journal of Wildlife Management',\n",
- " 'otherPhysicalFormat': 'resource/ISSN/2735-9298',\n",
- " 'title': 'resource/ISSN/2735-928X#KeyTitle',\n",
- " 'format': 'vocabularies/medium#Online',\n",
- " 'identifier': '2735-928X',\n",
- " 'isFormatOf': 'resource/ISSN/2735-9298',\n",
- " 'type': 'http://marc21rdf.info/terms/formofmaterial#a',\n",
- " 'http://purl.org/ontology/bibo/issn': '2735-928X',\n",
- " 'isPartOf': 'resource/ISSN-L/2735-9298',\n",
- " 'issn': '2735-928X',\n",
- " 'name': ['Nigerian JOurnal of Wildlife Management (Ondo. Online)',\n",
- " 'Nigerian Journal of Wildlife Management'],\n",
- " 'publication': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n",
- " 'url': 'http://www.wildlifesociety.ng.org/'},\n",
- " {'@id': 'resource/ISSN/2735-928X#ISSN',\n",
- " '@type': 'http://id.loc.gov/ontologies/bibframe/Issn',\n",
- " 'status': 'vocabularies/IdentifierStatus#Valid',\n",
- " 'value': '2735-928X'},\n",
- " {'@id': 'resource/ISSN/2735-928X#ISSN-L',\n",
- " '@type': 'http://id.loc.gov/ontologies/bibframe/IssnL',\n",
- " 'status': 'vocabularies/IdentifierStatus#Valid',\n",
- " 'value': '2735-9298'},\n",
- " {'@id': 'resource/ISSN/2735-928X#KeyTitle',\n",
- " '@type': ['http://id.loc.gov/ontologies/bibframe/Identifier',\n",
- " 'http://id.loc.gov/ontologies/bibframe/KeyTitle'],\n",
- " 'value': 'Nigerian JOurnal of Wildlife Management (Ondo. Online)'},\n",
- " {'@id': 'resource/ISSN/2735-928X#Record',\n",
- " '@type': 'http://schema.org/CreativeWork',\n",
- " 'status': 'vocabularies/RecordStatus#Register',\n",
- " 'modified': '20200808163600.0',\n",
- " 'mainEntity': 'resource/ISSN/2735-928X',\n",
- " 'wasAttributedTo': 'organization/ISSNCenter#_e'},\n",
- " {'@id': 'resource/ISSN/2735-928X#ReferencePublicationEvent',\n",
- " '@type': 'http://schema.org/PublicationEvent',\n",
- " 'location': 'http://id.loc.gov/vocabulary/countries/nr'}],\n",
- " '@context': {'status': {'@id': 'http://id.loc.gov/ontologies/bibframe/status',\n",
- " '@type': '@id'},\n",
- " 'value': {'@id': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#value'},\n",
- " 'publication': {'@id': 'http://schema.org/publication', '@type': '@id'},\n",
- " 'mainTitle': {'@id': 'http://id.loc.gov/ontologies/bibframe/mainTitle'},\n",
- " 'title': {'@id': 'http://id.loc.gov/ontologies/bibframe/title',\n",
- " '@type': '@id'},\n",
- " 'name': {'@id': 'http://schema.org/name'},\n",
- " 'issn': {'@id': 'http://schema.org/issn'},\n",
- " 'format': {'@id': 'http://purl.org/dc/elements/1.1/format', '@type': '@id'},\n",
- " 'url': {'@id': 'http://schema.org/url'},\n",
- " 'identifiedBy': {'@id': 'http://id.loc.gov/ontologies/bibframe/identifiedBy',\n",
- " '@type': '@id'},\n",
- " 'otherPhysicalFormat': {'@id': 'http://id.loc.gov/ontologies/bibframe/otherPhysicalFormat',\n",
- " '@type': '@id'},\n",
- " 'isPartOf': {'@id': 'http://schema.org/isPartOf', '@type': '@id'},\n",
- " 'type': {'@id': 'http://purl.org/dc/terms/type', '@type': '@id'},\n",
- " 'identifier': {'@id': 'http://purl.org/dc/elements/1.1/identifier'},\n",
- " 'isFormatOf': {'@id': 'http://purl.org/dc/terms/isFormatOf', '@type': '@id'},\n",
- " 'wasAttributedTo': {'@id': 'http://www.w3.org/ns/prov#wasAttributedTo',\n",
- " '@type': '@id'},\n",
- " 'mainEntity': {'@id': 'http://schema.org/mainEntity', '@type': '@id'},\n",
- " 'modified': {'@id': 'http://purl.org/dc/terms/modified',\n",
- " '@type': 'http://www.w3.org/2001/XMLSchema#dateTime'},\n",
- " 'location': {'@id': 'http://schema.org/location', '@type': '@id'},\n",
- " 'label': {'@id': 'http://www.w3.org/2000/01/rdf-schema#label'}}}"
- ]
- },
- "execution_count": 91,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "requests.get(\"https://portal.issn.org/resource/ISSN/2735-928X?format=json\").json()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Interestingly, most (80%) journal do not seem to have the distinction between electronic and print. But it may be that names are not used consistently."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 94,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.8004443220991548"
- ]
- },
- "execution_count": 94,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(unique_name) / len(df)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 96,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>1848393</th>\n",
- " <td>Aux petits bonheurs des enfants</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1147789</th>\n",
- " <td>Statistik om boliger ... i Aalborg Kommune.</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2789832</th>\n",
- " <td>Mur (Regensburg)</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>35201</th>\n",
- " <td>Boletín de la Sociedad Vasco-Navarra de pediat...</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1955109</th>\n",
- " <td>Cucina piatti unici.</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1039</th>\n",
- " <td>ABD</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2595157</th>\n",
- " <td>Tribuna quinzenal de Mataró</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>416131</th>\n",
- " <td>Japan high tech review</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2885449</th>\n",
- " <td>Revista cósmica calavera.</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>343232</th>\n",
- " <td>First days.</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "1848393 Aux petits bonheurs des enfants 1\n",
- "1147789 Statistik om boliger ... i Aalborg Kommune. 1\n",
- "2789832 Mur (Regensburg) 1\n",
- "35201 Boletín de la Sociedad Vasco-Navarra de pediat... 1\n",
- "1955109 Cucina piatti unici. 1\n",
- "1039 ABD 1\n",
- "2595157 Tribuna quinzenal de Mataró 1\n",
- "416131 Japan high tech review 1\n",
- "2885449 Revista cósmica calavera. 1\n",
- "343232 First days. 1"
- ]
- },
- "execution_count": 96,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "unique_name.sample(n=10)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Example \"Fieldiana\"\n",
- "\n",
- "* \"Fieldiana. Anthropology\"\n",
- "* https://www.jstor.org/journal/fieldianaanthro"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 97,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['2162-4321', '0071-4739']"
- ]
- },
- "execution_count": 97,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mapping[\"Fieldiana. Anthropology\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "However, jstor reports two ISSN: INSSP: 0071-4739, EISSN: 2162-4321 - but ISSN.org does not know about it?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 98,
- "metadata": {},
- "outputs": [],
- "source": [
- "unique_issn = set([item for v in mapping.values() for item in v])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 99,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 99,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "\"2162-4321\" in unique_issn"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 100,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Fieldiana. ['2162-4267', '2162-4321', '2162-4291', '0096-2651', '0015-0746', '0097-3572', '2163-7105', '0071-4739', '0015-0754', '2158-5520', '0096-0438', '2162-4348']\n",
- "Fieldiana. Anthropology ['2162-4321', '0071-4739']\n",
- "Fieldiana. Anthropology (Online) ['2162-4321', '0071-4739']\n"
- ]
- }
- ],
- "source": [
- "for k, v in mapping.items():\n",
- " if \"2162-4321\" in v:\n",
- " print(k, v)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "As an example: when given a string like \"Fieldiana\" we would return ambiguous. But \"Fieldiana. (Online)\" might be matched to '2162-4321'. "
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Other Examples"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 101,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>571163</th>\n",
- " <td>Afhandling.</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1800364</th>\n",
- " <td>Advances in e-business research series (Online)</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2476171</th>\n",
- " <td>Journal of Automatic Control (Online)</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2756754</th>\n",
- " <td>Gerencia de riesgos y seguros.</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2799421</th>\n",
- " <td>˜The œskinny (Scotland ed. Online)</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1254438</th>\n",
- " <td>Accessible news (Print)</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2177530</th>\n",
- " <td>Biomathematical and biomechanical modeling of ...</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2093431</th>\n",
- " <td>˜Le œJura socialiste (Saint-Claude)</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2752857</th>\n",
- " <td>European Journal of Formal Sciences and Engine...</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>389738</th>\n",
- " <td>Austral journal of veterinary sciences (Online)</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "571163 Afhandling. 2\n",
- "1800364 Advances in e-business research series (Online) 2\n",
- "2476171 Journal of Automatic Control (Online) 2\n",
- "2756754 Gerencia de riesgos y seguros. 2\n",
- "2799421 ˜The œskinny (Scotland ed. Online) 2\n",
- "1254438 Accessible news (Print) 2\n",
- "2177530 Biomathematical and biomechanical modeling of ... 2\n",
- "2093431 ˜Le œJura socialiste (Saint-Claude) 2\n",
- "2752857 European Journal of Formal Sciences and Engine... 2\n",
- "389738 Austral journal of veterinary sciences (Online) 2"
- ]
- },
- "execution_count": 101,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "repeated_names.sample(n=10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 102,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['1608-3318', '1070-3284']"
- ]
- },
- "execution_count": 102,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mapping[\"Russian journal of coordination chemistry.\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "A new question: How many journals are listed under different names, yet still refer to the same journal?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 103,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['0733-2289']"
- ]
- },
- "execution_count": 103,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mapping[\"San Bernardino County popular street atlas\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Journal of ..."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 104,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>name</th>\n",
- " <th>issn_count</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>983</th>\n",
- " <td>Journal of vocational behavior.</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>984</th>\n",
- " <td>Journal of vocational behavior (Print)</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1213</th>\n",
- " <td>Journal of American Concrete Institute.</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1214</th>\n",
- " <td>Journal of the American Concrete Institute</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1317</th>\n",
- " <td>Journal of the American Dental Hygienists' Ass...</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938472</th>\n",
- " <td>Journal of surgical procedures and case reports</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938580</th>\n",
- " <td>Journal of Epidermiological Society of Nigeria...</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938581</th>\n",
- " <td>Journal of Epidermiological Society of Nigeria</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938795</th>\n",
- " <td>Journal of Practicing Teachers</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2938796</th>\n",
- " <td>Journal of Practicing Teachers (Uyo. Online)</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>40257 rows × 2 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " name issn_count\n",
- "983 Journal of vocational behavior. 2\n",
- "984 Journal of vocational behavior (Print) 2\n",
- "1213 Journal of American Concrete Institute. 1\n",
- "1214 Journal of the American Concrete Institute 1\n",
- "1317 Journal of the American Dental Hygienists' Ass... 1\n",
- "... ... ...\n",
- "2938472 Journal of surgical procedures and case reports 1\n",
- "2938580 Journal of Epidermiological Society of Nigeria... 2\n",
- "2938581 Journal of Epidermiological Society of Nigeria 2\n",
- "2938795 Journal of Practicing Teachers 1\n",
- "2938796 Journal of Practicing Teachers (Uyo. Online) 1\n",
- "\n",
- "[40257 rows x 2 columns]"
- ]
- },
- "execution_count": 104,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[df.name.str.startswith(\"Journal of\")]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}