aboutsummaryrefslogtreecommitdiffstats
path: root/docs/TR-20210808100000-IA-WDS-REFCAT
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-08-18 17:49:04 -0700
committerBryan Newbold <bnewbold@archive.org>2021-08-18 17:49:04 -0700
commit8fb454839903ffd539438df90a79904322e25da5 (patch)
tree9d64c0901fdc853c63930d474deff1c18245da4d /docs/TR-20210808100000-IA-WDS-REFCAT
parent9422fc5314ce6d1ba64cf32b0b88c9b76e96a0bd (diff)
downloadrefcat-8fb454839903ffd539438df90a79904322e25da5.tar.gz
refcat-8fb454839903ffd539438df90a79904322e25da5.zip
report: title; fix field name; capitalization
Diffstat (limited to 'docs/TR-20210808100000-IA-WDS-REFCAT')
-rw-r--r--docs/TR-20210808100000-IA-WDS-REFCAT/main.tex8
1 files changed, 4 insertions, 4 deletions
diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex b/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex
index 76f1456..21917fd 100644
--- a/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex
+++ b/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex
@@ -18,7 +18,7 @@
\begin{document}
-\title{Fatcat Reference Dataset}
+\title{REFCAT: The Fatcat Citation Graph}
\author{Martin Czygan \\
\\
@@ -117,7 +117,7 @@ citations is not expected to shrink in the future.
We release the first version of the \emph{refcat} dataset in an format used
internally for storage and to serve queries (and which we call \emph{biblioref}
or \emph{bref} for short). The dataset includes metadata from fatcat, the
-Open Library Project and inbound links from the English Wikipedia. The fatcat
+Open Library project and inbound links from the English Wikipedia. The fatcat
project itself aggregates data from variety of open data sources, such as
Crossref\citep{crossref}, PubMed\citep{canese2013pubmed},
DataCite\citep{brase2009datacite}, DOAJ\citep{doaj}, dblp\citep{ley2002dblp} and others,
@@ -196,7 +196,7 @@ Table~\ref{table:fields}.
\toprule
\bf{Fields} & \bf{Percentage} \\
\midrule
- \multicolumn{1}{l}{CN $\cdot$ RN $\cdot$ P $\cdot$ T $\cdot$ U $\cdot$ V $\cdot$ Y} & 14\% \\
+ \multicolumn{1}{l}{CN $\cdot$ CRN $\cdot$ P $\cdot$ T $\cdot$ U $\cdot$ V $\cdot$ Y} & 14\% \\
\multicolumn{1}{l}{\textbf{DOI}} & 14\% \\
\multicolumn{1}{l}{CN $\cdot$ CRN $\cdot$ IS $\cdot$ P $\cdot$ T $\cdot$ U $\cdot$ V $\cdot$ Y} & 5\% \\
\multicolumn{1}{l}{CN $\cdot$ CRN $\cdot$ \textbf{DOI} $\cdot$ U $\cdot$ V $\cdot$ Y} & 4\% \\
@@ -225,7 +225,7 @@ our target schema or perform
additional operations such as deduplication or fusion of matched and unmatched references.
The key derivation can be exact (via an identifier like DOI, PMID, etc) or
-based on a value normalization, like slugifying a title string. For identifier
+based on a value normalization, like ``slugifying'' a title string. For identifier
based matches we can generate the target schema directly. For fuzzy matching
candidates, we pass possible match pairs through a verification procedure,
which is implemented for \emph{release entity}\footnote{\url{https://guide.fatcat.wiki/entity_release.html}.} pairs. This procedure is a