docs: output structure table stub

author: Martin Czygan <martin.czygan@gmail.com> 2021-09-10 09:47:51 +0200
committer: Martin Czygan <martin.czygan@gmail.com> 2021-09-10 09:47:51 +0200
commit: a23c867858792d84747e2fd9f93a0cd13251b40f (patch)
tree: 92fa8cf97b9dd14f1d94567d4ec21338ef8d93f1
parent: 72c2f524e68b74645ea549466d4215d1b675063e (diff)
download: refcat-a23c867858792d84747e2fd9f93a0cd13251b40f.tar.gz
refcat-a23c867858792d84747e2fd9f93a0cd13251b40f.zip
2 files changed, 27 insertions, 1 deletions
diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf b/docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf
index 0ba95d9..e1f367a 100644
--- a/docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf
+++ b/docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf
diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex b/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex
index ae93e47..b729d48 100644
--- a/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex
+++ b/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex
@@ -215,6 +215,32 @@ in~Table~\ref{table:cocicmp}.
 % TODO: some more numbers on the structure
 
 
+* doi-to-doi
+* only source doi
+* only target doi
+* paper-to-book (OL)
+* wikipedia-to-paper (WI)
+
+\begin{table}[]
+	\begin{center}
+		\begin{tabular}{ll}
+			\toprule
+			\bf{Class}          & \bf{Count} \\
+			\midrule
+			total               &            \\
+			doi-doi             &            \\
+			source-only doi     &            \\
+			target-only doi     &            \\
+			edge w/o doi        &            \\
+			target-open-library &            \\
+			source-wikipedia    &            \\
+		\end{tabular}
+		\vspace*{2mm}
+		\caption{Output structure, e.g. edges between documents that both have a doi (doi-doi).}
+		\label{table:structure}
+	\end{center}
+\end{table}
+
 \section{System Design}
 
 \subsection{Constraints}
@@ -268,7 +294,7 @@ harvests and imports web accessible sources such as Crossref, Pubmed, Arxiv,
 Datacite, DOAJ, dblp and others into its catalog (as the source permits, data
 is processed continously or in batches). Reference data from PDF documents has
 been extracted with GROBID\footnote{GROBID
-\href{https://github.com/kermitt2/grobid/releases/tag/0.5.5}{v0.5.5}}, with the
+	\href{https://github.com/kermitt2/grobid/releases/tag/0.5.5}{v0.5.5}}, with the
 TEI-XML results being cached locally in a key-value store accessible with an S3
 API. Archived PDF documents result from dedicated web-scale crawls of scholarly
 domains conducted with
author	Martin Czygan <martin.czygan@gmail.com>	2021-09-10 09:47:51 +0200
committer	Martin Czygan <martin.czygan@gmail.com>	2021-09-10 09:47:51 +0200
commit	a23c867858792d84747e2fd9f93a0cd13251b40f (patch)
tree	92fa8cf97b9dd14f1d94567d4ec21338ef8d93f1
parent	72c2f524e68b74645ea549466d4215d1b675063e (diff)
download	refcat-a23c867858792d84747e2fd9f93a0cd13251b40f.tar.gz refcat-a23c867858792d84747e2fd9f93a0cd13251b40f.zip