From f7f51098146db6c929a726dfb51bb84d5c50e833 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 13 Oct 2021 11:39:53 +0200 Subject: docs: make report adhere to arxiv conventions --- docs/TR-20210808100000-IA-WDS-REFCAT/Makefile | 2 +- docs/TR-20210808100000-IA-WDS-REFCAT/main.bib | 415 ++++++++++++++++++++++++ docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf | Bin 130842 -> 130891 bytes docs/TR-20210808100000-IA-WDS-REFCAT/main.tex | 14 +- docs/TR-20210808100000-IA-WDS-REFCAT/refcat.zip | Bin 0 -> 44462 bytes docs/TR-20210808100000-IA-WDS-REFCAT/refs.bib | 415 ------------------------ 6 files changed, 426 insertions(+), 420 deletions(-) create mode 100644 docs/TR-20210808100000-IA-WDS-REFCAT/main.bib create mode 100644 docs/TR-20210808100000-IA-WDS-REFCAT/refcat.zip delete mode 100644 docs/TR-20210808100000-IA-WDS-REFCAT/refs.bib diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/Makefile b/docs/TR-20210808100000-IA-WDS-REFCAT/Makefile index 6eb4f3c..ccb2cd7 100644 --- a/docs/TR-20210808100000-IA-WDS-REFCAT/Makefile +++ b/docs/TR-20210808100000-IA-WDS-REFCAT/Makefile @@ -1,6 +1,6 @@ main.pdf: main.tex latexindent -w main.tex && rm -f main.bak* - latexindent -w refs.bib && rm -f refs.bak* + latexindent -w main.bib && rm -f main.bak* pdflatex main.tex bibtex main pdflatex main.tex diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/main.bib b/docs/TR-20210808100000-IA-WDS-REFCAT/main.bib new file mode 100644 index 0000000..d9ccb4f --- /dev/null +++ b/docs/TR-20210808100000-IA-WDS-REFCAT/main.bib @@ -0,0 +1,415 @@ +@inproceedings{kour2014real, + title={Real-time segmentation of on-line handwritten arabic script}, + author={Kour, George and Saabne, Raid}, + booktitle={Frontiers in Handwriting Recognition (ICFHR), 2014 14th International Conference on}, + pages={417--422}, + year={2014}, + organization={IEEE} +} + +@inproceedings{kour2014fast, + title={Fast classification of handwritten on-line Arabic characters}, + author={Kour, George and Saabne, Raid}, + booktitle={Soft Computing and Pattern Recognition (SoCPaR), 2014 6th International Conference of}, + pages={312--318}, + year={2014}, + organization={IEEE}, + doi={10.1109/SOCPAR.2014.7008025} +} + +@article{hadash2018estimate, + title={Estimate and Replace: A Novel Approach to Integrating Deep Neural Networks with Existing Applications}, + author={Hadash, Guy and Kermany, Einat and Carmeli, Boaz and Lavi, Ofer and Kour, George and Jacovi, Alon}, + journal={arXiv preprint arXiv:1804.09028}, + year={2018} +} + +@article{garfield1955citation, + title={Citation indexes for science}, + author={Garfield, Eugene}, + journal={Science}, + volume={122}, + number={3159}, + pages={108--111}, + year={1955}, + publisher={JSTOR} +} + +@inproceedings{lopez2009grobid, + title={GROBID: Combining automatic bibliographic data recognition and term extraction for scholarship publications}, + author={Lopez, Patrice}, + booktitle={International conference on theory and practice of digital libraries}, + pages={473--474}, + year={2009}, + organization={Springer} +} + +@article{garfield2007evolution, + title={The evolution of the science citation index}, + author={Garfield, Eugene}, + journal={International microbiology}, + volume={10}, + number={1}, + pages={65}, + year={2007} +} + +@article{shotton2013publishing, + title={Publishing: open citations}, + author={Shotton, David}, + journal={Nature News}, + volume={502}, + number={7471}, + pages={295}, + year={2013} +} + +@inproceedings{wu2019citeseerx, + title={CiteSeerX: 20 years of service to scholarly big data}, + author={Wu, Jian and Kim, Kunho and Giles, C Lee}, + booktitle={Proceedings of the Conference on Artificial Intelligence for Data Discovery and Reuse}, + pages={1--4}, + year={2019} +} + +@inproceedings{li2006citeseerx, + title={CiteSeerx: an architecture and web service design for an academic document search engine}, + author={Li, Huajing and Councill, Isaac and Lee, Wang-Chien and Giles, C Lee}, + booktitle={Proceedings of the 15th international conference on World Wide Web}, + pages={883--884}, + year={2006} +} + + +@inproceedings{sinha2015overview, + title={An overview of microsoft academic service (mas) and applications}, + author={Sinha, Arnab and Shen, Zhihong and Song, Yang and Ma, Hao and Eide, Darrin and Hsu, Bo-June and Wang, Kuansan}, + booktitle={Proceedings of the 24th international conference on world wide web}, + pages={243--246}, + year={2015} +} + +@inproceedings{ley2002dblp, + title={The DBLP computer science bibliography: Evolution, research issues, perspectives}, + author={Ley, Michael}, + booktitle={International symposium on string processing and information retrieval}, + pages={1--10}, + year={2002}, + organization={Springer} +} + + +@inproceedings{brase2009datacite, + title={DataCite-A global registration agency for research data}, + author={Brase, Jan}, + booktitle={2009 fourth international conference on cooperation and promotion of information resources in science and technology}, + pages={257--261}, + year={2009}, + organization={IEEE} +} + +@article{canese2013pubmed, + title={PubMed: the bibliographic database}, + author={Canese, Kathi and Weis, Sarah}, + journal={The NCBI Handbook}, + volume={2}, + pages={1}, + year={2013}, + publisher={National Center for Biotechnology Information (US)} +} + + +@article{shotton2018funders, + title={Funders should mandate open citations.}, + author={Shotton, David}, + journal={Nature}, + volume={553}, + number={7686}, + pages={129--130}, + year={2018}, + publisher={Nature Publishing Group} +} + +@article{hutchins2021tipping, + title={A tipping point for open citation data}, + author={Hutchins, B Ian}, + journal={Quantitative Science Studies}, + pages={1--5}, + year={2021} +} + +@article{silbert1970world, + title={The World's First Computerized Criminal-Justice Information-Sharing System-The New York State Identification and Intelligence System (NYSIIS)}, + author={Silbert, Jeffrey M}, + journal={Criminology}, + volume={8}, + pages={107}, + year={1970}, + publisher={HeinOnline} +} + +@article{peroni2020opencitations, + title={OpenCitations, an infrastructure organization for open scholarship}, + author={Peroni, Silvio and Shotton, David}, + journal={Quantitative Science Studies}, + volume={1}, + number={1}, + pages={428--444}, + year={2020}, + publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} +} + +@article{fricke2018semantic, + title={Semantic scholar}, + author={Fricke, Suzanne}, + journal={Journal of the Medical Library Association: JMLA}, + volume={106}, + number={1}, + pages={145}, + year={2018}, + publisher={Medical Library Association} +} + +@inproceedings{tang2016aminer, + title={AMiner: Toward understanding big scholar data}, + author={Tang, Jie}, + booktitle={Proceedings of the ninth ACM international conference on web search and data mining}, + pages={467--467}, + year={2016} +} + +@article{dean2010mapreduce, + title={MapReduce: a flexible data processing tool}, + author={Dean, Jeffrey and Ghemawat, Sanjay}, + journal={Communications of the ACM}, + volume={53}, + number={1}, + pages={72--77}, + year={2010}, + publisher={ACM New York, NY, USA} +} + +@article{collet2018zstandard, + title={Zstandard Compression and the application/zstd Media Type}, + author={Collet, Yann and Kucherawy, Murray}, + journal={RFC 8478}, + year={2018} +} + +@book{ortega2014academic, + title={Academic search engines: A quantitative outlook}, + author={Ortega, Jos{\'e} Luis}, + year={2014}, + publisher={Elsevier} +} + +@article{fedoryszak2014efficient, + title={Efficient blocking method for a large scale citation matching}, + author={Fedoryszak, Mateusz and Bolikowski, {\L}ukasz}, + journal={D-Lib Magazine}, + volume={20}, + number={11/12}, + year={2014}, + publisher={Corporation for National Research Initiatives} +} + +@inproceedings{fedoryszak2013large, + title={Large scale citation matching using Apache Hadoop}, + author={Fedoryszak, Mateusz and Tkaczyk, Dominika and Bolikowski, {\L}ukasz}, + booktitle={International Conference on Theory and Practice of Digital Libraries}, + pages={362--365}, + year={2013}, + organization={Springer} +} + +@article{hendricks2020crossref, + title={Crossref: The sustainable source of community-owned scholarly metadata}, + author={Hendricks, Ginny and Tkaczyk, Dominika and Lin, Jennifer and Feeney, Patricia}, + journal={Quantitative Science Studies}, + volume={1}, + number={1}, + pages={414--427}, + year={2020}, + publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} +} + +@inproceedings{tkaczyk2018machine, + title={Machine learning vs. rules and out-of-the-box vs. retrained: An evaluation of open-source bibliographic reference and citation parsers}, + author={Tkaczyk, Dominika and Collins, Andrew and Sheridan, Paraic and Beel, Joeran}, + booktitle={Proceedings of the 18th ACM/IEEE on joint conference on digital libraries}, + pages={99--108}, + year={2018} +} + +@article{jinha_2010, + title={Article 50 million: an estimate of the number of scholarly articles in existence}, + volume={23}, + DOI={10.1087/20100308}, + publisher={Wiley}, + author={Jinha}, + year={2010}, + month={Jul} +} + +@article{landhuis_2016, + title={Scientific literature: Information overload}, + volume={535}, + DOI={10.1038/nj7612-457a}, + number={7612}, + publisher={Springer Nature}, + author={Landhuis}, + year={2016}, + month={Jul} +} + +@article{khabsa_giles_2014, + title={The Number of Scholarly Documents on the Public Web}, + DOI={10.1371/journal.pone.0093949}, + publisher={Public Library of Science (PLoS)}, + author={Khabsa and Giles}, + editor={Zhang}, + year={2014}, + month={May} +} + +@inproceedings{councill2008parscit, + title={ParsCit: an Open-source CRF Reference String Parsing Package.}, + author={Councill, Isaac G and Giles, C Lee and Kan, Min-Yen}, + booktitle={LREC}, + volume={8}, + pages={661--667}, + year={2008} +} + +@article{lafferty2001conditional, + title={Conditional random fields: Probabilistic models for segmenting and labeling sequence data}, + author={Lafferty, John and McCallum, Andrew and Pereira, Fernando CN}, + year={2001} +} + + +@inproceedings{tkaczyk2014cermine, + title={Cermine--automatic extraction of metadata and references from scientific literature}, + author={Tkaczyk, Dominika and Szostek, Pawel and Dendek, Piotr Jan and Fedoryszak, Mateusz and Bolikowski, Lukasz}, + booktitle={2014 11th IAPR International Workshop on Document Analysis Systems}, + pages={217--221}, + year={2014}, + organization={IEEE} +} + + + +@inproceedings{hosseini2019excite, + title={EXCITE--A toolchain to extract, match and publish open literature references}, + author={Hosseini, Azam and Ghavimi, Behnam and Boukhers, Zeyd and Mayr, Philipp}, + booktitle={2019 ACM/IEEE Joint Conference on Digital Libraries (JCDL)}, + pages={432--433}, + year={2019}, + organization={IEEE} +} + + +@inproceedings{pasula2003identity, + title={Identity uncertainty and citation matching}, + author={Pasula, Hanna and Marthi, Bhaskara and Milch, Brian and Russell, Stuart J and Shpitser, Ilya}, + booktitle={Advances in neural information processing systems}, + pages={1425--1432}, + year={2003} +} + +@article{olensky2016evaluation, + title={Evaluation of the citation matching algorithms of CWTS and i FQ in comparison to the W eb of science}, + author={Olensky, Marlies and Schmidt, Marion and van Eck, Nees Jan}, + journal={Journal of the Association for Information Science and Technology}, + volume={67}, + number={10}, + pages={2550--2564}, + year={2016}, + publisher={Wiley Online Library} +} + +@article{mathiak2015challenges, + title={Challenges in matching dataset citation strings to datasets in social science}, + author={Mathiak, Brigitte and Boland, Katarina}, + journal={D-Lib Magazine}, + volume={21}, + number={1/2}, + pages={23--28}, + year={2015}, + publisher={Corporation for National Research Initiatives} +} + +@inproceedings{giles1998citeseer, + title={CiteSeer: An automatic citation indexing system}, + author={Giles, C Lee and Bollacker, Kurt D and Lawrence, Steve}, + booktitle={Proceedings of the third ACM conference on Digital libraries}, + pages={89--98}, + year={1998} +} + +@article{schulz2016use, + title={Use of application containers and workflows for genomic data analysis}, + author={Schulz, Wade L and Durant, Thomas JS and Siddon, Alexa J and Torres, Richard}, + journal={Journal of pathology informatics}, + volume={7}, + year={2016}, + publisher={Wolters Kluwer--Medknow Publications} +} + +@inproceedings{erdmann2017design, + title={Design and Execution of make-like, distributed Analyses based on Spotify’s Pipelining Package Luigi}, + author={Erdmann, M and Fischer, B and Fischer, R and Rieger, M}, + booktitle={Journal of Physics: Conference Series}, + volume={898}, + number={7}, + pages={072047}, + year={2017}, + organization={IOP Publishing} +} + +@misc{bernhardsson2018rouhani, + title={Rouhani A. spotify/luigi-GitHub}, + author={Bernhardsson, E and Freider, E}, + year={2018} +} + +@article{lampa2019scipipe, + title={SciPipe: A workflow library for agile development of complex and dynamic bioinformatics pipelines}, + author={Lampa, Samuel and Dahl{\"o}, Martin and Alvarsson, Jonathan and Spjuth, Ola}, + journal={GigaScience}, + volume={8}, + number={5}, + pages={giz044}, + year={2019}, + publisher={Oxford University Press} +} + +@article{czygan2014design, + title={Design and implementation of a library metadata management framework and its application in fuzzy data deduplication and data reconciliation with authority data}, + author={Czygan, Martin}, + journal={Informatik 2014}, + year={2014}, + publisher={Gesellschaft f{\"u}r Informatik eV} +} + +@article{mcilroy1971research, + title={A Research Unix reader: annotated excerpts from the Programmer’s Manual}, + author={McIlroy, M Douglas}, + year={1971}, + publisher={1971-1986} +} + +@dataset{harshdeep_singh_2020_3940692, + author = {Harshdeep Singh and + Robert West and + Giovanni Colavizza}, + title = {{Wikipedia Citations: A comprehensive dataset of + citations with identifiers extracted from English + Wikipedia}}, + month = jul, + year = 2020, + publisher = {Zenodo}, + version = {0.2}, + doi = {10.5281/zenodo.3940692}, + url = {https://doi.org/10.5281/zenodo.3940692} +} diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf b/docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf index f7cbf7a..d41c668 100644 Binary files a/docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf and b/docs/TR-20210808100000-IA-WDS-REFCAT/main.pdf differ diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex b/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex index 504cda7..ab14a23 100644 --- a/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex +++ b/docs/TR-20210808100000-IA-WDS-REFCAT/main.tex @@ -1,7 +1,7 @@ \documentclass[hidelinks,10pt,twocolumn]{article} \usepackage{simpleConference} \usepackage[pdftex, - pdfauthor={Martin Czygan, Bryan Newbold}, + pdfauthor={Martin Czygan, Helge Holzmann, Bryan Newbold}, pdftitle={Refcat: The Internet Archive Scholar Citation Graph}, pdfsubject={Citation Graph}, pdfkeywords={Citation Graph, Scholarly Communications, Web Archiving}, @@ -35,13 +35,19 @@ \author{Martin Czygan \\ \\ Internet Archive \\ - San Francisco, California, USA \\ + San Francisco, CA, USA \\ martin@archive.org \\ \and + Helge Holzmann \\ + \\ + Internet Archive \\ + San Francisco, CA, USA \\ + helge@archive.org \\ + \and Bryan Newbold \\ \\ Internet Archive \\ - San Francisco, California, USA \\ + San Francisco, CA, USA \\ bnewbold@archive.org \\ \\ } @@ -557,5 +563,5 @@ more easily (see~Table~\ref{table:matches}). % \bibliographystyle{abbrv} \bibliographystyle{plainnat} -\bibliography{refs} +\bibliography{main} \end{document} diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/refcat.zip b/docs/TR-20210808100000-IA-WDS-REFCAT/refcat.zip new file mode 100644 index 0000000..30b7077 Binary files /dev/null and b/docs/TR-20210808100000-IA-WDS-REFCAT/refcat.zip differ diff --git a/docs/TR-20210808100000-IA-WDS-REFCAT/refs.bib b/docs/TR-20210808100000-IA-WDS-REFCAT/refs.bib deleted file mode 100644 index d9ccb4f..0000000 --- a/docs/TR-20210808100000-IA-WDS-REFCAT/refs.bib +++ /dev/null @@ -1,415 +0,0 @@ -@inproceedings{kour2014real, - title={Real-time segmentation of on-line handwritten arabic script}, - author={Kour, George and Saabne, Raid}, - booktitle={Frontiers in Handwriting Recognition (ICFHR), 2014 14th International Conference on}, - pages={417--422}, - year={2014}, - organization={IEEE} -} - -@inproceedings{kour2014fast, - title={Fast classification of handwritten on-line Arabic characters}, - author={Kour, George and Saabne, Raid}, - booktitle={Soft Computing and Pattern Recognition (SoCPaR), 2014 6th International Conference of}, - pages={312--318}, - year={2014}, - organization={IEEE}, - doi={10.1109/SOCPAR.2014.7008025} -} - -@article{hadash2018estimate, - title={Estimate and Replace: A Novel Approach to Integrating Deep Neural Networks with Existing Applications}, - author={Hadash, Guy and Kermany, Einat and Carmeli, Boaz and Lavi, Ofer and Kour, George and Jacovi, Alon}, - journal={arXiv preprint arXiv:1804.09028}, - year={2018} -} - -@article{garfield1955citation, - title={Citation indexes for science}, - author={Garfield, Eugene}, - journal={Science}, - volume={122}, - number={3159}, - pages={108--111}, - year={1955}, - publisher={JSTOR} -} - -@inproceedings{lopez2009grobid, - title={GROBID: Combining automatic bibliographic data recognition and term extraction for scholarship publications}, - author={Lopez, Patrice}, - booktitle={International conference on theory and practice of digital libraries}, - pages={473--474}, - year={2009}, - organization={Springer} -} - -@article{garfield2007evolution, - title={The evolution of the science citation index}, - author={Garfield, Eugene}, - journal={International microbiology}, - volume={10}, - number={1}, - pages={65}, - year={2007} -} - -@article{shotton2013publishing, - title={Publishing: open citations}, - author={Shotton, David}, - journal={Nature News}, - volume={502}, - number={7471}, - pages={295}, - year={2013} -} - -@inproceedings{wu2019citeseerx, - title={CiteSeerX: 20 years of service to scholarly big data}, - author={Wu, Jian and Kim, Kunho and Giles, C Lee}, - booktitle={Proceedings of the Conference on Artificial Intelligence for Data Discovery and Reuse}, - pages={1--4}, - year={2019} -} - -@inproceedings{li2006citeseerx, - title={CiteSeerx: an architecture and web service design for an academic document search engine}, - author={Li, Huajing and Councill, Isaac and Lee, Wang-Chien and Giles, C Lee}, - booktitle={Proceedings of the 15th international conference on World Wide Web}, - pages={883--884}, - year={2006} -} - - -@inproceedings{sinha2015overview, - title={An overview of microsoft academic service (mas) and applications}, - author={Sinha, Arnab and Shen, Zhihong and Song, Yang and Ma, Hao and Eide, Darrin and Hsu, Bo-June and Wang, Kuansan}, - booktitle={Proceedings of the 24th international conference on world wide web}, - pages={243--246}, - year={2015} -} - -@inproceedings{ley2002dblp, - title={The DBLP computer science bibliography: Evolution, research issues, perspectives}, - author={Ley, Michael}, - booktitle={International symposium on string processing and information retrieval}, - pages={1--10}, - year={2002}, - organization={Springer} -} - - -@inproceedings{brase2009datacite, - title={DataCite-A global registration agency for research data}, - author={Brase, Jan}, - booktitle={2009 fourth international conference on cooperation and promotion of information resources in science and technology}, - pages={257--261}, - year={2009}, - organization={IEEE} -} - -@article{canese2013pubmed, - title={PubMed: the bibliographic database}, - author={Canese, Kathi and Weis, Sarah}, - journal={The NCBI Handbook}, - volume={2}, - pages={1}, - year={2013}, - publisher={National Center for Biotechnology Information (US)} -} - - -@article{shotton2018funders, - title={Funders should mandate open citations.}, - author={Shotton, David}, - journal={Nature}, - volume={553}, - number={7686}, - pages={129--130}, - year={2018}, - publisher={Nature Publishing Group} -} - -@article{hutchins2021tipping, - title={A tipping point for open citation data}, - author={Hutchins, B Ian}, - journal={Quantitative Science Studies}, - pages={1--5}, - year={2021} -} - -@article{silbert1970world, - title={The World's First Computerized Criminal-Justice Information-Sharing System-The New York State Identification and Intelligence System (NYSIIS)}, - author={Silbert, Jeffrey M}, - journal={Criminology}, - volume={8}, - pages={107}, - year={1970}, - publisher={HeinOnline} -} - -@article{peroni2020opencitations, - title={OpenCitations, an infrastructure organization for open scholarship}, - author={Peroni, Silvio and Shotton, David}, - journal={Quantitative Science Studies}, - volume={1}, - number={1}, - pages={428--444}, - year={2020}, - publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} -} - -@article{fricke2018semantic, - title={Semantic scholar}, - author={Fricke, Suzanne}, - journal={Journal of the Medical Library Association: JMLA}, - volume={106}, - number={1}, - pages={145}, - year={2018}, - publisher={Medical Library Association} -} - -@inproceedings{tang2016aminer, - title={AMiner: Toward understanding big scholar data}, - author={Tang, Jie}, - booktitle={Proceedings of the ninth ACM international conference on web search and data mining}, - pages={467--467}, - year={2016} -} - -@article{dean2010mapreduce, - title={MapReduce: a flexible data processing tool}, - author={Dean, Jeffrey and Ghemawat, Sanjay}, - journal={Communications of the ACM}, - volume={53}, - number={1}, - pages={72--77}, - year={2010}, - publisher={ACM New York, NY, USA} -} - -@article{collet2018zstandard, - title={Zstandard Compression and the application/zstd Media Type}, - author={Collet, Yann and Kucherawy, Murray}, - journal={RFC 8478}, - year={2018} -} - -@book{ortega2014academic, - title={Academic search engines: A quantitative outlook}, - author={Ortega, Jos{\'e} Luis}, - year={2014}, - publisher={Elsevier} -} - -@article{fedoryszak2014efficient, - title={Efficient blocking method for a large scale citation matching}, - author={Fedoryszak, Mateusz and Bolikowski, {\L}ukasz}, - journal={D-Lib Magazine}, - volume={20}, - number={11/12}, - year={2014}, - publisher={Corporation for National Research Initiatives} -} - -@inproceedings{fedoryszak2013large, - title={Large scale citation matching using Apache Hadoop}, - author={Fedoryszak, Mateusz and Tkaczyk, Dominika and Bolikowski, {\L}ukasz}, - booktitle={International Conference on Theory and Practice of Digital Libraries}, - pages={362--365}, - year={2013}, - organization={Springer} -} - -@article{hendricks2020crossref, - title={Crossref: The sustainable source of community-owned scholarly metadata}, - author={Hendricks, Ginny and Tkaczyk, Dominika and Lin, Jennifer and Feeney, Patricia}, - journal={Quantitative Science Studies}, - volume={1}, - number={1}, - pages={414--427}, - year={2020}, - publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} -} - -@inproceedings{tkaczyk2018machine, - title={Machine learning vs. rules and out-of-the-box vs. retrained: An evaluation of open-source bibliographic reference and citation parsers}, - author={Tkaczyk, Dominika and Collins, Andrew and Sheridan, Paraic and Beel, Joeran}, - booktitle={Proceedings of the 18th ACM/IEEE on joint conference on digital libraries}, - pages={99--108}, - year={2018} -} - -@article{jinha_2010, - title={Article 50 million: an estimate of the number of scholarly articles in existence}, - volume={23}, - DOI={10.1087/20100308}, - publisher={Wiley}, - author={Jinha}, - year={2010}, - month={Jul} -} - -@article{landhuis_2016, - title={Scientific literature: Information overload}, - volume={535}, - DOI={10.1038/nj7612-457a}, - number={7612}, - publisher={Springer Nature}, - author={Landhuis}, - year={2016}, - month={Jul} -} - -@article{khabsa_giles_2014, - title={The Number of Scholarly Documents on the Public Web}, - DOI={10.1371/journal.pone.0093949}, - publisher={Public Library of Science (PLoS)}, - author={Khabsa and Giles}, - editor={Zhang}, - year={2014}, - month={May} -} - -@inproceedings{councill2008parscit, - title={ParsCit: an Open-source CRF Reference String Parsing Package.}, - author={Councill, Isaac G and Giles, C Lee and Kan, Min-Yen}, - booktitle={LREC}, - volume={8}, - pages={661--667}, - year={2008} -} - -@article{lafferty2001conditional, - title={Conditional random fields: Probabilistic models for segmenting and labeling sequence data}, - author={Lafferty, John and McCallum, Andrew and Pereira, Fernando CN}, - year={2001} -} - - -@inproceedings{tkaczyk2014cermine, - title={Cermine--automatic extraction of metadata and references from scientific literature}, - author={Tkaczyk, Dominika and Szostek, Pawel and Dendek, Piotr Jan and Fedoryszak, Mateusz and Bolikowski, Lukasz}, - booktitle={2014 11th IAPR International Workshop on Document Analysis Systems}, - pages={217--221}, - year={2014}, - organization={IEEE} -} - - - -@inproceedings{hosseini2019excite, - title={EXCITE--A toolchain to extract, match and publish open literature references}, - author={Hosseini, Azam and Ghavimi, Behnam and Boukhers, Zeyd and Mayr, Philipp}, - booktitle={2019 ACM/IEEE Joint Conference on Digital Libraries (JCDL)}, - pages={432--433}, - year={2019}, - organization={IEEE} -} - - -@inproceedings{pasula2003identity, - title={Identity uncertainty and citation matching}, - author={Pasula, Hanna and Marthi, Bhaskara and Milch, Brian and Russell, Stuart J and Shpitser, Ilya}, - booktitle={Advances in neural information processing systems}, - pages={1425--1432}, - year={2003} -} - -@article{olensky2016evaluation, - title={Evaluation of the citation matching algorithms of CWTS and i FQ in comparison to the W eb of science}, - author={Olensky, Marlies and Schmidt, Marion and van Eck, Nees Jan}, - journal={Journal of the Association for Information Science and Technology}, - volume={67}, - number={10}, - pages={2550--2564}, - year={2016}, - publisher={Wiley Online Library} -} - -@article{mathiak2015challenges, - title={Challenges in matching dataset citation strings to datasets in social science}, - author={Mathiak, Brigitte and Boland, Katarina}, - journal={D-Lib Magazine}, - volume={21}, - number={1/2}, - pages={23--28}, - year={2015}, - publisher={Corporation for National Research Initiatives} -} - -@inproceedings{giles1998citeseer, - title={CiteSeer: An automatic citation indexing system}, - author={Giles, C Lee and Bollacker, Kurt D and Lawrence, Steve}, - booktitle={Proceedings of the third ACM conference on Digital libraries}, - pages={89--98}, - year={1998} -} - -@article{schulz2016use, - title={Use of application containers and workflows for genomic data analysis}, - author={Schulz, Wade L and Durant, Thomas JS and Siddon, Alexa J and Torres, Richard}, - journal={Journal of pathology informatics}, - volume={7}, - year={2016}, - publisher={Wolters Kluwer--Medknow Publications} -} - -@inproceedings{erdmann2017design, - title={Design and Execution of make-like, distributed Analyses based on Spotify’s Pipelining Package Luigi}, - author={Erdmann, M and Fischer, B and Fischer, R and Rieger, M}, - booktitle={Journal of Physics: Conference Series}, - volume={898}, - number={7}, - pages={072047}, - year={2017}, - organization={IOP Publishing} -} - -@misc{bernhardsson2018rouhani, - title={Rouhani A. spotify/luigi-GitHub}, - author={Bernhardsson, E and Freider, E}, - year={2018} -} - -@article{lampa2019scipipe, - title={SciPipe: A workflow library for agile development of complex and dynamic bioinformatics pipelines}, - author={Lampa, Samuel and Dahl{\"o}, Martin and Alvarsson, Jonathan and Spjuth, Ola}, - journal={GigaScience}, - volume={8}, - number={5}, - pages={giz044}, - year={2019}, - publisher={Oxford University Press} -} - -@article{czygan2014design, - title={Design and implementation of a library metadata management framework and its application in fuzzy data deduplication and data reconciliation with authority data}, - author={Czygan, Martin}, - journal={Informatik 2014}, - year={2014}, - publisher={Gesellschaft f{\"u}r Informatik eV} -} - -@article{mcilroy1971research, - title={A Research Unix reader: annotated excerpts from the Programmer’s Manual}, - author={McIlroy, M Douglas}, - year={1971}, - publisher={1971-1986} -} - -@dataset{harshdeep_singh_2020_3940692, - author = {Harshdeep Singh and - Robert West and - Giovanni Colavizza}, - title = {{Wikipedia Citations: A comprehensive dataset of - citations with identifiers extracted from English - Wikipedia}}, - month = jul, - year = 2020, - publisher = {Zenodo}, - version = {0.2}, - doi = {10.5281/zenodo.3940692}, - url = {https://doi.org/10.5281/zenodo.3940692} -} -- cgit v1.2.3