@inproceedings{kour2014real, title={Real-time segmentation of on-line handwritten arabic script}, author={Kour, George and Saabne, Raid}, booktitle={Frontiers in Handwriting Recognition (ICFHR), 2014 14th International Conference on}, pages={417--422}, year={2014}, organization={IEEE} } @inproceedings{kour2014fast, title={Fast classification of handwritten on-line Arabic characters}, author={Kour, George and Saabne, Raid}, booktitle={Soft Computing and Pattern Recognition (SoCPaR), 2014 6th International Conference of}, pages={312--318}, year={2014}, organization={IEEE}, doi={10.1109/SOCPAR.2014.7008025} } @article{hadash2018estimate, title={Estimate and Replace: A Novel Approach to Integrating Deep Neural Networks with Existing Applications}, author={Hadash, Guy and Kermany, Einat and Carmeli, Boaz and Lavi, Ofer and Kour, George and Jacovi, Alon}, journal={arXiv preprint arXiv:1804.09028}, year={2018} } @article{garfield1955citation, title={Citation indexes for science}, author={Garfield, Eugene}, journal={Science}, volume={122}, number={3159}, pages={108--111}, year={1955}, publisher={JSTOR} } @inproceedings{lopez2009grobid, title={GROBID: Combining automatic bibliographic data recognition and term extraction for scholarship publications}, author={Lopez, Patrice}, booktitle={International conference on theory and practice of digital libraries}, pages={473--474}, year={2009}, organization={Springer} } @article{garfield2007evolution, title={The evolution of the science citation index}, author={Garfield, Eugene}, journal={International microbiology}, volume={10}, number={1}, pages={65}, year={2007} } @article{shotton2013publishing, title={Publishing: open citations}, author={Shotton, David}, journal={Nature News}, volume={502}, number={7471}, pages={295}, year={2013} } @inproceedings{wu2019citeseerx, title={CiteSeerX: 20 years of service to scholarly big data}, author={Wu, Jian and Kim, Kunho and Giles, C Lee}, booktitle={Proceedings of the Conference on Artificial Intelligence for Data Discovery and Reuse}, pages={1--4}, year={2019} } @inproceedings{li2006citeseerx, title={CiteSeerx: an architecture and web service design for an academic document search engine}, author={Li, Huajing and Councill, Isaac and Lee, Wang-Chien and Giles, C Lee}, booktitle={Proceedings of the 15th international conference on World Wide Web}, pages={883--884}, year={2006} } @inproceedings{sinha2015overview, title={An overview of microsoft academic service (mas) and applications}, author={Sinha, Arnab and Shen, Zhihong and Song, Yang and Ma, Hao and Eide, Darrin and Hsu, Bo-June and Wang, Kuansan}, booktitle={Proceedings of the 24th international conference on world wide web}, pages={243--246}, year={2015} } @inproceedings{ley2002dblp, title={The DBLP computer science bibliography: Evolution, research issues, perspectives}, author={Ley, Michael}, booktitle={International symposium on string processing and information retrieval}, pages={1--10}, year={2002}, organization={Springer} } @inproceedings{brase2009datacite, title={DataCite-A global registration agency for research data}, author={Brase, Jan}, booktitle={2009 fourth international conference on cooperation and promotion of information resources in science and technology}, pages={257--261}, year={2009}, organization={IEEE} } @article{canese2013pubmed, title={PubMed: the bibliographic database}, author={Canese, Kathi and Weis, Sarah}, journal={The NCBI Handbook}, volume={2}, pages={1}, year={2013}, publisher={National Center for Biotechnology Information (US)} } @article{shotton2018funders, title={Funders should mandate open citations.}, author={Shotton, David}, journal={Nature}, volume={553}, number={7686}, pages={129--130}, year={2018}, publisher={Nature Publishing Group} } @article{hutchins2021tipping, title={A tipping point for open citation data}, author={Hutchins, B Ian}, journal={Quantitative Science Studies}, pages={1--5}, year={2021} } @article{silbert1970world, title={The World's First Computerized Criminal-Justice Information-Sharing System-The New York State Identification and Intelligence System (NYSIIS)}, author={Silbert, Jeffrey M}, journal={Criminology}, volume={8}, pages={107}, year={1970}, publisher={HeinOnline} } @article{peroni2020opencitations, title={OpenCitations, an infrastructure organization for open scholarship}, author={Peroni, Silvio and Shotton, David}, journal={Quantitative Science Studies}, volume={1}, number={1}, pages={428--444}, year={2020}, publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} } @article{fricke2018semantic, title={Semantic scholar}, author={Fricke, Suzanne}, journal={Journal of the Medical Library Association: JMLA}, volume={106}, number={1}, pages={145}, year={2018}, publisher={Medical Library Association} } @inproceedings{tang2016aminer, title={AMiner: Toward understanding big scholar data}, author={Tang, Jie}, booktitle={Proceedings of the ninth ACM international conference on web search and data mining}, pages={467--467}, year={2016} } @article{dean2010mapreduce, title={MapReduce: a flexible data processing tool}, author={Dean, Jeffrey and Ghemawat, Sanjay}, journal={Communications of the ACM}, volume={53}, number={1}, pages={72--77}, year={2010}, publisher={ACM New York, NY, USA} } @article{collet2018zstandard, title={Zstandard Compression and the application/zstd Media Type}, author={Collet, Yann and Kucherawy, Murray}, journal={RFC 8478}, year={2018} } @book{ortega2014academic, title={Academic search engines: A quantitative outlook}, author={Ortega, Jos{\'e} Luis}, year={2014}, publisher={Elsevier} } @article{fedoryszak2014efficient, title={Efficient blocking method for a large scale citation matching}, author={Fedoryszak, Mateusz and Bolikowski, {\L}ukasz}, journal={D-Lib Magazine}, volume={20}, number={11/12}, year={2014}, publisher={Corporation for National Research Initiatives} } @inproceedings{fedoryszak2013large, title={Large scale citation matching using Apache Hadoop}, author={Fedoryszak, Mateusz and Tkaczyk, Dominika and Bolikowski, {\L}ukasz}, booktitle={International Conference on Theory and Practice of Digital Libraries}, pages={362--365}, year={2013}, organization={Springer} } @article{hendricks2020crossref, title={Crossref: The sustainable source of community-owned scholarly metadata}, author={Hendricks, Ginny and Tkaczyk, Dominika and Lin, Jennifer and Feeney, Patricia}, journal={Quantitative Science Studies}, volume={1}, number={1}, pages={414--427}, year={2020}, publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} } @inproceedings{tkaczyk2018machine, title={Machine learning vs. rules and out-of-the-box vs. retrained: An evaluation of open-source bibliographic reference and citation parsers}, author={Tkaczyk, Dominika and Collins, Andrew and Sheridan, Paraic and Beel, Joeran}, booktitle={Proceedings of the 18th ACM/IEEE on joint conference on digital libraries}, pages={99--108}, year={2018} } @article{jinha_2010, title={Article 50 million: an estimate of the number of scholarly articles in existence}, volume={23}, DOI={10.1087/20100308}, publisher={Wiley}, author={Jinha}, year={2010}, month={Jul} } @article{landhuis_2016, title={Scientific literature: Information overload}, volume={535}, DOI={10.1038/nj7612-457a}, number={7612}, publisher={Springer Nature}, author={Landhuis}, year={2016}, month={Jul} } @article{khabsa_giles_2014, title={The Number of Scholarly Documents on the Public Web}, DOI={10.1371/journal.pone.0093949}, publisher={Public Library of Science (PLoS)}, author={Khabsa and Giles}, editor={Zhang}, year={2014}, month={May} } @inproceedings{councill2008parscit, title={ParsCit: an Open-source CRF Reference String Parsing Package.}, author={Councill, Isaac G and Giles, C Lee and Kan, Min-Yen}, booktitle={LREC}, volume={8}, pages={661--667}, year={2008} } @article{lafferty2001conditional, title={Conditional random fields: Probabilistic models for segmenting and labeling sequence data}, author={Lafferty, John and McCallum, Andrew and Pereira, Fernando CN}, year={2001} } @inproceedings{tkaczyk2014cermine, title={Cermine--automatic extraction of metadata and references from scientific literature}, author={Tkaczyk, Dominika and Szostek, Pawel and Dendek, Piotr Jan and Fedoryszak, Mateusz and Bolikowski, Lukasz}, booktitle={2014 11th IAPR International Workshop on Document Analysis Systems}, pages={217--221}, year={2014}, organization={IEEE} } @inproceedings{hosseini2019excite, title={EXCITE--A toolchain to extract, match and publish open literature references}, author={Hosseini, Azam and Ghavimi, Behnam and Boukhers, Zeyd and Mayr, Philipp}, booktitle={2019 ACM/IEEE Joint Conference on Digital Libraries (JCDL)}, pages={432--433}, year={2019}, organization={IEEE} } @inproceedings{pasula2003identity, title={Identity uncertainty and citation matching}, author={Pasula, Hanna and Marthi, Bhaskara and Milch, Brian and Russell, Stuart J and Shpitser, Ilya}, booktitle={Advances in neural information processing systems}, pages={1425--1432}, year={2003} } @article{olensky2016evaluation, title={Evaluation of the citation matching algorithms of CWTS and i FQ in comparison to the W eb of science}, author={Olensky, Marlies and Schmidt, Marion and van Eck, Nees Jan}, journal={Journal of the Association for Information Science and Technology}, volume={67}, number={10}, pages={2550--2564}, year={2016}, publisher={Wiley Online Library} } @article{mathiak2015challenges, title={Challenges in matching dataset citation strings to datasets in social science}, author={Mathiak, Brigitte and Boland, Katarina}, journal={D-Lib Magazine}, volume={21}, number={1/2}, pages={23--28}, year={2015}, publisher={Corporation for National Research Initiatives} } @inproceedings{giles1998citeseer, title={CiteSeer: An automatic citation indexing system}, author={Giles, C Lee and Bollacker, Kurt D and Lawrence, Steve}, booktitle={Proceedings of the third ACM conference on Digital libraries}, pages={89--98}, year={1998} } @article{schulz2016use, title={Use of application containers and workflows for genomic data analysis}, author={Schulz, Wade L and Durant, Thomas JS and Siddon, Alexa J and Torres, Richard}, journal={Journal of pathology informatics}, volume={7}, year={2016}, publisher={Wolters Kluwer--Medknow Publications} } @inproceedings{erdmann2017design, title={Design and Execution of make-like, distributed Analyses based on Spotify’s Pipelining Package Luigi}, author={Erdmann, M and Fischer, B and Fischer, R and Rieger, M}, booktitle={Journal of Physics: Conference Series}, volume={898}, number={7}, pages={072047}, year={2017}, organization={IOP Publishing} } @misc{bernhardsson2018rouhani, title={Rouhani A. spotify/luigi-GitHub}, author={Bernhardsson, E and Freider, E}, year={2018} } @article{lampa2019scipipe, title={SciPipe: A workflow library for agile development of complex and dynamic bioinformatics pipelines}, author={Lampa, Samuel and Dahl{\"o}, Martin and Alvarsson, Jonathan and Spjuth, Ola}, journal={GigaScience}, volume={8}, number={5}, pages={giz044}, year={2019}, publisher={Oxford University Press} } @article{czygan2014design, title={Design and implementation of a library metadata management framework and its application in fuzzy data deduplication and data reconciliation with authority data}, author={Czygan, Martin}, journal={Informatik 2014}, year={2014}, publisher={Gesellschaft f{\"u}r Informatik eV} } @article{mcilroy1971research, title={A Research Unix reader: annotated excerpts from the Programmer’s Manual}, author={McIlroy, M Douglas}, year={1971}, publisher={1971-1986} } @dataset{harshdeep_singh_2020_3940692, author = {Harshdeep Singh and Robert West and Giovanni Colavizza}, title = {{Wikipedia Citations: A comprehensive dataset of citations with identifiers extracted from English Wikipedia}}, month = jul, year = 2020, publisher = {Zenodo}, version = {0.2}, doi = {10.5281/zenodo.3940692}, url = {https://doi.org/10.5281/zenodo.3940692} }