From d3fa74e941aa11f79cee2d0adcb5cbc70884ef48 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 29 Oct 2021 12:16:02 -0700 Subject: initial crossref-refs via GROBID helper routine --- python/grobid_tool.py | 21 +- python/sandcrawler/grobid.py | 125 +++++- .../crossref_api_work_978-3-030-64953-1_4.json | 1 + .../files/crossref_api_work_s1047951103000064.json | 1 + .../files/grobid_refs_978-3-030-64953-1_4.tei.xml | 66 +++ .../files/grobid_refs_s1047951103000064.tei.xml | 499 +++++++++++++++++++++ python/tests/test_grobid.py | 132 +++++- 7 files changed, 839 insertions(+), 6 deletions(-) create mode 100644 python/tests/files/crossref_api_work_978-3-030-64953-1_4.json create mode 100644 python/tests/files/crossref_api_work_s1047951103000064.json create mode 100644 python/tests/files/grobid_refs_978-3-030-64953-1_4.tei.xml create mode 100644 python/tests/files/grobid_refs_s1047951103000064.tei.xml diff --git a/python/grobid_tool.py b/python/grobid_tool.py index f99a78b..782bc13 100755 --- a/python/grobid_tool.py +++ b/python/grobid_tool.py @@ -15,6 +15,7 @@ import sys from grobid_tei_xml import parse_document_xml from sandcrawler import * +from sandcrawler.grobid import CrossrefRefsWorker def run_extract_json(args): @@ -84,6 +85,13 @@ def run_transform(args): print(json.dumps(out)) +def run_parse_crossref_refs(args): + grobid_client = GrobidClient(host_url=args.grobid_host) + worker = CrossrefRefsWorker(grobid_client, sink=args.sink) + pusher = JsonLinePusher(worker, args.json_file) + pusher.run() + + def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( @@ -101,7 +109,7 @@ def main(): "-j", "--jobs", default=8, type=int, help="parallelism for batch CPU jobs" ) parser.add_argument( - "--grobid-host", default="http://grobid.qa.fatcat.wiki", help="GROBID API host/port" + "--grobid-host", default="https://grobid.qa.fatcat.wiki", help="GROBID API host/port" ) subparsers = parser.add_subparsers() @@ -133,6 +141,17 @@ def main(): sub_extract_zipfile.set_defaults(func=run_extract_zipfile) sub_extract_zipfile.add_argument("zip_file", help="zipfile with PDFs to extract", type=str) + sub_parse_crossref_refs = subparsers.add_parser( + "parse-crossref-refs", + help="reads Crossref metadata records, parses any unstructured refs with GROBID", + ) + sub_parse_crossref_refs.set_defaults(func=run_parse_crossref_refs) + sub_parse_crossref_refs.add_argument( + "json_file", + help="JSON-L file to process (or '-' for stdin)", + type=argparse.FileType("r"), + ) + sub_transform = subparsers.add_parser("transform") sub_transform.set_defaults(func=run_transform) sub_transform.add_argument( diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 37c4ea1..cdd2093 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -1,7 +1,8 @@ -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional +import sys import requests -from grobid_tei_xml import parse_document_xml +from grobid_tei_xml import GrobidBiblio, parse_citation_list_xml, parse_document_xml from .ia import WaybackClient from .misc import gen_file_metadata @@ -9,7 +10,7 @@ from .workers import SandcrawlerFetchWorker, SandcrawlerWorker class GrobidClient(object): - def __init__(self, host_url: str = "http://grobid.qa.fatcat.wiki", **kwargs): + def __init__(self, host_url: str = "https://grobid.qa.fatcat.wiki", **kwargs): self.host_url = host_url self.consolidate_mode = int(kwargs.get("consolidate_mode", 0)) @@ -67,6 +68,29 @@ class GrobidClient(object): info["error_msg"] = grobid_response.text[:10000] return info + def process_citation_list(self, unstructured_list: List[str]) -> List[GrobidBiblio]: + if not unstructured_list: + return [] + if len(unstructured_list) > 5000: + raise ValueError("more than 5,000 references in a batch is just too much") + + try: + grobid_response = requests.post( + self.host_url + "/api/processCitationList", + data={ + "citations": unstructured_list, + "consolidateCitations": 0, + "includeRawCitations": 1, + }, + timeout=30.0, + ) + except requests.Timeout as te: + # TODO: handle somehow? + raise te + + grobid_response.raise_for_status() + return parse_citation_list_xml(grobid_response.text) + def metadata(self, result: Dict[str, Any]) -> Optional[Dict[str, Any]]: if result["status"] != "success": return None @@ -90,6 +114,87 @@ class GrobidClient(object): meta[k] = tei_json[k] return meta + def should_parse_crossref_ref(self, ref: Dict[str, Any]) -> bool: + """ + Helper function to decide whether to run GROBID parsing on an crossref + reference. + + For example, if there is already a DOI in the ref metadata, could skip. + Or, if there is sufficient structured metadata, or only depending on + the source of the DOI linkage. + """ + if ref.get("DOI"): + return False + if len(ref.get("unstructured", "")) <= 6: + return False + + # TODO: what other combinations are enough to skip parsing? + if ( + ref.get("year") + and ref.get("author") + and (ref.get("article-title") or ref.get("series-title") or ref.get("volume-title")) + ): + return False + elif ref.get("year") and ref.get("author") and ref.get("journal-title"): + return False + elif ref.get("journal-title") and ref.get("volume") and ref.get("first-page"): + return False + + return True + + def crossref_refs(self, record: Dict[str, Any]) -> Dict[str, Any]: + """ + Given a complete Crossref metadata record, inspects the + + The returned dict is in the schema of the `grobid_refs` database table, + in dict form: + + source: 'crossref' + source_id: doi, as lower-case string + source_ts: Crossref indexed timestamp, if available + ('updated' is not set) + refs_json: list of dicts + """ + + # remove API wrapper around record, if necessary + if "message" in record and "DOI" not in record: + record = record["message"] + + ret = dict( + source="crossref", + source_id=record["DOI"].lower(), + source_ts=record["indexed"]["date-time"], + refs_json=[], + ) + all_refs = record.get("reference", []) + unstructured_refs = [] + for r in all_refs: + if not r.get("unstructured"): + continue + if not self.should_parse_crossref_ref(r): + continue + unstructured_refs.append(r) + if not unstructured_refs: + return ret + + # some reasonable cap on length of refs per work + if len(unstructured_refs) > 2000: + print( + f"truncatin very large reference list for doi:{record['DOI']} len:{len(unstructured_refs)}", + file=sys.stderr, + ) + unstructured_refs = unstructured_refs[:2000] + + refs = self.process_citation_list([r["unstructured"] for r in unstructured_refs]) + assert len(refs) == len(unstructured_refs) + refs_json = [] + for i in range(len(refs)): + refs[i].id = unstructured_refs[i].get("key") + assert refs[i].unstructured == unstructured_refs[i]["unstructured"] + refs_json.append(refs[i].to_dict()) + ret["refs_json"] = refs_json + return ret + class GrobidWorker(SandcrawlerFetchWorker): def __init__( @@ -97,7 +202,7 @@ class GrobidWorker(SandcrawlerFetchWorker): grobid_client: GrobidClient, wayback_client: Optional[WaybackClient] = None, sink: Optional[SandcrawlerWorker] = None, - **kwargs + **kwargs, ): super().__init__(wayback_client=wayback_client) self.grobid_client = grobid_client @@ -129,6 +234,18 @@ class GrobidWorker(SandcrawlerFetchWorker): return result +class CrossrefRefsWorker(SandcrawlerWorker): + def __init__( + self, grobid_client: GrobidClient, sink: Optional[SandcrawlerWorker] = None, **kwargs + ): + super().__init__(**kwargs) + self.grobid_client = grobid_client + self.sink = sink + + def process(self, record: Any, key: Optional[str] = None) -> Any: + return self.grobid_client.crossref_refs(record) + + class GrobidBlobWorker(SandcrawlerWorker): """ This is sort of like GrobidWorker, except it receives blobs directly, diff --git a/python/tests/files/crossref_api_work_978-3-030-64953-1_4.json b/python/tests/files/crossref_api_work_978-3-030-64953-1_4.json new file mode 100644 index 0000000..54d07db --- /dev/null +++ b/python/tests/files/crossref_api_work_978-3-030-64953-1_4.json @@ -0,0 +1 @@ +{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2021,5,10]],"date-time":"2021-05-10T22:08:45Z","timestamp":1620684525878},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-64953-1_4","type":"book-chapter","created":{"date-parts":[[2021,1,14]],"date-time":"2021-01-14T02:57:20Z","timestamp":1610593040000},"page":"53-71","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mathematical Knowledge and Mathematical Objects"],"prefix":"10.1007","author":[{"given":"Lars-G\u00f6ran","family":"Johansson","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,14]]},"reference":[{"key":"4_CR12","doi-asserted-by":"publisher","volume-title":"Deflating existential consequence: A case for nominalism","author":"J Azzouni","year":"2004","unstructured":"Azzouni, J. (2004). Deflating existential consequence: A case for nominalism. New York: Oxford University Press.","DOI":"10.1093\/0195159888.001.0001"},{"key":"4_CR23","doi-asserted-by":"publisher","volume-title":"Foundations of constructive mathematics","author":"M Beeson","year":"1985","unstructured":"Beeson, M. (1985). Foundations of constructive mathematics. Berlin\/Heidelberg: Springer.","DOI":"10.1007\/978-3-642-68952-9"},{"issue":"2","key":"4_CR27","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1093\/philmat\/11.2.176","volume":"11","author":"H Billinge","year":"2003","unstructured":"Billinge, H. (2003). Did bishop have a philosophy of mathematics? Philosophica Mathematica, 11(2), 176\u2013194.","journal-title":"Philosophica Mathematica"},{"key":"4_CR29","doi-asserted-by":"publisher","volume-title":"Constructive analysis","author":"E Bishop","year":"1985","unstructured":"Bishop, E., & Bridges, D. S. (1985). Constructive analysis. Berlin: Springer.","DOI":"10.1007\/978-3-642-61667-9"},{"key":"4_CR37","series-title":"In E. N. Zalta (Ed.), The Stanford encyclopedia of philosophy (spring 2014 ed.)","volume-title":"Nominalism in the philosophy of mathematics","author":"O Bueno","year":"2014","unstructured":"Bueno, O. (2014). Nominalism in the philosophy of mathematics. In E. N. Zalta (Ed.), The Stanford encyclopedia of philosophy (spring 2014 ed.). Metaphysics Research Lab, Stanford University."},{"key":"4_CR38","volume-title":"Grundlagen einer allgemeinen mannigfaltiglehre. ein mathematisch-philosophisher versuch in der leher de unendlichen","author":"G Cantor","year":"1883","unstructured":"Cantor, G. (1883). Grundlagen einer allgemeinen mannigfaltiglehre. ein mathematisch-philosophisher versuch in der leher de unendlichen. Leipzig: Teubner."},{"key":"4_CR60","volume-title":"The seas of language","author":"M Dummett","year":"1993","unstructured":"Dummett, M. (1993). The seas of language. Oxford: Clarendon Press."},{"key":"4_CR73","volume-title":"In the light of logic","author":"S Feferman","year":"1998","unstructured":"Feferman, S. (1998). In the light of logic. New York: Oxford University Press."},{"key":"4_CR74","doi-asserted-by":"publisher","first-page":"590","DOI":"10.1093\/0195148770.003.0019","volume-title":"The Oxford handbook of philosophy of mathematics and logic","author":"S Feferman","year":"2005","unstructured":"Feferman, S. (2005). Predicativity. In S. Shapiro (Ed.), The Oxford handbook of philosophy of mathematics and logic (pp. 590\u2013624). New York\/Oxford: Oxford University Press."},{"key":"4_CR77","volume-title":"Science without numbers: A defence of nominalism","author":"H H Field","year":"1980","unstructured":"Field, H. H. (1980). Science without numbers: A defence of nominalism. Oxford: Blackwell."},{"key":"4_CR88","volume-title":"Werke, volume 8","author":"C F Gauss","year":"2011","unstructured":"Gauss, C. F. (2011). Werke, volume 8. Cambridge: Cambridge University Press."},{"key":"4_CR93","unstructured":"Goodman, N. (1972). A world of individuals. In Problems and projects (pp. 155\u2013172). Bobs-Merrill company."},{"key":"4_CR103","volume-title":"Mathematics without numbers: Towards a modal-structural interpretation","author":"G Hellman","year":"1989","unstructured":"Hellman, G. (1989). Mathematics without numbers: Towards a modal-structural interpretation. Oxford: Clarendon Press."},{"key":"4_CR126","first-page":"201","volume-title":"Bertrand Russell. Philosopher of the century","author":"G Kreisel","year":"1967","unstructured":"Kreisel, G. (1967). Mathematical logic: What has it done for the philosophy of mathematics? In R. Shoenman (Ed.), Bertrand Russell. Philosopher of the century (pp. 201\u2013272). London: George Allen & Unwin."},{"key":"4_CR135","doi-asserted-by":"crossref","unstructured":"Lear, J. (1980). Aristotelian infinity. Proceedings of the Aristotelian Society, New Series, 80, 187\u2013210.","DOI":"10.1093\/aristotelian\/80.1.187"},{"key":"4_CR175","doi-asserted-by":"publisher","first-page":"63","DOI":"10.12775\/LLP.1998.004","volume":"6","author":"F Pataut","year":"1998","unstructured":"Pataut, F. (1998). Incompleteness, constructivism and truth. Logic and Logical Philosophy, 6, 63\u201376.","journal-title":"Logic and Logical Philosophy"},{"key":"4_CR180","first-page":"294","volume":"14","author":"H Poincar\u00e9","year":"1906","unstructured":"Poincar\u00e9, H. (1906). Les math\u00e9matiques et la logique. Revue de m\u00e9taphysique et de morale, 14, 294\u2013317.","journal-title":"Revue de m\u00e9taphysique et de morale"},{"key":"4_CR190","volume-title":"Word and object","author":"W V O Quine","year":"1960","unstructured":"Quine, W. V. O. (1960). Word and object. Cambridge, MA: MIT Press."},{"key":"4_CR193","unstructured":"Quine, W. V. O. (1976b). Implicit definition sustained. In The ways of paradox and other essays (2. enlarged and revised ed., pp. 133\u2013136). Cambridge, MA: Harvard University Press."},{"key":"4_CR197","first-page":"31","volume-title":"Theories and things","author":"W V O Quine","year":"1981","unstructured":"Quine, W. V. O. (1981c). What price bivalence? In Theories and things (pp. 31\u201337). Cambridge, MA: The Belknap Press of Harvard University Press."},{"issue":"1","key":"4_CR198","doi-asserted-by":"publisher","first-page":"5","DOI":"10.2307\/2026889","volume":"89","author":"WV O Quine","year":"1992","unstructured":"Quine, W.V. O. (1992). Structure and nature. The Journal of Philosophy, 89(1), 5\u20139.","journal-title":"The Journal of Philosophy"},{"key":"4_CR199","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1080\/014453401625669","volume":"25","author":"P Raatikainen","year":"2004","unstructured":"Raatikainen, P. (2004). Conceptions of truth in intuitionism. History and Philosophy of Logic, 25, 131\u2013145.","journal-title":"History and Philosophy of Logic"},{"key":"4_CR210","unstructured":"Russell, B. (1906). On some difficulties in the theory of transfinite numbers and order types. Proceedings of London Mathematical Society, 4, 29\u201353."},{"key":"4_CR212","volume-title":"Introduction to mathematical philosophy","author":"B Russell","year":"1919","unstructured":"Russell, B. (1919). Introduction to mathematical philosophy. London: Routledge."},{"key":"4_CR222","doi-asserted-by":"crossref","unstructured":"Schwarz, J. T. (2006(1966)). The pernicious influence of mathematics on science. In R. Hersch (Ed.), 18 unconventional essays on the nature of mathematics (Chap. 13, pp. 231\u2013235). New York: Springer.","DOI":"10.1007\/0-387-29831-2_13"},{"key":"4_CR233","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/BF00247187","volume":"12","author":"G Sundholm","year":"1983","unstructured":"Sundholm, G. (1983). Constructions, proofs and the meaning of logical constants. Journal of Philosophical Logic, 12, 151\u2013172.","journal-title":"Journal of Philosophical Logic"},{"issue":"2","key":"4_CR235","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1007\/s10701-007-9186-9","volume":"38","author":"M Tegmark","year":"2008","unstructured":"Tegmark, M. (2008). The mathematical universe. Foundations of Physics, 38(2), 101\u2013150.","journal-title":"Foundations of Physics"},{"key":"4_CR262","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/0010-0277(90)90003-3","volume":"36","author":"K Wynn","year":"1990","unstructured":"Wynn, K. (1990). Children\u2019s understanding of counting. Cognition, 36, 155\u2013193.","journal-title":"Cognition"}],"container-title":["Synthese Library","Empiricism and Philosophy of Physics"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-64953-1_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,1,14]],"date-time":"2021-01-14T03:00:39Z","timestamp":1610593239000},"score":1,"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":28,"URL":"http:\/\/dx.doi.org\/10.1007\/978-3-030-64953-1_4","relation":{},"ISSN":["0166-6991","2542-8292"],"issn-type":[{"value":"0166-6991","type":"print"},{"value":"2542-8292","type":"electronic"}],"published":{"date-parts":[[2021]]},"assertion":[{"value":"14 January 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}} \ No newline at end of file diff --git a/python/tests/files/crossref_api_work_s1047951103000064.json b/python/tests/files/crossref_api_work_s1047951103000064.json new file mode 100644 index 0000000..dfb795d --- /dev/null +++ b/python/tests/files/crossref_api_work_s1047951103000064.json @@ -0,0 +1 @@ +{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2021,6,10]],"date-time":"2021-06-10T05:35:02Z","timestamp":1623303302043},"reference-count":46,"publisher":"Cambridge University Press (CUP)","issue":"1","license":[{"start":{"date-parts":[[2005,4,18]],"date-time":"2005-04-18T00:00:00Z","timestamp":1113782400000},"content-version":"unspecified","delay-in-days":807,"URL":"https:\/\/www.cambridge.org\/core\/terms"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Cardiol Young"],"published-print":{"date-parts":[[2003,2]]},"abstract":"We designed a multi-hospital prospective study of children less than 12 years to determine the comparative clinical profile, severity of carditis, and outcome on follow up of patients suffering an initial and recurrent episodes of acute rheumatic fever. The study extended over a period of 3 years, with diagnosis based on the Jones criteria. We included 161 children in the study, 57 having only one episode and 104 with recurrent episodes. Those seen in the first episode were differentiated from those with recurrent episodes on the basis of the history. The severity of carditis was graded by clinical and echocardiographic means. In those suffering their first episode, carditis was significantly less frequent (61.4%) compared to those having recurrent episodes (96.2%). Arthritis was more marked in the first episode (61.4%) compared to recurrent episodes (36.5%). Chorea was also significantly higher in the first episode (15.8%) compared to recurrent episodes (3.8%). Sub-cutaneous nodules were more-or-less the same in those suffering the first (7%) as opposed to recurrent episodes (5.8%), but Erythema marginatum was more marked during the first episode (3.5%), being rare in recurrent episodes at 0.9%. Fever was recorded in approximately the same numbers in first (45.6%) and recurrent episodes (48.1%). Arthralgia, in contrast, was less frequent in first (21.1%) compared to recurrent episodes (32.7%). A history of sore throat was significantly increased amongst those suffering the first episode (54.4%) compared to recurrent episodes (21.2%). When we compared the severity of carditis in the first versus recurrent episodes, at the start of study mild carditis was found in 29.8% versus 10.6%, moderate carditis in 26.3% versus 53.8%, and severe carditis in 5.3% versus 31.8% of cases, respectively. At the end of study, 30.3% of patients suffering their first episode were completely cured of carditis, and all others showed significant improvement compared to those with recurrent episodes, where only 6.8% were cured, little improvement or deterioration being noted in the remainder of the patients. We conclude that the clinical profile of acute rheumatic fever, especially that of carditis, is milder in those suffering their first attack compared to those with recurrent episodes.<\/jats:p>","DOI":"10.1017\/s1047951103000064","type":"journal-article","created":{"date-parts":[[2005,4,18]],"date-time":"2005-04-18T11:49:54Z","timestamp":1113824994000},"page":"28-35","source":"Crossref","is-referenced-by-count":11,"title":["Clinical profile of acute rheumatic fever in Pakistan"],"prefix":"10.1017","volume":"13","author":[{"given":"Hasina Suleman","family":"Chagani","sequence":"first","affiliation":[]},{"given":"Kalimuddin","family":"Aziz","sequence":"additional","affiliation":[]}],"member":"56","published-online":{"date-parts":[[2005,4,18]]},"reference":[{"key":"S1047951103000064_ref010","doi-asserted-by":"crossref","unstructured":"Alan L , Bisno . Group A streptococcal infection and acute rheumatic fever. N Engl J Med 1991; 325: 783\u2013793.","DOI":"10.1056\/NEJM199109123251106"},{"key":"S1047951103000064_ref036","doi-asserted-by":"crossref","unstructured":"Abbasi AS , Hashmi JA , Robinson RD , Suraya S , Syed SA . Prevalence of heart disease in school children of Karachi. Am J Cardiol 1966; 18: 544\u2013547.","DOI":"10.1016\/0002-9149(66)90008-7"},{"key":"S1047951103000064_ref025","unstructured":"Strasser T , Dondong N , Elkholy A et al. The community control of rheumatic fever and rheumatic heart disease. Report of a WHO international co-operative project. Bull. WHO 1981; 59: 285\u2013294."},{"key":"S1047951103000064_ref013","unstructured":"Rahimtoola RJ , Rehman H . Acute rheumatic fever in children. JPMA 1972; 22: 185\u2013192."},{"key":"S1047951103000064_ref007","doi-asserted-by":"crossref","unstructured":"Okoroma EO , Ihenacho HNC , Anyanwu CH . Rheumatic fever in Nigerian children. A prospective study of 66 patients. Am J Dis Child 1981; 35: 236\u2013238.","DOI":"10.1001\/archpedi.1981.02130270028010"},{"key":"S1047951103000064_ref031","doi-asserted-by":"crossref","unstructured":"Gordis L . Effectiveness of comprehensive care program in preventing rheumatic fever. N Engl J Med 1973; 289: 331\u2013335.","DOI":"10.1056\/NEJM197308162890701"},{"key":"S1047951103000064_ref012","unstructured":"Ismail SA , El Amin A . Rheumatic fever in Sudanese children. Arab J Med 1983; 2: 21\u201324."},{"key":"S1047951103000064_ref026","doi-asserted-by":"crossref","unstructured":"Reale A , Colella C , Bruno AM . Mitral stenosis in childhood: Clinical and therapeutic aspects. Am Heart J 1963; 66: 15.","DOI":"10.1016\/0002-8703(63)90064-4"},{"key":"S1047951103000064_ref046","doi-asserted-by":"crossref","unstructured":"Aziz KU , Cheema L , Memon AD . Long-term observations of rheumatic carditis. Cardiol Young 1992; 2: 254\u2013260.","DOI":"10.1017\/S1047951100001001"},{"key":"S1047951103000064_ref041","unstructured":"Aziz KU . Incidence of heart disease in children at NICVD. JPMA 1984; 34: 300\u2013305."},{"key":"S1047951103000064_ref002","unstructured":"Cheadle WB . The various manifestations of rheumatic fever as exemplified in childhood and early life. Smith and Co., London, 1889."},{"key":"S1047951103000064_ref043","unstructured":"Community control of rheumatic heart disease in developing countries-I. A major public health problem. WHO Chron 1980; 34: 336\u2013345."},{"key":"S1047951103000064_ref037","unstructured":"Malik SM , Jaffrey S , Ahmed S , Zubeda Khanum : Prevalence of heart disease in school children of Islamabad. Pakistan Heart Journal 1981; 14: 2\u20136."},{"key":"S1047951103000064_ref029","doi-asserted-by":"crossref","unstructured":"Hassel TA , Stuart KL . Rheumatic fever prophylaxis. A three-year study. Br Med J 1972; 2: 39\u201340.","DOI":"10.1136\/bmj.2.5909.39"},{"key":"S1047951103000064_ref024","doi-asserted-by":"crossref","unstructured":"Sanyal SK , Berry AM , Duggal S , Hooja V , Ghosh S . Sequel of initial attack of acute rheumatic fever. A prospective 5-year follow-up study. Circulation 1982; 65: 375\u2013379.","DOI":"10.1161\/01.CIR.65.2.375"},{"key":"S1047951103000064_ref022","doi-asserted-by":"crossref","unstructured":"Brownell KD , Rese FB . Acute rheumatic fever in children. Incidence in Borough of New York city. JAMA. 1973; 224: 1593\u20131597.","DOI":"10.1001\/jama.1973.03220260015004"},{"key":"S1047951103000064_ref035","unstructured":"Watkins JH , Quinn JP . Rheumatic heart disease and overcrowding. Am J Public Health 1948; 38: 1071\u20131081."},{"key":"S1047951103000064_ref003","unstructured":"El-Sadr W , Taranta A . The spectrum and specter of rheumatic fever in 1980's. In: Clinical Immunology Up-Date. Edited by Franklin EC . Elsevier, New York, 1979, pp 183\u2013203."},{"key":"S1047951103000064_ref045","doi-asserted-by":"crossref","unstructured":"Markowitz M . Eradication of rheumatic fever. An unfulfilled hope. Circulation 1970; 41: 1077\u20131084.","DOI":"10.1161\/01.CIR.41.6.1077"},{"key":"S1047951103000064_ref005","unstructured":"Haig-Brown C . Tonsillitis in adolescent, Bailliere Tendoll and Cox, London 1886."},{"key":"S1047951103000064_ref017","unstructured":"Levine LI , Chapman SS , Guerra V , Cooper J , Krause RM . Studies on the transmission within the families of group A hemolytic streptococci. J Lab Clin Med 1966; 67: 483\u2013494."},{"key":"S1047951103000064_ref028","doi-asserted-by":"crossref","unstructured":"Ehmke DA , Stehbens JA , Young L . Two studies of compliance with daily prophylaxis in rheumatic fever patients in Iowa. Am J Public Health 1980; 70: 1189\u20131193.","DOI":"10.2105\/AJPH.70.11.1189"},{"key":"S1047951103000064_ref021","doi-asserted-by":"crossref","unstructured":"Ward C . The reappraisal of the clinical features in acute and chronic rheumatic heart disease. Etiology implications. Am Heart J 1979; 98: 298\u2013306.","DOI":"10.1016\/0002-8703(79)90040-1"},{"key":"S1047951103000064_ref009","doi-asserted-by":"crossref","unstructured":"Sanyal SK , Thaper MK , Ahmed SA , Hooja V , Tewari P . The initial attack of acute rheumatic fever during childhood in North India. A prospective study of the clinical profile. Circulation 1974; 49: 7\u201312.","DOI":"10.1161\/01.CIR.49.1.7"},{"key":"S1047951103000064_ref016","unstructured":"Strasser T . Rheumatic fever and rheumatic heart disease in the 1970's. WHO Chron. 1978; 32: 18\u201325."},{"key":"S1047951103000064_ref019","doi-asserted-by":"crossref","unstructured":"Bland EF , Jones TD . Rheumatic fever and rheumatic heart disease. A twenty-year report on 1000 patients followed since childhood. Circulation 1951; 4: 836\u2013843.","DOI":"10.1161\/01.CIR.4.6.836"},{"key":"S1047951103000064_ref042","doi-asserted-by":"crossref","unstructured":"Wood HF , McCarty M . Laboratory aids in the diagnosis of rheumatic fever and evaluation of disease activity. Am J Med 1954; 17: 768\u2013774.","DOI":"10.1016\/0002-9343(54)90221-1"},{"key":"S1047951103000064_ref020","doi-asserted-by":"crossref","unstructured":"Baldwin JS , Kerr JM , Kuttner AG , Doyle EF . Observation in rheumatic nodules over 30 years period. J Pediatr 1960; 56: 465\u2013470.","DOI":"10.1016\/S0022-3476(60)80358-7"},{"key":"S1047951103000064_ref004","doi-asserted-by":"crossref","unstructured":"Majeed HA , Khan N , Dabbagh M , Naidi K . Acute rheumatic fever during childhood in Kuwait: The mild nature of initial attack. Ann Trop Paediatr 1981; 1: 13\u201320.","DOI":"10.1080\/02724936.1981.11748053"},{"key":"S1047951103000064_ref001","unstructured":"Brittanica: Book of year 1991. Chicago, 1991."},{"key":"S1047951103000064_ref039","unstructured":"Talbot R . Pockets of rheumatic fever in developed world. XI World Congress of Cardiology. Manila 1990."},{"key":"S1047951103000064_ref040","doi-asserted-by":"crossref","unstructured":"Taranta A , Markowitz M . Rheumatic fever. A guide to its recognition, prevention and cure, with special reference to developing countries. M.T.P. Press Ltd., Boston, 1981.","DOI":"10.1007\/978-94-015-7171-5"},{"key":"S1047951103000064_ref032","unstructured":"Intersociety commission for heart disease and resources. Rheumatic fever and rheumatic heart disease study group. Prevention of rheumatic fever and rheumatic heart disease. Circulation 1970; 41: A1\u201315."},{"key":"S1047951103000064_ref014","unstructured":"Rahimtoola RJ , Shafqat H , Ramzan A . Acute rheumatic fever and rheumatic carditis in children. Pak Heart J 1980; 3: 2\u20139."},{"key":"S1047951103000064_ref011","doi-asserted-by":"crossref","unstructured":"Gharib R . Acute rheumatic fever in Shiraz, Iran. It's prevalence and characteristics in two socio-economic groups. Am J Dis Child 1969: 118: 694\u2013699.","DOI":"10.1001\/archpedi.1969.02100040696005"},{"key":"S1047951103000064_ref008","unstructured":"Padmavati S . Rheumatic fever and rheumatic heart disease in developing countries. Bull. WHO 1979; 56: 543\u2013550."},{"key":"S1047951103000064_ref033","doi-asserted-by":"crossref","unstructured":"Spagnuolo M , Pasternack B , Taranta A . Risk of rheumatic fever recurrences after streptococcal infections. Prospective study of clinical and social factors. N Engl J Med 1971; 285: 641\u2013647.","DOI":"10.1056\/NEJM197109162851201"},{"key":"S1047951103000064_ref038","unstructured":"Meyer RJ , Haggerty RJ . Streptococcal infections in families. Factors altering individual susceptibility. Pediatrics 1962; 29: 539\u2013549."},{"key":"S1047951103000064_ref023","doi-asserted-by":"crossref","unstructured":"Feinstein AR , Spagnuolo M . The clinical patterns of acute rheumatic fever; A reappraisal. Medicine 1962; 41: 279\u2013305.","DOI":"10.1097\/00005792-196212000-00001"},{"key":"S1047951103000064_ref018","unstructured":"Shanks RA . Collagen and connective tissue diseases. In: Forfar JA , Arneil CC (eds) Textbook of Pediatrics. Churchill Livingstone, Edinburgh, 1978: 1501\u20131515."},{"key":"S1047951103000064_ref027","unstructured":"Billoo AG , Abbasi AS , Sultana S , Desa L , Syed SA . Prophylaxis against recurrence of rheumatic fever. Pak Heart J 1968; 1: 8\u201314."},{"key":"S1047951103000064_ref034","unstructured":"Syed SA . Rheumatic heart disease. Pak Heart J 1972; 5: 14\u201316."},{"key":"S1047951103000064_ref044","unstructured":"Community control of rheumatic heart disease in developing countries-II. Strategies for prevention and control. WHO Chron 1980; 34: 389\u2013395."},{"key":"S1047951103000064_ref006","unstructured":"Joshi MK , Kandoth PW , Barve RJ , Kamat JR . Rheumatic fever: Clinical profile of 339 cases with long term follow-up. Indian pediatr 1983; 20: 849\u2013853."},{"key":"S1047951103000064_ref030","unstructured":"Koshi G , Benjamin V , Cherian G . Rheumatic fever and rheumatic heart disease in rural south Indian children. Bull WHO 1981; 59: 599\u2013603."},{"key":"S1047951103000064_ref015","doi-asserted-by":"crossref","unstructured":"Robinson RD , Sultana S , Abbasi AS et al. Acute rheumatic fever in Karachi, Pakistan. Am J Cardiol 1966; 8: 548\u2013551.","DOI":"10.1016\/0002-9149(66)90009-9"}],"container-title":["Cardiology in the Young"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.cambridge.org\/core\/services\/aop-cambridge-core\/content\/view\/S1047951103000064","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,6]],"date-time":"2020-04-06T22:32:57Z","timestamp":1586212377000},"score":1,"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003,2]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2003,2]]}},"alternative-id":["S1047951103000064"],"URL":"http:\/\/dx.doi.org\/10.1017\/s1047951103000064","relation":{},"ISSN":["1047-9511","1467-1107"],"issn-type":[{"value":"1047-9511","type":"print"},{"value":"1467-1107","type":"electronic"}],"subject":["Cardiology and Cardiovascular Medicine","General Medicine","Pediatrics, Perinatology, and Child Health"],"published":{"date-parts":[[2003,2]]}}} \ No newline at end of file diff --git a/python/tests/files/grobid_refs_978-3-030-64953-1_4.tei.xml b/python/tests/files/grobid_refs_978-3-030-64953-1_4.tei.xml new file mode 100644 index 0000000..b47f85b --- /dev/null +++ b/python/tests/files/grobid_refs_978-3-030-64953-1_4.tei.xml @@ -0,0 +1,66 @@ + + + + + + +
+ + + + A world of individuals + + NGoodman + + + + Problems and projects + + 1972 + + + + Goodman, N. (1972). A world of individuals. In Problems and projects (pp. 155–172). Bobs-Merrill company. + + + + + Implicit definition sustained + + WV OQuine + + + + The ways of paradox and other essays +
Cambridge, MA
+ + Harvard University Press + 1976b + + +
+ Quine, W. V. O. (1976b). Implicit definition sustained. In The ways of paradox and other essays (2. enlarged and revised ed., pp. 133–136). Cambridge, MA: Harvard University Press. +
+ + + + On some difficulties in the theory of transfinite numbers and order types + + BRussell + + + 1906 + Proceedings of London Mathematical Society + 4 + + + + Russell, B. (1906). On some difficulties in the theory of transfinite numbers and order types. Proceedings of London Mathematical Society, 4, 29–53. + + +
+
+
+
+
diff --git a/python/tests/files/grobid_refs_s1047951103000064.tei.xml b/python/tests/files/grobid_refs_s1047951103000064.tei.xml new file mode 100644 index 0000000..e0eae8a --- /dev/null +++ b/python/tests/files/grobid_refs_s1047951103000064.tei.xml @@ -0,0 +1,499 @@ + + + + + + +
+ + + + The community control of rheumatic fever and rheumatic heart disease + + TStrasser + + + NDondong + + + AElkholy + + + + Bull. WHO + + 59 + + 1981 + + + Report of a WHO international co-operative project + Strasser T , Dondong N , Elkholy A et al. The community control of rheumatic fever and rheumatic heart disease. Report of a WHO international co-operative project. Bull. WHO 1981; 59: 285–294. + + + + + Acute rheumatic fever in children + + RJRahimtoola + + + HRehman + + + + JPMA + + 22 + + 1972 + + + Rahimtoola RJ , Rehman H . Acute rheumatic fever in children. JPMA 1972; 22: 185–192. + + + + + Rheumatic fever in Sudanese children + + SAIsmail + + + AEl Amin + + + + Arab J Med + + 2 + + 1983 + + + Ismail SA , El Amin A . Rheumatic fever in Sudanese children. Arab J Med 1983; 2: 21–24. + + + + + Incidence of heart disease in children at NICVD + + KUAziz + + + + JPMA + + 34 + + 1984 + + + Aziz KU . Incidence of heart disease in children at NICVD. JPMA 1984; 34: 300–305. + + + + + The various manifestations of rheumatic fever as exemplified in childhood and early life + + WBCheadle + + + Smith and Co + 1889 + London + + + Cheadle WB . The various manifestations of rheumatic fever as exemplified in childhood and early life. Smith and Co., London, 1889. + + + + + Community control of rheumatic heart disease in developing countries-I. A major public health problem + + + WHO Chron + + 34 + + 1980 + + + Community control of rheumatic heart disease in developing countries-I. A major public health problem. WHO Chron 1980; 34: 336–345. + + + + + Prevalence of heart disease in school children of Islamabad + + SMMalik + + + SJaffrey + + + SAhmed + + + ZubedaKhanum + + + + Pakistan Heart Journal + + 14 + + 1981 + + + Malik SM , Jaffrey S , Ahmed S , Zubeda Khanum : Prevalence of heart disease in school children of Islamabad. Pakistan Heart Journal 1981; 14: 2–6. + + + + + Rheumatic heart disease and overcrowding + + JHWatkins + + + JPQuinn + + + + Am J Public Health + + 38 + + 1948 + + + Watkins JH , Quinn JP . Rheumatic heart disease and overcrowding. Am J Public Health 1948; 38: 1071–1081. + + + + + The spectrum and specter of rheumatic fever in 1980's + + WEl-Sadr + + + ATaranta + + + + Clinical Immunology Up-Date. Edited by Franklin EC + + + 1979 + Elsevier + + + El-Sadr W , Taranta A . The spectrum and specter of rheumatic fever in 1980's. In: Clinical Immunology Up-Date. Edited by Franklin EC . Elsevier, New York, 1979, pp 183–203. + + + + + Tonsillitis in adolescent, Bailliere Tendoll and Cox + + CHaig-Brown + + + 1886 + London + + + Haig-Brown C . Tonsillitis in adolescent, Bailliere Tendoll and Cox, London 1886. + + + + + Studies on the transmission within the families of group A hemolytic streptococci + + LILevine + + + SSChapman + + + VGuerra + + + JCooper + + + RMKrause + + + + J Lab Clin Med + + 67 + + 1966 + + + Levine LI , Chapman SS , Guerra V , Cooper J , Krause RM . Studies on the transmission within the families of group A hemolytic streptococci. J Lab Clin Med 1966; 67: 483–494. + + + + + Rheumatic fever and rheumatic heart disease in the 1970's. WHO Chron + + TStrasser + + + 1978 + 32 + + + + Strasser T . Rheumatic fever and rheumatic heart disease in the 1970's. WHO Chron. 1978; 32: 18–25. + + + + + Brittanica: Book of year 1991 + + 1991 + Chicago + + + Brittanica: Book of year 1991. Chicago, 1991. + + + + + Pockets of rheumatic fever in developed world. XI World Congress of Cardiology + + RTalbot + + + 1990 + Manila + + + Talbot R . Pockets of rheumatic fever in developed world. XI World Congress of Cardiology. Manila 1990. + + + + + Intersociety commission for heart disease and resources. Rheumatic fever and rheumatic heart disease study group. Prevention of rheumatic fever and rheumatic heart disease + + + Circulation + + 41 + + 1970 + + + Intersociety commission for heart disease and resources. Rheumatic fever and rheumatic heart disease study group. Prevention of rheumatic fever and rheumatic heart disease. Circulation 1970; 41: A1–15. + + + + + Acute rheumatic fever and rheumatic carditis in children + + RJRahimtoola + + + HShafqat + + + ARamzan + + + + Pak Heart J + + 3 + + 1980 + + + Rahimtoola RJ , Shafqat H , Ramzan A . Acute rheumatic fever and rheumatic carditis in children. Pak Heart J 1980; 3: 2–9. + + + + + Rheumatic fever and rheumatic heart disease in developing countries + + SPadmavati + + + + Bull. WHO + + 56 + + 1979 + + + Padmavati S . Rheumatic fever and rheumatic heart disease in developing countries. Bull. WHO 1979; 56: 543–550. + + + + + Streptococcal infections in families. Factors altering individual susceptibility + + RJMeyer + + + RJHaggerty + + + + Pediatrics + + 29 + + 1962 + + + Meyer RJ , Haggerty RJ . Streptococcal infections in families. Factors altering individual susceptibility. Pediatrics 1962; 29: 539–549. + + + + + Collagen and connective tissue diseases + + RAShanks + + + + Textbook of Pediatrics + + JAForfar + CCArneil + +
Edinburgh
+ + 1978 + + + + Churchill Livingstone + +
+ Shanks RA . Collagen and connective tissue diseases. In: Forfar JA , Arneil CC (eds) Textbook of Pediatrics. Churchill Livingstone, Edinburgh, 1978: 1501–1515. +
+ + + + Prophylaxis against recurrence of rheumatic fever + + AGBilloo + + + ASAbbasi + + + SSultana + + + LDesa + + + SASyed + + + + Pak Heart J + + 1 + + 1968 + + + Billoo AG , Abbasi AS , Sultana S , Desa L , Syed SA . Prophylaxis against recurrence of rheumatic fever. Pak Heart J 1968; 1: 8–14. + + + + + Rheumatic heart disease + + SASyed + + + + Pak Heart J + + 5 + + 1972 + + + Syed SA . Rheumatic heart disease. Pak Heart J 1972; 5: 14–16. + + + + + Community control of rheumatic heart disease in developing countries-II. Strategies for prevention and control + + + WHO Chron + + 34 + + 1980 + + + Community control of rheumatic heart disease in developing countries-II. Strategies for prevention and control. WHO Chron 1980; 34: 389–395. + + + + + Rheumatic fever: Clinical profile of 339 cases with long term follow-up + + MKJoshi + + + PWKandoth + + + RJBarve + + + JRKamat + + + + Indian pediatr + + 20 + + 1983 + + + Joshi MK , Kandoth PW , Barve RJ , Kamat JR . Rheumatic fever: Clinical profile of 339 cases with long term follow-up. Indian pediatr 1983; 20: 849–853. + + + + + Rheumatic fever and rheumatic heart disease in rural south Indian children + + GKoshi + + + VBenjamin + + + GCherian + + + + Bull WHO + + 59 + + 1981 + + + Koshi G , Benjamin V , Cherian G . Rheumatic fever and rheumatic heart disease in rural south Indian children. Bull WHO 1981; 59: 599–603. + + +
+
+
+
+
diff --git a/python/tests/test_grobid.py b/python/tests/test_grobid.py index c086d73..dce64bc 100644 --- a/python/tests/test_grobid.py +++ b/python/tests/test_grobid.py @@ -1,3 +1,4 @@ +import json import struct import pytest @@ -41,7 +42,10 @@ def test_grobid_503(grobid_client): @responses.activate -def test_grobid_success(grobid_client): +def test_grobid_success_iso_8859(grobid_client): + """ + This might have been the old GROBID behavior, with default encoding? Can't really remember. + """ responses.add( responses.POST, @@ -63,6 +67,27 @@ def test_grobid_success(grobid_client): assert resp["tei_xml"] == REAL_TEI_XML.decode("ISO-8859-1") +@responses.activate +def test_grobid_success(grobid_client): + + responses.add( + responses.POST, + "http://dummy-grobid/api/processFulltextDocument", + status=200, + body=REAL_TEI_XML, + content_type="application/xml; charset=UTF-8", + ) + + resp = grobid_client.process_fulltext(FAKE_PDF_BYTES) + + # grobid gets POST 1x times + assert len(responses.calls) == 1 + + assert resp["status_code"] == 200 + assert resp["status"] == "success" + assert resp["tei_xml"] == REAL_TEI_XML.decode("UTF-8") + + @responses.activate def test_grobid_worker_cdx(grobid_client, wayback_client): # noqa: F811 @@ -90,3 +115,108 @@ def test_grobid_worker_cdx(grobid_client, wayback_client): # noqa: F811 assert pusher_counts["pushed"] == worker.counts["total"] assert len(responses.calls) == worker.counts["total"] + + +@responses.activate +def test_grobid_refs_978(grobid_client): + + with open("tests/files/crossref_api_work_978-3-030-64953-1_4.json", "r") as f: + crossref_work = json.loads(f.read()) + + with open("tests/files/grobid_refs_978-3-030-64953-1_4.tei.xml", "rb") as f: + xml_bytes = f.read() + assert "\u2013".encode("utf-8") in xml_bytes + responses.add( + responses.POST, + "http://dummy-grobid/api/processCitationList", + status=200, + body=xml_bytes, + content_type="application/xml; charset=UTF-8", + ) + + refs_row = grobid_client.crossref_refs(crossref_work) + + # grobid gets POST 1x times + assert len(responses.calls) == 1 + + assert refs_row["source"] == "crossref" + assert refs_row["source_id"] == "10.1007/978-3-030-64953-1_4" + assert refs_row["source_ts"] == "2021-05-10T22:08:45Z" + refs = refs_row["refs_json"] + assert len(refs) == 3 + assert set([r["id"] for r in refs]) == set(["4_CR93", "4_CR193", "4_CR210"]) + + # test case of no references + crossref_work["message"]["reference"] = [] + refs_row = grobid_client.crossref_refs(crossref_work) + + assert refs_row["source"] == "crossref" + assert refs_row["source_id"] == "10.1007/978-3-030-64953-1_4" + assert refs_row["source_ts"] == "2021-05-10T22:08:45Z" + assert len(refs_row["refs_json"]) == 0 + + # test that 'message' works also + refs_row = grobid_client.crossref_refs(crossref_work["message"]) + assert refs_row["source"] == "crossref" + assert refs_row["source_id"] == "10.1007/978-3-030-64953-1_4" + assert refs_row["source_ts"] == "2021-05-10T22:08:45Z" + assert len(refs_row["refs_json"]) == 0 + + # grobid gets no additional POST from the above empty queries + assert len(responses.calls) == 1 + + +@responses.activate +def test_grobid_refs_s104(grobid_client): + + # test another file + with open("tests/files/crossref_api_work_s1047951103000064.json", "r") as f: + crossref_work = json.loads(f.read()) + + with open("tests/files/grobid_refs_s1047951103000064.tei.xml", "rb") as f: + responses.add( + responses.POST, + "http://dummy-grobid/api/processCitationList", + status=200, + body=f.read(), + content_type="application/xml; charset=UTF-8", + ) + + refs_row = grobid_client.crossref_refs(crossref_work) + + # GROBID gets one more POST + assert len(responses.calls) == 1 + + assert refs_row["source"] == "crossref" + assert refs_row["source_id"] == "10.1017/s1047951103000064" + assert refs_row["source_ts"] == "2021-06-10T05:35:02Z" + refs = refs_row["refs_json"] + assert len(refs) == 24 + assert set([r["id"] for r in refs]) == set( + [ + "S1047951103000064_ref025", + "S1047951103000064_ref013", + "S1047951103000064_ref012", + "S1047951103000064_ref041", + "S1047951103000064_ref002", + "S1047951103000064_ref043", + "S1047951103000064_ref037", + "S1047951103000064_ref035", + "S1047951103000064_ref003", + "S1047951103000064_ref005", + "S1047951103000064_ref017", + "S1047951103000064_ref016", + "S1047951103000064_ref001", + "S1047951103000064_ref039", + "S1047951103000064_ref032", + "S1047951103000064_ref014", + "S1047951103000064_ref008", + "S1047951103000064_ref038", + "S1047951103000064_ref018", + "S1047951103000064_ref027", + "S1047951103000064_ref034", + "S1047951103000064_ref044", + "S1047951103000064_ref006", + "S1047951103000064_ref030", + ] + ) -- cgit v1.2.3