From 3f85b3a35dff8dbcfa8bd8ae1390c9610e0f34a8 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Mon, 2 Aug 2021 15:55:27 +0200 Subject: update notes --- python/notes/coci_notes.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'python') diff --git a/python/notes/coci_notes.md b/python/notes/coci_notes.md index 6d7a968..b6e2b0e 100644 --- a/python/notes/coci_notes.md +++ b/python/notes/coci_notes.md @@ -41,3 +41,65 @@ Example approach: * from fatcat db dump, extract source id and ext ids, sort by source ident * "zip together" +Unify CSV files: + +``` +$ zstdcat -T0 6741422v9.csv.zst | wc -l +759516506 +``` + +Nomenclature: + +* citing = source +* cited = target + +Example: + +``` +10.3846/16111699.2012.720591,10.1016/0024-6301(96)00041-6 +``` + +> citing: 10.3846/16111699.2012.720591, https://fatcat.wiki/release/52znjflg2bdd5h2q2icu3zjhki +> cited: 10.1016/0024-6301(96)00041-6, https://fatcat.wiki/release/mz6dkakhknd47h3skd7ttomwga + +``` +$ curl -s "localhost:9200/fatcat_ref_v02_20210716/_search?q=source_release_ident:52znjflg2bdd5h2q2icu3zjhki+AND+target_release_ident:mz6dkakhknd47h3skd7ttomwga" | jq . +{ + "took": 259, + "timed_out": false, + "_shards": { + "total": 6, + "successful": 6, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 32.16953, + "hits": [ + { + "_index": "fatcat_ref_v02_20210716", + "_type": "_doc", + "_id": "52znjflg2bdd5h2q2icu3zjhki_2", + "_score": 32.16953, + "_source": { + "indexed_ts": "2021-07-10T12:04:57Z", + "match_provenance": "crossref", + "match_reason": "doi", + "match_status": "exact", + "ref_index": 2, + "ref_key": "cit0005", + "source_release_ident": "52znjflg2bdd5h2q2icu3zjhki", + "source_work_ident": "76yenkekovfh5bnvuxwvtvxy5q", + "source_year": "2014", + "target_release_ident": "mz6dkakhknd47h3skd7ttomwga", + "target_work_ident": "um37w3kdcnhqvnp5jeh3mvhumy" + } + } + ] + } +} +``` -- cgit v1.2.3