aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/notes/coci_notes.md62
1 files changed, 62 insertions, 0 deletions
diff --git a/python/notes/coci_notes.md b/python/notes/coci_notes.md
index 6d7a968..b6e2b0e 100644
--- a/python/notes/coci_notes.md
+++ b/python/notes/coci_notes.md
@@ -41,3 +41,65 @@ Example approach:
* from fatcat db dump, extract source id and ext ids, sort by source ident
* "zip together"
+Unify CSV files:
+
+```
+$ zstdcat -T0 6741422v9.csv.zst | wc -l
+759516506
+```
+
+Nomenclature:
+
+* citing = source
+* cited = target
+
+Example:
+
+```
+10.3846/16111699.2012.720591,10.1016/0024-6301(96)00041-6
+```
+
+> citing: 10.3846/16111699.2012.720591, https://fatcat.wiki/release/52znjflg2bdd5h2q2icu3zjhki
+> cited: 10.1016/0024-6301(96)00041-6, https://fatcat.wiki/release/mz6dkakhknd47h3skd7ttomwga
+
+```
+$ curl -s "localhost:9200/fatcat_ref_v02_20210716/_search?q=source_release_ident:52znjflg2bdd5h2q2icu3zjhki+AND+target_release_ident:mz6dkakhknd47h3skd7ttomwga" | jq .
+{
+ "took": 259,
+ "timed_out": false,
+ "_shards": {
+ "total": 6,
+ "successful": 6,
+ "skipped": 0,
+ "failed": 0
+ },
+ "hits": {
+ "total": {
+ "value": 1,
+ "relation": "eq"
+ },
+ "max_score": 32.16953,
+ "hits": [
+ {
+ "_index": "fatcat_ref_v02_20210716",
+ "_type": "_doc",
+ "_id": "52znjflg2bdd5h2q2icu3zjhki_2",
+ "_score": 32.16953,
+ "_source": {
+ "indexed_ts": "2021-07-10T12:04:57Z",
+ "match_provenance": "crossref",
+ "match_reason": "doi",
+ "match_status": "exact",
+ "ref_index": 2,
+ "ref_key": "cit0005",
+ "source_release_ident": "52znjflg2bdd5h2q2icu3zjhki",
+ "source_work_ident": "76yenkekovfh5bnvuxwvtvxy5q",
+ "source_year": "2014",
+ "target_release_ident": "mz6dkakhknd47h3skd7ttomwga",
+ "target_work_ident": "um37w3kdcnhqvnp5jeh3mvhumy"
+ }
+ }
+ ]
+ }
+}
+```