aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/refcat/tasks.py3
-rw-r--r--skate/cmd/skate-map/main.go1
-rw-r--r--skate/map.go19
-rw-r--r--skate/schema.go2
4 files changed, 23 insertions, 2 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 573ec74..66175c8 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -448,8 +448,7 @@ class BrefDOITable(Refcat):
def run(self):
output = shellout("""
zstdcat -T0 {input} |
- parallel --block 10M -j 20 --pipe
- "jq -rc '[.source_release_ident, .target_release_ident, .source_doi, .target_doi] | @tsv'" |
+ skate-map -m bidt |
zstd -c -T0 > {output}
""",
input=self.input().path)
diff --git a/skate/cmd/skate-map/main.go b/skate/cmd/skate-map/main.go
index 6c61af0..57a1498 100644
--- a/skate/cmd/skate-map/main.go
+++ b/skate/cmd/skate-map/main.go
@@ -83,6 +83,7 @@ func main() {
"cdxu": skate.MapperCdxSummary,
"bref": skate.MapperBrefWork,
"rewo": skate.MapperReleaseWork,
+ "bidt": skate.MapperBrefIdentifierTable,
}
if *logFile != "" {
f, err := os.OpenFile(*logFile, os.O_CREATE|os.O_APPEND, 0644)
diff --git a/skate/map.go b/skate/map.go
index ca98186..ad62328 100644
--- a/skate/map.go
+++ b/skate/map.go
@@ -415,6 +415,25 @@ func MapperReleaseWork(p []byte) (fields [][]byte, err error) {
return [][]byte{[]byte(release.WorkID), p}, nil
}
+// MapperBrefIdentifierTable generates an id table from biblioref.
+func MapperBrefIdentifierTable(p []byte) (field [][]byte, err error) {
+ var bref struct {
+ SourceReleaseIdent string `json:"source_release_ident,omitempty"`
+ TargetReleaseIdent string `json:"target_release_ident,omitempty"`
+ SourceDOI string `json:"source_doi,omitempty"`
+ TargetDOI string `json:"target_doi,omitempty"`
+ }
+ if err := json.Unmarshal(p, &bref); err != nil {
+ return nil, err
+ }
+ return [][]byte{
+ []byte(bref.SourceReleaseIdent),
+ []byte(bref.TargetReleaseIdent),
+ []byte(bref.SourceDOI),
+ []byte(bref.TargetDOI),
+ }, nil
+}
+
// sandcrawlerSlugify normalizes a string.
func sandcrawlerSlugify(s string) string {
slug := strings.ToLower(strings.TrimSpace(s))
diff --git a/skate/schema.go b/skate/schema.go
index 2cea15a..078556c 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -438,6 +438,8 @@ type BiblioRef struct {
MatchReason string `json:"match_reason,omitempty"`
TargetUnstructured string `json:"target_unstructured,omitempty"`
TargetCSL *CSL `json:"target_csl,omitempty"`
+ SourceDOI string `json:"source_doi,omitempty"`
+ TargetDOI string `json:"target_doi,omitempty"`
}
// CSL is a subset of citation style language schema.